summaryrefslogtreecommitdiff
path: root/libc/string/arc/memset.S
diff options
context:
space:
mode:
authorAlexey Brodkin <Alexey.Brodkin@synopsys.com>2017-08-18 01:25:25 +0300
committerWaldemar Brodkorb <wbx@uclibc-ng.org>2017-08-20 12:53:17 +0200
commit26cc89d99cc9d783859eb9d38e067fad5d6bbb60 (patch)
treebb2f9a637c0f492eeb66465e975aa26f6207eb3c /libc/string/arc/memset.S
parentd9f7022736fd429c6c62c93441804dc87900ed6f (diff)
arc: Merge ARCv2 string routines in generic ARC .S files
In cde74b83f9b2 "ARC: remove special CFLAGS/LDFLAGS handling" we got rid of CONFIG_ARC_CPU_HS which was used to select ARCv2-specific implementation of optimized string routines. So now ARCv2-tuned memset/memcpy/strcmp are not used, instead those for ARC700 used for both ARC700 and ARCHS. Without uClibc config option we may only tell which CPU type we're targeting by built-in defines of GCC. I.e. no more conditional file inclusion in Makefiles. That leaves us only one option - merge both implementations in 1 file and use ifdefs. Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Diffstat (limited to 'libc/string/arc/memset.S')
-rw-r--r--libc/string/arc/memset.S115
1 files changed, 113 insertions, 2 deletions
diff --git a/libc/string/arc/memset.S b/libc/string/arc/memset.S
index f4048455a..0b74ddc7f 100644
--- a/libc/string/arc/memset.S
+++ b/libc/string/arc/memset.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2007 ARC International (UK) LTD
*
* Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -7,10 +7,15 @@
#include <sysdep.h>
-#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
+#if !defined(__ARC700__) && !defined(__ARCHS__)
+#error "Neither ARC700 nor ARCHS is defined!"
+#endif
ENTRY(memset)
+#ifdef __ARC700__
+#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
+
mov_s r4,r0
or r12,r0,r2
bmsk.f r12,r12,1
@@ -47,5 +52,111 @@ ENTRY(memset)
stb.ab r1,[r4,1]
.Ltiny_end:
j_s [blink]
+#endif /* __ARC700__ */
+
+#ifdef __ARCHS__
+#ifdef DONT_USE_PREALLOC
+#define PREWRITE(A,B) prefetchw [(A),(B)]
+#else
+#define PREWRITE(A,B) prealloc [(A),(B)]
+#endif
+
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don't clobber ret val
+
+;;; if length < 8
+ brls.d.nt r2, 8, .Lsmallchunk
+ mov.f lp_count,r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @.Laligndestination
+ ;; LOOP BEGIN
+ stb.ab r1, [r3,1]
+ sub r2, r2, 1
+.Laligndestination:
+
+;;; Destination is aligned
+ and r1, r1, 0xFF
+ asl r4, r1, 8
+ or r4, r4, r1
+ asl r5, r4, 16
+ or r5, r5, r4
+ mov r4, r5
+
+ sub3 lp_count, r2, 8
+ cmp r2, 64
+ bmsk.hi r2, r2, 5
+ mov.ls lp_count, 0
+ add3.hi r2, r2, 8
+
+;;; Convert len to Dwords, unfold x8
+ lsr.f lp_count, lp_count, 6
+ lpnz @.Lset64bytes
+ ;; LOOP START
+ PREWRITE(r3, 64) ;Prefetch the next write location
+#if defined(__LL64__) || defined(__ARC_LL64__)
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+#else
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+#endif
+.Lset64bytes:
+
+ lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
+ lpnz .Lset32bytes
+ ;; LOOP START
+ prefetchw [r3, 32] ;Prefetch the next write location
+#if defined(__LL64__) || defined(__ARC_LL64__)
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+#else
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+#endif
+.Lset32bytes:
+
+ and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
+.Lsmallchunk:
+ lpnz .Lcopy3bytes
+ ;; LOOP START
+ stb.ab r1, [r3, 1]
+.Lcopy3bytes:
+
+ j [blink]
+#endif /* __ARCHS__ */
+
END(memset)
libc_hidden_def(memset)