diff options
author | Alexey Brodkin <Alexey.Brodkin@synopsys.com> | 2017-08-18 01:25:25 +0300 |
---|---|---|
committer | Waldemar Brodkorb <wbx@uclibc-ng.org> | 2017-08-20 12:53:17 +0200 |
commit | 26cc89d99cc9d783859eb9d38e067fad5d6bbb60 (patch) | |
tree | bb2f9a637c0f492eeb66465e975aa26f6207eb3c /libc/string/arc/memset.S | |
parent | d9f7022736fd429c6c62c93441804dc87900ed6f (diff) |
arc: Merge ARCv2 string routines in generic ARC .S files
In cde74b83f9b2 "ARC: remove special CFLAGS/LDFLAGS handling" we
got rid of CONFIG_ARC_CPU_HS which was used to select ARCv2-specific
implementation of optimized string routines. So now ARCv2-tuned
memset/memcpy/strcmp are not used, instead those for ARC700 used for
both ARC700 and ARCHS.
Without uClibc config option we may only tell which CPU type we're
targeting by built-in defines of GCC. I.e. no more conditional file
inclusion in Makefiles. That leaves us only one option - merge both
implementations in 1 file and use ifdefs.
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Diffstat (limited to 'libc/string/arc/memset.S')
-rw-r--r-- | libc/string/arc/memset.S | 115 |
1 files changed, 113 insertions, 2 deletions
diff --git a/libc/string/arc/memset.S b/libc/string/arc/memset.S index f4048455a..0b74ddc7f 100644 --- a/libc/string/arc/memset.S +++ b/libc/string/arc/memset.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2007 ARC International (UK) LTD * * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. @@ -7,10 +7,15 @@ #include <sysdep.h> -#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ +#if !defined(__ARC700__) && !defined(__ARCHS__) +#error "Neither ARC700 nor ARCHS is defined!" +#endif ENTRY(memset) +#ifdef __ARC700__ +#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ + mov_s r4,r0 or r12,r0,r2 bmsk.f r12,r12,1 @@ -47,5 +52,111 @@ ENTRY(memset) stb.ab r1,[r4,1] .Ltiny_end: j_s [blink] +#endif /* __ARC700__ */ + +#ifdef __ARCHS__ +#ifdef DONT_USE_PREALLOC +#define PREWRITE(A,B) prefetchw [(A),(B)] +#else +#define PREWRITE(A,B) prealloc [(A),(B)] +#endif + + prefetchw [r0] ; Prefetch the write location + mov.f 0, r2 +;;; if size is zero + jz.d [blink] + mov r3, r0 ; don't clobber ret val + +;;; if length < 8 + brls.d.nt r2, 8, .Lsmallchunk + mov.f lp_count,r2 + + and.f r4, r0, 0x03 + rsub lp_count, r4, 4 + lpnz @.Laligndestination + ;; LOOP BEGIN + stb.ab r1, [r3,1] + sub r2, r2, 1 +.Laligndestination: + +;;; Destination is aligned + and r1, r1, 0xFF + asl r4, r1, 8 + or r4, r4, r1 + asl r5, r4, 16 + or r5, r5, r4 + mov r4, r5 + + sub3 lp_count, r2, 8 + cmp r2, 64 + bmsk.hi r2, r2, 5 + mov.ls lp_count, 0 + add3.hi r2, r2, 8 + +;;; Convert len to Dwords, unfold x8 + lsr.f lp_count, lp_count, 6 + lpnz @.Lset64bytes + ;; LOOP START + PREWRITE(r3, 64) ;Prefetch the next write location +#if defined(__LL64__) || defined(__ARC_LL64__) + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +#else + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] +#endif +.Lset64bytes: + + lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes + lpnz .Lset32bytes + ;; LOOP START + prefetchw [r3, 32] ;Prefetch the next write location +#if defined(__LL64__) || defined(__ARC_LL64__) + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +#else + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] +#endif +.Lset32bytes: + + and.f lp_count, r2, 0x1F ;Last remaining 31 bytes +.Lsmallchunk: + lpnz .Lcopy3bytes + ;; LOOP START + stb.ab r1, [r3, 1] +.Lcopy3bytes: + + j [blink] +#endif /* __ARCHS__ */ + END(memset) libc_hidden_def(memset) |