diff options
author | Alexey Brodkin <Alexey.Brodkin@synopsys.com> | 2017-08-18 01:25:25 +0300 |
---|---|---|
committer | Waldemar Brodkorb <wbx@uclibc-ng.org> | 2017-08-20 12:53:17 +0200 |
commit | 26cc89d99cc9d783859eb9d38e067fad5d6bbb60 (patch) | |
tree | bb2f9a637c0f492eeb66465e975aa26f6207eb3c /libc/string/arc/strcmp.S | |
parent | d9f7022736fd429c6c62c93441804dc87900ed6f (diff) |
arc: Merge ARCv2 string routines in generic ARC .S files
In cde74b83f9b2 "ARC: remove special CFLAGS/LDFLAGS handling" we
got rid of CONFIG_ARC_CPU_HS which was used to select ARCv2-specific
implementation of optimized string routines. So now ARCv2-tuned
memset/memcpy/strcmp are not used, instead those for ARC700 used for
both ARC700 and ARCHS.
Without uClibc config option we may only tell which CPU type we're
targeting by built-in defines of GCC. I.e. no more conditional file
inclusion in Makefiles. That leaves us only one option - merge both
implementations in 1 file and use ifdefs.
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Diffstat (limited to 'libc/string/arc/strcmp.S')
-rw-r--r-- | libc/string/arc/strcmp.S | 81 |
1 files changed, 79 insertions, 2 deletions
diff --git a/libc/string/arc/strcmp.S b/libc/string/arc/strcmp.S index 5a0e56045..ad38d9e00 100644 --- a/libc/string/arc/strcmp.S +++ b/libc/string/arc/strcmp.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2007 ARC International (UK) LTD * * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. @@ -8,6 +8,13 @@ #include <features.h> #include <sysdep.h> +#if !defined(__ARC700__) && !defined(__ARCHS__) +#error "Neither ARC700 nor ARCHS is defined!" +#endif + +ENTRY(strcmp) + +#ifdef __ARC700__ /* This is optimized primarily for the ARC700. It would be possible to speed up the loops by one cycle / word respective one cycle / byte by forcing double source 1 alignment, unrolling @@ -15,7 +22,6 @@ source 1; however, that would increase the overhead for loop setup / finish, and strcmp might often terminate early. */ -ENTRY(strcmp) or r2,r0,r1 bmsk_s r2,r2,1 brne r2,0,.Lcharloop @@ -93,6 +99,77 @@ ENTRY(strcmp) .Lcmpend: j_s.d [blink] sub r0,r2,r3 +#endif /* __ARC700__ */ + +#ifdef __ARCHS__ + or r2, r0, r1 + bmsk_s r2, r2, 1 + brne r2, 0, @.Lcharloop + +;;; s1 and s2 are word aligned + ld.ab r2, [r0, 4] + + mov_s r12, 0x01010101 + ror r11, r12 + .align 4 +.LwordLoop: + ld.ab r3, [r1, 4] + ;; Detect NULL char in str1 + sub r4, r2, r12 + ld.ab r5, [r0, 4] + bic r4, r4, r2 + and r4, r4, r11 + brne.d.nt r4, 0, .LfoundNULL + ;; Check if the read locations are the same + cmp r2, r3 + beq.d .LwordLoop + mov.eq r2, r5 + + ;; A match is found, spot it out +#ifdef __LITTLE_ENDIAN__ + swape r3, r3 + mov_s r0, 1 + swape r2, r2 +#else + mov_s r0, 1 +#endif + cmp_s r2, r3 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.LfoundNULL: +#ifdef __BIG_ENDIAN__ + swape r4, r4 + swape r2, r2 + swape r3, r3 +#endif + ;; Find null byte + ffs r0, r4 + bmsk r2, r2, r0 + bmsk r3, r3, r0 + swape r2, r2 + swape r3, r3 + ;; make the return value + sub.f r0, r2, r3 + mov.hi r0, 1 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.Lcharloop: + ldb.ab r2, [r0, 1] + ldb.ab r3, [r1, 1] + nop + breq r2, 0, .Lcmpend + breq r2, r3, .Lcharloop + + .align 4 +.Lcmpend: + j_s.d [blink] + sub r0, r2, r3 +#endif /* __ARCHS__ */ + END(strcmp) libc_hidden_def(strcmp) |