diff options
Diffstat (limited to 'libc/string/arc/strcmp.S')
-rw-r--r-- | libc/string/arc/strcmp.S | 81 |
1 files changed, 79 insertions, 2 deletions
diff --git a/libc/string/arc/strcmp.S b/libc/string/arc/strcmp.S index 5a0e56045..ad38d9e00 100644 --- a/libc/string/arc/strcmp.S +++ b/libc/string/arc/strcmp.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2007 ARC International (UK) LTD * * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. @@ -8,6 +8,13 @@ #include <features.h> #include <sysdep.h> +#if !defined(__ARC700__) && !defined(__ARCHS__) +#error "Neither ARC700 nor ARCHS is defined!" +#endif + +ENTRY(strcmp) + +#ifdef __ARC700__ /* This is optimized primarily for the ARC700. It would be possible to speed up the loops by one cycle / word respective one cycle / byte by forcing double source 1 alignment, unrolling @@ -15,7 +22,6 @@ source 1; however, that would increase the overhead for loop setup / finish, and strcmp might often terminate early. */ -ENTRY(strcmp) or r2,r0,r1 bmsk_s r2,r2,1 brne r2,0,.Lcharloop @@ -93,6 +99,77 @@ ENTRY(strcmp) .Lcmpend: j_s.d [blink] sub r0,r2,r3 +#endif /* __ARC700__ */ + +#ifdef __ARCHS__ + or r2, r0, r1 + bmsk_s r2, r2, 1 + brne r2, 0, @.Lcharloop + +;;; s1 and s2 are word aligned + ld.ab r2, [r0, 4] + + mov_s r12, 0x01010101 + ror r11, r12 + .align 4 +.LwordLoop: + ld.ab r3, [r1, 4] + ;; Detect NULL char in str1 + sub r4, r2, r12 + ld.ab r5, [r0, 4] + bic r4, r4, r2 + and r4, r4, r11 + brne.d.nt r4, 0, .LfoundNULL + ;; Check if the read locations are the same + cmp r2, r3 + beq.d .LwordLoop + mov.eq r2, r5 + + ;; A match is found, spot it out +#ifdef __LITTLE_ENDIAN__ + swape r3, r3 + mov_s r0, 1 + swape r2, r2 +#else + mov_s r0, 1 +#endif + cmp_s r2, r3 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.LfoundNULL: +#ifdef __BIG_ENDIAN__ + swape r4, r4 + swape r2, r2 + swape r3, r3 +#endif + ;; Find null byte + ffs r0, r4 + bmsk r2, r2, r0 + bmsk r3, r3, r0 + swape r2, r2 + swape r3, r3 + ;; make the return value + sub.f r0, r2, r3 + mov.hi r0, 1 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.Lcharloop: + ldb.ab r2, [r0, 1] + ldb.ab r3, [r1, 1] + nop + breq r2, 0, .Lcmpend + breq r2, r3, .Lcharloop + + .align 4 +.Lcmpend: + j_s.d [blink] + sub r0, r2, r3 +#endif /* __ARCHS__ */ + END(strcmp) libc_hidden_def(strcmp) |