From 663b8a0497c40a20668258bd69db13924c569c41 Mon Sep 17 00:00:00 2001 From: Pavel Kozlov Date: Fri, 7 Oct 2022 13:43:45 +0400 Subject: arc: add optimized string functions for ARCv3 Add ability to use optimized versions of string functions for ARCv3 32-bit CPUs with UCLIBC_HAS_STRING_ARCH_OPT option. Add optimized memcpy/memset/memcmp code for ARCv3 CPUs based on the code from newlib and adapt for ARCv3 existed optimized strchr/strcmp/strcpy/strlen. Link to the Synopsys newlib repo with code for ARCv3 on GitHub: https://github.com/foss-for-synopsys-dwc-arc-processors/newlib Signed-off-by: Pavel Kozlov --- libc/string/arc/strcmp.S | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'libc/string/arc/strcmp.S') diff --git a/libc/string/arc/strcmp.S b/libc/string/arc/strcmp.S index ad38d9e00..3f64ac421 100644 --- a/libc/string/arc/strcmp.S +++ b/libc/string/arc/strcmp.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2007 ARC International (UK) LTD * * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. @@ -7,14 +7,11 @@ #include #include - -#if !defined(__ARC700__) && !defined(__ARCHS__) -#error "Neither ARC700 nor ARCHS is defined!" -#endif +#include ENTRY(strcmp) -#ifdef __ARC700__ +#if defined(__ARC700__) || defined(__ARC64_ARCH32__) /* This is optimized primarily for the ARC700. It would be possible to speed up the loops by one cycle / word respective one cycle / byte by forcing double source 1 alignment, unrolling @@ -38,7 +35,7 @@ ENTRY(strcmp) breq r2,r3,.Lwordloop #ifdef __LITTLE_ENDIAN__ xor r0,r2,r3 ; mask for difference - sub_s r1,r0,1 + SUBR_S r1,r0,1 bic_s r0,r0,r1 ; mask for least significant difference bit sub r1,r5,r0 xor r0,r5,r1 ; mask for least significant difference byte @@ -55,7 +52,7 @@ ENTRY(strcmp) .Lfound0: xor r0,r2,r3 ; mask for difference or r0,r0,r4 ; or in zero indicator - sub_s r1,r0,1 + SUBR_S r1,r0,1 bic_s r0,r0,r1 ; mask for least significant difference bit sub r1,r5,r0 xor r0,r5,r1 ; mask for least significant difference byte @@ -99,9 +96,8 @@ ENTRY(strcmp) .Lcmpend: j_s.d [blink] sub r0,r2,r3 -#endif /* __ARC700__ */ -#ifdef __ARCHS__ +#elif defined(__ARCHS__) or r2, r0, r1 bmsk_s r2, r2, 1 brne r2, 0, @.Lcharloop @@ -168,7 +164,10 @@ ENTRY(strcmp) .Lcmpend: j_s.d [blink] sub r0, r2, r3 -#endif /* __ARCHS__ */ + +#else +#error "Unsupported ARC CPU type" +#endif END(strcmp) libc_hidden_def(strcmp) -- cgit v1.2.3