From 26cc89d99cc9d783859eb9d38e067fad5d6bbb60 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Fri, 18 Aug 2017 01:25:25 +0300 Subject: arc: Merge ARCv2 string routines in generic ARC .S files In cde74b83f9b2 "ARC: remove special CFLAGS/LDFLAGS handling" we got rid of CONFIG_ARC_CPU_HS which was used to select ARCv2-specific implementation of optimized string routines. So now ARCv2-tuned memset/memcpy/strcmp are not used, instead those for ARC700 used for both ARC700 and ARCHS. Without uClibc config option we may only tell which CPU type we're targeting by built-in defines of GCC. I.e. no more conditional file inclusion in Makefiles. That leaves us only one option - merge both implementations in 1 file and use ifdefs. Signed-off-by: Alexey Brodkin --- libc/string/arc/arcv2/memcpy.S | 236 ----------------------------------------- libc/string/arc/arcv2/memset.S | 115 -------------------- libc/string/arc/arcv2/strcmp.S | 83 --------------- 3 files changed, 434 deletions(-) delete mode 100644 libc/string/arc/arcv2/memcpy.S delete mode 100644 libc/string/arc/arcv2/memset.S delete mode 100644 libc/string/arc/arcv2/strcmp.S (limited to 'libc/string/arc/arcv2') diff --git a/libc/string/arc/arcv2/memcpy.S b/libc/string/arc/arcv2/memcpy.S deleted file mode 100644 index ba29e8790..000000000 --- a/libc/string/arc/arcv2/memcpy.S +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - * - * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. - */ - -#include -#include - -#ifdef __LITTLE_ENDIAN__ -# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << -# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> -# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM -# define MERGE_2(RX,RY,IMM) -# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF -# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM -#else -# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> -# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << -# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << -# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << -# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM -# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 -#endif - -#if defined(__LL64__) || defined(__ARC_LL64__) -# define PREFETCH_READ(RX) prefetch [RX, 56] -# define PREFETCH_WRITE(RX) prefetchw [RX, 64] -# define LOADX(DST,RX) ldd.ab DST, [RX, 8] -# define STOREX(SRC,RX) std.ab SRC, [RX, 8] -# define ZOLSHFT 5 -# define ZOLAND 0x1F -#else -# define PREFETCH_READ(RX) prefetch [RX, 28] -# define PREFETCH_WRITE(RX) prefetchw [RX, 32] -# define LOADX(DST,RX) ld.ab DST, [RX, 4] -# define STOREX(SRC,RX) st.ab SRC, [RX, 4] -# define ZOLSHFT 4 -# define ZOLAND 0xF -#endif - -ENTRY(memcpy) - prefetch [r1] ; Prefetch the read location - prefetchw [r0] ; Prefetch the write location - mov.f 0, r2 -;;; if size is zero - jz.d [blink] - mov r3, r0 ; don't clobber ret val - -;;; if size <= 8 - cmp r2, 8 - bls.d @.Lsmallchunk - mov.f lp_count, r2 - - and.f r4, r0, 0x03 - rsub lp_count, r4, 4 - lpnz @.Laligndestination - ;; LOOP BEGIN - ldb.ab r5, [r1,1] - sub r2, r2, 1 - stb.ab r5, [r3,1] -.Laligndestination: - -;;; Check the alignment of the source - and.f r4, r1, 0x03 - bnz.d @.Lsourceunaligned - -;;; CASE 0: Both source and destination are 32bit aligned -;;; Convert len to Dwords, unfold x4 - lsr.f lp_count, r2, ZOLSHFT - lpnz @.Lcopy32_64bytes - ;; LOOP START - LOADX (r6, r1) - PREFETCH_READ (r1) - PREFETCH_WRITE (r3) - LOADX (r8, r1) - LOADX (r10, r1) - LOADX (r4, r1) - STOREX (r6, r3) - STOREX (r8, r3) - STOREX (r10, r3) - STOREX (r4, r3) -.Lcopy32_64bytes: - - and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes -.Lsmallchunk: - lpnz @.Lcopyremainingbytes - ;; LOOP START - ldb.ab r5, [r1,1] - stb.ab r5, [r3,1] -.Lcopyremainingbytes: - - j [blink] -;;; END CASE 0 - -.Lsourceunaligned: - cmp r4, 2 - beq.d @.LunalignedOffby2 - sub r2, r2, 1 - - bhi.d @.LunalignedOffby3 - ldb.ab r5, [r1, 1] - -;;; CASE 1: The source is unaligned, off by 1 - ;; Hence I need to read 1 byte for a 16bit alignment - ;; and 2bytes to reach 32bit alignment - ldh.ab r6, [r1, 2] - sub r2, r2, 2 - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 - MERGE_1 (r6, r6, 8) - MERGE_2 (r5, r5, 24) - or r5, r5, r6 - - ;; Both src and dst are aligned - lpnz @.Lcopy8bytes_1 - ;; LOOP START - ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location - ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location - - SHIFT_1 (r7, r6, 24) - or r7, r7, r5 - SHIFT_2 (r5, r6, 8) - - SHIFT_1 (r9, r8, 24) - or r9, r9, r5 - SHIFT_2 (r5, r8, 8) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_1: - - ;; Write back the remaining 16bits - EXTRACT_1 (r6, r5, 16) - sth.ab r6, [r3, 2] - ;; Write back the remaining 8bits - EXTRACT_2 (r5, r5, 16) - stb.ab r5, [r3, 1] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_1 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_1: - j [blink] - -.LunalignedOffby2: -;;; CASE 2: The source is unaligned, off by 2 - ldh.ab r5, [r1, 2] - sub r2, r2, 1 - - ;; Both src and dst are aligned - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 -#ifdef __BIG_ENDIAN__ - asl.nz r5, r5, 16 -#endif - lpnz @.Lcopy8bytes_2 - ;; LOOP START - ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location - ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location - - SHIFT_1 (r7, r6, 16) - or r7, r7, r5 - SHIFT_2 (r5, r6, 16) - - SHIFT_1 (r9, r8, 16) - or r9, r9, r5 - SHIFT_2 (r5, r8, 16) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_2: - -#ifdef __BIG_ENDIAN__ - lsr.nz r5, r5, 16 -#endif - sth.ab r5, [r3, 2] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_2 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_2: - j [blink] - -.LunalignedOffby3: -;;; CASE 3: The source is unaligned, off by 3 -;;; Hence, I need to read 1byte for achieve the 32bit alignment - - ;; Both src and dst are aligned - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 -#ifdef __BIG_ENDIAN__ - asl.ne r5, r5, 24 -#endif - lpnz @.Lcopy8bytes_3 - ;; LOOP START - ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location - ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location - - SHIFT_1 (r7, r6, 8) - or r7, r7, r5 - SHIFT_2 (r5, r6, 24) - - SHIFT_1 (r9, r8, 8) - or r9, r9, r5 - SHIFT_2 (r5, r8, 24) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_3: - -#ifdef __BIG_ENDIAN__ - lsr.nz r5, r5, 24 -#endif - stb.ab r5, [r3, 1] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_3 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_3: - j [blink] - -END(memcpy) -libc_hidden_def(memcpy) diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S deleted file mode 100644 index 343cfaf81..000000000 --- a/libc/string/arc/arcv2/memset.S +++ /dev/null @@ -1,115 +0,0 @@ - -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - * - * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. - */ - -#include -#include - -#ifdef DONT_USE_PREALLOC -#define PREWRITE(A,B) prefetchw [(A),(B)] -#else -#define PREWRITE(A,B) prealloc [(A),(B)] -#endif - -ENTRY(memset) - prefetchw [r0] ; Prefetch the write location - mov.f 0, r2 -;;; if size is zero - jz.d [blink] - mov r3, r0 ; don't clobber ret val - -;;; if length < 8 - brls.d.nt r2, 8, .Lsmallchunk - mov.f lp_count,r2 - - and.f r4, r0, 0x03 - rsub lp_count, r4, 4 - lpnz @.Laligndestination - ;; LOOP BEGIN - stb.ab r1, [r3,1] - sub r2, r2, 1 -.Laligndestination: - -;;; Destination is aligned - and r1, r1, 0xFF - asl r4, r1, 8 - or r4, r4, r1 - asl r5, r4, 16 - or r5, r5, r4 - mov r4, r5 - - sub3 lp_count, r2, 8 - cmp r2, 64 - bmsk.hi r2, r2, 5 - mov.ls lp_count, 0 - add3.hi r2, r2, 8 - -;;; Convert len to Dwords, unfold x8 - lsr.f lp_count, lp_count, 6 - lpnz @.Lset64bytes - ;; LOOP START - PREWRITE(r3, 64) ;Prefetch the next write location -#if defined(__LL64__) || defined(__ARC_LL64__) - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] -#else - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] -#endif -.Lset64bytes: - - lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes - lpnz .Lset32bytes - ;; LOOP START - prefetchw [r3, 32] ;Prefetch the next write location -#if defined(__LL64__) || defined(__ARC_LL64__) - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] - std.ab r4, [r3, 8] -#else - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] - st.ab r4, [r3, 4] -#endif -.Lset32bytes: - - and.f lp_count, r2, 0x1F ;Last remaining 31 bytes -.Lsmallchunk: - lpnz .Lcopy3bytes - ;; LOOP START - stb.ab r1, [r3, 1] -.Lcopy3bytes: - - j [blink] - -END(memset) -libc_hidden_def(memset) diff --git a/libc/string/arc/arcv2/strcmp.S b/libc/string/arc/arcv2/strcmp.S deleted file mode 100644 index 2e0e64a0c..000000000 --- a/libc/string/arc/arcv2/strcmp.S +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - * - * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. - */ - -#include -#include - -ENTRY(strcmp) - or r2, r0, r1 - bmsk_s r2, r2, 1 - brne r2, 0, @.Lcharloop - -;;; s1 and s2 are word aligned - ld.ab r2, [r0, 4] - - mov_s r12, 0x01010101 - ror r11, r12 - .align 4 -.LwordLoop: - ld.ab r3, [r1, 4] - ;; Detect NULL char in str1 - sub r4, r2, r12 - ld.ab r5, [r0, 4] - bic r4, r4, r2 - and r4, r4, r11 - brne.d.nt r4, 0, .LfoundNULL - ;; Check if the read locations are the same - cmp r2, r3 - beq.d .LwordLoop - mov.eq r2, r5 - - ;; A match is found, spot it out -#ifdef __LITTLE_ENDIAN__ - swape r3, r3 - mov_s r0, 1 - swape r2, r2 -#else - mov_s r0, 1 -#endif - cmp_s r2, r3 - j_s.d [blink] - bset.lo r0, r0, 31 - - .align 4 -.LfoundNULL: -#ifdef __BIG_ENDIAN__ - swape r4, r4 - swape r2, r2 - swape r3, r3 -#endif - ;; Find null byte - ffs r0, r4 - bmsk r2, r2, r0 - bmsk r3, r3, r0 - swape r2, r2 - swape r3, r3 - ;; make the return value - sub.f r0, r2, r3 - mov.hi r0, 1 - j_s.d [blink] - bset.lo r0, r0, 31 - - .align 4 -.Lcharloop: - ldb.ab r2, [r0, 1] - ldb.ab r3, [r1, 1] - nop - breq r2, 0, .Lcmpend - breq r2, r3, .Lcharloop - - .align 4 -.Lcmpend: - j_s.d [blink] - sub r0, r2, r3 -END(strcmp) -libc_hidden_def(strcmp) - -#ifndef __UCLIBC_HAS_LOCALE__ -strong_alias(strcmp,strcoll) -libc_hidden_def(strcoll) -#endif -- cgit v1.2.3