diff options
Diffstat (limited to 'libc/string')
204 files changed, 7290 insertions, 7373 deletions
diff --git a/libc/string/Makefile.in b/libc/string/Makefile.in index 2f14cc0e6..e7f2ccde1 100644 --- a/libc/string/Makefile.in +++ b/libc/string/Makefile.in @@ -1,10 +1,12 @@ # Makefile for uClibc # -# Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org> +# Copyright (C) 2000-2008 Erik Andersen <andersen@uclibc.org> # # Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. # +subdirs += libc/string/$(TARGET_ARCH) libc/string/generic + # # Arch specific fun # @@ -16,7 +18,10 @@ STRING_SUBARCH_OUT := $(top_builddir)libc/string/$(TARGET_ARCH)/$(TARGET_SUBARCH STRING_SUBARCH_SSRC := $(wildcard $(STRING_SUBARCH_OUT)/*.S) STRING_SUBARCH_SOBJ := $(patsubst $(STRING_SUBARCH_DIR)/%.S,$(STRING_SUBARCH_OUT)/%.o,$(STRING_SUBARCH_SSRC)) -STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ) +STRING_SUBARCH_CSRC := $(wildcard $(STRING_SUBARCH_OUT)/*.c) +STRING_SUBARCH_COBJ := $(patsubst $(STRING_SUBARCH_DIR)/%.c,$(STRING_SUBARCH_OUT)/%.o,$(STRING_SUBARCH_CSRC)) + +STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ) $(STRING_SUBARCH_COBJ) endif # Collect the arch specific implementation (asm, c files) @@ -133,7 +138,7 @@ libc-y += $(STRING_COBJ) libc-nomulti-$(UCLIBC_HAS_XLOCALE) += $(STRING_OUT)/wcsxfrm_l.o libc-nomulti-y += $(STRING_OUT)/__xpg_strerror_r.o -objclean-y += string_objclean +objclean-y += CLEAN_libc/string -string_objclean: - $(RM) $(STRING_OUT)/{,*/}{,*/}*.{o,os,oS} +CLEAN_libc/string: + $(do_rm) $(addprefix $(STRING_OUT)/,$(addprefix *., o os oS) $(addprefix */*., o os oS) $(addprefix */*/*., o os oS)) diff --git a/libc/string/__glibc_strerror_r.c b/libc/string/__glibc_strerror_r.c index 0f9cd16a9..96b881700 100644 --- a/libc/string/__glibc_strerror_r.c +++ b/libc/string/__glibc_strerror_r.c @@ -5,11 +5,13 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ +/* get rid of REDIRECT */ +#define strerror_r __hide_strerror_r + #include <features.h> #include <string.h> -libc_hidden_proto(__glibc_strerror_r) -libc_hidden_proto(__xpg_strerror_r) +#undef strerror_r char *__glibc_strerror_r(int errnum, char *strerrbuf, size_t buflen) { @@ -18,3 +20,6 @@ char *__glibc_strerror_r(int errnum, char *strerrbuf, size_t buflen) return strerrbuf; } libc_hidden_def(__glibc_strerror_r) +#if !defined __USE_XOPEN2K || defined __USE_GNU +strong_alias(__glibc_strerror_r,strerror_r) +#endif diff --git a/libc/string/__xpg_basename.c b/libc/string/__xpg_basename.c index 2449d1d42..2e7ade913 100644 --- a/libc/string/__xpg_basename.c +++ b/libc/string/__xpg_basename.c @@ -5,7 +5,6 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -#include "_string.h" #include <libgen.h> char *__xpg_basename(register char *path) @@ -34,3 +33,7 @@ char *__xpg_basename(register char *path) return first; } +#ifndef __USE_GNU +# undef basename +weak_alias(__xpg_basename,basename) +#endif diff --git a/libc/string/__xpg_strerror_r.c b/libc/string/__xpg_strerror_r.c index ff41192e5..3e78da1be 100644 --- a/libc/string/__xpg_strerror_r.c +++ b/libc/string/__xpg_strerror_r.c @@ -5,8 +5,8 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -/* Make sure we get proper strerror_r() prototype */ -#define strerror_r _hidestrerror_r +/* get rid of REDIRECT */ +#define strerror_r __hide_strerror_r #include <features.h> #include <errno.h> @@ -15,10 +15,6 @@ #undef strerror_r -libc_hidden_proto(__xpg_strerror_r) -/* Experimentally off - libc_hidden_proto(memcpy) */ -/* Experimentally off - libc_hidden_proto(strlen) */ - #ifdef __UCLIBC_HAS_ERRNO_MESSAGES__ extern const char _string_syserrmsgs[] attribute_hidden; @@ -276,4 +272,6 @@ int __xpg_strerror_r(int errnum, char *strerrbuf, size_t buflen) #endif /* __UCLIBC_HAS_ERRNO_MESSAGES__ */ libc_hidden_def(__xpg_strerror_r) -weak_alias(__xpg_strerror_r, strerror_r) +#if defined __USE_XOPEN2K && !defined __USE_GNU +strong_alias(__xpg_strerror_r,strerror_r) +#endif diff --git a/libc/string/_collate.c b/libc/string/_collate.c index 64b5d9608..93501b85e 100644 --- a/libc/string/_collate.c +++ b/libc/string/_collate.c @@ -19,15 +19,6 @@ #include <errno.h> #include <assert.h> -/* Experimentally off - libc_hidden_proto(memset) */ -/* Experimentally off - libc_hidden_proto(memcpy) */ -/* Experimentally off - libc_hidden_proto(strlcpy) */ -/* Experimentally off - libc_hidden_proto(strcmp) */ -#ifdef WANT_WIDE -libc_hidden_proto(wcsxfrm) -libc_hidden_proto(wcscmp) -#endif - #ifdef __UCLIBC_HAS_LOCALE__ #if defined(L_strxfrm) || defined(L_strxfrm_l) || defined(L_wcsxfrm) || defined(L_wcsxfrm_l) @@ -59,29 +50,24 @@ libc_hidden_proto(wcscmp) #if defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) -libc_hidden_proto(wcscoll_l) -libc_hidden_proto(wcscoll) int wcscoll (const Wchar *s0, const Wchar *s1) { return wcscoll_l(s0, s1, __UCLIBC_CURLOCALE ); } libc_hidden_def(wcscoll) -libc_hidden_proto(wcsxfrm_l) -libc_hidden_proto(wcsxfrm) size_t wcsxfrm(Wchar *__restrict ws1, const Wchar *__restrict ws2, size_t n) { return wcsxfrm_l(ws1, ws2, n, __UCLIBC_CURLOCALE ); } -libc_hidden_def(wcsxfrm) #else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ #if 0 -#define CUR_COLLATE (&__UCLIBC_CURLOCALE_DATA.collate) +#define CUR_COLLATE (&__UCLIBC_CURLOCALE->collate) #else #define CUR_COLLATE (& __LOCALE_PTR->collate) #endif @@ -173,7 +159,7 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM ) #define N (1) #else /* WANT_WIDE */ wchar_t WC; - size_t n0, nx; + size_t n0, nx = 0; #define N n0 #endif /* WANT_WIDE */ @@ -381,7 +367,7 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM ) if (cs->back_buf == cs->ibb) { /* was using internal buffer */ cs->bp = malloc(cs->bb_size + 128); if (!cs->bp) { - __set_errno(ENOMEM); + /* __set_errno(ENOMEM); */ #ifdef __UCLIBC_MJN3_ONLY__ #warning what to do here? #endif @@ -393,7 +379,7 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM ) } else { cs->bp = realloc(cs->back_buf, cs->bb_size + 128); if (!cs->bp) { - __set_errno(ENOMEM); + /* __set_errno(ENOMEM); */ #ifdef __UCLIBC_MJN3_ONLY__ #warning what to do here? #endif @@ -513,7 +499,6 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM ) } while (1); } -libc_hidden_proto(__XL_NPP(wcscoll)) int __XL_NPP(wcscoll) (const Wchar *s0, const Wchar *s1 __LOCALE_PARAM ) { col_state_t ws[2]; @@ -522,9 +507,9 @@ int __XL_NPP(wcscoll) (const Wchar *s0, const Wchar *s1 __LOCALE_PARAM ) if (!CUR_COLLATE->num_weights) { /* C locale */ #ifdef WANT_WIDE return wcscmp(s0, s1); -#else /* WANT_WIDE */ +#else return strcmp(s0, s1); -#endif /* WANT_WIDE */ +#endif } pass = 0; @@ -551,10 +536,6 @@ libc_hidden_def(__XL_NPP(wcscoll)) #ifdef WANT_WIDE -extern size_t __wcslcpy(wchar_t *__restrict dst, - const wchar_t *__restrict src, size_t n); - -libc_hidden_proto(__XL_NPP(wcsxfrm)) size_t __XL_NPP(wcsxfrm)(wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t n __LOCALE_PARAM ) { @@ -592,7 +573,9 @@ size_t __XL_NPP(wcsxfrm)(wchar_t *__restrict ws1, const wchar_t *__restrict ws2, } return count-1; } +#if defined L_strxfrm_l || defined L_wcsxfrm_l libc_hidden_def(__XL_NPP(wcsxfrm)) +#endif #else /* WANT_WIDE */ @@ -636,7 +619,6 @@ static size_t store(unsigned char *s, size_t count, size_t n, __uwchar_t weight) return r; } -libc_hidden_proto(__XL_NPP(strxfrm)) size_t __XL_NPP(strxfrm)(char *__restrict ws1, const char *__restrict ws2, size_t n __LOCALE_PARAM ) { @@ -674,7 +656,9 @@ size_t __XL_NPP(strxfrm)(char *__restrict ws1, const char *__restrict ws2, size_ } return count-1; } +#ifdef L_strxfrm_l libc_hidden_def(__XL_NPP(strxfrm)) +#endif #endif /* WANT_WIDE */ diff --git a/libc/string/arc/Makefile b/libc/string/arc/Makefile new file mode 100755 index 000000000..523cf6842 --- /dev/null +++ b/libc/string/arc/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> +# +# Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/arc/arcv2/memcpy.S b/libc/string/arc/arcv2/memcpy.S new file mode 100644 index 000000000..7573daf51 --- /dev/null +++ b/libc/string/arc/arcv2/memcpy.S @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +#ifdef __LITTLE_ENDIAN__ +# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << +# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> +# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM +# define MERGE_2(RX,RY,IMM) +# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF +# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM +#else +# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> +# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << +# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << +# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << +# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM +# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 +#endif + +#ifdef __LL64__ +# define PREFETCH_READ(RX) prefetch [RX, 56] +# define PREFETCH_WRITE(RX) prefetchw [RX, 64] +# define LOADX(DST,RX) ldd.ab DST, [RX, 8] +# define STOREX(SRC,RX) std.ab SRC, [RX, 8] +# define ZOLSHFT 5 +# define ZOLAND 0x1F +#else +# define PREFETCH_READ(RX) prefetch [RX, 28] +# define PREFETCH_WRITE(RX) prefetchw [RX, 32] +# define LOADX(DST,RX) ld.ab DST, [RX, 4] +# define STOREX(SRC,RX) st.ab SRC, [RX, 4] +# define ZOLSHFT 4 +# define ZOLAND 0xF +#endif + +ENTRY(memcpy) + prefetch [r1] ; Prefetch the read location + prefetchw [r0] ; Prefetch the write location + mov.f 0, r2 +;;; if size is zero + jz.d [blink] + mov r3, r0 ; don't clobber ret val + +;;; if size <= 8 + cmp r2, 8 + bls.d @.Lsmallchunk + mov.f lp_count, r2 + + and.f r4, r0, 0x03 + rsub lp_count, r4, 4 + lpnz @.Laligndestination + ;; LOOP BEGIN + ldb.ab r5, [r1,1] + sub r2, r2, 1 + stb.ab r5, [r3,1] +.Laligndestination: + +;;; Check the alignment of the source + and.f r4, r1, 0x03 + bnz.d @.Lsourceunaligned + +;;; CASE 0: Both source and destination are 32bit aligned +;;; Convert len to Dwords, unfold x4 + lsr.f lp_count, r2, ZOLSHFT + lpnz @.Lcopy32_64bytes + ;; LOOP START + LOADX (r6, r1) + PREFETCH_READ (r1) + PREFETCH_WRITE (r3) + LOADX (r8, r1) + LOADX (r10, r1) + LOADX (r4, r1) + STOREX (r6, r3) + STOREX (r8, r3) + STOREX (r10, r3) + STOREX (r4, r3) +.Lcopy32_64bytes: + + and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes +.Lsmallchunk: + lpnz @.Lcopyremainingbytes + ;; LOOP START + ldb.ab r5, [r1,1] + stb.ab r5, [r3,1] +.Lcopyremainingbytes: + + j [blink] +;;; END CASE 0 + +.Lsourceunaligned: + cmp r4, 2 + beq.d @.LunalignedOffby2 + sub r2, r2, 1 + + bhi.d @.LunalignedOffby3 + ldb.ab r5, [r1, 1] + +;;; CASE 1: The source is unaligned, off by 1 + ;; Hence I need to read 1 byte for a 16bit alignment + ;; and 2bytes to reach 32bit alignment + ldh.ab r6, [r1, 2] + sub r2, r2, 2 + ;; Convert to words, unfold x2 + lsr.f lp_count, r2, 3 + MERGE_1 (r6, r6, 8) + MERGE_2 (r5, r5, 24) + or r5, r5, r6 + + ;; Both src and dst are aligned + lpnz @.Lcopy8bytes_1 + ;; LOOP START + ld.ab r6, [r1, 4] + prefetch [r1, 28] ;Prefetch the next read location + ld.ab r8, [r1,4] + prefetchw [r3, 32] ;Prefetch the next write location + + SHIFT_1 (r7, r6, 24) + or r7, r7, r5 + SHIFT_2 (r5, r6, 8) + + SHIFT_1 (r9, r8, 24) + or r9, r9, r5 + SHIFT_2 (r5, r8, 8) + + st.ab r7, [r3, 4] + st.ab r9, [r3, 4] +.Lcopy8bytes_1: + + ;; Write back the remaining 16bits + EXTRACT_1 (r6, r5, 16) + sth.ab r6, [r3, 2] + ;; Write back the remaining 8bits + EXTRACT_2 (r5, r5, 16) + stb.ab r5, [r3, 1] + + and.f lp_count, r2, 0x07 ;Last 8bytes + lpnz @.Lcopybytewise_1 + ;; LOOP START + ldb.ab r6, [r1,1] + stb.ab r6, [r3,1] +.Lcopybytewise_1: + j [blink] + +.LunalignedOffby2: +;;; CASE 2: The source is unaligned, off by 2 + ldh.ab r5, [r1, 2] + sub r2, r2, 1 + + ;; Both src and dst are aligned + ;; Convert to words, unfold x2 + lsr.f lp_count, r2, 3 +#ifdef __BIG_ENDIAN__ + asl.nz r5, r5, 16 +#endif + lpnz @.Lcopy8bytes_2 + ;; LOOP START + ld.ab r6, [r1, 4] + prefetch [r1, 28] ;Prefetch the next read location + ld.ab r8, [r1,4] + prefetchw [r3, 32] ;Prefetch the next write location + + SHIFT_1 (r7, r6, 16) + or r7, r7, r5 + SHIFT_2 (r5, r6, 16) + + SHIFT_1 (r9, r8, 16) + or r9, r9, r5 + SHIFT_2 (r5, r8, 16) + + st.ab r7, [r3, 4] + st.ab r9, [r3, 4] +.Lcopy8bytes_2: + +#ifdef __BIG_ENDIAN__ + lsr.nz r5, r5, 16 +#endif + sth.ab r5, [r3, 2] + + and.f lp_count, r2, 0x07 ;Last 8bytes + lpnz @.Lcopybytewise_2 + ;; LOOP START + ldb.ab r6, [r1,1] + stb.ab r6, [r3,1] +.Lcopybytewise_2: + j [blink] + +.LunalignedOffby3: +;;; CASE 3: The source is unaligned, off by 3 +;;; Hence, I need to read 1byte for achieve the 32bit alignment + + ;; Both src and dst are aligned + ;; Convert to words, unfold x2 + lsr.f lp_count, r2, 3 +#ifdef __BIG_ENDIAN__ + asl.ne r5, r5, 24 +#endif + lpnz @.Lcopy8bytes_3 + ;; LOOP START + ld.ab r6, [r1, 4] + prefetch [r1, 28] ;Prefetch the next read location + ld.ab r8, [r1,4] + prefetchw [r3, 32] ;Prefetch the next write location + + SHIFT_1 (r7, r6, 8) + or r7, r7, r5 + SHIFT_2 (r5, r6, 24) + + SHIFT_1 (r9, r8, 8) + or r9, r9, r5 + SHIFT_2 (r5, r8, 24) + + st.ab r7, [r3, 4] + st.ab r9, [r3, 4] +.Lcopy8bytes_3: + +#ifdef __BIG_ENDIAN__ + lsr.nz r5, r5, 24 +#endif + stb.ab r5, [r3, 1] + + and.f lp_count, r2, 0x07 ;Last 8bytes + lpnz @.Lcopybytewise_3 + ;; LOOP START + ldb.ab r6, [r1,1] + stb.ab r6, [r3,1] +.Lcopybytewise_3: + j [blink] + +END(memcpy) +libc_hidden_def(memcpy) diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S new file mode 100644 index 000000000..0918d3774 --- /dev/null +++ b/libc/string/arc/arcv2/memset.S @@ -0,0 +1,115 @@ + +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +#ifdef DONT_USE_PREALLOC +#define PREWRITE(A,B) prefetchw [(A),(B)] +#else +#define PREWRITE(A,B) prealloc [(A),(B)] +#endif + +ENTRY(memset) + prefetchw [r0] ; Prefetch the write location + mov.f 0, r2 +;;; if size is zero + jz.d [blink] + mov r3, r0 ; don't clobber ret val + +;;; if length < 8 + brls.d.nt r2, 8, .Lsmallchunk + mov.f lp_count,r2 + + and.f r4, r0, 0x03 + rsub lp_count, r4, 4 + lpnz @.Laligndestination + ;; LOOP BEGIN + stb.ab r1, [r3,1] + sub r2, r2, 1 +.Laligndestination: + +;;; Destination is aligned + and r1, r1, 0xFF + asl r4, r1, 8 + or r4, r4, r1 + asl r5, r4, 16 + or r5, r5, r4 + mov r4, r5 + + sub3 lp_count, r2, 8 + cmp r2, 64 + bmsk.hi r2, r2, 5 + mov.ls lp_count, 0 + add3.hi r2, r2, 8 + +;;; Convert len to Dwords, unfold x8 + lsr.f lp_count, lp_count, 6 + lpnz @.Lset64bytes + ;; LOOP START + PREWRITE(r3, 64) ;Prefetch the next write location +#ifdef __LL64__ + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +#else + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] +#endif +.Lset64bytes: + + lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes + lpnz .Lset32bytes + ;; LOOP START + prefetchw [r3, 32] ;Prefetch the next write location +#ifdef __LL64__ + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +#else + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] + st.ab r4, [r3, 4] +#endif +.Lset32bytes: + + and.f lp_count, r2, 0x1F ;Last remaining 31 bytes +.Lsmallchunk: + lpnz .Lcopy3bytes + ;; LOOP START + stb.ab r1, [r3, 1] +.Lcopy3bytes: + + j [blink] + +END(memset) +libc_hidden_def(memset) diff --git a/libc/string/arc/arcv2/strcmp.S b/libc/string/arc/arcv2/strcmp.S new file mode 100644 index 000000000..2e0e64a0c --- /dev/null +++ b/libc/string/arc/arcv2/strcmp.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +ENTRY(strcmp) + or r2, r0, r1 + bmsk_s r2, r2, 1 + brne r2, 0, @.Lcharloop + +;;; s1 and s2 are word aligned + ld.ab r2, [r0, 4] + + mov_s r12, 0x01010101 + ror r11, r12 + .align 4 +.LwordLoop: + ld.ab r3, [r1, 4] + ;; Detect NULL char in str1 + sub r4, r2, r12 + ld.ab r5, [r0, 4] + bic r4, r4, r2 + and r4, r4, r11 + brne.d.nt r4, 0, .LfoundNULL + ;; Check if the read locations are the same + cmp r2, r3 + beq.d .LwordLoop + mov.eq r2, r5 + + ;; A match is found, spot it out +#ifdef __LITTLE_ENDIAN__ + swape r3, r3 + mov_s r0, 1 + swape r2, r2 +#else + mov_s r0, 1 +#endif + cmp_s r2, r3 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.LfoundNULL: +#ifdef __BIG_ENDIAN__ + swape r4, r4 + swape r2, r2 + swape r3, r3 +#endif + ;; Find null byte + ffs r0, r4 + bmsk r2, r2, r0 + bmsk r3, r3, r0 + swape r2, r2 + swape r3, r3 + ;; make the return value + sub.f r0, r2, r3 + mov.hi r0, 1 + j_s.d [blink] + bset.lo r0, r0, 31 + + .align 4 +.Lcharloop: + ldb.ab r2, [r0, 1] + ldb.ab r3, [r1, 1] + nop + breq r2, 0, .Lcmpend + breq r2, r3, .Lcharloop + + .align 4 +.Lcmpend: + j_s.d [blink] + sub r0, r2, r3 +END(strcmp) +libc_hidden_def(strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias(strcmp,strcoll) +libc_hidden_def(strcoll) +#endif diff --git a/libc/string/arc/memcmp.S b/libc/string/arc/memcmp.S new file mode 100644 index 000000000..a60757e7a --- /dev/null +++ b/libc/string/arc/memcmp.S @@ -0,0 +1,157 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <sysdep.h> +#include <features.h> + +#ifdef __LITTLE_ENDIAN__ +#define WORD2 r2 +#define SHIFT r3 +#else /* BIG ENDIAN */ +#define WORD2 r3 +#define SHIFT r2 +#endif + +ENTRY(memcmp) + or r12,r0,r1 + asl_s r12,r12,30 + sub r3,r2,1 + brls r2,r12,.Lbytewise + ld r4,[r0,0] + ld r5,[r1,0] + lsr.f lp_count,r3,3 +#ifdef __HS__ + /* In ARCv2 a branch can't be the last instruction in a zero overhead + * loop. + * So we move the branch to the start of the loop, duplicate it + * after the end, and set up r12 so that the branch isn't taken + * initially. + */ + mov_s r12,WORD2 + lpne .Loop_end + brne WORD2,r12,.Lodd + ld WORD2,[r0,4] +#else + lpne .Loop_end + ld_s WORD2,[r0,4] +#endif + ld_s r12,[r1,4] + brne r4,r5,.Leven + ld.a r4,[r0,8] + ld.a r5,[r1,8] +#ifdef __HS__ +.Loop_end: + brne WORD2,r12,.Lodd +#else + brne WORD2,r12,.Lodd +.Loop_end: +#endif + asl_s SHIFT,SHIFT,3 + bhs_s .Last_cmp + brne r4,r5,.Leven + ld r4,[r0,4] + ld r5,[r1,4] +#ifdef __LITTLE_ENDIAN__ + nop_s + ; one more load latency cycle +.Last_cmp: + xor r0,r4,r5 + bset r0,r0,SHIFT + sub_s r1,r0,1 + bic_s r1,r1,r0 + norm r1,r1 + b.d .Leven_cmp + and r1,r1,24 +.Leven: + xor r0,r4,r5 + sub_s r1,r0,1 + bic_s r1,r1,r0 + norm r1,r1 + ; slow track insn + and r1,r1,24 +.Leven_cmp: + asl r2,r4,r1 + asl r12,r5,r1 + lsr_s r2,r2,1 + lsr_s r12,r12,1 + j_s.d [blink] + sub r0,r2,r12 + .balign 4 +.Lodd: + xor r0,WORD2,r12 + sub_s r1,r0,1 + bic_s r1,r1,r0 + norm r1,r1 + ; slow track insn + and r1,r1,24 + asl_s r2,r2,r1 + asl_s r12,r12,r1 + lsr_s r2,r2,1 + lsr_s r12,r12,1 + j_s.d [blink] + sub r0,r2,r12 +#else /* BIG ENDIAN */ +.Last_cmp: + neg_s SHIFT,SHIFT + lsr r4,r4,SHIFT + lsr r5,r5,SHIFT + ; slow track insn +.Leven: + sub.f r0,r4,r5 + mov.ne r0,1 + j_s.d [blink] + bset.cs r0,r0,31 +.Lodd: + cmp_s WORD2,r12 + mov_s r0,1 + j_s.d [blink] + bset.cs r0,r0,31 +#endif /* ENDIAN */ + .balign 4 +.Lbytewise: + breq r2,0,.Lnil + ldb r4,[r0,0] + ldb r5,[r1,0] + lsr.f lp_count,r3 +#ifdef __HS__ + mov r12,r3 + lpne .Lbyte_end + brne r3,r12,.Lbyte_odd +#else + lpne .Lbyte_end +#endif + ldb_s r3,[r0,1] + ldb r12,[r1,1] + brne r4,r5,.Lbyte_even + ldb.a r4,[r0,2] + ldb.a r5,[r1,2] +#ifdef __HS__ +.Lbyte_end: + brne r3,r12,.Lbyte_odd +#else + brne r3,r12,.Lbyte_odd +.Lbyte_end: +#endif + bcc .Lbyte_even + brne r4,r5,.Lbyte_even + ldb_s r3,[r0,1] + ldb_s r12,[r1,1] +.Lbyte_odd: + j_s.d [blink] + sub r0,r3,r12 +.Lbyte_even: + j_s.d [blink] + sub r0,r4,r5 +.Lnil: + j_s.d [blink] + mov r0,0 +END(memcmp) +libc_hidden_def(memcmp) + +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias(memcmp,bcmp) +#endif diff --git a/libc/string/arc/memcpy.S b/libc/string/arc/memcpy.S new file mode 100644 index 000000000..1c11951e4 --- /dev/null +++ b/libc/string/arc/memcpy.S @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <sysdep.h> + +/* This memcpy implementation does not support objects of 1GB or larger - + the check for alignment does not work then. */ +/* We assume that most sources and destinations are aligned, and + that also lengths are mostly a multiple of four, although to a lesser + extent. */ +ENTRY(memcpy) + or r3,r0,r1 + asl_s r3,r3,30 + mov_s r5,r0 + brls.d r2,r3,.Lcopy_bytewise + sub.f r3,r2,1 + ld_s r12,[r1,0] + asr.f lp_count,r3,3 + bbit0.d r3,2,.Lnox4 + bmsk_s r2,r2,1 + st.ab r12,[r5,4] + ld.a r12,[r1,4] +.Lnox4: + lppnz .Lendloop + ld_s r3,[r1,4] + st.ab r12,[r5,4] + ld.a r12,[r1,8] + st.ab r3,[r5,4] +.Lendloop: + breq r2,0,.Last_store + ld r3,[r5,0] +#ifdef __LITTLE_ENDIAN__ + add3 r2,-1,r2 + ; uses long immediate + xor_s r12,r12,r3 + bmsk r12,r12,r2 + xor_s r12,r12,r3 +#else /* BIG ENDIAN */ + sub3 r2,31,r2 + ; uses long immediate + xor_s r3,r3,r12 + bmsk r3,r3,r2 + xor_s r12,r12,r3 +#endif /* ENDIAN */ +.Last_store: + j_s.d [blink] + st r12,[r5,0] + + .balign 4 +.Lcopy_bytewise: + jcs [blink] + ldb_s r12,[r1,0] + lsr.f lp_count,r3 + bhs_s .Lnox1 + stb.ab r12,[r5,1] + ldb.a r12,[r1,1] +.Lnox1: + lppnz .Lendbloop + ldb_s r3,[r1,1] + stb.ab r12,[r5,1] + ldb.a r12,[r1,2] + stb.ab r3,[r5,1] +.Lendbloop: + j_s.d [blink] + stb r12,[r5,0] +END(memcpy) +libc_hidden_def(memcpy) diff --git a/libc/string/arc/memset.S b/libc/string/arc/memset.S new file mode 100644 index 000000000..f4048455a --- /dev/null +++ b/libc/string/arc/memset.S @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <sysdep.h> + +#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ + +ENTRY(memset) + + mov_s r4,r0 + or r12,r0,r2 + bmsk.f r12,r12,1 + extb_s r1,r1 + asl r3,r1,8 + beq.d .Laligned + or_s r1,r1,r3 + brls r2,SMALL,.Ltiny + add r3,r2,r0 + stb r1,[r3,-1] + bclr_s r3,r3,0 + stw r1,[r3,-2] + bmsk.f r12,r0,1 + add_s r2,r2,r12 + sub.ne r2,r2,4 + stb.ab r1,[r4,1] + and r4,r4,-2 + stw.ab r1,[r4,2] + and r4,r4,-4 +.Laligned: ; This code address should be aligned for speed. + asl r3,r1,16 + lsr.f lp_count,r2,2 + or_s r1,r1,r3 + lpne .Loop_end + st.ab r1,[r4,4] +.Loop_end: + j_s [blink] + + + .balign 4 +.Ltiny: + mov.f lp_count,r2 + lpne .Ltiny_end + stb.ab r1,[r4,1] +.Ltiny_end: + j_s [blink] +END(memset) +libc_hidden_def(memset) diff --git a/libc/string/arc/strchr.S b/libc/string/arc/strchr.S new file mode 100644 index 000000000..443993589 --- /dev/null +++ b/libc/string/arc/strchr.S @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <sysdep.h> +#include <features.h> + +/* ARC700 has a relatively long pipeline and branch prediction, so we want + to avoid branches that are hard to predict. On the other hand, the + presence of the norm instruction makes it easier to operate on whole + words branch-free. */ + +ENTRY(strchr) + extb_s r1,r1 + asl r5,r1,8 + bmsk r2,r0,1 + or r5,r5,r1 + mov_s r3,0x01010101 + breq.d r2,r0,.Laligned + asl r4,r5,16 + sub_s r0,r0,r2 + asl r7,r2,3 + ld_s r2,[r0] +#ifdef __LITTLE_ENDIAN__ + asl r7,r3,r7 +#else + lsr r7,r3,r7 +#endif + or r5,r5,r4 + ror r4,r3 + sub r12,r2,r7 + bic_s r12,r12,r2 + and r12,r12,r4 + brne.d r12,0,.Lfound0_ua + xor r6,r2,r5 + ld.a r2,[r0,4] + sub r12,r6,r7 + bic r12,r12,r6 +#ifdef __LITTLE_ENDIAN__ + and r7,r12,r4 + breq r7,0,.Loop ; For speed, we want this branch to be unaligned. + b .Lfound_char ; Likewise this one. +#else + and r12,r12,r4 + breq r12,0,.Loop ; For speed, we want this branch to be unaligned. + lsr_s r12,r12,7 + bic r2,r7,r6 + b.d .Lfound_char_b + and_s r2,r2,r12 +#endif +; /* We require this code address to be unaligned for speed... */ +.Laligned: + ld_s r2,[r0] + or r5,r5,r4 + ror r4,r3 +; /* ... so that this code address is aligned, for itself and ... */ +.Loop: + sub r12,r2,r3 + bic_s r12,r12,r2 + and r12,r12,r4 + brne.d r12,0,.Lfound0 + xor r6,r2,r5 + ld.a r2,[r0,4] + sub r12,r6,r3 + bic r12,r12,r6 + and r7,r12,r4 + breq r7,0,.Loop /* ... so that this branch is unaligned. */ + ; Found searched-for character. r0 has already advanced to next word. +#ifdef __LITTLE_ENDIAN__ +/* We only need the information about the first matching byte + (i.e. the least significant matching byte) to be exact, + hence there is no problem with carry effects. */ +.Lfound_char: + sub r3,r7,1 + bic r3,r3,r7 + norm r2,r3 + sub_s r0,r0,1 + asr_s r2,r2,3 + j.d [blink] + sub_s r0,r0,r2 + + .balign 4 +.Lfound0_ua: + mov r3,r7 +.Lfound0: + sub r3,r6,r3 + bic r3,r3,r6 + and r2,r3,r4 + or_s r12,r12,r2 + sub_s r3,r12,1 + bic_s r3,r3,r12 + norm r3,r3 + add_s r0,r0,3 + asr_s r12,r3,3 + asl.f 0,r2,r3 + sub_s r0,r0,r12 + j_s.d [blink] + mov.pl r0,0 +#else /* BIG ENDIAN */ +.Lfound_char: + lsr r7,r7,7 + + bic r2,r7,r6 +.Lfound_char_b: + norm r2,r2 + sub_s r0,r0,4 + asr_s r2,r2,3 + j.d [blink] + add_s r0,r0,r2 + +.Lfound0_ua: + mov_s r3,r7 +.Lfound0: + asl_s r2,r2,7 + or r7,r6,r4 + bic_s r12,r12,r2 + sub r2,r7,r3 + or r2,r2,r6 + bic r12,r2,r12 + bic.f r3,r4,r12 + norm r3,r3 + + add.pl r3,r3,1 + asr_s r12,r3,3 + asl.f 0,r2,r3 + add_s r0,r0,r12 + j_s.d [blink] + mov.mi r0,0 +#endif /* ENDIAN */ +END(strchr) +libc_hidden_def(strchr) + +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias(strchr,index) +#endif diff --git a/libc/string/arc/strcmp.S b/libc/string/arc/strcmp.S new file mode 100644 index 000000000..5a0e56045 --- /dev/null +++ b/libc/string/arc/strcmp.S @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +/* This is optimized primarily for the ARC700. + It would be possible to speed up the loops by one cycle / word + respective one cycle / byte by forcing double source 1 alignment, unrolling + by a factor of two, and speculatively loading the second word / byte of + source 1; however, that would increase the overhead for loop setup / finish, + and strcmp might often terminate early. */ + +ENTRY(strcmp) + or r2,r0,r1 + bmsk_s r2,r2,1 + brne r2,0,.Lcharloop + mov_s r12,0x01010101 + ror r5,r12 +.Lwordloop: + ld.ab r2,[r0,4] + ld.ab r3,[r1,4] + nop_s + sub r4,r2,r12 + bic r4,r4,r2 + and r4,r4,r5 + brne r4,0,.Lfound0 + breq r2,r3,.Lwordloop +#ifdef __LITTLE_ENDIAN__ + xor r0,r2,r3 ; mask for difference + sub_s r1,r0,1 + bic_s r0,r0,r1 ; mask for least significant difference bit + sub r1,r5,r0 + xor r0,r5,r1 ; mask for least significant difference byte + and_s r2,r2,r0 + and_s r3,r3,r0 +#endif /* LITTLE ENDIAN */ + cmp_s r2,r3 + mov_s r0,1 + j_s.d [blink] + bset.lo r0,r0,31 + + .balign 4 +#ifdef __LITTLE_ENDIAN__ +.Lfound0: + xor r0,r2,r3 ; mask for difference + or r0,r0,r4 ; or in zero indicator + sub_s r1,r0,1 + bic_s r0,r0,r1 ; mask for least significant difference bit + sub r1,r5,r0 + xor r0,r5,r1 ; mask for least significant difference byte + and_s r2,r2,r0 + and_s r3,r3,r0 + sub.f r0,r2,r3 + mov.hi r0,1 + j_s.d [blink] + bset.lo r0,r0,31 +#else /* BIG ENDIAN */ + /* The zero-detection above can mis-detect 0x01 bytes as zeroes + because of carry-propagateion from a lower significant zero byte. + We can compensate for this by checking that bit0 is zero. + This compensation is not necessary in the step where we + get a low estimate for r2, because in any affected bytes + we already have 0x00 or 0x01, which will remain unchanged + when bit 7 is cleared. */ + .balign 4 +.Lfound0: + lsr r0,r4,8 + lsr_s r1,r2 + bic_s r2,r2,r0 ; get low estimate for r2 and get ... + bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> + or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... + cmp_s r3,r2 ; ... be independent of trailing garbage + or_s r2,r2,r0 ; likewise for r3 > r2 + bic_s r3,r3,r0 + rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 + cmp_s r2,r3 + j_s.d [blink] + bset.lo r0,r0,31 +#endif /* ENDIAN */ + + .balign 4 +.Lcharloop: + ldb.ab r2,[r0,1] + ldb.ab r3,[r1,1] + nop_s + breq r2,0,.Lcmpend + breq r2,r3,.Lcharloop +.Lcmpend: + j_s.d [blink] + sub r0,r2,r3 +END(strcmp) +libc_hidden_def(strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias(strcmp,strcoll) +libc_hidden_def(strcoll) +#endif diff --git a/libc/string/arc/strcpy.S b/libc/string/arc/strcpy.S new file mode 100644 index 000000000..241bf3ee6 --- /dev/null +++ b/libc/string/arc/strcpy.S @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + + +#include <sysdep.h> + +/* If dst and src are 4 byte aligned, copy 8 bytes at a time. + If the src is 4, but not 8 byte aligned, we first read 4 bytes to get + it 8 byte aligned. Thus, we can do a little read-ahead, without + dereferencing a cache line that we should not touch. + Note that short and long instructions have been scheduled to avoid + branch stalls. + The beq_s to r3z could be made unaligned & long to avoid a stall + there, but the it is not likely to be taken often, and it + would also be likey to cost an unaligned mispredict at the next call. */ + +ENTRY(strcpy) + or r2,r0,r1 + bmsk_s r2,r2,1 + brne.d r2,0,charloop + mov_s r10,r0 + ld_s r3,[r1,0] + mov r8,0x01010101 + bbit0.d r1,2,loop_start + ror r12,r8 + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne r3z + mov_s r4,r3 + .balign 4 +loop: + ld.a r3,[r1,4] + st.ab r4,[r10,4] +loop_start: + ld.a r4,[r1,4] + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne_s r3z + st.ab r3,[r10,4] + sub r2,r4,r8 + bic r2,r2,r4 + tst r2,r12 + beq loop + mov_s r3,r4 +#ifdef __LITTLE_ENDIAN__ +r3z: bmsk.f r1,r3,7 + lsr_s r3,r3,8 +#else +r3z: lsr.f r1,r3,24 + asl_s r3,r3,8 +#endif + bne.d r3z + stb.ab r1,[r10,1] + j_s [blink] + + .balign 4 +charloop: + ldb.ab r3,[r1,1] + + + brne.d r3,0,charloop + stb.ab r3,[r10,1] + j [blink] +END(strcpy) +libc_hidden_def(strcpy) diff --git a/libc/string/arc/strlen.S b/libc/string/arc/strlen.S new file mode 100644 index 000000000..0b9b93815 --- /dev/null +++ b/libc/string/arc/strlen.S @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + + +#include <sysdep.h> + +ENTRY(strlen) + or r3,r0,7 + ld r2,[r3,-7] + ld.a r6,[r3,-3] + mov r4,0x01010101 + ; uses long immediate +#ifdef __LITTLE_ENDIAN__ + asl_s r1,r0,3 + btst_s r0,2 + asl r7,r4,r1 + ror r5,r4 + sub r1,r2,r7 + bic_s r1,r1,r2 + mov.eq r7,r4 + sub r12,r6,r7 + bic r12,r12,r6 + or.eq r12,r12,r1 + and r12,r12,r5 + brne r12,0,.Learly_end +#else /* BIG ENDIAN */ + ror r5,r4 + btst_s r0,2 + mov_s r1,31 + sub3 r7,r1,r0 + sub r1,r2,r4 + bic_s r1,r1,r2 + bmsk r1,r1,r7 + sub r12,r6,r4 + bic r12,r12,r6 + bmsk.ne r12,r12,r7 + or.eq r12,r12,r1 + and r12,r12,r5 + brne r12,0,.Learly_end +#endif /* ENDIAN */ + +.Loop: + ld_s r2,[r3,4] + ld.a r6,[r3,8] + ; stall for load result + sub r1,r2,r4 + bic_s r1,r1,r2 + sub r12,r6,r4 + bic r12,r12,r6 + or r12,r12,r1 + and r12,r12,r5 + breq r12,0,.Loop +.Lend: + and.f r1,r1,r5 + sub.ne r3,r3,4 + mov.eq r1,r12 +#ifdef __LITTLE_ENDIAN__ + sub_s r2,r1,1 + bic_s r2,r2,r1 + norm r1,r2 + sub_s r0,r0,3 + lsr_s r1,r1,3 + sub r0,r3,r0 + j_s.d [blink] + sub r0,r0,r1 +#else /* BIG ENDIAN */ + lsr_s r1,r1,7 + mov.eq r2,r6 + bic_s r1,r1,r2 + norm r1,r1 + sub r0,r3,r0 + lsr_s r1,r1,3 + j_s.d [blink] + add r0,r0,r1 +#endif /* ENDIAN */ +.Learly_end: + b.d .Lend + sub_s.ne r1,r1,r1 +END(strlen) +libc_hidden_def(strlen) diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S index 103580a0c..2999e8ee6 100644 --- a/libc/string/arm/_memcpy.S +++ b/libc/string/arm/_memcpy.S @@ -40,6 +40,7 @@ #include <features.h> #include <endian.h> #include <bits/arm_asm.h> +#include <bits/arm_bx.h> #if !defined(THUMB1_ONLY) /* @@ -67,8 +68,9 @@ * a time where possible. * * Note: r12 (aka ip) can be trashed during the function along with - * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. + * r0-r3 although r0-r2 have defined uses i.e. dest, src, len throughout. * Additional registers are preserved prior to use i.e. r4, r5 & lr + * The return value in r0 must be the destination address. * * Apologies for the state of the comments ;-) */ @@ -108,12 +110,8 @@ _memcpy: cmp r1, r0 bcc .Lmemcpy_backwards - IT(tt, eq) /* Quick abort for src=dst */ -#if defined(__USE_BX__) - bxeq lr -#else - moveq pc, lr -#endif + IT(t, eq) /* Quick abort for src=dst */ + BXC(eq, lr) stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ subs r2, r2, #4 blt .Lmemcpy_fl4 /* less than 4 bytes */ @@ -453,11 +451,7 @@ _memcpy: /* less than 4 bytes to go */ adds r2, r2, #4 IT(t, eq) -#if defined(__USE_BX__) - bxeq lr -#else - moveq pc, lr /* done */ -#endif + BXC(eq, lr) /* done */ /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1, #-1]! @@ -475,11 +469,7 @@ _memcpy: ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! #endif -#if defined(__USE_BX__) - bx lr -#else - mov pc, lr -#endif + BX(lr) /* erg - unaligned destination */ .Lmemcpy_bdestul: cmp r12, #2 diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S index 65409f43a..5b9473cd0 100644 --- a/libc/string/arm/memcmp.S +++ b/libc/string/arm/memcmp.S @@ -31,6 +31,7 @@ #include <features.h> #include <bits/arm_asm.h> +#include <bits/arm_bx.h> .text .global memcmp @@ -66,11 +67,7 @@ memcmp: subs r2, r2, #1 IT(tt, mi) movmi r0, #0 -#if defined(__USE_BX__) - bxmi lr -#else - movmi pc, lr -#endif + BXC(mi, lr) /* ip == last src address to compare */ add ip, r0, r2 1: @@ -81,11 +78,7 @@ memcmp: cmpcs r2, r3 beq 1b sub r0, r2, r3 -#if defined(__USE_BX__) - bx lr -#else - mov pc, lr -#endif + BX(lr) #endif .size memcmp,.-memcmp diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S index 66aa6039c..2be4850e4 100644 --- a/libc/string/arm/memset.S +++ b/libc/string/arm/memset.S @@ -13,13 +13,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <features.h> -#include <sys/syscall.h> #include <bits/arm_asm.h> +#include <bits/arm_bx.h> .text .global memset @@ -109,11 +108,7 @@ memset: 2: movs a3, a3 @ anything left? IT(t, eq) -#if defined(__USE_BX__) - bxeq lr -#else - moveq pc, lr @ nope -#endif + BXC(eq, lr) @ nope #if defined (__thumb2__) 1: strb a2, [a4], #1 @@ -131,11 +126,7 @@ memset: strb a2, [a4], $1 strb a2, [a4], $1 strb a2, [a4], $1 -#if defined(__USE_BX__) - bx lr -#else - mov pc, lr -#endif + BX(lr) #endif #endif diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S index 97363c1c2..81416a9a5 100644 --- a/libc/string/arm/strcmp.S +++ b/libc/string/arm/strcmp.S @@ -31,6 +31,7 @@ #include <features.h> #include <bits/arm_asm.h> +#include <bits/arm_bx.h> .text .global strcmp @@ -62,11 +63,7 @@ strcmp: cmpcs r2, r3 beq 1b sub r0, r2, r3 -#if defined(__USE_BX__) - bx lr -#else - mov pc, lr -#endif + BX(lr) #endif .size strcmp,.-strcmp diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S index 949e918f4..9995d768c 100644 --- a/libc/string/arm/strlen.S +++ b/libc/string/arm/strlen.S @@ -13,14 +13,13 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <features.h> #include <endian.h> -#include <sys/syscall.h> #include <bits/arm_asm.h> +#include <bits/arm_bx.h> /* size_t strlen(const char *S) * entry: r0 -> string @@ -99,11 +98,7 @@ Llastword: @ drop through to here once we find a IT(t, ne) addne r0, r0, $1 @ must be zero) #endif -#if defined(__USE_BX__) - bx lr -#else - mov pc,lr -#endif + BX(lr) #endif .size strlen,.-strlen diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S deleted file mode 100644 index 8487639c8..000000000 --- a/libc/string/arm/strncmp.S +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2002 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Adapted for uClibc from NetBSD strncmp.S, version 1.2 2003/04/05 - * by Erik Andersen <andersen@codepoet.org> - */ - -#include <features.h> -#include <bits/arm_asm.h> - -.text -.global strncmp -.type strncmp,%function -.align 4 - -#if defined(THUMB1_ONLY) -.thumb_func -strncmp: - /* if (len == 0) return 0 */ - cmp r2, #0 - bne 1f - mov r0, #0 - bx lr -1: - push {r4} - - /* ip == last src address to compare */ - add r4, r0, r2 -2: - cmp r4, r0 - beq 3f - ldrb r2, [r0] - add r0, r0, #1 - ldrb r3, [r1] - add r1, r1, #1 - cmp r2, #0 - beq 3f - cmp r2, r3 - beq 2b -3: - sub r0, r2, r3 - pop {r4} - bx lr -#else -strncmp: - /* if (len == 0) return 0 */ - cmp r2, #0 - IT(tt, eq) - moveq r0, #0 -#if defined(__USE_BX__) - bxeq lr -#else - moveq pc, lr -#endif - subs r2, r2, #1 - - /* ip == last src address to compare */ - add ip, r0, r2 -1: - ldrb r2, [r0], #1 - ldrb r3, [r1], #1 - cmp ip, r0 - IT(tt, cs) - cmpcs r2, #1 - cmpcs r2, r3 - beq 1b - sub r0, r2, r3 -#if defined(__USE_BX__) - bx lr -#else - mov pc, lr -#endif -#endif - -.size strncmp,.-strncmp - -libc_hidden_weak(strncmp) diff --git a/libc/string/avr32/Makefile b/libc/string/avr32/Makefile index e19e9d9ec..385cf085e 100644 --- a/libc/string/avr32/Makefile +++ b/libc/string/avr32/Makefile @@ -13,8 +13,7 @@ # details. # # You should have received a copy of the GNU Library General Public License -# along with this program; if not, write to the Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# along with this program; if not, see <http://www.gnu.org/licenses/>. top_srcdir := ../../../ top_builddir := ../../../ diff --git a/libc/string/basename.c b/libc/string/basename.c index a076c20e9..abc9d89db 100644 --- a/libc/string/basename.c +++ b/libc/string/basename.c @@ -5,10 +5,9 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -#include "_string.h" +#include <string.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(basename) */ char *basename(const char *path) { @@ -25,5 +24,4 @@ char *basename(const char *path) return (char *) p; } -libc_hidden_def(basename) #endif diff --git a/libc/string/bcopy.c b/libc/string/bcopy.c index 3aa7eab1e..e16ba241d 100644 --- a/libc/string/bcopy.c +++ b/libc/string/bcopy.c @@ -5,35 +5,14 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -#include "_string.h" +#include <string.h> #ifdef __UCLIBC_SUSV3_LEGACY__ - -/* Experimentally off - libc_hidden_proto(memmove) */ - void bcopy(const void *s2, void *s1, size_t n) { #if 1 memmove(s1, s2, n); #else -#ifdef __BCC__ - register char *s; - register const char *p; - - s = s1; - p = s2; - if (p >= s) { - while (n--) { - *s++ = *p++; - } - } else { - s += n; - p += n; - while (n--) { - *--s = *--p; - } - } -#else register char *s; register const char *p; @@ -51,6 +30,5 @@ void bcopy(const void *s2, void *s1, size_t n) } } #endif -#endif } #endif diff --git a/libc/string/bfin/memchr.S b/libc/string/bfin/memchr.S index 88e46bef6..26d419f7c 100644 --- a/libc/string/bfin/memchr.S +++ b/libc/string/bfin/memchr.S @@ -25,8 +25,8 @@ .weak _memchr ENTRY(_memchr) - P0 = R0; // P0 = address - P2 = R2; // P2 = count + P0 = R0; /* P0 = address */ + P2 = R2; /* P2 = count */ R1 = R1.B(Z); CC = R2 == 0; IF CC JUMP .Lfailed; diff --git a/libc/string/bfin/strcmp.S b/libc/string/bfin/strcmp.S index 12e8c53c6..ef23aa9ab 100644 --- a/libc/string/bfin/strcmp.S +++ b/libc/string/bfin/strcmp.S @@ -29,66 +29,66 @@ ENTRY(_strcmp) p1 = r0; p2 = r1; - p0 = -1; // (need for loop counter init) + p0 = -1; /* (need for loop counter init) */ - // check if byte aligned - r0 = r0 | r1; // check both pointers at same time - r0 <<= 30; // dump all but last 2 bits - cc = az; // are they zero? - if !cc jump .Lunaligned; // no; use unaligned code. - // fall-thru for aligned case.. + /* check if byte aligned */ + r0 = r0 | r1; /* check both pointers at same time */ + r0 <<= 30; /* dump all but last 2 bits */ + cc = az; /* are they zero? */ + if !cc jump .Lunaligned; /* no; use unaligned code. */ + /* fall-thru for aligned case.. */ - // note that r0 is zero from the previous... - // p0 set to -1 + /* note that r0 is zero from the previous... */ + /* p0 set to -1 */ LSETUP (.Lbeginloop, .Lendloop) lc0=p0; - // pick up first words + /* pick up first words */ r1 = [p1++]; r2 = [p2++]; - // make up mask: 0FF0FF + /* make up mask: 0FF0FF */ r7 = 0xFF; r7.h = 0xFF; - // loop : 9 cycles to check 4 characters + /* loop : 9 cycles to check 4 characters */ cc = r1 == r2; .Lbeginloop: - if !cc jump .Lnotequal4; // compare failure, exit loop + if !cc jump .Lnotequal4; /* compare failure, exit loop */ - // starting with 44332211 - // see if char 3 or char 1 is 0 - r3 = r1 & r7; // form 00330011 - // add to zero, and (r2 is free, reload) + /* starting with 44332211 */ + /* see if char 3 or char 1 is 0 */ + r3 = r1 & r7; /* form 00330011 */ + /* add to zero, and (r2 is free, reload) */ r6 = r3 +|+ r0 || r2 = [p2++] || nop; - cc = az; // true if either is zero - r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020) - // (trick, saves having another mask) - // add to zero, and (r1 is free, reload) + cc = az; /* true if either is zero */ + r3 = r1 ^ r3; /* form 44002200 (4321^0301 => 4020) */ + /* (trick, saves having another mask) */ + /* add to zero, and (r1 is free, reload) */ r6 = r3 +|+ r0 || r1 = [p1++] || nop; - cc |= az; // true if either is zero - if cc jump .Lzero4; // leave if a zero somewhere + cc |= az; /* true if either is zero */ + if cc jump .Lzero4; /* leave if a zero somewhere */ .Lendloop: cc = r1 == r2; - // loop exits -.Lnotequal4: // compare failure on 4-char compare - // address pointers are one word ahead; - // faster to use zero4 exit code + /* loop exits */ +.Lnotequal4: /* compare failure on 4-char compare */ + /* address pointers are one word ahead; */ + /* faster to use zero4 exit code */ p1 += 4; p2 += 4; -.Lzero4: // one of the bytes in word 1 is zero - // but we've already fetched the next word; so - // backup two to look at failing word again +.Lzero4: /* one of the bytes in word 1 is zero */ + /* but we've already fetched the next word; so */ + /* backup two to look at failing word again */ p1 += -8; p2 += -8; - // here when pointers are unaligned: checks one - // character at a time. Also use at the end of - // the word-check algorithm to figure out what happened + /* here when pointers are unaligned: checks one */ + /* character at a time. Also use at the end of */ + /* the word-check algorithm to figure out what happened */ .Lunaligned: - // R0 is non-zero from before. - // p0 set to -1 + /* R0 is non-zero from before. */ + /* p0 set to -1 */ r0 = 0 (Z); r1 = B[p1++] (Z); @@ -96,18 +96,18 @@ ENTRY(_strcmp) LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0; .Lbeginloop1: - cc = r1; // first char must be non-zero - // chars must be the same + cc = r1; /* first char must be non-zero */ + /* chars must be the same */ r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop; cc &= az; - r3 = r0 - r2; // second char must be non-zero + r3 = r0 - r2; /* second char must be non-zero */ cc &= an; if !cc jump .Lexitloop1; .Lendloop1: r2 = B[p2++] (Z); -.Lexitloop1: // here means we found a zero or a difference. - // we have r2(N), p2(N), r1(N+1), p1(N+2) +.Lexitloop1: /* here means we found a zero or a difference. */ + /* we have r2(N), p2(N), r1(N+1), p1(N+2) */ r1=B[p1+ -2] (Z); r0 = r1 - r2; (r7:4) = [sp++]; diff --git a/libc/string/bzero.c b/libc/string/bzero.c index 7498f795f..32dce416e 100644 --- a/libc/string/bzero.c +++ b/libc/string/bzero.c @@ -5,30 +5,20 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -#include "_string.h" +#include <string.h> #ifdef __UCLIBC_SUSV3_LEGACY__ - -/* Experimentally off - libc_hidden_proto(memset) */ - void bzero(void *s, size_t n) { #if 1 (void)memset(s, 0, n); #else register unsigned char *p = s; -#ifdef __BCC__ - /* bcc can optimize the counter if it thinks it is a pointer... */ - register const char *np = (const char *) n; -#else -#define np n -#endif - while (np) { + while (n) { *p++ = 0; - --np; + --n; } #endif } -#undef np #endif diff --git a/libc/string/cris/memcopy.h b/libc/string/cris/memcopy.h index 449c75641..ccd447c83 100644 --- a/libc/string/cris/memcopy.h +++ b/libc/string/cris/memcopy.h @@ -16,8 +16,7 @@ You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + see <http://www.gnu.org/licenses/>. */ #include "../generic/memcopy.h" diff --git a/libc/string/cris/memcpy.c b/libc/string/cris/memcpy.c index cc14188b8..94e576f4f 100644 --- a/libc/string/cris/memcpy.c +++ b/libc/string/cris/memcpy.c @@ -1,264 +1,242 @@ -/* Copyright (C) 2001, 2003 Free Software Foundation, Inc. - Copyright (C) 1994, 1995, 2000 Axis Communications AB. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/*#************************************************************************#*/ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# FUNCTION NAME: memcpy() */ -/*# */ -/*# PARAMETERS: void* dst; Destination address. */ -/*# void* src; Source address. */ -/*# int len; Number of bytes to copy. */ -/*# */ -/*# RETURNS: dst. */ -/*# */ -/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */ -/*# about copying of overlapping memory areas. This routine is */ -/*# very sensitive to compiler changes in register allocation. */ -/*# Should really be rewritten to avoid this problem. */ -/*# */ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# HISTORY */ -/*# */ -/*# DATE NAME CHANGES */ -/*# ---- ---- ------- */ -/*# 941007 Kenny R Creation */ -/*# 941011 Kenny R Lots of optimizations and inlining. */ -/*# 941129 Ulf A Adapted for use in libc. */ -/*# 950216 HP N==0 forgotten if non-aligned src/dst. */ -/*# Added some optimizations. */ -/*# 001025 HP Make src and dst char *. Align dst to */ -/*# dword, not just word-if-both-src-and-dst- */ -/*# are-misaligned. */ -/*# 070806 RW Modified for uClibc */ -/*# (__arch_v32 -> __CONFIG_CRISV32__, */ -/*# include features.h to reach it.) */ -/*# */ -/*#-------------------------------------------------------------------------*/ - -#include <features.h> - -#ifdef __CONFIG_CRISV32__ +/* A memcpy for CRIS. + Copyright (C) 1994-2008 Axis Communications. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Neither the name of Axis Communications nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS + COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. */ + +/* FIXME: This file should really only be used for reference, as the + result is somewhat depending on gcc generating what we expect rather + than what we describe. An assembly file should be used instead. */ + +#include <string.h> + +#ifdef __arch_v32 /* For CRISv32, movem is very cheap. */ -#define MEMCPY_BLOCK_THRESHOLD (44) +#define MEMCPY_BY_BLOCK_THRESHOLD (44) #else -/* Break even between movem and move16 is at 38.7*2, but modulo 44. */ -#define MEMCPY_BLOCK_THRESHOLD (44*2) +/* Break even between movem and move16 is really at 38.7 * 2, but + modulo 44, so up to the next multiple of 44, we use ordinary code. */ +#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) #endif -void *memcpy(void *, const void *, unsigned int); +/* No name ambiguities in this file. */ +__asm__ (".syntax no_register_prefix"); -/* Experimentally off - libc_hidden_proto(memcpy) */ -void *memcpy(void *pdst, - const void *psrc, - unsigned int pn) +void * +memcpy(void *pdst, const void *psrc, size_t pn) { - /* Ok. Now we want the parameters put in special registers. + /* Now we want the parameters put in special registers. Make sure the compiler is able to make something useful of this. - As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). + As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). If gcc was allright, it really would need no temporaries, and no - stack space to save stuff on. */ + stack space to save stuff on. */ -#ifndef MEMPCPY register void *return_dst __asm__ ("r10") = pdst; -#else - /* FIXME: Use R10 for something. */ -# define return_dst dst -#endif - - register char *dst __asm__ ("r13") = pdst; - register char *src __asm__ ("r11") = (char *) psrc; + register unsigned char *dst __asm__ ("r13") = pdst; + register unsigned const char *src __asm__ ("r11") = psrc; register int n __asm__ ("r12") = pn; - /* When src is aligned but not dst, this makes a few extra needless cycles. I believe it would take as many to check that the re-alignment was unnecessary. */ if (((unsigned long) dst & 3) != 0 /* Don't align if we wouldn't copy more than a few bytes; so we - don't have to check further for overflows. */ + don't have to check further for overflows. */ && n >= 3) { if ((unsigned long) dst & 1) - { - n--; - *(char*)dst = *(char*)src; - src++; - dst++; - } + { + n--; + *dst = *src; + src++; + dst++; + } if ((unsigned long) dst & 2) - { - n -= 2; - *(short*)dst = *(short*)src; - src += 2; - dst += 2; - } + { + n -= 2; + *(short *) dst = *(short *) src; + src += 2; + dst += 2; + } } - /* Decide which copying method to use. */ - if (n >= MEMCPY_BLOCK_THRESHOLD) - { - /* For large copies we use 'movem' */ - - /* It is not optimal to tell the compiler about clobbering any - registers; that will move the saving/restoring of those registers - to the function prologue/epilogue, and make non-movem sizes - suboptimal. - - This method is not foolproof; it assumes that the "register asm" - declarations at the beginning of the function really are used - here (beware: they may be moved to temporary registers). - This way, we do not have to save/move the registers around into - temporaries; we can safely use them straight away. */ - __asm__ __volatile__ ("\ - .syntax no_register_prefix \n\ - \n\ - ;; Check that the register asm declaration got right. \n\ - ;; The GCC manual explicitly says TRT will happen. \n\ - .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ - .err \n\ - .endif \n\ - \n\ - ;; Save the registers we'll use in the movem process \n\ - ;; on the stack. \n\ - subq 11*4,sp \n\ - movem r10,[sp] \n\ - \n\ - ;; Now we've got this: \n\ - ;; r11 - src \n\ - ;; r13 - dst \n\ - ;; r12 - n \n\ - \n\ - ;; Update n for the first loop \n\ - subq 44,r12 \n\ -0: \n\ - movem [r11+],r10 \n\ - subq 44,r12 \n\ - bge 0b \n\ - movem r10,[r13+] \n\ - \n\ - addq 44,r12 ;; compensate for last loop underflowing n \n\ - \n\ - ;; Restore registers from stack \n\ - movem [sp+],r10" - - /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) - /* Inputs */ : "0" (dst), "1" (src), "2" (n)); - } + /* Decide which copying method to use. */ + if (n >= MEMCPY_BY_BLOCK_THRESHOLD) + { + /* It is not optimal to tell the compiler about clobbering any + registers; that will move the saving/restoring of those registers + to the function prologue/epilogue, and make non-movem sizes + suboptimal. */ + __asm__ __volatile__ + ("\ + ;; GCC does promise correct register allocations, but let's \n\ + ;; make sure it keeps its promises. \n\ + .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ + .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\ + .endif \n\ + \n\ + ;; Save the registers we'll use in the movem process \n\ + ;; on the stack. \n\ + subq 11*4,sp \n\ + movem r10,[sp] \n\ + \n\ + ;; Now we've got this: \n\ + ;; r11 - src \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop. \n\ + subq 44,r12 \n\ +0: \n\ +" +#ifdef __arch_common_v10_v32 + /* Cater to branch offset difference between v32 and v10. We + assume the branch below has an 8-bit offset. */ +" setf\n" +#endif +" movem [r11+],r10 \n\ + subq 44,r12 \n\ + bge 0b \n\ + movem r10,[r13+] \n\ + \n\ + ;; Compensate for last loop underflowing n. \n\ + addq 44,r12 \n\ + \n\ + ;; Restore registers from stack. \n\ + movem [sp+],r10" + + /* Outputs. */ + : "=r" (dst), "=r" (src), "=r" (n) + + /* Inputs. */ + : "0" (dst), "1" (src), "2" (n)); + } - /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' - and then the last block (<44 bytes) is copied here. - This will work since 'movem' will have updated src,dst,n. */ + while (n >= 16) + { + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; - while ( n >= 16 ) - { - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - n -= 16; - } + n -= 16; + } - /* A switch() is definitely the fastest although it takes a LOT of code. - * Particularly if you inline code this. - */ switch (n) - { + { case 0: break; + case 1: - *((char*)dst)++ = *((char*)src)++; + *dst = *src; break; + case 2: - *((short*)dst)++ = *((short*)src)++; + *(short *) dst = *(short *) src; break; + case 3: - *((short*)dst)++ = *((short*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 4: - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; break; + case 5: - *((long*)dst)++ = *((long*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 6: - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 7: - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 8: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; break; + case 9: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 10: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 11: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; + case 12: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; break; + case 13: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *dst = *src; break; + case 14: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; break; + case 15: - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((long*)dst)++ = *((long*)src)++; - *((short*)dst)++ = *((short*)src)++; - *((char*)dst)++ = *((char*)src)++; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(long *) dst = *(long *) src; dst += 4; src += 4; + *(short *) dst = *(short *) src; dst += 2; src += 2; + *dst = *src; break; - } + } - return return_dst; /* destination pointer. */ -} /* memcpy() */ + return return_dst; +} libc_hidden_def(memcpy) diff --git a/libc/string/cris/memmove.c b/libc/string/cris/memmove.c index fa495eba4..906ef8e74 100644 --- a/libc/string/cris/memmove.c +++ b/libc/string/cris/memmove.c @@ -18,16 +18,14 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" #include "../generic/pagecopy.h" -/* Experimentally off - libc_hidden_proto(memmove) */ void *memmove (void *dest, const void *src, size_t len) { unsigned long int dstp = (long int) dest; diff --git a/libc/string/cris/memset.c b/libc/string/cris/memset.c index b578aac5d..fab4e8b66 100644 --- a/libc/string/cris/memset.c +++ b/libc/string/cris/memset.c @@ -1,271 +1,262 @@ -/* Copyright (C) 2001, 2003 Free Software Foundation, Inc. - Copyright (C) 1999, 2000 Axis Communications AB. - - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/*#************************************************************************#*/ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# FUNCTION NAME: memset() */ -/*# */ -/*# PARAMETERS: void* dst; Destination address. */ -/*# int c; Value of byte to write. */ -/*# int len; Number of bytes to write. */ -/*# */ -/*# RETURNS: dst. */ -/*# */ -/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */ -/*# Framework taken from memcpy. This routine is */ -/*# very sensitive to compiler changes in register allocation. */ -/*# Should really be rewritten to avoid this problem. */ -/*# */ -/*#-------------------------------------------------------------------------*/ -/*# */ -/*# HISTORY */ -/*# */ -/*# DATE NAME CHANGES */ -/*# ---- ---- ------- */ -/*# 990713 HP Tired of watching this function (or */ -/*# really, the nonoptimized generic */ -/*# implementation) take up 90% of simulator */ -/*# output. Measurements needed. */ -/*# */ -/*#-------------------------------------------------------------------------*/ - -/* No, there's no macro saying 12*4, since it is "hard" to get it into - the asm in a good way. Thus better to expose the problem everywhere. - */ - -/* Assuming 1 cycle per dword written or read (ok, not really true), and - one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1) - so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */ - -#define ZERO_BLOCK_SIZE (1*12*4) - -void *memset(void *, int, unsigned long); - -/* Experimentally off - libc_hidden_proto(memset) */ -void *memset(void *pdst, - int c, - unsigned long plen) +/* A memset for CRIS. + Copyright (C) 1999-2008 Axis Communications. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Neither the name of Axis Communications nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS + COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. */ + +/* FIXME: This file should really only be used for reference, as the + result is somewhat depending on gcc generating what we expect rather + than what we describe. An assembly file should be used instead. */ + +#include <string.h> + +/* Note the multiple occurrence of the expression "12*4", including the + asm. It is hard to get it into the asm in a good way. Thus better to + expose the problem everywhere: no macro. */ + +/* Assuming one cycle per dword written or read (ok, not really true; the + world is not ideal), and one cycle per instruction, then 43+3*(n/48-1) + <= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full + 48-byte block to set. */ + +#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48) + +/* No name ambiguities in this file. */ +__asm__ (".syntax no_register_prefix"); + +void *memset(void *pdst, int c, unsigned int plen) { - /* Ok. Now we want the parameters put in special registers. - Make sure the compiler is able to make something useful of this. */ + /* Now we want the parameters in special registers. Make sure the + compiler does something usable with this. */ register char *return_dst __asm__ ("r10") = pdst; - register long n __asm__ ("r12") = plen; + register int n __asm__ ("r12") = plen; register int lc __asm__ ("r11") = c; - /* Most apps use memset sanely. Only those memsetting about 3..4 - bytes or less get penalized compared to the generic implementation - - and that's not really sane use. */ + /* Most apps use memset sanely. Memsetting about 3..4 bytes or less get + penalized here compared to the generic implementation. */ - /* Ugh. This is fragile at best. Check with newer GCC releases, if - they compile cascaded "x |= x << 8" sanely! */ - __asm__("movu.b %0,$r13 \n\ - lslq 8,$r13 \n\ - move.b %0,$r13 \n\ - move.d $r13,%0 \n\ - lslq 16,$r13 \n\ - or.d $r13,%0" - : "=r" (lc) : "0" (lc) : "r13"); + /* This is fragile performancewise at best. Check with newer GCC + releases, if they compile cascaded "x |= x << 8" to sane code. */ + __asm__("movu.b %0,r13 \n\ + lslq 8,r13 \n\ + move.b %0,r13 \n\ + move.d r13,%0 \n\ + lslq 16,r13 \n\ + or.d r13,%0" + : "=r" (lc) /* Inputs. */ + : "0" (lc) /* Outputs. */ + : "r13"); /* Trash. */ { register char *dst __asm__ ("r13") = pdst; - if (((unsigned long) pdst & 3) != 0 - /* Oops! n=0 must be a legal call, regardless of alignment. */ - && n >= 3) - { - if ((unsigned long)dst & 1) - { - *dst = (char) lc; - n--; - dst++; - } - - if ((unsigned long)dst & 2) - { - *(short *)dst = lc; - n -= 2; - dst += 2; - } - } + if (((unsigned long) pdst & 3) != 0 + /* Oops! n = 0 must be a valid call, regardless of alignment. */ + && n >= 3) + { + if ((unsigned long) dst & 1) + { + *dst = (char) lc; + n--; + dst++; + } - /* Now the fun part. For the threshold value of this, check the equation - above. */ - /* Decide which copying method to use. */ - if (n >= ZERO_BLOCK_SIZE) - { - /* For large copies we use 'movem' */ - - /* It is not optimal to tell the compiler about clobbering any - registers; that will move the saving/restoring of those registers - to the function prologue/epilogue, and make non-movem sizes - suboptimal. - - This method is not foolproof; it assumes that the "asm reg" - declarations at the beginning of the function really are used - here (beware: they may be moved to temporary registers). - This way, we do not have to save/move the registers around into - temporaries; we can safely use them straight away. */ - __asm__ __volatile__ (" \n\ - .syntax no_register_prefix \n\ - \n\ - ;; Check that the register asm declaration got right. \n\ - ;; The GCC manual explicitly says there's no warranty for that (too). \n\ - .ifnc %0-%1-%4,$r13-$r12-$r11 \n\ - .err \n\ - .endif \n\ - \n\ - ;; Save the registers we'll clobber in the movem process \n\ - ;; on the stack. Don't mention them to gcc, it will only be \n\ - ;; upset. \n\ - subq 11*4,sp \n\ - movem r10,[sp] \n\ - \n\ - move.d r11,r0 \n\ - move.d r11,r1 \n\ - move.d r11,r2 \n\ - move.d r11,r3 \n\ - move.d r11,r4 \n\ - move.d r11,r5 \n\ - move.d r11,r6 \n\ - move.d r11,r7 \n\ - move.d r11,r8 \n\ - move.d r11,r9 \n\ - move.d r11,r10 \n\ - \n\ - ;; Now we've got this: \n\ - ;; r13 - dst \n\ - ;; r12 - n \n\ - \n\ - ;; Update n for the first loop \n\ - subq 12*4,r12 \n\ -0: \n\ - subq 12*4,r12 \n\ - bge 0b \n\ - movem r11,[r13+] \n\ - \n\ - addq 12*4,r12 ;; compensate for last loop underflowing n \n\ - \n\ - ;; Restore registers from stack \n\ - movem [sp+],r10" - - /* Outputs */ : "=r" (dst), "=r" (n) - /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); + if ((unsigned long) dst & 2) + { + *(short *) dst = lc; + n -= 2; + dst += 2; + } + } - } + /* Decide which setting method to use. */ + if (n >= MEMSET_BY_BLOCK_THRESHOLD) + { + /* It is not optimal to tell the compiler about clobbering any + registers; that will move the saving/restoring of those registers + to the function prologue/epilogue, and make non-block sizes + suboptimal. */ + __asm__ __volatile__ + ("\ + ;; GCC does promise correct register allocations, but let's \n\ + ;; make sure it keeps its promises. \n\ + .ifnc %0-%1-%4,$r13-$r12-$r11 \n\ + .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\ + .endif \n\ + \n\ + ;; Save the registers we'll clobber in the movem process \n\ + ;; on the stack. Don't mention them to gcc, it will only be \n\ + ;; upset. \n\ + subq 11*4,sp \n\ + movem r10,[sp] \n\ + \n\ + move.d r11,r0 \n\ + move.d r11,r1 \n\ + move.d r11,r2 \n\ + move.d r11,r3 \n\ + move.d r11,r4 \n\ + move.d r11,r5 \n\ + move.d r11,r6 \n\ + move.d r11,r7 \n\ + move.d r11,r8 \n\ + move.d r11,r9 \n\ + move.d r11,r10 \n\ + \n\ + ;; Now we've got this: \n\ + ;; r13 - dst \n\ + ;; r12 - n \n\ + \n\ + ;; Update n for the first loop \n\ + subq 12*4,r12 \n\ +0: \n\ +" +#ifdef __arch_common_v10_v32 + /* Cater to branch offset difference between v32 and v10. We + assume the branch below has an 8-bit offset. */ +" setf\n" +#endif +" subq 12*4,r12 \n\ + bge 0b \n\ + movem r11,[r13+] \n\ + \n\ + ;; Compensate for last loop underflowing n. \n\ + addq 12*4,r12 \n\ + \n\ + ;; Restore registers from stack. \n\ + movem [sp+],r10" + + /* Outputs. */ + : "=r" (dst), "=r" (n) + + /* Inputs. */ + : "0" (dst), "1" (n), "r" (lc)); + } + + /* An ad-hoc unroll, used for 4*12-1..16 bytes. */ + while (n >= 16) + { + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + n -= 16; + } - /* Either we directly starts copying, using dword copying - in a loop, or we copy as much as possible with 'movem' - and then the last block (<44 bytes) is copied here. - This will work since 'movem' will have updated src,dst,n. */ - - while ( n >= 16 ) - { - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - n -= 16; - } - - /* A switch() is definitely the fastest although it takes a LOT of code. - * Particularly if you inline code this. - */ switch (n) - { + { case 0: break; + case 1: - *(char*)dst = (char) lc; + *dst = (char) lc; break; + case 2: - *(short*)dst = (short) lc; + *(short *) dst = (short) lc; break; + case 3: - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; + *(short *) dst = (short) lc; dst += 2; + *dst = (char) lc; break; + case 4: - *((long*)dst)++ = lc; + *(long *) dst = lc; break; + case 5: - *((long*)dst)++ = lc; - *(char*)dst = (char) lc; + *(long *) dst = lc; dst += 4; + *dst = (char) lc; break; + case 6: - *((long*)dst)++ = lc; - *(short*)dst = (short) lc; + *(long *) dst = lc; dst += 4; + *(short *) dst = (short) lc; break; + case 7: - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; + *(long *) dst = lc; dst += 4; + *(short *) dst = (short) lc; dst += 2; + *dst = (char) lc; break; + case 8: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; break; + case 9: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(char*)dst = (char) lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *dst = (char) lc; break; + case 10: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(short*)dst = (short) lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(short *) dst = (short) lc; break; + case 11: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(short *) dst = (short) lc; dst += 2; + *dst = (char) lc; break; + case 12: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; break; + case 13: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(char*)dst = (char) lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *dst = (char) lc; break; + case 14: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *(short*)dst = (short) lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(short *) dst = (short) lc; break; + case 15: - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((long*)dst)++ = lc; - *((short*)dst)++ = (short) lc; - *(char*)dst = (char) lc; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(long *) dst = lc; dst += 4; + *(short *) dst = (short) lc; dst += 2; + *dst = (char) lc; break; - } + } } - return return_dst; /* destination pointer. */ -} /* memset() */ + return return_dst; +} libc_hidden_def(memset) diff --git a/libc/string/cris/strcpy.c b/libc/string/cris/strcpy.c index 955a990b7..40e6389b9 100644 --- a/libc/string/cris/strcpy.c +++ b/libc/string/cris/strcpy.c @@ -6,7 +6,6 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strcpy) */ char *strcpy(char *dest, const char *src) { char *ret = dest; diff --git a/libc/string/cris/strncpy.c b/libc/string/cris/strncpy.c index 3f2775bdd..8d074071a 100644 --- a/libc/string/cris/strncpy.c +++ b/libc/string/cris/strncpy.c @@ -6,9 +6,7 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memset) */ -/* Experimentally off - libc_hidden_proto(strncpy) */ char *strncpy(char *dest, const char *src, size_t count) { char *ret = dest; diff --git a/libc/string/dirname.c b/libc/string/dirname.c index 6265e562e..c7f4dec1f 100644 --- a/libc/string/dirname.c +++ b/libc/string/dirname.c @@ -5,7 +5,8 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -#include "_string.h" +#define __need_NULL +#include <stddef.h> #include <libgen.h> char *dirname(char *path) diff --git a/libc/string/ffs.c b/libc/string/ffs.c index 241b7456f..f39d304b7 100644 --- a/libc/string/ffs.c +++ b/libc/string/ffs.c @@ -5,12 +5,9 @@ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ -/* ffsl,ffsll */ - -#include "_string.h" - -/* Experimentally off - libc_hidden_proto(ffs) */ - +#include <limits.h> +#include <string.h> + int ffs(int i) { #if 1 @@ -53,3 +50,6 @@ int ffs(int i) #endif } libc_hidden_def(ffs) +#if ULONG_MAX == UINT_MAX +strong_alias_untyped(ffs, ffsl) +#endif diff --git a/libc/string/ffsll.c b/libc/string/ffsll.c new file mode 100644 index 000000000..967fc5168 --- /dev/null +++ b/libc/string/ffsll.c @@ -0,0 +1,35 @@ +/* Copyright (C) 1991, 1992, 1997, 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Torbjorn Granlund (tege@sics.se). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <limits.h> +#include <string.h> + +/* Find the first bit set in I. */ +int ffsll (long long int i) +{ + unsigned long long int x = i & -i; + + if (x <= 0xffffffff) + return ffs (i); + else + return 32 + ffs (i >> 32); +} + +#if ULONG_MAX != UINT_MAX +strong_alias_untyped(ffsll, ffsl) +#endif diff --git a/libc/string/frv/memcpy.S b/libc/string/frv/memcpy.S index ae843797d..47773726a 100644 --- a/libc/string/frv/memcpy.S +++ b/libc/string/frv/memcpy.S @@ -14,8 +14,8 @@ * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. */ #include <features.h> diff --git a/libc/string/frv/memset.S b/libc/string/frv/memset.S index 477597dcd..17013672e 100644 --- a/libc/string/frv/memset.S +++ b/libc/string/frv/memset.S @@ -14,8 +14,8 @@ * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public - * License along with this library; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * License along with this library; if not, see + * <http://www.gnu.org/licenses/>. */ #include <features.h> @@ -155,4 +155,4 @@ memset: bralr .size memset, .-memset -/* Experimentally off - libc_hidden_proto(memset) */ +libc_hidden_def(memset) diff --git a/libc/string/generic/bp-checks.h b/libc/string/generic/bp-checks.h deleted file mode 100644 index 08c70aa5d..000000000 --- a/libc/string/generic/bp-checks.h +++ /dev/null @@ -1,129 +0,0 @@ -/* Bounded-pointer checking macros for C. - Copyright (C) 2000 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Greg McGary <greg@mcgary.org> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _bp_checks_h_ -#define _bp_checks_h_ 1 - -#if __BOUNDED_POINTERS__ - -# define BOUNDS_VIOLATED (__builtin_trap (), 0) - -/* Verify that pointer's value >= low. Return pointer value. */ -# define CHECK_BOUNDS_LOW(ARG) \ - (((__ptrvalue (ARG) < __ptrlow (ARG)) && BOUNDS_VIOLATED), \ - __ptrvalue (ARG)) - -/* Verify that pointer's value < high. Return pointer value. */ -# define CHECK_BOUNDS_HIGH(ARG) \ - (((__ptrvalue (ARG) > __ptrhigh (ARG)) && BOUNDS_VIOLATED), \ - __ptrvalue (ARG)) - -# define _CHECK_N(ARG, N, COND) \ - (((COND) \ - && (__ptrvalue (ARG) < __ptrlow (ARG) \ - || __ptrvalue (ARG) + (N) > __ptrhigh (ARG)) \ - && BOUNDS_VIOLATED), \ - __ptrvalue (ARG)) - -extern void *__unbounded __ubp_memchr (const void *__unbounded, int, unsigned); - -# define _CHECK_STRING(ARG, COND) \ - (((COND) \ - && (__ptrvalue (ARG) < __ptrlow (ARG) \ - || !__ubp_memchr (__ptrvalue (ARG), '\0', \ - (__ptrhigh (ARG) - __ptrvalue (ARG)))) \ - && BOUNDS_VIOLATED), \ - __ptrvalue (ARG)) - -/* Check bounds of a pointer seated to an array of N objects. */ -# define CHECK_N(ARG, N) _CHECK_N ((ARG), (N), 1) -/* Same as CHECK_N, but tolerate ARG == NULL. */ -# define CHECK_N_NULL_OK(ARG, N) _CHECK_N ((ARG), (N), __ptrvalue (ARG)) - -/* Check bounds of a pointer seated to a single object. */ -# define CHECK_1(ARG) CHECK_N ((ARG), 1) -/* Same as CHECK_1, but tolerate ARG == NULL. */ -# define CHECK_1_NULL_OK(ARG) CHECK_N_NULL_OK ((ARG), 1) - -/* Check for NUL-terminator within string's bounds. */ -# define CHECK_STRING(ARG) _CHECK_STRING ((ARG), 1) -/* Same as CHECK_STRING, but tolerate ARG == NULL. */ -# define CHECK_STRING_NULL_OK(ARG) _CHECK_STRING ((ARG), __ptrvalue (ARG)) - -/* Check bounds of signal syscall args with type sigset_t. */ -# define CHECK_SIGSET(SET) CHECK_N ((SET), _NSIG / (8 * sizeof *(SET))) -/* Same as CHECK_SIGSET, but tolerate SET == NULL. */ -# define CHECK_SIGSET_NULL_OK(SET) CHECK_N_NULL_OK ((SET), _NSIG / (8 * sizeof *(SET))) - -# if defined (_IOC_SIZESHIFT) && defined (_IOC_SIZEBITS) -/* Extract the size of the ioctl data and check its bounds. */ -# define CHECK_IOCTL(ARG, CMD) \ - CHECK_N ((const char *) (ARG), \ - (((CMD) >> _IOC_SIZESHIFT) & ((1 << _IOC_SIZEBITS) - 1))) -# else -/* We don't know the size of the ioctl data, so the best we can do - is check that the first byte is within bounds. */ -# define CHECK_IOCTL(ARG, CMD) CHECK_1 ((const char *) ARG) -# endif - -/* Check bounds of `struct flock *' for the locking fcntl commands. */ -# define CHECK_FCNTL(ARG, CMD) \ - (((CMD) == F_GETLK || (CMD) == F_SETLK || (CMD) == F_SETLKW) \ - ? CHECK_1 ((struct flock *) ARG) : (unsigned long) (ARG)) - -/* Check bounds of an array of mincore residency-status flags that - cover a region of NBYTES. Such a vector occupies one byte per page - of memory. */ -# define CHECK_N_PAGES(ARG, NBYTES) \ - ({ int _page_size_ = __sysconf (_SC_PAGE_SIZE); \ - CHECK_N ((const char *) (ARG), \ - ((NBYTES) + _page_size_ - 1) / _page_size_); }) - -/* Return a bounded pointer with value PTR that satisfies CHECK_N (PTR, N). */ -# define BOUNDED_N(PTR, N) \ - ({ __typeof (PTR) __bounded _p_; \ - __ptrvalue _p_ = __ptrlow _p_ = __ptrvalue (PTR); \ - __ptrhigh _p_ = __ptrvalue _p_ + (N); \ - _p_; }) - -#else /* !__BOUNDED_POINTERS__ */ - -/* Do nothing if not compiling with -fbounded-pointers. */ - -# define BOUNDS_VIOLATED -# define CHECK_BOUNDS_LOW(ARG) (ARG) -# define CHECK_BOUNDS_HIGH(ARG) (ARG) -# define CHECK_1(ARG) (ARG) -# define CHECK_1_NULL_OK(ARG) (ARG) -# define CHECK_N(ARG, N) (ARG) -# define CHECK_N_NULL_OK(ARG, N) (ARG) -# define CHECK_STRING(ARG) (ARG) -# define CHECK_SIGSET(SET) (SET) -# define CHECK_SIGSET_NULL_OK(SET) (SET) -# define CHECK_IOCTL(ARG, CMD) (ARG) -# define CHECK_FCNTL(ARG, CMD) (ARG) -# define CHECK_N_PAGES(ARG, NBYTES) (ARG) -# define BOUNDED_N(PTR, N) (PTR) - -#endif /* !__BOUNDED_POINTERS__ */ - -#define BOUNDED_1(PTR) BOUNDED_N (PTR, 1) - -#endif /* _bp_checks_h_ */ diff --git a/libc/string/generic/memchr.c b/libc/string/generic/memchr.c index 3c7c997bc..967ae51ea 100644 --- a/libc/string/generic/memchr.c +++ b/libc/string/generic/memchr.c @@ -17,22 +17,19 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> #include <limits.h> -/* Experimentally off - libc_hidden_proto(memchr) */ -libc_hidden_proto(abort) - #include "memcopy.h" #define LONG_MAX_32_BITS 2147483647 /* Search no more than N bytes of S for C. */ +#undef memchr void *memchr (const void * s, int c_in, size_t n) { const unsigned char *char_ptr; diff --git a/libc/string/generic/memcmp.c b/libc/string/generic/memcmp.c index fc63a2eae..170c50997 100644 --- a/libc/string/generic/memcmp.c +++ b/libc/string/generic/memcmp.c @@ -14,22 +14,16 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(memcmp) */ #include <endian.h> #if __BYTE_ORDER == __BIG_ENDIAN -# define WORDS_BIGENDIAN -#endif - -#ifdef WORDS_BIGENDIAN # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) #else # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) @@ -48,17 +42,12 @@ 3. Compare the few remaining bytes. */ -#ifndef WORDS_BIGENDIAN +#if __BYTE_ORDER != __BIG_ENDIAN /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. A and B are known to be different. This is needed only on little-endian machines. */ -static int memcmp_bytes __P((op_t, op_t)); - -# ifdef __GNUC__ -__inline -# endif -static int +static __inline__ int memcmp_bytes (op_t a, op_t b) { long int srcp1 = (long int) &a; @@ -77,8 +66,6 @@ memcmp_bytes (op_t a, op_t b) } #endif -static int memcmp_common_alignment __P((long, long, size_t)); - /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for memory operations on `op_t's. */ @@ -161,8 +148,6 @@ memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) return 0; } -static int memcmp_not_common_alignment __P((long, long, size_t)); - /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory operations on `op_t', but SRCP1 *should be unaligned*. */ diff --git a/libc/string/generic/memcopy.h b/libc/string/generic/memcopy.h index fab4da764..031557ac8 100644 --- a/libc/string/generic/memcopy.h +++ b/libc/string/generic/memcopy.h @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* The strategy of the memory functions is: @@ -107,7 +106,6 @@ typedef unsigned char byte; } \ } while (0) -#ifdef __ARCH_HAS_BWD_MEMCPY__ /* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with the assumption that DST_BP is aligned on an OPSIZ multiple. If not all bytes could be easily copied, store remaining number of bytes @@ -126,8 +124,6 @@ typedef unsigned char byte; (nbytes_left) = (nbytes) % OPSIZ; \ } while (0) -#endif - /* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR, beginning at the words (of type op_t) right before the pointers and continuing towards smaller addresses. May take advantage of that diff --git a/libc/string/generic/memcpy.c b/libc/string/generic/memcpy.c index 4284f2fe5..ca2e7e0f9 100644 --- a/libc/string/generic/memcpy.c +++ b/libc/string/generic/memcpy.c @@ -15,15 +15,14 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" #include "pagecopy.h" +#include "_memcpy_fwd.c" -/* Experimentally off - libc_hidden_proto(memcpy) */ void *memcpy (void *dstpp, const void *srcpp, size_t len) { diff --git a/libc/string/generic/memmem.c b/libc/string/generic/memmem.c index c75bb2426..753e43ae5 100644 --- a/libc/string/generic/memmem.c +++ b/libc/string/generic/memmem.c @@ -12,16 +12,13 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stddef.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(memmem) */ -/* Experimentally off - libc_hidden_proto(memcmp) */ /* Return the first occurrence of NEEDLE in HAYSTACK. */ void *memmem (const void *haystack, size_t haystack_len, @@ -50,5 +47,4 @@ void *memmem (const void *haystack, size_t haystack_len, return NULL; } -libc_hidden_def(memmem) #endif diff --git a/libc/string/generic/memmove.c b/libc/string/generic/memmove.c index 7f945b150..bf78c4778 100644 --- a/libc/string/generic/memmove.c +++ b/libc/string/generic/memmove.c @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> @@ -29,8 +28,6 @@ #include "_memcpy_fwd.c" #endif -/* Experimentally off - libc_hidden_proto(memmove) */ -/* Experimentally off - libc_hidden_proto(memcpy) */ static void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len) { diff --git a/libc/string/generic/mempcpy.c b/libc/string/generic/mempcpy.c index 8d7356486..bb5563a6a 100644 --- a/libc/string/generic/mempcpy.c +++ b/libc/string/generic/mempcpy.c @@ -8,13 +8,13 @@ #include <string.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(mempcpy) */ -/* Experimentally off - libc_hidden_proto(memcpy) */ +# undef mempcpy void *mempcpy (void *dstpp, const void *srcpp, size_t len) { memcpy(dstpp, srcpp, len); return (void *)(((char *)dstpp) + len); } libc_hidden_weak(mempcpy) +strong_alias(mempcpy,__mempcpy) #endif diff --git a/libc/string/generic/memrchr.c b/libc/string/generic/memrchr.c index 9ab805cf7..b74cf5152 100644 --- a/libc/string/generic/memrchr.c +++ b/libc/string/generic/memrchr.c @@ -18,17 +18,14 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> #include <limits.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(memrchr) */ -libc_hidden_proto(abort) #include "memcopy.h" diff --git a/libc/string/generic/memset.c b/libc/string/generic/memset.c index 62cc36fe3..5644e2522 100644 --- a/libc/string/generic/memset.c +++ b/libc/string/generic/memset.c @@ -12,14 +12,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(memset) */ void *memset (void *dstpp, int c, size_t len) { long int dstp = (long int) dstpp; diff --git a/libc/string/generic/pagecopy.h b/libc/string/generic/pagecopy.h index 5a0ada1fa..16aaacab6 100644 --- a/libc/string/generic/pagecopy.h +++ b/libc/string/generic/pagecopy.h @@ -13,9 +13,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* This file defines the macro: @@ -40,7 +39,7 @@ */ -#if PAGE_COPY_THRESHOLD +#if defined PAGE_COPY_THRESHOLD && PAGE_COPY_THRESHOLD #include <assert.h> @@ -48,7 +47,7 @@ do \ { \ if ((nbytes) >= PAGE_COPY_THRESHOLD && \ - PAGE_OFFSET ((dstp) - (srcp)) == 0) \ + PAGE_OFFSET ((dstp) - (srcp)) == 0) \ { \ /* The amount to copy is past the threshold for copying \ pages virtually with kernel VM operations, and the \ diff --git a/libc/string/generic/rawmemchr.c b/libc/string/generic/rawmemchr.c index f8b97a61d..816589649 100644 --- a/libc/string/generic/rawmemchr.c +++ b/libc/string/generic/rawmemchr.c @@ -17,17 +17,14 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> #include <limits.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(rawmemchr) */ -libc_hidden_proto(abort) #include "memcopy.h" diff --git a/libc/string/generic/strcat.c b/libc/string/generic/strcat.c index e00494038..68fc2a289 100644 --- a/libc/string/generic/strcat.c +++ b/libc/string/generic/strcat.c @@ -12,14 +12,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(strcat) */ /* Append SRC on the end of DEST. */ char *strcat (char *dest, const char *src) { diff --git a/libc/string/generic/strchr.c b/libc/string/generic/strchr.c index 66aed1e25..321d2b8c3 100644 --- a/libc/string/generic/strchr.c +++ b/libc/string/generic/strchr.c @@ -17,15 +17,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> -/* Experimentally off - libc_hidden_proto(strchr) */ -libc_hidden_proto(abort) #include "memcopy.h" diff --git a/libc/string/generic/strchrnul.c b/libc/string/generic/strchrnul.c index 72cab2891..d11d9e00d 100644 --- a/libc/string/generic/strchrnul.c +++ b/libc/string/generic/strchrnul.c @@ -17,16 +17,13 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(strchrnul) */ -libc_hidden_proto(abort) #include "memcopy.h" diff --git a/libc/string/generic/strcmp.c b/libc/string/generic/strcmp.c index 50acd3548..24ad14382 100644 --- a/libc/string/generic/strcmp.c +++ b/libc/string/generic/strcmp.c @@ -12,15 +12,13 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(strcmp) */ /* Compare S1 and S2, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. */ @@ -44,7 +42,6 @@ int strcmp (const char *p1, const char *p2) libc_hidden_weak(strcmp) #ifndef __UCLIBC_HAS_LOCALE__ -/* Experimentally off - libc_hidden_proto(strcoll) */ strong_alias(strcmp,strcoll) libc_hidden_def(strcoll) #endif diff --git a/libc/string/generic/strcpy.c b/libc/string/generic/strcpy.c index 99e077139..924615fca 100644 --- a/libc/string/generic/strcpy.c +++ b/libc/string/generic/strcpy.c @@ -12,36 +12,21 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> -#include <stddef.h> -#include "memcopy.h" -#include "bp-checks.h" - -/* Experimentally off - libc_hidden_proto(strcpy) */ /* Copy SRC to DEST. */ -char *strcpy (char *dest, const char *src) +char *strcpy(char *dest, const char *src) { - reg_char c; - char *__unbounded s = (char *__unbounded) CHECK_BOUNDS_LOW (src); - const ptrdiff_t off = CHECK_BOUNDS_LOW (dest) - s - 1; - size_t n; - - do - { - c = *s++; - s[off] = c; - } - while (c != '\0'); + char *dst = dest; - n = s - src; - (void) CHECK_BOUNDS_HIGH (src + n); - (void) CHECK_BOUNDS_HIGH (dest + n); + while ((*dst = *src) != '\0') { + src++; + dst++; + } - return dest; + return dest; } libc_hidden_def(strcpy) diff --git a/libc/string/generic/strcspn.c b/libc/string/generic/strcspn.c index b65b3b995..ca9506bdd 100644 --- a/libc/string/generic/strcspn.c +++ b/libc/string/generic/strcspn.c @@ -12,14 +12,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> -/* Experimentally off - libc_hidden_proto(strcspn) */ -/* Experimentally off - libc_hidden_proto(strchr) */ /* Return the length of the maximum initial segment of S which contains no characters from REJECT. */ diff --git a/libc/string/generic/strlen.c b/libc/string/generic/strlen.c index 764dae18d..dc383398b 100644 --- a/libc/string/generic/strlen.c +++ b/libc/string/generic/strlen.c @@ -15,15 +15,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> -/* Experimentally off - libc_hidden_proto(strlen) */ -libc_hidden_proto(abort) /* Return the length of the null-terminated string STR. Scan for the null terminator quickly by testing four bytes at a time. */ diff --git a/libc/string/generic/strncat.c b/libc/string/generic/strncat.c index 8e3423e49..f0cf8f995 100644 --- a/libc/string/generic/strncat.c +++ b/libc/string/generic/strncat.c @@ -12,15 +12,13 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(strncat) */ char *strncat (char *s1, const char *s2, size_t n) { reg_char c; diff --git a/libc/string/generic/strncmp.c b/libc/string/generic/strncmp.c index c49f36d8b..ca980415e 100644 --- a/libc/string/generic/strncmp.c +++ b/libc/string/generic/strncmp.c @@ -12,14 +12,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(strncmp) */ /* Compare no more than N characters of S1 and S2, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or diff --git a/libc/string/generic/strncpy.c b/libc/string/generic/strncpy.c index d2d693f2b..0256bcc6b 100644 --- a/libc/string/generic/strncpy.c +++ b/libc/string/generic/strncpy.c @@ -12,14 +12,12 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #include "memcopy.h" -/* Experimentally off - libc_hidden_proto(strncpy) */ char *strncpy (char *s1, const char *s2, size_t n) { reg_char c; diff --git a/libc/string/generic/strnlen.c b/libc/string/generic/strnlen.c index d9ba76129..4d4cde84f 100644 --- a/libc/string/generic/strnlen.c +++ b/libc/string/generic/strnlen.c @@ -17,16 +17,13 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + License along with the GNU C Library; see the file COPYING.LIB. If + not, see <http://www.gnu.org/licenses/>. */ #include <string.h> #include <stdlib.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(strnlen) */ -libc_hidden_proto(abort) /* Find the length of S, but scan at most MAXLEN characters. If no '\0' terminator is found in that many characters, return MAXLEN. */ @@ -34,7 +31,7 @@ size_t strnlen (const char *str, size_t maxlen) { const char *char_ptr, *end_ptr = str + maxlen; const unsigned long int *longword_ptr; - unsigned long int longword, magic_bits, himagic, lomagic; + unsigned long int longword, himagic, lomagic; if (maxlen == 0) return 0; @@ -68,14 +65,12 @@ size_t strnlen (const char *str, size_t maxlen) The 1-bits make sure that carries propagate to the next 0-bit. The 0-bits provide holes for carries to fall into. */ - magic_bits = 0x7efefeffL; himagic = 0x80808080L; lomagic = 0x01010101L; if (sizeof (longword) > 4) { /* 64-bit version of the magic. */ /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL; himagic = ((himagic << 16) << 16) | himagic; lomagic = ((lomagic << 16) << 16) | lomagic; } diff --git a/libc/string/generic/strrchr.c b/libc/string/generic/strrchr.c index c85707241..8ca404843 100644 --- a/libc/string/generic/strrchr.c +++ b/libc/string/generic/strrchr.c @@ -12,14 +12,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> -/* Experimentally off - libc_hidden_proto(strrchr) */ -/* Experimentally off - libc_hidden_proto(strchr) */ /* Find the last occurrence of C in S. */ char *strrchr (const char *s, int c) diff --git a/libc/string/generic/strsep.c b/libc/string/generic/strsep.c index e02e57068..bbdaf8849 100644 --- a/libc/string/generic/strsep.c +++ b/libc/string/generic/strsep.c @@ -12,18 +12,14 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> #ifdef __USE_BSD -/* Experimentally off - libc_hidden_proto(strchr) */ -/* Experimentally off - libc_hidden_proto(strpbrk) */ -/* Experimentally off - libc_hidden_proto(strsep) */ char *strsep (char **stringp, const char *delim) { char *begin, *end; diff --git a/libc/string/generic/strspn.c b/libc/string/generic/strspn.c index 010567744..86bcdcb70 100644 --- a/libc/string/generic/strspn.c +++ b/libc/string/generic/strspn.c @@ -12,13 +12,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> -/* Experimentally off - libc_hidden_proto(strspn) */ /* Return the length of the maximum initial segment of S which contains only characters in ACCEPT. */ size_t strspn (const char *s, const char *accept) diff --git a/libc/string/generic/strstr.c b/libc/string/generic/strstr.c index c12dceb33..dd101768b 100644 --- a/libc/string/generic/strstr.c +++ b/libc/string/generic/strstr.c @@ -13,9 +13,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* * My personal strstr() implementation that beats most other algorithms. @@ -28,7 +27,6 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strstr) */ typedef unsigned chartype; diff --git a/libc/string/generic/strtok_r.c b/libc/string/generic/strtok_r.c index d082d226e..253964395 100644 --- a/libc/string/generic/strtok_r.c +++ b/libc/string/generic/strtok_r.c @@ -13,33 +13,27 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <string.h> -/* Experimentally off - libc_hidden_proto(strtok_r) */ -/* Experimentally off - libc_hidden_proto(strspn) */ -/* Experimentally off - libc_hidden_proto(strpbrk) */ #ifdef __USE_GNU # define __rawmemchr rawmemchr -/* Experimentally off - libc_hidden_proto(rawmemchr) */ #else # define __rawmemchr strchr -/* Experimentally off - libc_hidden_proto(strchr) */ #endif - -/* Parse S into tokens separated by characters in DELIM. +#if 0 + Parse S into tokens separated by characters in DELIM. If S is NULL, the saved pointer in SAVE_PTR is used as the next starting point. For example: char s[] = "-abc-=-def"; char *sp; - x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def" - x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL - x = strtok_r(NULL, "=", &sp); // x = NULL - // s = "abc\0-def\0" -*/ + x = strtok_r(s, "-", &sp); /* x = "abc", sp = "=-def" */ + x = strtok_r(NULL, "-=", &sp); /* x = "def", sp = NULL */ + x = strtok_r(NULL, "=", &sp); /* x = NULL */ + /* s = "abc\0-def\0" */ +#endif char *strtok_r (char *s, const char *delim, char **save_ptr) { char *token; diff --git a/libc/string/i386/memchr.c b/libc/string/i386/memchr.c index fe4537914..1960f6ba4 100644 --- a/libc/string/i386/memchr.c +++ b/libc/string/i386/memchr.c @@ -32,20 +32,44 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memchr) */ -void *memchr(const void *cs, int c, size_t count) +#undef memchr +/*#define memchr TESTING*/ +void *memchr(const void *s, int c, size_t count) { - int d0; - register void * __res; - if (!count) - return NULL; - __asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "je 1f\n\t" - "movl $1,%0\n" - "1:\tdecl %0" - :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count)); - return __res; + void *edi; + int ecx; + __asm__ __volatile__( + " jecxz 1f\n" + " repne; scasb\n" + " leal -1(%%edi), %%edi\n" + " je 2f\n" + "1:\n" + " xorl %%edi, %%edi\n" /* NULL */ + "2:\n" + : "=&D" (edi), "=&c" (ecx) + : "a" (c), "0" (s), "1" (count) + /* : no clobbers */ + ); + return edi; } +#ifndef memchr libc_hidden_def(memchr) +#else +/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os memchr.c -o memchr + * and run ./memchr + */ +int main() +{ + static const char str[] = "abc.def"; + printf((char*)memchr(str, '.',-2) - str == 3 ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.',-1) - str == 3 ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.', 0) == NULL ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.', 1) == NULL ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.', 2) == NULL ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.', 3) == NULL ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.', 4) - str == 3 ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str, '.', 5) - str == 3 ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str+3, '.', 0) == NULL ? "ok\n" : "BAD!\n"); + printf((char*)memchr(str+3, '.', 5) - str == 3 ? "ok\n" : "BAD!\n"); +} +#endif diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c index 285583f3b..697d0bdc2 100644 --- a/libc/string/i386/memcpy.c +++ b/libc/string/i386/memcpy.c @@ -32,22 +32,23 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memcpy) */ +#undef memcpy void *memcpy(void * to, const void * from, size_t n) { - int d0, d1, d2; - __asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); - return (to); + int d0, d1, d2; + __asm__ __volatile__( + " rep; movsl\n" + " movl %4, %%ecx\n" + " andl $3, %%ecx\n" + /* jz is optional. avoids "rep; movsb" with ecx == 0, + * but adds a branch, which is currently (2008) faster */ + " jz 1f\n" + " rep; movsb\n" + "1:\n" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory" + ); + return to; } libc_hidden_def(memcpy) diff --git a/libc/string/i386/memmove.c b/libc/string/i386/memmove.c index a924efcbc..0ec8016a5 100644 --- a/libc/string/i386/memmove.c +++ b/libc/string/i386/memmove.c @@ -32,28 +32,40 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memmove) */ +#undef memmove +/*#define memmove TESTING*/ void *memmove(void *dest, const void *src, size_t n) { - int d0, d1, d2; - if (dest<src) + int eax, ecx, esi, edi; __asm__ __volatile__( - "rep\n\t" - "movsb" - : "=&c" (d0), "=&S" (d1), "=&D" (d2) - :"0" (n),"1" (src),"2" (dest) - : "memory"); - else - __asm__ __volatile__( - "std\n\t" - "rep\n\t" - "movsb\n\t" - "cld" - : "=&c" (d0), "=&S" (d1), "=&D" (d2) - :"0" (n), - "1" (n-1+(const char *)src), - "2" (n-1+(char *)dest) - :"memory"); - return dest; + " movl %%eax, %%edi\n" + " cmpl %%esi, %%eax\n" + " je 2f\n" /* (optional) src == dest -> NOP */ + " jb 1f\n" /* src > dest -> simple copy */ + " leal -1(%%esi,%%ecx), %%esi\n" + " leal -1(%%eax,%%ecx), %%edi\n" + " std\n" + "1: rep; movsb\n" + " cld\n" + "2:\n" + : "=&c" (ecx), "=&S" (esi), "=&a" (eax), "=&D" (edi) + : "0" (n), "1" (src), "2" (dest) + : "memory" + ); + return (void*)eax; } +#ifndef memmove libc_hidden_def(memmove) +#else +/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os memmove.c -o memmove + * and run ./memmove + */ +int main() +{ + static char str[] = "abcdef.123"; + memmove(str + 1, str, 5); + printf(strcmp(str, "aabcde.123") == 0 ? "ok\n" : "BAD!\n"); + memmove(str, str + 1, 5); + printf(strcmp(str, "abcdee.123") == 0 ? "ok\n" : "BAD!\n"); +} +#endif diff --git a/libc/string/i386/memset.c b/libc/string/i386/memset.c index bbaa45215..9f51f3c60 100644 --- a/libc/string/i386/memset.c +++ b/libc/string/i386/memset.c @@ -28,20 +28,68 @@ * More importantly, these should provide a good example for * others to follow when adding arch specific optimizations. * -Erik + * + * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com> + * Fill byte-by-byte is a bit too slow. I prefer 46 byte function + * which fills x4 faster than 21 bytes one. */ #include <string.h> -/* Experimentally off - libc_hidden_proto(memset) */ +#undef memset void *memset(void *s, int c, size_t count) { - int d0, d1; - __asm__ __volatile__( - "rep\n\t" - "stosb" - : "=&c" (d0), "=&D" (d1) - :"a" (c),"1" (s),"0" (count) - :"memory"); - return s; + int reg, edi; + __asm__ __volatile__( + + /* Most of the time, count is divisible by 4 and nonzero */ + /* It's better to make this case faster */ + /* " jecxz 9f\n" - (optional) count == 0: goto ret */ + " mov %%ecx, %1\n" + " shr $2, %%ecx\n" + " jz 1f\n" /* zero words: goto fill_bytes */ + /* extend 8-bit fill to 32 bits */ + " movzx %%al, %%eax\n" /* 3 bytes */ + /* or: " and $0xff, %%eax\n" - 5 bytes */ + " imul $0x01010101, %%eax\n" /* 6 bytes */ + /* fill full words */ + " rep; stosl\n" + /* fill 0-3 bytes */ + "1: and $3, %1\n" + " jz 9f\n" /* (count & 3) == 0: goto end */ + "2: stosb\n" + " dec %1\n" + " jnz 2b\n" + /* end */ + "9:\n" + + : "=&D" (edi), "=&r" (reg) + : "0" (s), "a" (c), "c" (count) + : "memory" + ); + return s; } libc_hidden_def(memset) + +/* +gcc 4.3.1 +========= +57 push %edi +8b 7c 24 08 mov 0x8(%esp),%edi +8b 4c 24 10 mov 0x10(%esp),%ecx +8b 44 24 0c mov 0xc(%esp),%eax +89 ca mov %ecx,%edx +c1 e9 02 shr $0x2,%ecx +74 0b je 1f <__GI_memset+0x1f> +0f b6 c0 movzbl %al,%eax +69 c0 01 01 01 01 imul $0x1010101,%eax,%eax +f3 ab rep stos %eax,%es:(%edi) +83 e2 03 and $0x3,%edx +74 04 je 28 <__GI_memset+0x28> +aa stos %al,%es:(%edi) +4a dec %edx +75 fc jne 24 <__GI_memset+0x24> +8b 44 24 08 mov 0x8(%esp),%eax +5f pop %edi +c3 ret +*/ diff --git a/libc/string/i386/rawmemchr.c b/libc/string/i386/rawmemchr.c new file mode 100644 index 000000000..be0b142c3 --- /dev/null +++ b/libc/string/i386/rawmemchr.c @@ -0,0 +1,24 @@ +/* + * Adapted from strlen.c code + * + * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <string.h> + +#undef rawmemchr +void *rawmemchr(const void *s, int c) +{ + void *eax; + int ecx, edi; + __asm__ __volatile__( + " repne; scasb\n" + " leal -1(%%edi), %%eax\n" + : "=&c" (ecx), "=&D" (edi), "=&a" (eax) + : "0" (0xffffffff), "1" (s), "2" (c) + ); + return eax; +} +libc_hidden_def(rawmemchr) diff --git a/libc/string/i386/strcat.c b/libc/string/i386/strcat.c index 2cf0237a6..e71aad4f7 100644 --- a/libc/string/i386/strcat.c +++ b/libc/string/i386/strcat.c @@ -32,7 +32,6 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strcat) */ char *strcat(char * dest, const char * src) { int d0, d1, d2, d3; diff --git a/libc/string/i386/strchr.c b/libc/string/i386/strchr.c index 46b1dfb6e..93cc9583e 100644 --- a/libc/string/i386/strchr.c +++ b/libc/string/i386/strchr.c @@ -32,23 +32,25 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strchr) */ +#undef strchr char *strchr(const char *s, int c) { - int d0; - register char * __res; - __asm__ __volatile__( - "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "je 2f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "movl $1,%1\n" - "2:\tmovl %1,%0\n\t" - "decl %0" - :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); - return __res; + int esi; + register char * eax; + __asm__ __volatile__( + " movb %%al, %%ah\n" + "1: lodsb\n" + " cmpb %%ah, %%al\n" + " je 2f\n" + " testb %%al, %%al\n" + " jnz 1b\n" + " movl $1, %%esi\n" /* can use shorter xor + inc */ + "2: leal -1(%%esi), %%eax\n" + : "=a" (eax), "=&S" (esi) + : "0" (c), "1" (s) + /* no clobbers */ + ); + return eax; } libc_hidden_def(strchr) #ifdef __UCLIBC_SUSV3_LEGACY__ diff --git a/libc/string/i386/strchrnul.c b/libc/string/i386/strchrnul.c new file mode 100644 index 000000000..d48427214 --- /dev/null +++ b/libc/string/i386/strchrnul.c @@ -0,0 +1,47 @@ +/* + * Adapted from strchr.c code + * + * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <string.h> + +#undef strchrnul +/*#define strchrnul TESTING*/ +char *strchrnul(const char *s, int c) +{ + int esi; + char *eax; + __asm__ __volatile__( + " movb %%al, %%ah\n" + "1: lodsb\n" + " cmpb %%ah, %%al\n" + " je 2f\n" + " testb %%al, %%al\n" + " jnz 1b\n" + /* with this, we'd get strchr(): */ + /* " movl $1, %%esi\n" */ + "2: leal -1(%%esi), %%eax\n" + : "=a" (eax), "=&S" (esi) + : "0" (c), "1" (s) + /* no clobbers */ + ); + return eax; +} +#ifndef strchrnul +libc_hidden_def(strchrnul) +#else +/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os strchrnul.c -o strchrnul + * and run ./strchrnul + */ +int main() +{ + static const char str[] = "abc.def"; + printf((char*)strchrnul(str, '.') - str == 3 ? "ok\n" : "BAD!\n"); + printf((char*)strchrnul(str, '*') - str == 7 ? "ok\n" : "BAD!\n"); + printf((char*)strchrnul(str, 0) - str == 7 ? "ok\n" : "BAD!\n"); + printf((char*)strchrnul(str+3, '.') - str == 3 ? "ok\n" : "BAD!\n"); +} +#endif diff --git a/libc/string/i386/strcmp.c b/libc/string/i386/strcmp.c index eff230c5c..9621f66f8 100644 --- a/libc/string/i386/strcmp.c +++ b/libc/string/i386/strcmp.c @@ -32,7 +32,6 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strcmp) */ int strcmp(const char *cs, const char *ct) { int d0, d1; @@ -55,7 +54,6 @@ int strcmp(const char *cs, const char *ct) libc_hidden_def(strcmp) #ifndef __UCLIBC_HAS_LOCALE__ -/* Experimentally off - libc_hidden_proto(strcoll) */ strong_alias(strcmp,strcoll) libc_hidden_def(strcoll) #endif diff --git a/libc/string/i386/strcpy.c b/libc/string/i386/strcpy.c index 09065a9b7..fff1bd006 100644 --- a/libc/string/i386/strcpy.c +++ b/libc/string/i386/strcpy.c @@ -32,7 +32,7 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strcpy) */ +#undef strcpy char *strcpy(char * dest, const char * src) { int d0, d1, d2; diff --git a/libc/string/i386/string.h b/libc/string/i386/string.h new file mode 100644 index 000000000..cf4333dec --- /dev/null +++ b/libc/string/i386/string.h @@ -0,0 +1,338 @@ +/* + * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball + */ + +#if !defined _STRING_H +#error "Never use <libc-string_i386.h> directly; include <string.h> instead" +#endif + +#ifndef _LIBC_STRING_i386_H +#define _LIBC_STRING_i386_H 1 + +static __always_inline +void *inlined_memset_const_c_count4(void *s, unsigned eax, unsigned count) +{ + int ecx, edi; + + if (count == 0) + return s; + + /* Very small (2 stores or less) are best done with direct + * mov <const>,<mem> instructions (they do not clobber registers) */ + if (count == 1) { + *(char *)(s + 0) = eax; + return s; + } + + /* You wonder why & 0xff is needed? Try memset(p, '\xff', size). + * If char is signed, '\xff' == -1! */ + eax = (eax & 0xff) * 0x01010101; /* done at compile time */ + + if (count == 2) { + *(short *)(s + 0) = eax; + return s; + } + if (count == 3) { + *(short *)(s + 0) = eax; + *(char *) (s + 2) = eax; + return s; + } + if (count == 1*4 + 0) { + *(int *)(s + 0) = eax; + return s; + } + if (count == 1*4 + 1) { + *(int *) (s + 0) = eax; + *(char *)(s + 4) = eax; + return s; + } + if (count == 1*4 + 2) { + *(int *) (s + 0) = eax; + *(short *)(s + 4) = eax; + return s; + } + + /* Small string stores: don't clobber ecx + * (clobbers only eax and edi) */ +#define small_store(arg) { \ + __asm__ __volatile__( \ + arg \ + : "=&D" (edi) \ + : "a" (eax), "0" (s) \ + : "memory" \ + ); \ + return s; \ +} + if (count == 1*4 + 3) small_store("stosl; stosw; stosb"); + if (count == 2*4 + 0) { + ((int *)s)[0] = eax; + ((int *)s)[1] = eax; + return s; + } + if (count == 2*4 + 1) small_store("stosl; stosl; stosb"); + if (count == 2*4 + 2) small_store("stosl; stosl; stosw"); + if (count == 2*4 + 3) small_store("stosl; stosl; stosw; stosb"); + if (count == 3*4 + 0) small_store("stosl; stosl; stosl"); + if (count == 3*4 + 1) small_store("stosl; stosl; stosl; stosb"); + if (count == 3*4 + 2) small_store("stosl; stosl; stosl; stosw"); + if (count == 3*4 + 3) small_store("stosl; stosl; stosl; stosw; stosb"); + if (count == 4*4 + 0) small_store("stosl; stosl; stosl; stosl"); + if (count == 4*4 + 1) small_store("stosl; stosl; stosl; stosl; stosb"); + /* going over 7 bytes is suboptimal */ + /* stosw is 2-byte insn, so this one takes 6 bytes: */ + if (count == 4*4 + 2) small_store("stosl; stosl; stosl; stosl; stosw"); + /* 7 bytes */ + if (count == 4*4 + 3) small_store("stosl; stosl; stosl; stosl; stosw; stosb"); + /* 5 bytes */ + if (count == 5*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl"); + /* 6 bytes */ + if (count == 5*4 + 1) small_store("stosl; stosl; stosl; stosl; stosl; stosb"); + /* 7 bytes */ + if (count == 5*4 + 2) small_store("stosl; stosl; stosl; stosl; stosl; stosw"); + /* 8 bytes, but oh well... */ + if (count == 5*4 + 3) small_store("stosl; stosl; stosl; stosl; stosl; stosw; stosb"); + /* 6 bytes */ + if (count == 6*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl; stosl"); + /* the rest would be 7+ bytes and is handled below instead */ +#undef small_store + + /* Not small, but multiple-of-4 store. + * "mov <const>,%ecx; rep; stosl" sequence is 7 bytes */ + __asm__ __volatile__( + " rep; stosl\n" + : "=&c" (ecx), "=&D" (edi) + : "a" (eax), "0" (count / 4), "1" (s) + : "memory" + ); + return s; +} +#if 1 /* -51 bytes on shared i386 build with gcc 4.3.0 */ +#define memset(s, c, count) ( \ + ( !(__builtin_constant_p(c) && __builtin_constant_p(count)) \ + || ((count) > (6*4 + 0) && ((count) % 4) != 0) \ + ) \ + ? memset((s), (c), (count)) \ + : inlined_memset_const_c_count4((s), (c), (count)) \ + ) +#endif + + +static __always_inline +void *inlined_mempcpy_const_count4(void *d, const void *s, unsigned count) +{ + int ecx; + char *esi, *edi; + + if (count == 0) + return d; + + if (count == 1) { + *(char *)d = *(char *)s; + return d + 1; + } + if (count == 2) { + *(short *)d = *(short *)s; + return d + 2; + } + /* Small string moves: don't clobber ecx + * (clobbers only esi and edi) */ +#define small_move(arg) { \ + __asm__ __volatile__( \ + arg \ + : "=&S" (esi), "=&D" (edi) \ + : "0" (s), "1" (d) \ + : "memory" \ + ); \ + return edi; \ +} + if (count == 3) small_move("movsw; movsb"); + if (count == 1*4 + 0) { + *(int *)d = *(int *)s; + return d + 4; + } + if (count == 1*4 + 1) small_move("movsl; movsb"); + if (count == 1*4 + 2) small_move("movsl; movsw"); + if (count == 1*4 + 3) small_move("movsl; movsw; movsb"); + if (count == 2*4 + 0) small_move("movsl; movsl"); + if (count == 2*4 + 1) small_move("movsl; movsl; movsb"); + if (count == 2*4 + 2) small_move("movsl; movsl; movsw"); + if (count == 2*4 + 3) small_move("movsl; movsl; movsw; movsb"); + if (count == 3*4 + 0) small_move("movsl; movsl; movsl"); + if (count == 3*4 + 1) small_move("movsl; movsl; movsl; movsb"); + if (count == 3*4 + 2) small_move("movsl; movsl; movsl; movsw"); + if (count == 3*4 + 3) small_move("movsl; movsl; movsl; movsw; movsb"); + if (count == 4*4 + 0) small_move("movsl; movsl; movsl; movsl"); + if (count == 4*4 + 1) small_move("movsl; movsl; movsl; movsl; movsb"); + /* going over 7 bytes is suboptimal */ + /* movsw is 2-byte insn, so this one takes 6 bytes: */ + if (count == 4*4 + 2) small_move("movsl; movsl; movsl; movsl; movsw"); + /* 7 bytes */ + if (count == 4*4 + 3) small_move("movsl; movsl; movsl; movsl; movsw; movsb"); + /* 5 bytes */ + if (count == 5*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl"); + /* 6 bytes */ + if (count == 5*4 + 1) small_move("movsl; movsl; movsl; movsl; movsl; movsb"); + /* 7 bytes */ + if (count == 5*4 + 2) small_move("movsl; movsl; movsl; movsl; movsl; movsw"); + /* 8 bytes, but oh well... */ + if (count == 5*4 + 3) small_move("movsl; movsl; movsl; movsl; movsl; movsw; movsb"); + /* 6 bytes */ + if (count == 6*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl; movsl"); + /* the rest would be 7+ bytes and is handled below instead */ +#undef small_move + + /* Not small, but multiple-of-4 move. + * "mov <const>,%ecx; rep; movsl" sequence is 7 bytes */ + __asm__ __volatile__( + " rep; movsl\n" + : "=&c" (ecx), "=&S" (esi), "=&D" (edi) + : "0" (count / 4), "1" (s), "2" (d) + : "memory" + ); + return edi; +} +static __always_inline +void *inlined_memcpy_const_count4(void *d, const void *s, unsigned count) +{ + inlined_mempcpy_const_count4(d, s, count); + return d; +} +#if 1 /* +34 bytes on shared i386 build with gcc 4.3.0 */ +#define mempcpy(d, s, count) ( \ + ( !(__builtin_constant_p(count)) \ + || ((count) > (6*4 + 0) && ((count) % 4) != 0) \ + ) \ + ? mempcpy((d), (s), (count)) \ + : inlined_mempcpy_const_count4((d), (s), (count)) \ + ) +#define memcpy(d, s, count) ( \ + ( !(__builtin_constant_p(count)) \ + || ((count) > (6*4 + 0) && ((count) % 4) != 0) \ + ) \ + ? memcpy((d), (s), (count)) \ + : inlined_memcpy_const_count4((d), (s), (count)) \ + ) +#endif + + +static __always_inline +size_t inlined_strlen(const char *s) +{ + int edi; + int ecx; + __asm__ __volatile__( + " repne; scasb\n" + /* " notl %0\n" */ + /* " decl %0\n" */ + : "=c" (ecx), "=&D" (edi) + : "1" (s), "a" (0), "0" (0xffffffffu) + /* : no clobbers */ + ); + return -ecx - 1; +} +#if 0 /* +1108 bytes on shared i386 build with gcc 4.3.0 */ +#define strlen(s) inlined_strlen(s) +#endif + + +static __always_inline +char *inlined_stpcpy(char *dest, const char *src) +{ + char *esi, *edi; + int eax; + __asm__ __volatile__( + "1: lodsb\n" + " stosb\n" + " testb %%al, %%al\n" + " jnz 1b\n" + : "=&S" (esi), "=&D" (edi), "=&a" (eax) + : "0" (src), "1" (dest) + : "memory" + ); + return edi - 1; +} +static __always_inline +char *inlined_strcpy(char *dest, const char *src) +{ + inlined_stpcpy(dest, src); + return dest; +} +#if 0 /* +562 bytes on shared i386 build with gcc 4.3.0 */ +#define stpcpy(dest, src) inlined_stpcpy(dest, src) +#define strcpy(dest, src) inlined_strcpy(dest, src) +#endif + + +static __always_inline +void *inlined_memchr(const void *s, int c, size_t count) +{ + void *edi; + int ecx; + /* Unfortunately, c gets loaded to %eax (wide insn), not %al */ + __asm__ __volatile__( + " jecxz 1f\n" + " repne; scasb\n" + " leal -1(%%edi), %%edi\n" + " je 2f\n" + "1:\n" + " xorl %%edi, %%edi\n" + "2:\n" + : "=&D" (edi), "=&c" (ecx) + : "a" (c), "0" (s), "1" (count) + /* : no clobbers */ + ); + return edi; +} +static __always_inline +void *inlined_memchr_const_c(const void *s, int c, size_t count) +{ +#if defined __OPTIMIZE__ + void *edi; + int ecx, eax; + __asm__ __volatile__( + " jecxz 1f\n" + " movb %4, %%al\n" /* const c to %%al */ + " repne; scasb\n" + " leal -1(%%edi), %%edi\n" + " je 2f\n" + "1:\n" + " xorl %%edi, %%edi\n" + "2:\n" + : "=&D" (edi), "=&c" (ecx), "=&a" (eax) + : "0" (s), "i" (c), "1" (count) + /* : no clobbers */ + ); + return edi; +#else + /* With -O0, gcc can't figure out how to encode CONST c + * as an immediate operand. Generating slightly bigger code + * (usually "movl CONST,%eax", 3 bytes bigger than needed): + */ + void *edi; + int ecx, eax; + __asm__ __volatile__( + " jecxz 1f\n" + " repne; scasb\n" + " leal -1(%%edi), %%edi\n" + " je 2f\n" + "1:\n" + " xorl %%edi, %%edi\n" + "2:\n" + : "=&D" (edi), "=&c" (ecx), "=&a" (eax) + : "0" (s), "2" (c), "1" (count) + /* : no clobbers */ + ); + return edi; +#endif +} +#if 1 /* +2 bytes on shared i386 build with gcc 4.3.0 */ +#define memchr(s, c, count) ( \ + __builtin_constant_p(c) \ + ? inlined_memchr_const_c(s, (c) & 0xff, count) \ + : inlined_memchr(s, c, count) \ + ) +#endif + +#endif /* _LIBC_STRING_i386_H */ diff --git a/libc/string/i386/strlen.c b/libc/string/i386/strlen.c index 61a178393..ff2baeb38 100644 --- a/libc/string/i386/strlen.c +++ b/libc/string/i386/strlen.c @@ -32,17 +32,17 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strlen) */ +#undef strlen size_t strlen(const char *s) { - int d0; - register int __res; - __asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "notl %0\n\t" - "decl %0" - :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); - return __res; + int eax, ecx, edi; + __asm__ __volatile__( + " repne; scasb\n" + " notl %%ecx\n" + " leal -1(%%ecx), %%eax\n" + : "=&c" (ecx), "=&D" (edi), "=&a" (eax) + : "0" (0xffffffff), "1" (s), "2" (0) + ); + return eax; } libc_hidden_def(strlen) diff --git a/libc/string/i386/strncat.c b/libc/string/i386/strncat.c index 3872679d5..12f0a302b 100644 --- a/libc/string/i386/strncat.c +++ b/libc/string/i386/strncat.c @@ -32,30 +32,55 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strncat) */ -char *strncat(char * dest, - const char * src, size_t count) +#undef strncat +/*#define strncat TESTING*/ +char *strncat(char * dest, const char * src, size_t count) { - int d0, d1, d2, d3; - __asm__ __volatile__( - "repne\n\t" - "scasb\n\t" - "decl %1\n\t" - "movl %8,%3\n" - "incl %3\n" - "1:\tdecl %3\n\t" - "jz 2f\n" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "jmp 3f\n" - "2:\txorl %2,%2\n\t" - "stosb\n" - "3:" - : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) - : "memory"); - return dest; + int esi, edi, eax, ecx, edx; + __asm__ __volatile__( + " xorl %%eax, %%eax\n" + " incl %%edx\n" + " pushl %%edi\n" /* save dest */ + " repne; scasb\n" + " decl %%edi\n" /* edi => NUL in dest */ + /* count-- */ + "1: decl %%edx\n" + /* if count reached 0, store NUL and bail out */ + " movl %%edx, %%eax\n" + " jz 2f\n" + /* else copy a char */ + " lodsb\n" + "2: stosb\n" + " testb %%al, %%al\n" + " jnz 1b\n" + /* end of loop */ + " popl %%eax\n" /* restore dest into eax */ + : "=&S" (esi), "=&D" (edi), "=&a" (eax), "=&c" (ecx), "=&d" (edx) + : "0" (src), "1" (dest), "3" (0xffffffff), "4" (count) + : "memory" + ); + return (char *)eax; } +#ifndef strncat libc_hidden_def(strncat) +#else +/* Uncomment TESTING, gcc -m32 -Os strncat.c -o strncat + * and run ./strncat + */ +int main() +{ + char buf[99]; + + strcpy(buf, "abc"); buf[4] = '*'; strncat(buf, "def", 0); + printf(strcmp(buf, "abc") == 0 && buf[4] == '*' ? "ok\n" : "BAD!\n"); + + strcpy(buf, "abc"); buf[6] = 1; buf[7] = '*'; strncat(buf, "def", 50); + printf(strcmp(buf, "abcdef") == 0 && buf[7] == '*' ? "ok\n" : "BAD!\n"); + + strcpy(buf, "abc"); buf[6] = 1; buf[7] = '*'; strncat(buf, "def", -1); + printf(strcmp(buf, "abcdef") == 0 && buf[7] == '*' ? "ok\n" : "BAD!\n"); + + strcpy(buf, "abc"); buf[6] = 1; buf[7] = '*'; strncat(buf, "def123", 3); + printf(strcmp(buf, "abcdef") == 0 && buf[7] == '*' ? "ok\n" : "BAD!\n"); +} +#endif diff --git a/libc/string/i386/strncmp.c b/libc/string/i386/strncmp.c index a14bb503b..bfb20c307 100644 --- a/libc/string/i386/strncmp.c +++ b/libc/string/i386/strncmp.c @@ -32,27 +32,28 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strncmp) */ +#undef strncmp int strncmp(const char *cs, const char *ct, size_t count) { - register int __res; - int d0, d1, d2; - __asm__ __volatile__( - "incl %3\n" - "1:\tdecl %3\n\t" - "jz 2f\n" - "lodsb\n\t" - "scasb\n\t" - "jne 3f\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n" - "2:\txorl %%eax,%%eax\n\t" - "jmp 4f\n" - "3:\tsbbl %%eax,%%eax\n\t" - "orb $1,%%al\n" - "4:" - :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - :"1" (cs),"2" (ct),"3" (count)); - return __res; + int eax; + int esi, edi, ecx; + __asm__ __volatile__( + " incl %%ecx\n" + "1: decl %%ecx\n" + " jz 2f\n" + " lodsb\n" + " scasb\n" + " jne 3f\n" + " testb %%al, %%al\n" + " jnz 1b\n" + "2: xorl %%eax, %%eax\n" + " jmp 4f\n" + "3: sbbl %%eax, %%eax\n" + " orb $1, %%al\n" + "4:\n" + : "=a" (eax), "=&S" (esi), "=&D" (edi), "=&c" (ecx) + : "1" (cs), "2" (ct), "3" (count) + ); + return eax; } libc_hidden_weak(strncmp) diff --git a/libc/string/i386/strncpy.c b/libc/string/i386/strncpy.c index 76aa6ae1b..99d104b0d 100644 --- a/libc/string/i386/strncpy.c +++ b/libc/string/i386/strncpy.c @@ -32,25 +32,44 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strncpy) */ +#undef strncpy +/*#define strncpy TESTING*/ char *strncpy(char * dest, const char * src, size_t count) { - int d0, d1, d2, d3; - __asm__ __volatile__( - "incl %2\n" - "1:\n" - "decl %2\n" - "jz 2f\n" - "lodsb\n\t" - "stosb\n\t" - "testb %%al,%%al\n\t" - "jne 1b\n\t" - "decl %2\n" - "rep\n\t" - "stosb\n" - "2:" - : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - :"0" (src),"1" (dest),"2" (count) : "memory"); - return dest; + int esi, edi, ecx, eax; + __asm__ __volatile__( + "1: subl $1, %%ecx\n" /* not dec! it doesnt set CF */ + " jc 2f\n" + " lodsb\n" + " stosb\n" + " testb %%al, %%al\n" + " jnz 1b\n" + " rep; stosb\n" + "2:\n" + : "=&S" (esi), "=&D" (edi), "=&c" (ecx), "=&a" (eax) + : "0" (src), "1" (dest), "2" (count) + : "memory" + ); + return dest; } +#ifndef strncpy libc_hidden_def(strncpy) +#else +/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os strncpy.c -o strncpy + * and run ./strncpy + */ +int main() +{ + static char str[99]; + + str[3] = '*'; str[4] = 0; strncpy(str, "abc", 3); + printf(strcmp(str, "abc*") == 0 ? "ok\n" : "BAD!\n"); + + str[4] = '*'; str[5] = '+'; strncpy(str, "abc", 5); + printf(strcmp(str, "abc") == 0 && str[4] == 0 && str[5] == '+' ? + "ok\n" : "BAD!\n"); + strncpy(str, "def", 0); /* should do nothing */ + printf(strcmp(str, "abc") == 0 && str[4] == 0 && str[5] == '+' ? + "ok\n" : "BAD!\n"); +} +#endif diff --git a/libc/string/i386/strnlen.c b/libc/string/i386/strnlen.c index 02c72f530..f58f698d1 100644 --- a/libc/string/i386/strnlen.c +++ b/libc/string/i386/strnlen.c @@ -33,24 +33,43 @@ #include <string.h> #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(strnlen) */ + +#undef strnlen +/*#define strnlen TESTING*/ size_t strnlen(const char *s, size_t count) { - int d0; - register int __res; - __asm__ __volatile__( - "movl %2,%0\n\t" - "incl %1\n" - "jmp 2f\n" - "1:\tcmpb $0,(%0)\n\t" - "je 3f\n\t" - "incl %0\n" - "2:\tdecl %1\n\t" - "jne 1b\n" - "3:\tsubl %2,%0" - :"=a" (__res), "=&d" (d0) - :"c" (s),"1" (count)); - return __res; + int edx; + int eax; + __asm__ __volatile__( + " leal -1(%%ecx), %%eax\n" + "1: incl %%eax\n" + " decl %%edx\n" + " jz 3f\n" + " cmpb $0, (%%eax)\n" + " jnz 1b\n" + "3: subl %%ecx, %%eax" + : "=a" (eax), "=&d" (edx) + : "c" (s), "1" (count + 1) + ); + return eax; } +#ifndef strnlen libc_hidden_def(strnlen) +#else +/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os strnlen.c -o strnlen + * and run ./strnlen + */ +int main() +{ + printf(strnlen("abc\0def", -2) == 3 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", -1) == 3 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", 0) == 0 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", 1) == 1 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", 2) == 2 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", 3) == 3 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", 4) == 3 ? "ok\n" : "BAD!\n"); + printf(strnlen("abc\0def", 5) == 3 ? "ok\n" : "BAD!\n"); +} +#endif + #endif diff --git a/libc/string/i386/strrchr.c b/libc/string/i386/strrchr.c index ef378685b..5c349f683 100644 --- a/libc/string/i386/strrchr.c +++ b/libc/string/i386/strrchr.c @@ -32,21 +32,25 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(strrchr) */ char *strrchr(const char *s, int c) { - int d0, d1; - register char * __res; - __asm__ __volatile__( - "movb %%al,%%ah\n" - "1:\tlodsb\n\t" - "cmpb %%ah,%%al\n\t" - "jne 2f\n\t" - "leal -1(%%esi),%0\n" - "2:\ttestb %%al,%%al\n\t" - "jne 1b" - :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); - return __res; + char *eax; + + __asm__ __volatile__( + " movb %%cl, %%ch\n" + "1: movb (%1), %%cl\n" /* load char */ + " cmpb %%cl, %%ch\n" /* char == c? */ + " jne 2f\n" + " movl %1, %%eax\n" + "2: incl %1\n" + " testb %%cl, %%cl\n" /* char == NUL? */ + " jnz 1b\n" + /* "=c": use ecx, not ebx (-fpic uses it). */ + : "=a" (eax), "=r" (s), "=c" (c) + : "0" (0), "1" (s), "2" (c) + /* : no clobbers */ + ); + return eax; } libc_hidden_def(strrchr) #ifdef __UCLIBC_SUSV3_LEGACY__ diff --git a/libc/string/ia64/bcopy.S b/libc/string/ia64/bcopy.S index c5637c369..62da68d74 100644 --- a/libc/string/ia64/bcopy.S +++ b/libc/string/ia64/bcopy.S @@ -1,4 +1,4 @@ -#include "sysdep.h" +#include <sysdep.h> #ifdef __UCLIBC_SUSV3_LEGACY__ diff --git a/libc/string/ia64/bzero.S b/libc/string/ia64/bzero.S index d390838a6..79419579a 100644 --- a/libc/string/ia64/bzero.S +++ b/libc/string/ia64/bzero.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: dest @@ -32,7 +31,7 @@ Since a stf.spill f0 can store 16B in one go, we use this instruction to get peak speed. */ -#include "sysdep.h" +#include <sysdep.h> #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -47,13 +46,13 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_unalgn p9 #define p_y p11 #define p_n p12 @@ -65,7 +64,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -87,49 +86,49 @@ ENTRY(bzero) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value + mov ret0 = dest /* return value */ nop.m 0 cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest nop.i 0 -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 - sub bytecnt = (MIN1+1), tmp // higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 */ + sub bytecnt = (MIN1+1), tmp /* higher than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = r0,-4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = r0,-2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi nop.m 0 (p_y) st2 [ptr2] = r0,-1 @@ -138,44 +137,44 @@ ENTRY(bzero) { .mmi (p_yy) st1 [ptr2] = r0 - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib - nop.m 0 + nop.m 0 shr.u linecnt = cnt, LSIZE_SH nop.b 0 ;; } .align 32 -.l1b: // ------------------// L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -190,7 +189,7 @@ ENTRY(bzero) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -198,14 +197,14 @@ ENTRY(bzero) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -213,11 +212,11 @@ ENTRY(bzero) (p_scr) br.cond.dpnt.many .store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } .align 32 -.l2: // -----------------------------// L2A: store 32B in 2 cycles +.l2: /* ----------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -228,38 +227,38 @@ ENTRY(bzero) ;; } .store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store +(p_y) store [ptr1] = myval, 8 /* store */ (p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store +{ .mmi /* store */ (p_yy) store [ptr1] = myval, 8 -(p_yy) add cnt = -8, cnt // subtract +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = r0,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = r0,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? + tbit.nz.unc p_y, p0 = cnt, 0 /* should we terminate with a st1 ? */ ;; } { .mib @@ -281,38 +280,38 @@ ENTRY(bzero) (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = r0, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = r0, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = r0 // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = r0 // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = r0 /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = r0 /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(bzero) diff --git a/libc/string/ia64/memccpy.S b/libc/string/ia64/memccpy.S index 1afba3637..5c4d7e3c2 100644 --- a/libc/string/ia64/memccpy.S +++ b/libc/string/ia64/memccpy.S @@ -14,16 +14,15 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: a pointer to the next byte after char in dest or NULL Inputs: in0: dest in1: src - in2: char + in2: char in3: byte count This implementation assumes little endian mode (UM.be = 0). @@ -31,7 +30,7 @@ This implementation assumes that it is safe to do read ahead in the src block, without getting beyond its limit. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define OP_T_THRES 16 @@ -69,75 +68,75 @@ ENTRY(memccpy) .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] .rotp p[MEMLAT + 6 + 1] - mov ret0 = r0 // return NULL if no match + mov ret0 = r0 /* return NULL if no match */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers - mov dest = in0 // dest + mov saved_pr = pr /* save the predicate registers */ + mov dest = in0 /* dest */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov saved_ec = ar.ec // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ + mov saved_ec = ar.ec /* save the loop counter */ .body - mov src = in1 // src - extr.u char = in2, 0, 8 // char - mov len = in3 // len - sub tmp = r0, in0 // tmp = -dest - cmp.ne p7, p0 = r0, r0 // clear p7 + mov src = in1 /* src */ + extr.u char = in2, 0, 8 /* char */ + mov len = in3 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - mov ar.ec = 0 // ec not guaranteed zero on entry -(p6) br.cond.spnt .cpyfew // copy byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + mov ar.ec = 0 /* ec not guaranteed zero on entry */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 mux1 charx8 = char, @brcst (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ cmp.eq p6, p0 = value, char (p6) br.cond.spnt .foundit br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 6 + 1 // six more passes needed - ld8 r[1] = [asrc], 8 // r[1] = w0 - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 6 + 1 /* six more passes needed */ + ld8 r[1] = [asrc], 8 /* r[1] = w0 */ + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ ALIGN(32) .l2: -(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[0]) ld8.s r[0] = [asrc], 8 /* r[0] = w1 */ +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) xor tmp3[0] = val[1], charx8 (p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1] -(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't - // valid - rollback! +(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 /* our data isn't */ + /* valid - rollback! */ (p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1] (p6) br.cond.spnt .gotit -(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+6]) st8 [dest] = val[3], 8 /* store val to dest */ +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ br.ctop.sptk .l2 br.cond.sptk .cpyfew .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 2 + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 2 + 1 ;; /* set EC */ .l3: (p[0]) ld8.s r[0] = [src], 8 (p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8 @@ -149,8 +148,8 @@ ENTRY(memccpy) (p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -163,14 +162,14 @@ ENTRY(memccpy) .foundit: (p6) mov ret0 = dest .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter - mov ar.ec = saved_ec ;; // restore the epilog counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ + mov ar.ec = saved_ec ;; /* restore the epilog counter */ br.ret.sptk.many b0 .gotit: .pred.rel "mutex" p6, p7 -(p6) mov value = val[3] // if coming from l2 -(p7) mov value = r[MEMLAT+2] // if coming from l3 +(p6) mov value = val[3] /* if coming from l2 */ +(p7) mov value = r[MEMLAT+2] /* if coming from l3 */ mov ar.lc = pos0[1] ;; .l5: extr.u tmp = value, 0, 8 ;; diff --git a/libc/string/ia64/memchr.S b/libc/string/ia64/memchr.S index 2bf078fe6..fcd9f9305 100644 --- a/libc/string/ia64/memchr.S +++ b/libc/string/ia64/memchr.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: the address of the first occurence of chr in str or NULL @@ -40,7 +39,7 @@ All the loops in this function could have had the internal branch removed if br.ctop and br.cloop could be predicated :-(. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define saved_pr r15 @@ -62,18 +61,18 @@ ENTRY(__memchr) .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2] .rotp p[MEMLAT+3] .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .save pr, saved_pr - mov saved_pr = pr // save the predicates + mov saved_pr = pr /* save the predicates */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 - cmp.ne p7, p0 = r0, r0 // clear p7 - extr.u chr = in1, 0, 8 // chr = (unsigned char) in1 + and tmp = 7, str /* tmp = str % 8 */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + extr.u chr = in1, 0, 8 /* chr = (unsigned char) in1 */ mov len = in2 - cmp.gtu p6, p0 = 16, in2 // use a simple loop for short -(p6) br.cond.spnt .srchfew ;; // searches - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + cmp.gtu p6, p0 = 16, in2 /* use a simple loop for short */ +(p6) br.cond.spnt .srchfew ;; /* searches */ + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; sub len = len, loopcnt @@ -86,12 +85,12 @@ ENTRY(__memchr) (p6) br.cond.spnt .foundit br.cloop.sptk .l1 ;; .str_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // remaining len = len & 7 + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* remaining len = len & 7 */ adds loopcnt = -1, loopcnt mov ar.ec = MEMLAT + 3 - mux1 chrx8 = chr, @brcst ;; // get a word full of chr + mux1 chrx8 = chr, @brcst ;; /* get a word full of chr */ mov ar.lc = loopcnt mov pr.rot = 1 << 16 ;; .l2: @@ -114,20 +113,18 @@ ENTRY(__memchr) (p6) br.cond.dpnt .foundit br.cloop.sptk .l3 ;; .notfound: - cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here) - mov ret0 = r0 ;; // return NULL + cmp.ne p6, p0 = r0, r0 /* clear p6 (p7 was already 0 when we got here) */ + mov ret0 = r0 ;; /* return NULL */ .foundit: .pred.rel "mutex" p6, p7 -(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3 -(p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2 +(p6) adds ret0 = -1, ret0 /* if we got here from l1 or l3 */ +(p7) add ret0 = addr[MEMLAT+2], poschr[1] /* if we got here from l2 */ mov pr = saved_pr, -1 mov ar.lc = saved_lc br.ret.sptk.many b0 END(__memchr) -weak_alias (__memchr, memchr) -#if !__BOUNDED_POINTERS__ -weak_alias (__memchr, __ubp_memchr) -#endif -libc_hidden_def (memchr) +weak_alias(__memchr, memchr) +weak_alias(__memchr, __ubp_memchr) +libc_hidden_def(memchr) diff --git a/libc/string/ia64/memcmp.S b/libc/string/ia64/memcmp.S index 8b0c096ce..0cf54e7db 100644 --- a/libc/string/ia64/memcmp.S +++ b/libc/string/ia64/memcmp.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: the result of the comparison @@ -28,16 +27,16 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 + shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 shr.u tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 >> sh2 and all the mux1 instructions should be replaced by plain mov's. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret -#define OP_T_THRES 16 -#define OPSIZ 8 +#define OP_T_THRES 16 +#define OPSIZ 8 #define MEMLAT 2 #define start r15 @@ -56,85 +55,85 @@ ENTRY(memcmp) .prologue - alloc r2 = ar.pfs, 3, 37, 0, 40 + alloc r2 = ar.pfs, 3, 37, 0, 40 .rotr r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2] .rotp p[MEMLAT + 4 + 1] - mov ret0 = r0 // by default return value = 0 + mov ret0 = r0 /* by default return value = 0 */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp = r0, in0 // tmp = -dest + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES -(p6) br.cond.spnt .cmpfew // compare byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ +(p6) br.cond.spnt .cmpfew /* compare byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value1 = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value1 = [src], 1 /* value = *src++ */ ld1 value2 = [dest], 1 ;; cmp.ne p6, p0 = value1, value2 (p6) br.cond.spnt .done br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 4 + 1 // four more passes needed - ld8 r[1] = [asrc], 8 ;; // r[1] = w0 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 4 + 1 /* four more passes needed */ + ld8 r[1] = [asrc], 8 ;; /* r[1] = w0 */ .align 32 -// We enter this loop with p6 cleared by the above comparison +/* We enter this loop with p6 cleared by the above comparison */ .l2: -(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1 +(p[0]) ld8 r[0] = [asrc], 8 /* r[0] = w1 */ (p[0]) ld8 q[0] = [dest], 8 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) cmp.ne p6, p0 = q[MEMLAT + 4], val[1] -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ (p6) br.cond.spnt .l2exit br.ctop.sptk .l2 br.cond.sptk .cmpfew .l3exit: mux1 value1 = r[MEMLAT], @rev mux1 value2 = q[MEMLAT], @rev - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ .l2exit: (p6) mux1 value1 = val[1], @rev (p6) mux1 value2 = q[MEMLAT + 4], @rev ;; cmp.ltu p6, p7 = value2, value1 ;; (p6) mov ret0 = -1 (p7) mov ret0 = 1 - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 1 ;; /* set EC */ .l3: (p[0]) ld8 r[0] = [src], 8 (p[0]) ld8 q[0] = [dest], 8 @@ -142,8 +141,8 @@ ENTRY(memcmp) (p6) br.cond.spnt .l3exit br.ctop.dptk .l3 ;; .cmpfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -154,10 +153,10 @@ ENTRY(memcmp) (p6) br.cond.spnt .done br.cloop.dptk .l4 ;; .done: -(p6) sub ret0 = value2, value1 // don't execute it if falling thru +(p6) sub ret0 = value2, value1 /* don't execute it if falling thru */ .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 END(memcmp) libc_hidden_def (memcmp) diff --git a/libc/string/ia64/memcpy.S b/libc/string/ia64/memcpy.S index 810eb0c0e..5f2e79414 100644 --- a/libc/string/ia64/memcpy.S +++ b/libc/string/ia64/memcpy.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: dest @@ -37,13 +36,13 @@ #define USE_LFETCH #define USE_FLP -#include "sysdep.h" +#include <sysdep.h> #undef ret #define LFETCH_DIST 500 -#define ALIGN_UNROLL_no 4 // no. of elements -#define ALIGN_UNROLL_sh 2 // (shift amount) +#define ALIGN_UNROLL_no 4 /* no. of elements */ +#define ALIGN_UNROLL_sh 2 /* (shift amount) */ #define MEMLAT 8 #define Nrot ((4*(MEMLAT+2) + 7) & ~7) @@ -168,76 +167,76 @@ ENTRY(memcpy) .rotr r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1] .rotp p[MEMLAT+2] .rotf fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1] - mov ret0 = in0 // return tmp2 = dest + mov ret0 = in0 /* return tmp2 = dest */ .save pr, saved_pr - movi0 saved_pr = pr // save the predicate registers + movi0 saved_pr = pr /* save the predicate registers */ } { .mmi - and tmp4 = 7, in0 // check if destination is aligned - mov dest = in0 // dest - mov src = in1 // src + and tmp4 = 7, in0 /* check if destination is aligned */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ ;; } { .mii - cmp.eq p_scr, p0 = in2, r0 // if (len == 0) + cmp.eq p_scr, p0 = in2, r0 /* if (len == 0) */ .save ar.lc, saved_lc - movi0 saved_lc = ar.lc // save the loop counter + movi0 saved_lc = ar.lc /* save the loop counter */ .body - cmp.ge p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH + cmp.ge p_few, p0 = OP_T_THRES, in2 /* is len <= OP_T_THRESH */ } { .mbb - mov len = in2 // len -(p_scr) br.cond.dpnt.few .restore_and_exit // Branch no. 1: return dest -(p_few) br.cond.dpnt.many .copy_bytes // Branch no. 2: copy byte by byte + mov len = in2 /* len */ +(p_scr) br.cond.dpnt.few .restore_and_exit /* Branch no. 1: return dest */ +(p_few) br.cond.dpnt.many .copy_bytes /* Branch no. 2: copy byte by byte */ ;; } { .mmi #if defined(USE_LFETCH) - lfetch.nt1 [dest] // - lfetch.nt1 [src] // + lfetch.nt1 [dest] /* */ + lfetch.nt1 [src] /* */ #endif - shr.u elemcnt = len, 3 // elemcnt = len / 8 + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ } { .mib - cmp.eq p_scr, p0 = tmp4, r0 // is destination aligned? - sub loopcnt = 7, tmp4 // + cmp.eq p_scr, p0 = tmp4, r0 /* is destination aligned? */ + sub loopcnt = 7, tmp4 /* */ (p_scr) br.cond.dptk.many .dest_aligned ;; } { .mmi - ld1 tmp2 = [src], 1 // - sub len = len, loopcnt, 1 // reduce len - movi0 ar.lc = loopcnt // + ld1 tmp2 = [src], 1 /* */ + sub len = len, loopcnt, 1 /* reduce len */ + movi0 ar.lc = loopcnt /* */ } { .mib - cmp.ne p_scr, p0 = 0, loopcnt // avoid loading beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid loading beyond end-point */ ;; } -.l0: // ---------------------------- // L0: Align src on 8-byte boundary +.l0: /* ---------------------------- L0: Align src on 8-byte boundary */ { .mmi - st1 [dest] = tmp2, 1 // -(p_scr) ld1 tmp2 = [src], 1 // + st1 [dest] = tmp2, 1 /* */ +(p_scr) ld1 tmp2 = [src], 1 /* */ } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt - br.cloop.dptk.few .l0 // + br.cloop.dptk.few .l0 /* */ ;; } .dest_aligned: { .mmi - and tmp4 = 7, src // ready for alignment check - shr.u elemcnt = len, 3 // elemcnt = len / 8 + and tmp4 = 7, src /* ready for alignment check */ + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ ;; } { .mib - cmp.ne p_scr, p0 = tmp4, r0 // is source also aligned - tbit.nz p_xtr, p_nxtr = src, 3 // prepare a separate move if src -} { .mib // is not 16B aligned - add ptr2 = LFETCH_DIST, dest // prefetch address + cmp.ne p_scr, p0 = tmp4, r0 /* is source also aligned */ + tbit.nz p_xtr, p_nxtr = src, 3 /* prepare a separate move if src */ +} { .mib /* is not 16B aligned */ + add ptr2 = LFETCH_DIST, dest /* prefetch address */ add ptr1 = LFETCH_DIST, src (p_scr) br.cond.dptk.many .src_not_aligned ;; } -// The optimal case, when dest, and src are aligned +/* The optimal case, when dest, and src are aligned */ .both_aligned: { .mmi .pred.rel "mutex",p_xtr,p_nxtr -(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify -(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt // Need only N to qualify - movi0 pr.rot = 1 << 16 // set rotating predicates +(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt /* Need N + 1 to qualify */ +(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt /* Need only N to qualify */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ } { .mib (p_scr) br.cond.dpnt.many .copy_full_words ;; } @@ -245,21 +244,21 @@ ENTRY(memcpy) { .mmi (p_xtr) load tempreg = [src], 8 (p_xtr) add elemcnt = -1, elemcnt - movi0 ar.ec = MEMLAT + 1 // set the epilog counter + movi0 ar.ec = MEMLAT + 1 /* set the epilog counter */ ;; } { .mmi -(p_xtr) add len = -8, len // - add asrc = 16, src // one bank apart (for USE_INT) - shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh // cater for unrolling +(p_xtr) add len = -8, len /* */ + add asrc = 16, src /* one bank apart (for USE_INT) */ + shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh /* cater for unrolling */ ;;} { .mmi add loopcnt = -1, loopcnt -(p_xtr) store [dest] = tempreg, 8 // copy the "extra" word +(p_xtr) store [dest] = tempreg, 8 /* copy the "extra" word */ nop.i 0 ;; } { .mib add adest = 16, dest - movi0 ar.lc = loopcnt // set the loop counter + movi0 ar.lc = loopcnt /* set the loop counter */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -268,7 +267,7 @@ ENTRY(memcpy) .align 32 #endif #if defined(USE_FLP) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi #if defined(USE_LFETCH) (p[0]) lfetch.nt1 [ptr2],32 @@ -290,7 +289,7 @@ ENTRY(memcpy) br.ctop.dptk.many .l1 ;; } #elif defined(USE_INT) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi (p[0]) load the_r[0] = [src], 8 (p[0]) load the_q[0] = [asrc], 8 @@ -317,58 +316,58 @@ ENTRY(memcpy) .copy_full_words: { .mib - cmp.gt p_scr, p0 = 8, len // - shr.u elemcnt = len, 3 // + cmp.gt p_scr, p0 = 8, len /* */ + shr.u elemcnt = len, 3 /* */ (p_scr) br.cond.dpnt.many .copy_bytes ;; } { .mii load tempreg = [src], 8 - add loopcnt = -1, elemcnt // + add loopcnt = -1, elemcnt /* */ ;; } { .mii - cmp.ne p_scr, p0 = 0, loopcnt // - mov ar.lc = loopcnt // + cmp.ne p_scr, p0 = 0, loopcnt /* */ + mov ar.lc = loopcnt /* */ ;; } -.l2: // ------------------------------- // L2: Max 4 words copied separately +.l2: /* ------------------------------- L2: Max 4 words copied separately */ { .mmi store [dest] = tempreg, 8 -(p_scr) load tempreg = [src], 8 // +(p_scr) load tempreg = [src], 8 /* */ add len = -8, len } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt br.cloop.dptk.few .l2 ;; } .copy_bytes: { .mib - cmp.eq p_scr, p0 = len, r0 // is len == 0 ? - add loopcnt = -1, len // len--; + cmp.eq p_scr, p0 = len, r0 /* is len == 0 ? */ + add loopcnt = -1, len /* len--; */ (p_scr) br.cond.spnt .restore_and_exit ;; } { .mii ld1 tmp2 = [src], 1 movi0 ar.lc = loopcnt - cmp.ne p_scr, p0 = 0, loopcnt // avoid load beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid load beyond end-point */ ;; } -.l3: // ------------------------------- // L3: Final byte move +.l3: /* ------------------------------- L3: Final byte move */ { .mmi st1 [dest] = tmp2, 1 (p_scr) ld1 tmp2 = [src], 1 } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt br.cloop.dptk.few .l3 ;; } .restore_and_exit: { .mmi - movi0 pr = saved_pr, -1 // restore the predicate registers + movi0 pr = saved_pr, -1 /* restore the predicate registers */ ;; } { .mib - movi0 ar.lc = saved_lc // restore the loop counter + movi0 ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 ;; } @@ -376,41 +375,41 @@ ENTRY(memcpy) .src_not_aligned: { .mmi cmp.gt p_scr, p0 = 16, len - and sh1 = 7, src // sh1 = src % 8 - shr.u loopcnt = len, 4 // element-cnt = len / 16 + and sh1 = 7, src /* sh1 = src % 8 */ + shr.u loopcnt = len, 4 /* element-cnt = len / 16 */ } { .mib add tmp4 = @ltoff(.table), gp add tmp3 = @ltoff(.loop56), gp -(p_scr) br.cond.dpnt.many .copy_bytes // do byte by byte if too few +(p_scr) br.cond.dpnt.many .copy_bytes /* do byte by byte if too few */ ;; } { .mmi - and asrc = -8, src // asrc = (-8) -- align src for loop - add loopcnt = -1, loopcnt // loopcnt-- - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) + and asrc = -8, src /* asrc = (-8) -- align src for loop */ + add loopcnt = -1, loopcnt /* loopcnt-- */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ } { .mmi - ld8 ptable = [tmp4] // ptable = &table - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - and tmp2 = -16, len // tmp2 = len & -OPSIZ + ld8 ptable = [tmp4] /* ptable = &table */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + and tmp2 = -16, len /* tmp2 = len & -OPSIZ */ ;; } { .mmi - add tmp3 = ptable, sh1 // tmp3 = &table + sh1 - add src = src, tmp2 // src += len & (-16) - movi0 ar.lc = loopcnt // set LC + add tmp3 = ptable, sh1 /* tmp3 = &table + sh1 */ + add src = src, tmp2 /* src += len & (-16) */ + movi0 ar.lc = loopcnt /* set LC */ ;; } { .mmi - ld8 tmp4 = [tmp3] // tmp4 = loop offset - sub len = len, tmp2 // len -= len & (-16) - movi0 ar.ec = MEMLAT + 2 // one more pass needed + ld8 tmp4 = [tmp3] /* tmp4 = loop offset */ + sub len = len, tmp2 /* len -= len & (-16) */ + movi0 ar.ec = MEMLAT + 2 /* one more pass needed */ ;; } { .mmi - ld8 s[1] = [asrc], 8 // preload - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - movi0 pr.rot = 1 << 16 // set rotating predicates + ld8 s[1] = [asrc], 8 /* preload */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ ;; } { .mib nop.m 0 movi0 b6 = loopaddr - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ ;; } LOOP(8) @@ -426,7 +425,7 @@ libc_hidden_def (memcpy) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 diff --git a/libc/string/ia64/memmove.S b/libc/string/ia64/memmove.S index 00342d8e0..7d830f912 100644 --- a/libc/string/ia64/memmove.S +++ b/libc/string/ia64/memmove.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: dest @@ -33,7 +32,7 @@ sh1 must be computed using an extra instruction: sub sh1 = 64, sh1 or the UM.be bit should be cleared at the beginning and set at the end. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define OP_T_THRES 16 @@ -81,48 +80,48 @@ ENTRY(memmove) alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot .rotr r[MEMLAT + 2], q[MEMLAT + 1] .rotp p[MEMLAT + 2] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - or tmp3 = in0, in1 ;; // tmp3 = dest | src - or tmp3 = tmp3, in2 // tmp3 = dest | src | len - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp2 = r0, in0 // tmp2 = -dest - cmp.eq p6, p0 = in2, r0 // if (len == 0) -(p6) br.cond.spnt .restore_and_exit;;// return dest; - and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7 - cmp.le p6, p0 = dest, src // if dest <= src it's always safe -(p6) br.cond.spnt .forward // to copy forward + or tmp3 = in0, in1 ;; /* tmp3 = dest | src */ + or tmp3 = tmp3, in2 /* tmp3 = dest | src | len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp2 = r0, in0 /* tmp2 = -dest */ + cmp.eq p6, p0 = in2, r0 /* if (len == 0) */ +(p6) br.cond.spnt .restore_and_exit;;/* return dest; */ + and tmp4 = 7, tmp3 /* tmp4 = (dest | src | len) & 7 */ + cmp.le p6, p0 = dest, src /* if dest <= src it's always safe */ +(p6) br.cond.spnt .forward /* to copy forward */ add tmp3 = src, len;; - cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len -(p6) br.cond.spnt .backward // we have to copy backward + cmp.lt p6, p0 = dest, tmp3 /* if dest > src && dest < src + len */ +(p6) br.cond.spnt .backward /* we have to copy backward */ .forward: - shr.u loopcnt = len, 4 ;; // loopcnt = len / 16 - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .next // goto next; + shr.u loopcnt = len, 4 ;; /* loopcnt = len / 16 */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .next /* goto next; */ -// The optimal case, when dest, src and len are all multiples of 8 +/* The optimal case, when dest, src and len are all multiples of 8 */ and tmp3 = 0xf, len - mov pr.rot = 1 << 16 // set rotating predicates - mov ar.ec = MEMLAT + 1 ;; // set the epilog counter - cmp.ne p6, p0 = tmp3, r0 // do we have to copy an extra word? - adds loopcnt = -1, loopcnt;; // --loopcnt + mov pr.rot = 1 << 16 /* set rotating predicates */ + mov ar.ec = MEMLAT + 1 ;; /* set the epilog counter */ + cmp.ne p6, p0 = tmp3, r0 /* do we have to copy an extra word? */ + adds loopcnt = -1, loopcnt;; /* --loopcnt */ (p6) ld8 value = [src], 8;; -(p6) st8 [dest] = value, 8 // copy the "odd" word - mov ar.lc = loopcnt // set the loop counter +(p6) st8 [dest] = value, 8 /* copy the "odd" word */ + mov ar.lc = loopcnt /* set the loop counter */ cmp.eq p6, p0 = 8, len -(p6) br.cond.spnt .restore_and_exit;;// the one-word special case - adds adest = 8, dest // set adest one word ahead of dest - adds asrc = 8, src ;; // set asrc one word ahead of src - nop.b 0 // get the "golden" alignment for - nop.b 0 // the next loop +(p6) br.cond.spnt .restore_and_exit;;/* the one-word special case */ + adds adest = 8, dest /* set adest one word ahead of dest */ + adds asrc = 8, src ;; /* set asrc one word ahead of src */ + nop.b 0 /* get the "golden" alignment for */ + nop.b 0 /* the next loop */ .l0: (p[0]) ld8 r[0] = [src], 16 (p[0]) ld8 q[0] = [asrc], 16 @@ -130,50 +129,50 @@ ENTRY(memmove) (p[MEMLAT]) st8 [adest] = q[MEMLAT], 16 br.ctop.dptk .l0 ;; - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .next: - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - and loopcnt = 7, tmp2 // loopcnt = -dest % 8 -(p6) br.cond.spnt .cpyfew // copy byte by byte + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + and loopcnt = 7, tmp2 /* loopcnt = -dest % 8 */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp2 = -8, len // tmp2 = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len;; // len = len % 8 - adds loopcnt = -1, loopcnt // --loopcnt + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp2 = -8, len /* tmp2 = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len;; /* len = len % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ addl tmp4 = @ltoff(.table), gp addl tmp3 = @ltoff(.loop56), gp - mov ar.ec = MEMLAT + 1 // set EC - mov pr.rot = 1 << 16;; // set rotating predicates - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov ar.ec = MEMLAT + 1 /* set EC */ + mov pr.rot = 1 << 16;; /* set rotating predicates */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp2 // src += len & -OPSIZ - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - ld8 ploop56 = [tmp3] // ploop56 = &loop56 - ld8 ptable = [tmp4];; // ptable = &table - add tmp3 = ptable, sh1;; // tmp3 = &table + sh1 - mov ar.ec = MEMLAT + 1 + 1 // one more pass needed - ld8 tmp4 = [tmp3];; // tmp4 = loop offset - sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset - ld8 r[1] = [asrc], 8;; // w0 + add src = src, tmp2 /* src += len & -OPSIZ */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */ + ld8 ptable = [tmp4];; /* ptable = &table */ + add tmp3 = ptable, sh1;; /* tmp3 = &table + sh1 */ + mov ar.ec = MEMLAT + 1 + 1 /* one more pass needed */ + ld8 tmp4 = [tmp3];; /* tmp4 = loop offset */ + sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */ + ld8 r[1] = [asrc], 8;; /* w0 */ mov b6 = loopaddr;; - br b6 // jump to the appropriate loop + br b6 /* jump to the appropriate loop */ LOOP(8) LOOP(16) @@ -189,8 +188,8 @@ ENTRY(memmove) (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -199,36 +198,36 @@ ENTRY(memmove) st1 [dest] = value, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 -// In the case of a backward copy, optimise only the case when everything -// is a multiple of 8, otherwise copy byte by byte. The backward copy is -// used only when the blocks are overlapping and dest > src. - +/* In the case of a backward copy, optimise only the case when everything + is a multiple of 8, otherwise copy byte by byte. The backward copy is + used only when the blocks are overlapping and dest > src. +*/ .backward: - shr.u loopcnt = len, 3 // loopcnt = len / 8 - add src = src, len // src points one byte past the end - add dest = dest, len ;; // dest points one byte past the end - mov ar.ec = MEMLAT + 1 // set the epilog counter - mov pr.rot = 1 << 16 // set rotating predicates - adds loopcnt = -1, loopcnt // --loopcnt - cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0) -(p6) br.cond.sptk .bytecopy ;; // copy byte by byte backward - adds src = -8, src // src points to the last word - adds dest = -8, dest // dest points to the last word - mov ar.lc = loopcnt;; // set the loop counter + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + add src = src, len /* src points one byte past the end */ + add dest = dest, len ;; /* dest points one byte past the end */ + mov ar.ec = MEMLAT + 1 /* set the epilog counter */ + mov pr.rot = 1 << 16 /* set rotating predicates */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */ +(p6) br.cond.sptk .bytecopy ;; /* copy byte by byte backward */ + adds src = -8, src /* src points to the last word */ + adds dest = -8, dest /* dest points to the last word */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l5: (p[0]) ld8 r[0] = [src], -8 (p[MEMLAT]) st8 [dest] = r[MEMLAT], -8 br.ctop.dptk .l5 br.cond.sptk .restore_and_exit .bytecopy: - adds src = -1, src // src points to the last byte - adds dest = -1, dest // dest points to the last byte - adds loopcnt = -1, len;; // loopcnt = len - 1 - mov ar.lc = loopcnt;; // set the loop counter + adds src = -1, src /* src points to the last byte */ + adds dest = -1, dest /* dest points to the last byte */ + adds loopcnt = -1, len;; /* loopcnt = len - 1 */ + mov ar.lc = loopcnt;; /* set the loop counter */ .l6: (p[0]) ld1 r[0] = [src], -1 (p[MEMLAT]) st1 [dest] = r[MEMLAT], -1 @@ -239,7 +238,7 @@ END(memmove) .rodata .align 8 .table: - data8 0 // dummy entry + data8 0 /* dummy entry */ data8 .loop56 - .loop8 data8 .loop56 - .loop16 data8 .loop56 - .loop24 diff --git a/libc/string/ia64/memset.S b/libc/string/ia64/memset.S index ed27f3f31..7bd15c88a 100644 --- a/libc/string/ia64/memset.S +++ b/libc/string/ia64/memset.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: dest @@ -33,7 +32,7 @@ Since a stf.spill f0 can store 16B in one go, we use this instruction to get peak speed when value = 0. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define dest in0 @@ -46,15 +45,15 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 #define fvalue f6 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_nz p7 #define p_zr p8 #define p_unalgn p9 @@ -68,7 +67,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -90,97 +89,97 @@ ENTRY(memset) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value - cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + mov ret0 = dest /* return value */ + cmp.ne p_nz, p_zr = value, r0 /* use stf.spill if value is zero */ cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest - mux1 value = value, @brcst // create 8 identical bytes in word -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 + mux1 value = value, @brcst /* create 8 identical bytes in word */ +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 higher - sub bytecnt = (MIN1+1), tmp // than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 higher */ + sub bytecnt = (MIN1+1), tmp /* than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = value, -4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = value, -2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi - setf.sig fvalue=value // transfer value to FLP side + setf.sig fvalue=value /* transfer value to FLP side */ (p_y) st2 [ptr2] = value, -1 (p_n) add ptr2 = 1, ptr2 ;; } { .mmi (p_yy) st1 [ptr2] = value - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib nop.m 0 shr.u linecnt = cnt, LSIZE_SH -(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +(p_zr) br.cond.dptk.many .l1b /* Jump to use stf.spill */ ;; } #ifndef GAS_ALIGN_BREAKS_UNWIND_INFO - .align 32 // -------- // L1A: store ahead into cache lines; fill later + .align 32 /* -------- L1A: store ahead into cache lines; fill later */ #endif { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi -(p_scr) add loopcnt = -1, linecnt // start of stores - add ptr2 = 8, ptr1 // (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total -;; } // range +(p_scr) add loopcnt = -1, linecnt /* start of stores */ + add ptr2 = 8, ptr1 /* (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total */ +;; } /* range */ { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1a: { .mib - store [ptr9] = myval, 128 // Do stores one cache line apart + store [ptr9] = myval, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1a ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1ax: @@ -211,7 +210,7 @@ ENTRY(memset) { .mmi store [ptr2] = myval, 8 store [ptr0] = myval, 32 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb store [ptr2] = myval, 24 @@ -219,9 +218,9 @@ ENTRY(memset) br.cloop.dptk.few .l1ax ;; } { .mbb - cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 - br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 + cmp.le p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .fraction_of_line /* Branch no. 2 */ + br.cond.dpnt.many .move_bytes_from_alignment /* Branch no. 3 */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -229,32 +228,32 @@ ENTRY(memset) #else .align 32 #endif -.l1b: // ------------------ // L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -269,7 +268,7 @@ ENTRY(memset) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -277,14 +276,14 @@ ENTRY(memset) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -292,13 +291,13 @@ ENTRY(memset) (p_scr) br.cond.dpnt.many store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } #ifndef GAS_ALIGN_BREAKS_UNWIND_INFO .align 32 #endif -.l2: // ---------------------------- // L2A: store 32B in 2 cycles +.l2: /* ---------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -309,34 +308,34 @@ ENTRY(memset) ;; } store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store -(p_y) cmp.le.unc p_yy, p_nn = 16, cnt // -(p_y) add cnt = -8, cnt // subtract +(p_y) store [ptr1] = myval, 8 /* store */ +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt /* */ +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store -(p_yy) store [ptr1] = myval, 8 // -(p_yy) add cnt = -8, cnt // subtract +{ .mmi /* store */ +(p_yy) store [ptr1] = myval, 8 /* */ +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = value, 4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = value, 2 @@ -362,38 +361,38 @@ store_words: (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = value, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = value // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = value /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = value /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(memset) diff --git a/libc/string/ia64/softpipe.h b/libc/string/ia64/softpipe.h index d71af735e..a9a9dc679 100644 --- a/libc/string/ia64/softpipe.h +++ b/libc/string/ia64/softpipe.h @@ -12,9 +12,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* The latency of a memory load assumed by the assembly implementation of the mem and str functions. Since we don't have any clue about diff --git a/libc/string/ia64/strchr.S b/libc/string/ia64/strchr.S index 401a07941..034fd3096 100644 --- a/libc/string/ia64/strchr.S +++ b/libc/string/ia64/strchr.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: the address of the first occurence of chr in str or NULL @@ -30,7 +29,7 @@ This implementation assumes little endian mode. For big endian mode, the instruction czx1.r should be replaced by czx1.l. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define saved_lc r18 @@ -49,15 +48,15 @@ ENTRY(strchr) .prologue alloc r2 = ar.pfs, 2, 0, 0, 0 .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 + and tmp = 7, str /* tmp = str % 8 */ mux1 chrx8 = chr, @brcst - extr.u chr = chr, 0, 8 // retain only the last byte - cmp.ne p8, p0 = r0, r0 // clear p8 + extr.u chr = chr, 0, 8 /* retain only the last byte */ + cmp.ne p8, p0 = r0, r0 /* clear p8 */ ;; - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; adds loopcnt = -1, loopcnt;; @@ -75,10 +74,10 @@ ENTRY(strchr) nop.b 0 nop.b 0 .l2: - ld8.s val2 = [ret0], 8 // don't bomb out here + ld8.s val2 = [ret0], 8 /* don't bomb out here */ czx1.r pos0 = val1 - xor tmp = val1, chrx8 // if val1 contains chr, tmp will - ;; // contain a zero in its position + xor tmp = val1, chrx8 /* if val1 contains chr, tmp will */ + ;; /* contain a zero in its position */ czx1.r poschr = tmp cmp.ne p6, p0 = 8, pos0 ;; @@ -90,21 +89,21 @@ ENTRY(strchr) mov val1 = val2 br.cond.dptk .l2 .foundit: -(p6) cmp.lt p8, p0 = pos0, poschr // we found chr and null in the word -(p8) br.cond.spnt .notfound // null was found before chr +(p6) cmp.lt p8, p0 = pos0, poschr /* we found chr and null in the word */ +(p8) br.cond.spnt .notfound /* null was found before chr */ add ret0 = ret0, poschr ;; - adds ret0 = -15, ret0 ;; // should be -16, but we decrement -.restore_and_exit: // ret0 in the next instruction - adds ret0 = -1, ret0 // ret0 was pointing 1 char too far - mov ar.lc = saved_lc // restore the loop counter + adds ret0 = -15, ret0 ;; /* should be -16, but we decrement */ +.restore_and_exit: /* ret0 in the next instruction */ + adds ret0 = -1, ret0 /* ret0 was pointing 1 char too far */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .notfound: - mov ret0 = r0 // return NULL if null was found + mov ret0 = r0 /* return NULL if null was found */ mov ar.lc = saved_lc br.ret.sptk.many b0 .recovery: adds ret0 = -8, ret0;; - ld8 val2 = [ret0], 8 // bomb out here + ld8 val2 = [ret0], 8 /* bomb out here */ br.cond.sptk .back END(strchr) libc_hidden_def (strchr) diff --git a/libc/string/ia64/strcmp.S b/libc/string/ia64/strcmp.S index d3b41e642..c45ab4801 100644 --- a/libc/string/ia64/strcmp.S +++ b/libc/string/ia64/strcmp.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: the result of the comparison @@ -27,7 +26,7 @@ Unlike memcmp(), this function is optimized for mismatches within the first few characters. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define s1 in0 @@ -42,7 +41,7 @@ ENTRY(strcmp) .loop: ld1 val1 = [s1], 1 ld1 val2 = [s2], 1 - cmp.eq p6, p0 = r0, r0 // set p6 + cmp.eq p6, p0 = r0, r0 /* set p6 */ ;; cmp.ne.and p6, p0 = val1, r0 cmp.ne.and p6, p0 = val2, r0 diff --git a/libc/string/ia64/strcpy.S b/libc/string/ia64/strcpy.S index e4a9915ca..c9b3bc143 100644 --- a/libc/string/ia64/strcpy.S +++ b/libc/string/ia64/strcpy.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: dest @@ -27,11 +26,11 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl value = r[1], sh1 // value = w0 << sh1 - shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 + shl value = r[1], sh1 // value = w0 << sh1 + shr.u tmp = r[0], sh2 // tmp = w1 >> sh2 */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define saved_lc r15 @@ -53,62 +52,62 @@ ENTRY(strcpy) .prologue - alloc r2 = ar.pfs, 2, 0, 30, 32 + alloc r2 = ar.pfs, 2, 0, 30, 32 #define MEMLAT 2 .rotr r[MEMLAT + 2] .rotp p[MEMLAT + 1] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - sub tmp = r0, in0 ;; // tmp = -dest - mov dest = in0 // dest - mov src = in1 // src - and loopcnt = 7, tmp ;; // loopcnt = -dest % 8 + sub tmp = r0, in0 ;; /* tmp = -dest */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + and loopcnt = 7, tmp ;; /* loopcnt = -dest % 8 */ cmp.eq p6, p0 = loopcnt, r0 - adds loopcnt = -1, loopcnt // --loopcnt + adds loopcnt = -1, loopcnt /* --loopcnt */ (p6) br.cond.sptk .dest_aligned ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 c = [src], 1 // c = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 c = [src], 1 /* c = *src++ */ ;; - st1 [dest] = c, 1 // *dest++ = c + st1 [dest] = c, 1 /* *dest++ = c */ cmp.eq p6, p0 = c, r0 (p6) br.cond.dpnt .restore_and_exit br.cloop.dptk .l1 ;; .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - mov ar.lc = -1 // "infinite" loop - and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src + and sh1 = 7, src /* sh1 = src % 8 */ + mov ar.lc = -1 /* "infinite" loop */ + and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */ sub thresh = 8, sh1 - mov pr.rot = 1 << 16 // set rotating predicates - cmp.ne p7, p0 = r0, r0 // clear p7 - shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov pr.rot = 1 << 16 /* set rotating predicates */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; ld8 r[1] = [asrc],8 ;; .align 32 .l2: ld8.s r[0] = [asrc], 8 - shr.u value = r[1], sh1 ;; // value = w0 >> sh1 - czx1.r pos = value ;; // do we have an "early" zero - cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? + shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */ + czx1.r pos = value ;; /* do we have an "early" zero */ + cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */ (p7) br.cond.dpnt .found0 - chk.s r[0], .recovery2 // it is safe to do that only -.back2: // after the previous test - shl tmp = r[0], sh2 // tmp = w1 << sh2 + chk.s r[0], .recovery2 /* it is safe to do that only */ +.back2: /* after the previous test */ + shl tmp = r[0], sh2 /* tmp = w1 << sh2 */ ;; - or value = value, tmp ;; // value |= tmp + or value = value, tmp ;; /* value |= tmp */ czx1.r pos = value ;; cmp.ne p7, p0 = 8, pos (p7) br.cond.dpnt .found0 - st8 [dest] = value, 8 // store val to dest + st8 [dest] = value, 8 /* store val to dest */ br.ctop.dptk .l2 ;; .src_aligned: .l3: @@ -124,14 +123,14 @@ ENTRY(strcpy) .found0: mov ar.lc = pos .l4: - extr.u c = value, 0, 8 // c = value & 0xff + extr.u c = value, 0, 8 /* c = value & 0xff */ shr.u value = value, 8 ;; st1 [dest] = c, 1 br.cloop.dptk .l4 ;; .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .recovery2: add tmp = -8, asrc ;; diff --git a/libc/string/ia64/strlen.S b/libc/string/ia64/strlen.S index 9b27a2d1b..83244cd76 100644 --- a/libc/string/ia64/strlen.S +++ b/libc/string/ia64/strlen.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: the length of the input string @@ -33,7 +32,7 @@ This implementation assumes little endian mode. For big endian mode, the instruction czx1.r should be replaced by czx1.l. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define saved_lc r18 @@ -50,13 +49,13 @@ ENTRY(strlen) .prologue alloc r2 = ar.pfs, 1, 0, 0, 0 .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body mov str = in0 - mov len = r0 // len = 0 - and tmp = 7, in0 // tmp = str % 8 + mov len = r0 /* len = 0 */ + and tmp = 7, in0 /* tmp = str % 8 */ ;; - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; adds loopcnt = -1, loopcnt;; @@ -69,11 +68,11 @@ ENTRY(strlen) adds len = 1, len br.cloop.dptk .l1 .str_aligned: - mov origadd = str // origadd = orig + mov origadd = str /* origadd = orig */ ld8 val1 = [str], 8;; nop.b 0 nop.b 0 -.l2: ld8.s val2 = [str], 8 // don't bomb out here +.l2: ld8.s val2 = [str], 8 /* don't bomb out here */ czx1.r pos0 = val1 ;; cmp.ne p6, p0 = 8, pos0 @@ -83,16 +82,16 @@ ENTRY(strlen) mov val1 = val2 br.cond.dptk .l2 .foundit: - sub tmp = str, origadd // tmp = crt address - orig + sub tmp = str, origadd /* tmp = crt address - orig */ add len = len, pos0;; add len = len, tmp;; adds len = -16, len .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .recovery: adds str = -8, str;; - ld8 val2 = [str], 8 // bomb out here + ld8 val2 = [str], 8 /* bomb out here */ br.cond.sptk .back END(strlen) libc_hidden_def (strlen) diff --git a/libc/string/ia64/strncmp.S b/libc/string/ia64/strncmp.S index 8e0373c7f..d58a2007e 100644 --- a/libc/string/ia64/strncmp.S +++ b/libc/string/ia64/strncmp.S @@ -14,21 +14,20 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: the result of the comparison Inputs: in0: s1 in1: s2 - in2: n + in2: n Unlike memcmp(), this function is optimized for mismatches within the first few characters. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define s1 in0 @@ -42,13 +41,13 @@ ENTRY(strncmp) alloc r2 = ar.pfs, 3, 0, 0, 0 mov ret0 = r0 - cmp.eq p6, p0 = r0, r0 // set p6 - cmp.eq p7, p0 = n, r0 // return immediately if n == 0 + cmp.eq p6, p0 = r0, r0 /* set p6 */ + cmp.eq p7, p0 = n, r0 /* return immediately if n == 0 */ (p7) br.cond.spnt .restore_and_exit ;; .loop: ld1 val1 = [s1], 1 ld1 val2 = [s2], 1 - adds n = -1, n // n-- + adds n = -1, n /* n-- */ ;; cmp.ne.and p6, p0 = val1, r0 cmp.ne.and p6, p0 = val2, r0 @@ -58,5 +57,5 @@ ENTRY(strncmp) sub ret0 = val1, val2 .restore_and_exit: br.ret.sptk.many b0 -END(strncmp) +END(strncmp) libc_hidden_weak (strncmp) diff --git a/libc/string/ia64/strncpy.S b/libc/string/ia64/strncpy.S index 4f1129350..b72e8a70c 100644 --- a/libc/string/ia64/strncpy.S +++ b/libc/string/ia64/strncpy.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Return: dest @@ -29,7 +28,7 @@ In this form, it assumes little endian mode. */ -#include "sysdep.h" +#include <sysdep.h> #undef ret #define saved_lc r15 @@ -58,64 +57,64 @@ ENTRY(strncpy) .rotr r[MEMLAT + 2] .rotp p[MEMLAT + 1] - mov ret0 = in0 // return value = dest + mov ret0 = in0 /* return value = dest */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov ar.ec = 0 // ec is not guaranteed to - // be zero upon function entry + mov saved_lc = ar.lc /* save the loop counter */ + mov ar.ec = 0 /* ec is not guaranteed to */ + /* be zero upon function entry */ .body cmp.geu p6, p5 = 24, in2 (p6) br.cond.spnt .short_len - sub tmp = r0, in0 ;; // tmp = -dest - mov len = in2 // len - mov dest = in0 // dest - mov src = in1 // src - and tmp = 7, tmp ;; // loopcnt = -dest % 8 + sub tmp = r0, in0 ;; /* tmp = -dest */ + mov len = in2 /* len */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ + and tmp = 7, tmp ;; /* loopcnt = -dest % 8 */ cmp.eq p6, p7 = tmp, r0 - adds loopcnt = -1, tmp // --loopcnt + adds loopcnt = -1, tmp /* --loopcnt */ (p6) br.cond.sptk .dest_aligned ;; - sub len = len, tmp // len -= -dest % 8 + sub len = len, tmp /* len -= -dest % 8 */ mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes -(p5) ld1 c = [src], 1 // c = *src++ +.l1: /* copy -dest % 8 bytes */ +(p5) ld1 c = [src], 1 /* c = *src++ */ ;; - st1 [dest] = c, 1 // *dest++ = c + st1 [dest] = c, 1 /* *dest++ = c */ cmp.ne p5, p7 = c, r0 br.cloop.dptk .l1 ;; (p7) br.cond.dpnt .found0_align -.dest_aligned: // p7 should be cleared here - shr.u c = len, 3 // c = len / 8 - and sh1 = 7, src // sh1 = src % 8 - and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src - adds c = (MEMLAT-1), c // c = (len / 8) + MEMLAT - 1 +.dest_aligned: /* p7 should be cleared here */ + shr.u c = len, 3 /* c = len / 8 */ + and sh1 = 7, src /* sh1 = src % 8 */ + and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */ + adds c = (MEMLAT-1), c /* c = (len / 8) + MEMLAT - 1 */ sub thresh = 8, sh1 - mov pr.rot = 1 << 16 // set rotating predicates - shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8) - mov ar.lc = c // "infinite" loop - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + mov pr.rot = 1 << 16 /* set rotating predicates */ + shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */ + mov ar.lc = c /* "infinite" loop */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - adds c = -(MEMLAT-1), c ;; // c = (len / 8) + adds c = -(MEMLAT-1), c ;; /* c = (len / 8) */ ld8 r[1] = [asrc],8 mov ar.lc = c ;; .align 32 .l2: -(p6) st8 [dest] = value, 8 // store val to dest +(p6) st8 [dest] = value, 8 /* store val to dest */ ld8.s r[0] = [asrc], 8 - shr.u value = r[1], sh1 ;; // value = w0 >> sh1 - czx1.r pos = value ;; // do we have an "early" zero - cmp.lt p7, p0 = pos, thresh // in w0 >> sh1? - adds len = -8, len // len -= 8 + shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */ + czx1.r pos = value ;; /* do we have an "early" zero */ + cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */ + adds len = -8, len /* len -= 8 */ (p7) br.cond.dpnt .nonalign_found0 - chk.s r[0], .recovery2 // it is safe to do that only -.back2: // after the previous test - shl tmp = r[0], sh2 // tmp = w1 << sh2 + chk.s r[0], .recovery2 /* it is safe to do that only */ +.back2: /* after the previous test */ + shl tmp = r[0], sh2 /* tmp = w1 << sh2 */ ;; - or value = value, tmp ;; // value |= tmp + or value = value, tmp ;; /* value |= tmp */ czx1.r pos = value ;; cmp.ne p7, p6 = 8, pos (p7) br.cond.dpnt .nonalign_found0 @@ -137,7 +136,7 @@ ENTRY(strncpy) (p[MEMLAT]) mov value = r[MEMLAT] (p[MEMLAT]) czx1.r pos = r[MEMLAT] ;; (p[MEMLAT]) cmp.ne p7, p0 = 8, pos -(p[MEMLAT]) adds len = -8, len // len -= 8 +(p[MEMLAT]) adds len = -8, len /* len -= 8 */ (p7) br.cond.dpnt .found0 (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8 br.ctop.dptk .l3 ;; @@ -152,7 +151,7 @@ ENTRY(strncpy) (p5) br.cond.dptk .restore_and_exit ;; mov ar.lc = len .l4: -(p6) extr.u c = value, 0, 8 // c = value & 0xff +(p6) extr.u c = value, 0, 8 /* c = value & 0xff */ (p6) shr.u value = value, 8 ;; st1 [dest] = c, 1 cmp.ne p6, p0 = c, r0 @@ -165,7 +164,7 @@ ENTRY(strncpy) mov value = 0 ;; .found0: shl tmp = pos, 3 - shr.u loopcnt = len, 4 // loopcnt = len / 16 + shr.u loopcnt = len, 4 /* loopcnt = len / 16 */ mov c = -1 ;; cmp.eq p6, p0 = loopcnt, r0 adds loopcnt = -1, loopcnt @@ -192,24 +191,24 @@ ENTRY(strncpy) st1 [dest] = r0, 1 br.cloop.dptk .l7 ;; .restore_and_exit: - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .short_len: cmp.eq p5, p0 = in2, r0 adds loopcnt = -1, in2 (p5) br.cond.spnt .restore_and_exit ;; - mov ar.lc = loopcnt // p6 should be set when we get here + mov ar.lc = loopcnt /* p6 should be set when we get here */ .l8: -(p6) ld1 c = [in1], 1 // c = *src++ +(p6) ld1 c = [in1], 1 /* c = *src++ */ ;; - st1 [in0] = c, 1 // *dest++ = c + st1 [in0] = c, 1 /* *dest++ = c */ (p6) cmp.ne p6, p0 = c, r0 br.cloop.dptk .l8 ;; - mov ar.lc = saved_lc // restore the loop counter - mov pr = saved_pr, -1 // restore the predicate registers + mov ar.lc = saved_lc /* restore the loop counter */ + mov pr = saved_pr, -1 /* restore the predicate registers */ br.ret.sptk.many b0 .recovery2: add c = 8, len diff --git a/libc/string/ia64/sysdep.h b/libc/string/ia64/sysdep.h deleted file mode 100644 index d10020ac1..000000000 --- a/libc/string/ia64/sysdep.h +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright (C) 1999, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Written by Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999. - Based on code originally written by David Mosberger-Tang - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _LINUX_IA64_SYSDEP_H -#define _LINUX_IA64_SYSDEP_H 1 - -#include <features.h> -#include <asm/unistd.h> - -#ifdef __ASSEMBLER__ - -/* Macros to help writing .prologue directives in assembly code. */ -#define ASM_UNW_PRLG_RP 0x8 -#define ASM_UNW_PRLG_PFS 0x4 -#define ASM_UNW_PRLG_PSP 0x2 -#define ASM_UNW_PRLG_PR 0x1 -#define ASM_UNW_PRLG_GRSAVE(ninputs) (32+(ninputs)) - -#ifdef __STDC__ -#define C_LABEL(name) name : -#else -#define C_LABEL(name) name/**/: -#endif - -#define CALL_MCOUNT - -#define ENTRY(name) \ - .text; \ - .align 32; \ - .proc C_SYMBOL_NAME(name); \ - .global C_SYMBOL_NAME(name); \ - C_LABEL(name) \ - CALL_MCOUNT - -#define LEAF(name) \ - .text; \ - .align 32; \ - .proc C_SYMBOL_NAME(name); \ - .global name; \ - C_LABEL(name) - -/* Mark the end of function SYM. */ -#undef END -#define END(sym) .endp C_SYMBOL_NAME(sym) - -/* For Linux we can use the system call table in the header file - /usr/include/asm/unistd.h - of the kernel. But these symbols do not follow the SYS_* syntax - so we have to redefine the `SYS_ify' macro here. */ -#undef SYS_ify -#ifdef __STDC__ -# define SYS_ify(syscall_name) __NR_##syscall_name -#else -# define SYS_ify(syscall_name) __NR_/**/syscall_name -#endif - -/* Linux uses a negative return value to indicate syscall errors, unlike - most Unices, which use the condition codes' carry flag. - - Since version 2.1 the return value of a system call might be negative - even if the call succeeded. E.g., the `lseek' system call might return - a large offset. Therefore we must not anymore test for < 0, but test - for a real error by making sure the value in %d0 is a real error - number. Linus said he will make sure the no syscall returns a value - in -1 .. -4095 as a valid result so we can savely test with -4095. */ - -/* We don't want the label for the error handler to be visible in the symbol - table when we define it here. */ -#define SYSCALL_ERROR_LABEL __syscall_error - -#undef PSEUDO -#define PSEUDO(name, syscall_name, args) \ - ENTRY(name) \ - DO_CALL (SYS_ify(syscall_name)); \ - cmp.eq p6,p0=-1,r10; \ -(p6) br.cond.spnt.few __syscall_error; - -#define DO_CALL_VIA_BREAK(num) \ - mov r15=num; \ - break __BREAK_SYSCALL - -#ifdef IA64_USE_NEW_STUB -# ifdef SHARED -# define DO_CALL(num) \ - .prologue; \ - adds r2 = SYSINFO_OFFSET, r13;; \ - ld8 r2 = [r2]; \ - .save ar.pfs, r11; \ - mov r11 = ar.pfs;; \ - .body; \ - mov r15 = num; \ - mov b7 = r2; \ - br.call.sptk.many b6 = b7;; \ - .restore sp; \ - mov ar.pfs = r11; \ - .prologue; \ - .body -# else /* !SHARED */ -# define DO_CALL(num) \ - .prologue; \ - mov r15 = num; \ - movl r2 = _dl_sysinfo;; \ - ld8 r2 = [r2]; \ - .save ar.pfs, r11; \ - mov r11 = ar.pfs;; \ - .body; \ - mov b7 = r2; \ - br.call.sptk.many b6 = b7;; \ - .restore sp; \ - mov ar.pfs = r11; \ - .prologue; \ - .body -# endif -#else -# define DO_CALL(num) DO_CALL_VIA_BREAK(num) -#endif - -#undef PSEUDO_END -#define PSEUDO_END(name) .endp C_SYMBOL_NAME(name); - -#undef PSEUDO_NOERRNO -#define PSEUDO_NOERRNO(name, syscall_name, args) \ - ENTRY(name) \ - DO_CALL (SYS_ify(syscall_name)); - -#undef PSEUDO_END_NOERRNO -#define PSEUDO_END_NOERRNO(name) .endp C_SYMBOL_NAME(name); - -#undef PSEUDO_ERRVAL -#define PSEUDO_ERRVAL(name, syscall_name, args) \ - ENTRY(name) \ - DO_CALL (SYS_ify(syscall_name)); \ - cmp.eq p6,p0=-1,r10; \ -(p6) mov r10=r8; - - -#undef PSEUDO_END_ERRVAL -#define PSEUDO_END_ERRVAL(name) .endp C_SYMBOL_NAME(name); - -#undef END -#define END(name) \ - .size C_SYMBOL_NAME(name), . - C_SYMBOL_NAME(name) ; \ - .endp C_SYMBOL_NAME(name) - -#define ret br.ret.sptk.few b0 -#define ret_NOERRNO ret -#define ret_ERRVAL ret - -#endif /* not __ASSEMBLER__ */ - -#endif /* linux/ia64/sysdep.h */ diff --git a/libc/string/memchr.c b/libc/string/memchr.c index 413999722..99e13a2fc 100644 --- a/libc/string/memchr.c +++ b/libc/string/memchr.c @@ -10,31 +10,23 @@ #ifdef WANT_WIDE # define Wmemchr wmemchr #else +# undef memchr # define Wmemchr memchr #endif -libc_hidden_proto(Wmemchr) - Wvoid *Wmemchr(const Wvoid *s, Wint c, size_t n) { register const Wuchar *r = (const Wuchar *) s; -#ifdef __BCC__ - /* bcc can optimize the counter if it thinks it is a pointer... */ - register const char *np = (const char *) n; -#else -# define np n -#endif - while (np) { + while (n) { if (*r == ((Wuchar)c)) { return (Wvoid *) r; /* silence the warning */ } ++r; - --np; + --n; } return NULL; } -#undef np libc_hidden_def(Wmemchr) diff --git a/libc/string/memcmp.c b/libc/string/memcmp.c index 762fc23c1..6cb37f417 100644 --- a/libc/string/memcmp.c +++ b/libc/string/memcmp.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wmemcmp wmemcmp #else -/* Experimentally off - libc_hidden_proto(memcmp) */ # define Wmemcmp memcmp #endif diff --git a/libc/string/memcpy.c b/libc/string/memcpy.c index dc2986778..42436e0b6 100644 --- a/libc/string/memcpy.c +++ b/libc/string/memcpy.c @@ -10,26 +10,19 @@ #ifdef WANT_WIDE # define Wmemcpy wmemcpy #else +# undef memcpy # define Wmemcpy memcpy #endif -libc_hidden_proto(Wmemcpy) - Wvoid *Wmemcpy(Wvoid * __restrict s1, const Wvoid * __restrict s2, size_t n) { register Wchar *r1 = s1; register const Wchar *r2 = s2; -#ifdef __BCC__ - while (n--) { - *r1++ = *r2++; - } -#else while (n) { *r1++ = *r2++; --n; } -#endif return s1; } diff --git a/libc/string/memmem.c b/libc/string/memmem.c index 9dcd4c4c0..1b3a0bab6 100644 --- a/libc/string/memmem.c +++ b/libc/string/memmem.c @@ -8,7 +8,6 @@ #include "_string.h" #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(memmem) */ void *memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen) { @@ -38,5 +37,4 @@ void *memmem(const void *haystack, size_t haystacklen, return NULL; } -libc_hidden_def(memmem) #endif diff --git a/libc/string/memmove.c b/libc/string/memmove.c index 0bea9b497..b768b6ea9 100644 --- a/libc/string/memmove.c +++ b/libc/string/memmove.c @@ -10,30 +10,11 @@ #ifdef WANT_WIDE # define Wmemmove wmemmove #else -/* Experimentally off - libc_hidden_proto(memmove) */ # define Wmemmove memmove #endif Wvoid *Wmemmove(Wvoid *s1, const Wvoid *s2, size_t n) { -#ifdef __BCC__ - register Wchar *s = (Wchar *) s1; - register const Wchar *p = (const Wchar *) s2; - - if (p >= s) { - while (n--) { - *s++ = *p++; - } - } else { - s += n; - p += n; - while (n--) { - *--s = *--p; - } - } - - return s1; -#else register Wchar *s = (Wchar *) s1; register const Wchar *p = (const Wchar *) s2; @@ -50,9 +31,8 @@ Wvoid *Wmemmove(Wvoid *s1, const Wvoid *s2, size_t n) } return s1; -#endif } #ifndef WANT_WIDE -libc_hidden_def(Wmemmove) +libc_hidden_def(memmove) #endif diff --git a/libc/string/mempcpy.c b/libc/string/mempcpy.c index 91896434b..d1d752b50 100644 --- a/libc/string/mempcpy.c +++ b/libc/string/mempcpy.c @@ -12,26 +12,19 @@ #ifdef WANT_WIDE # define Wmempcpy wmempcpy #else +# undef mempcpy # define Wmempcpy mempcpy #endif -libc_hidden_proto(Wmempcpy) - Wvoid *Wmempcpy(Wvoid * __restrict s1, const Wvoid * __restrict s2, size_t n) { register Wchar *r1 = s1; register const Wchar *r2 = s2; -#ifdef __BCC__ - while (n--) { - *r1++ = *r2++; - } -#else while (n) { *r1++ = *r2++; --n; } -#endif return r1; } diff --git a/libc/string/memrchr.c b/libc/string/memrchr.c index 48ec50a4e..60211f804 100644 --- a/libc/string/memrchr.c +++ b/libc/string/memrchr.c @@ -8,31 +8,21 @@ #include "_string.h" #ifdef __USE_GNU - -/* Experimentally off - libc_hidden_proto(memrchr) */ - void *memrchr(const void *s, int c, size_t n) { register const unsigned char *r; -#ifdef __BCC__ - /* bcc can optimize the counter if it thinks it is a pointer... */ - register const char *np = (const char *) n; -#else -#define np n -#endif - r = ((unsigned char *)s) + ((size_t) np); + r = ((unsigned char *)s) + ((size_t) n); - while (np) { + while (n) { if (*--r == ((unsigned char)c)) { return (void *) r; /* silence the warning */ } - --np; + --n; } return NULL; } -#undef np libc_hidden_def(memrchr) #endif diff --git a/libc/string/memset.c b/libc/string/memset.c index 6dd20d668..2a7c19dee 100644 --- a/libc/string/memset.c +++ b/libc/string/memset.c @@ -10,28 +10,21 @@ #ifdef WANT_WIDE # define Wmemset wmemset #else -/* Experimentally off - libc_hidden_proto(memset) */ +# undef memset # define Wmemset memset #endif Wvoid *Wmemset(Wvoid *s, Wint c, size_t n) { register Wuchar *p = (Wuchar *) s; -#ifdef __BCC__ - /* bcc can optimize the counter if it thinks it is a pointer... */ - register const char *np = (const char *) n; -#else -# define np n -#endif - while (np) { + while (n) { *p++ = (Wuchar) c; - --np; + --n; } return s; } -#undef np #ifndef WANT_WIDE libc_hidden_def(memset) diff --git a/libc/string/metag/Makefile b/libc/string/metag/Makefile new file mode 100644 index 000000000..523cf6842 --- /dev/null +++ b/libc/string/metag/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> +# +# Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/metag/memchr.S b/libc/string/metag/memchr.S new file mode 100644 index 000000000..8b48d863c --- /dev/null +++ b/libc/string/metag/memchr.S @@ -0,0 +1,156 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. +! +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + .text + .global _memchr + .type _memchr,function +! D0Ar6 src +! D0Ar2 c +! D1Ar3 n +_memchr: + CMP D1Ar3, #0 + BEQ $Lexit_fail + !! convert c to unsigned char + AND D0Ar2,D0Ar2,#0xff + MOV D0Ar6, D1Ar1 + MOV D1Ar5, D0Ar6 + !! test alignment + AND D1Ar5, D1Ar5, #7 + CMP D1Ar5, #0 + BNZ $Lunaligned_loop + !! length must be greater than or equal to 8 for aligned loop + CMP D1Ar3, #8 + BGE $Laligned_setup +$Lunaligned_loop: + !! get 1 char from s + GETB D0Re0, [D0Ar6++] + !! increase alignment counter + ADD D1Ar5, D1Ar5, #1 + !! decrement n + SUB D1Ar3, D1Ar3, #1 + !! exit if we have a match + CMP D0Re0, D0Ar2 + BZ $Lexit_success1 + !! exit if we have hit the end of the string + CMP D1Ar3, #0 + BZ $Lexit_fail + !! fall through if the buffer is aligned now + CMP D1Ar5, #8 + BNE $Lunaligned_loop + !! fall through if there is more than 8 bytes left + CMP D1Ar3, #8 + BLT $Lunaligned_loop +$Laligned_setup: + !! fill the c into 4 bytes + MOV D0Ar4, D0Ar2 + LSL D0Ar4, D0Ar4, #8 + ADD D0Ar4, D0Ar4, D0Ar2 + LSL D0Ar4, D0Ar4, #8 + ADD D0Ar4, D0Ar4, D0Ar2 + LSL D0Ar4, D0Ar4, #8 + ADD D0Ar4, D0Ar4, D0Ar2 + !! divide n by 8 + MOV D1Ar5, D1Ar3 + LSR D1Ar5, D1Ar5, #3 +$Laligned_loop: + !! get 8 chars from s + GETL D0Re0, D1Re0, [D0Ar6++] + !! decrement loop counter + SUB D1Ar5, D1Ar5, #1 + !! test first 4 chars + XOR D0Re0, D0Re0, D0Ar4 + !! test second 4 chars + MOV D0Ar2, D1Re0 + XOR D1Re0, D0Ar2, D0Ar4 + !! check for matches in the first 4 chars + MOV D0Ar2, D0Re0 + ADDT D0Re0, D0Re0, #HI(0xfefefeff) + ADD D0Re0, D0Re0, #LO(0xfefefeff) + XOR D0Ar2, D0Ar2, #-1 + AND D0Re0, D0Re0, D0Ar2 + ANDMT D0Re0, D0Re0, #HI(0x80808080) + ANDMB D0Re0, D0Re0, #LO(0x80808080) + CMP D0Re0, #0 + BNZ $Lmatch_word1 + !! check for matches in the second 4 chars + MOV D1Ar1, D1Re0 + ADDT D1Re0, D1Re0, #HI(0xfefefeff) + ADD D1Re0, D1Re0, #LO(0xfefefeff) + XOR D1Ar1, D1Ar1, #-1 + AND D1Re0, D1Re0, D1Ar1 + ANDMT D1Re0, D1Re0, #HI(0x80808080) + ANDMB D1Re0, D1Re0, #LO(0x80808080) + CMP D1Re0, #0 + BNZ $Lmatch_word2 + !! check if we have reached the end of the buffer + CMP D1Ar5, #0 + BNE $Laligned_loop + !! exit if there are no chars left to check + AND D1Ar3, D1Ar3, #7 + CMP D1Ar3, #0 + BZ $Lexit_fail + !! recover c + AND D0Ar2, D0Ar4, #0xff +$Lbyte_loop: + !! get 1 char from s + GETB D0Re0, [D0Ar6++] + !! decrement n + SUB D1Ar3, D1Ar3, #1 + !! exit if we have a match + CMP D0Re0, D0Ar2 + BZ $Lexit_success1 + !! fall through if we have run out of chars + CMP D1Ar3, #0 + BNE $Lbyte_loop + +$Lexit_fail: + MOV D0Re0, #0 + B $Lend + +$Lmatch_word1: + !! move the match word into D1Re0 + MOV D1Re0, D0Re0 + !! roll back the buffer pointer by 4 chars + SUB D0Ar6, D0Ar6, #4 +$Lmatch_word2: + !! roll back the buffer pointer by 4 chars + SUB D0Ar6, D0Ar6, #4 + !! exit if lowest byte is 0 + MOV D1Ar1, D1Re0 + AND D1Ar1, D1Ar1, #0xff + CMP D1Ar1, #0 + BNE $Lexit_success2 + !! advance buffer pointer to the next char + ADD D0Ar6, D0Ar6, #1 + !! shift in the next lowest byte + LSR D1Re0, D1Re0, #8 + !! exit if lowest byte is 0 + MOV D1Ar1, D1Re0 + AND D1Ar1, D1Ar1, #0xff + CMP D1Ar1, #0 + BNE $Lexit_success2 + !! advance buffer pointer to the next char + ADD D0Ar6, D0Ar6, #1 + !! shift in the next lowest byte + LSR D1Re0, D1Re0, #8 + !! exit if lowest byte is 0 + MOV D1Ar1, D1Re0 + AND D1Ar1, D1Ar1, #0xff + CMP D1Ar1, #0 + BNE $Lexit_success2 + !! the match must be in the last byte, exit + ADD D0Ar6, D0Ar6, #1 + B $Lexit_success2 + +$Lexit_success1: + SUB D0Ar6, D0Ar6, #1 +$Lexit_success2: + !! return the buffer pointer + MOV D0Re0, D0Ar6 +$Lend: + MOV PC, D1RtP + + .size _memchr,.-_memchr + +libc_hidden_def(memchr) diff --git a/libc/string/metag/memcpy.S b/libc/string/metag/memcpy.S new file mode 100644 index 000000000..f96c9d131 --- /dev/null +++ b/libc/string/metag/memcpy.S @@ -0,0 +1,189 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + .text + .global _memcpy + .type _memcpy,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memcpy: + CMP D1Ar3, #16 + MOV A1.2, D0Ar2 ! source pointer + MOV A0.2, D1Ar1 ! destination pointer + MOV A0.3, D1Ar1 ! for return value +! If there are less than 16 bytes to copy use the byte copy loop + BGE $Llong_copy + +$Lbyte_copy: +! Simply copy a byte at a time + SUBS TXRPT, D1Ar3, #1 + BLT $Lend +$Lloop_byte: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + BR $Lloop_byte + +$Lend: +! Finally set return value and return + MOV D0Re0, A0.3 + MOV PC, D1RtP + +$Llong_copy: + ANDS D1Ar5, D1Ar1, #7 ! test destination alignment + BZ $Laligned_dst + +! The destination address is not 8 byte aligned. We will copy bytes from +! the source to the destination until the remaining data has an 8 byte +! destination address alignment (i.e we should never copy more than 7 +! bytes here). +$Lalign_dst: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8 + SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lalign_dst + +! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte +! blocks, then jump to the unaligned copy loop or fall through to the aligned +! copy loop as appropriate. +$Laligned_dst: + MOV D0Ar4, A1.2 + LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks + ANDS D0Ar4, D0Ar4, #7 ! test source alignment + BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop + +! Both source and destination are 8 byte aligned - the easy case. +$Laligned_copy: + LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks + BZ $Lbyte_copy + SUB TXRPT, D1Ar5, #1 + +$Laligned_32: + GETL D0Re0, D1Re0, [A1.2++] + GETL D0Ar6, D1Ar5, [A1.2++] + SETL [A0.2++], D0Re0, D1Re0 + SETL [A0.2++], D0Ar6, D1Ar5 + GETL D0Re0, D1Re0, [A1.2++] + GETL D0Ar6, D1Ar5, [A1.2++] + SETL [A0.2++], D0Re0, D1Re0 + SETL [A0.2++], D0Ar6, D1Ar5 + BR $Laligned_32 + +! If there are any remaining bytes use the byte copy loop, otherwise we are done + ANDS D1Ar3, D1Ar3, #0x1f + BNZ $Lbyte_copy + B $Lend + +! The destination is 8 byte aligned but the source is not, and there are 8 +! or more bytes to be copied. +$Lunaligned_copy: +! Adjust the source pointer (A1.2) to the 8 byte boundary before its +! current value + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 +! Save the number of bytes of mis-alignment in D0Ar4 for use later + SUBS D0Ar6, D0Ar6, D0Ar4 + MOV D0Ar4, D0Ar6 +! if there is no mis-alignment after all, use the aligned copy loop + BZ $Laligned_copy + +! prefetch 8 bytes + GETL D0Re0, D1Re0, [A1.2] + + SUB TXRPT, D1Ar5, #1 + +! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly +! 4 bytes, and more than 4 bytes. + CMP D0Ar6, #4 + BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop + BZ $Lunaligned_4 ! use 4 byte mis-alignment loop + +! The mis-alignment is more than 4 bytes +$Lunaligned_5_6_7: + SUB D0Ar6, D0Ar6, #4 +! Calculate the bit offsets required for the shift operations necesssary +! to align the data. +! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) + MULW D0Ar6, D0Ar6, #8 + MOV D1Ar5, #32 + SUB D1Ar5, D1Ar5, D0Ar6 +! Move data 4 bytes before we enter the main loop + MOV D0Re0, D1Re0 + +$Lloop_5_6_7: + GETL D0Ar2, D1Ar1, [++A1.2] +! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0Ar6 + MOV D1Re0, D0Ar2 + LSL D1Re0, D1Re0, D1Ar5 + ADD D0Re0, D0Re0, D1Re0 + + LSR D0Ar2, D0Ar2, D0Ar6 + LSL D1Re0, D1Ar1, D1Ar5 + ADD D1Re0, D1Re0, D0Ar2 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D1Ar1 + BR $Lloop_5_6_7 + + B $Lunaligned_end + +$Lunaligned_1_2_3: +! Calculate the bit offsets required for the shift operations necesssary +! to align the data. +! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) + MULW D0Ar6, D0Ar6, #8 + MOV D1Ar5, #32 + SUB D1Ar5, D1Ar5, D0Ar6 + +$Lloop_1_2_3: +! form 64-bit data in D0Re0,D1Re0 + LSR D0Re0, D0Re0, D0Ar6 + LSL D1Ar1, D1Re0, D1Ar5 + ADD D0Re0, D0Re0, D1Ar1 + MOV D0Ar2, D1Re0 + LSR D0FrT, D0Ar2, D0Ar6 + GETL D0Ar2, D1Ar1, [++A1.2] + + MOV D1Re0, D0Ar2 + LSL D1Re0, D1Re0, D1Ar5 + ADD D1Re0, D1Re0, D0FrT + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0Ar2 + MOV D1Re0, D1Ar1 + BR $Lloop_1_2_3 + + B $Lunaligned_end + +! The 4 byte mis-alignment case - this does not require any shifting, just a +! shuffling of registers. +$Lunaligned_4: + MOV D0Re0, D1Re0 +$Lloop_4: + GETL D0Ar2, D1Ar1, [++A1.2] + MOV D1Re0, D0Ar2 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D1Ar1 + BR $Lloop_4 + +$Lunaligned_end: +! If there are no remaining bytes to copy, we are done. + ANDS D1Ar3, D1Ar3, #7 + BZ $Lend +! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte +! address of the remaining bytes, and fall through to the byte copy loop. + MOV D0Ar6, A1.2 + ADD D1Ar5, D0Ar4, D0Ar6 + MOV A1.2, D1Ar5 + B $Lbyte_copy + + .size _memcpy,.-_memcpy + +libc_hidden_def(memcpy) diff --git a/libc/string/metag/memmove.S b/libc/string/metag/memmove.S new file mode 100644 index 000000000..3416fd558 --- /dev/null +++ b/libc/string/metag/memmove.S @@ -0,0 +1,350 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + + .text + .global _memmove + .type _memmove,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memmove: + CMP D1Ar3, #0 + MOV D0Re0, D1Ar1 + BZ $LEND2 + MSETL [A0StP], D0.5, D0.6, D0.7 + MOV D1Ar5, D0Ar2 + CMP D1Ar1, D1Ar5 + BLT $Lforwards_copy + SUB D0Ar4, D1Ar1, D1Ar3 + ADD D0Ar4, D0Ar4, #1 + CMP D0Ar2, D0Ar4 + BLT $Lforwards_copy + ! should copy backwards + MOV D1Re0, D0Ar2 + ! adjust pointer to the end of mem + ADD D0Ar2, D1Re0, D1Ar3 + ADD D1Ar1, D1Ar1, D1Ar3 + + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lbbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ! test 8 byte alignment + ANDS D1Ar5, D1Ar5, #7 + BNE $Lbdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lbsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lbaligned_loop: + GETL D0Re0, D1Re0, [--A1.2] + SETL [--A0.2], D0Re0, D1Re0 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit +$Lbbyte_loop: + GETB D1Re0, [--A1.2] + SETB [--A0.2], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lbbyte_loop +$Lbbyte_loop_exit: + MOV D0Re0, A0.2 +$LEND: + SUB A0.2, A0StP, #24 + MGETL D0.5, D0.6, D0.7, [A0.2] + SUB A0StP, A0StP, #24 +$LEND2: + MOV PC, D1RtP + +$Lbdest_unaligned: + GETB D0Re0, [--A1.2] + SETB [--A0.2], D0Re0 + SUBS D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + BNE $Lbdest_unaligned + CMP D1Ar3, #8 + BLT $Lbbyte_loop +$Lbsrc_unaligned: + LSR D1Ar5, D1Ar3, #3 + ! adjust A1.2 + MOV D0Ar4, A1.2 + ! save original address + MOV D0Ar6, A1.2 + + ADD D0Ar4, D0Ar4, #7 + ANDMB D0Ar4, D0Ar4, #0xfff8 + ! new address is the 8-byte aligned one above the original + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + ! measure the gap size + SUB D0Ar6, D0Ar4, D0Ar6 + MOVS D0Ar4, D0Ar6 + ! keep this information for the later adjustment + ! both aligned + BZ $Lbaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [--A1.2] + + CMP D0Ar6, #4 + BLT $Lbunaligned_1_2_3 + ! 32-bit aligned + BZ $Lbaligned_4 + + SUB D0Ar6, D0Ar6, #4 + ! D1.6 stores the gap size in bits + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + ! D0.6 stores the complement of the gap size + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_5_6_7: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D1Re0, D0Re0 + ! D1Re0 << gap-size + LSL D1Re0, D1Re0, D1.6 + MOV D0Re0, D1.7 + ! D0Re0 >> complement + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ! combine the both + ADD D1Re0, D1Re0, D1.5 + + MOV D1.5, D1.7 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D0.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ! A1.2 <- A1.2 +8 - gapsize + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbunaligned_1_2_3: + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_1_2_3_loop: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSL D1Re0, D1Re0, D1.6 + ! save D0Re0 for later use + MOV D0.5, D0Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ADD D1Re0, D1Re0, D1.5 + + ! orignal data in D0Re0 + MOV D1.5, D0.5 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D1.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbaligned_4: + GETL D0.7, D1.7, [--A1.2] + MOV D1Re0, D0Re0 + MOV D0Re0, D1.7 + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lforwards_copy: + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lfbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ANDS D1Ar5, D1Ar5, #7 + BNE $Lfdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lfsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lfaligned_loop: + GETL D0Re0, D1Re0, [A1.2++] + SUBS D1Ar5, D1Ar5, #1 + SETL [A0.2++], D0Re0, D1Re0 + BNE $Lfaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit +$Lfbyte_loop: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lfbyte_loop +$Lfbyte_loop_exit: + MOV D0Re0, D1Ar1 + B $LEND + +$Lfdest_unaligned: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lfdest_unaligned + CMP D1Ar3, #8 + BLT $Lfbyte_loop +$Lfsrc_unaligned: + ! adjust A1.2 + LSR D1Ar5, D1Ar3, #3 + + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + SUB D0Ar6, D0Ar6, D0Ar4 + ! keep the information for the later adjustment + MOVS D0Ar4, D0Ar6 + + ! both aligned + BZ $Lfaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [A1.2] + + CMP D0Ar6, #4 + BLT $Lfunaligned_1_2_3 + BZ $Lfaligned_4 + + SUB D0Ar6, D0Ar6, #4 + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_5_6_7: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D0.7 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D1.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfunaligned_1_2_3: + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_1_2_3_loop: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D1Re0 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D1.5 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfaligned_4: + GETL D0.7, D1.7, [++A1.2] + MOV D0Re0, D1Re0 + MOV D1Re0, D0.7 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + + .size _memmove,.-_memmove + +libc_hidden_def(memmove) diff --git a/libc/string/metag/memset.S b/libc/string/metag/memset.S new file mode 100644 index 000000000..8d4e9a158 --- /dev/null +++ b/libc/string/metag/memset.S @@ -0,0 +1,90 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + + .text + .global _memset + .type _memset,function +! D1Ar1 dst +! D0Ar2 c +! D1Ar3 cnt +! D0Re0 dst +_memset: + AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value + MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15 + ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst + LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31 + ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2) + MOV D0Re0,D1Ar1 ! Return dst + BZ $LLongStub ! if start address is aligned + ! start address is not aligned on an 8 byte boundary, so we + ! need the number of bytes up to the next 8 byte address + ! boundary, or the length of the string if less than 8, in D1Ar5 + MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ... + SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N + CMP D1Ar3,D1Ar5 + MOVMI D1Ar5,D1Ar3 + B $LByteStub ! dst is mis-aligned, do $LByteStub + +! +! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles) +! +$LLongStub: + LSRS D0Ar2,D1Ar3,#5 + AND D1Ar3,D1Ar3,#0x1F + MOV A1.2,A0.2 + BEQ $LLongishStub + SUB TXRPT,D0Ar2,#1 + CMP D1Ar3,#0 +$LLongLoop: + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + BR $LLongLoop + BZ $Lexit +! +! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles) +! +$LLongishStub: + LSRS D0Ar2,D1Ar3,#3 + AND D1Ar3,D1Ar3,#0x7 + MOV D1Ar5,D1Ar3 + BEQ $LByteStub + SUB TXRPT,D0Ar2,#1 + CMP D1Ar3,#0 +$LLongishLoop: + SETL [D1Ar1++],A0.2,A1.2 + BR $LLongishLoop + BZ $Lexit +! +! This does a byte structured burst of up to 7 bytes +! +! D1Ar1 should point to the location required +! D1Ar3 should be the remaining total byte count +! D1Ar5 should be burst size (<= D1Ar3) +! +$LByteStub: + SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count + ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area + MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4) + SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ... + MOV A1.2,D1Ar5 + SUB PC,CPC1,A1.2 ! Jump into table below + SETB [D1Ar1+#(-7)],A0.2 + SETB [D1Ar1+#(-6)],A0.2 + SETB [D1Ar1+#(-5)],A0.2 + SETB [D1Ar1+#(-4)],A0.2 + SETB [D1Ar1+#(-3)],A0.2 + SETB [D1Ar1+#(-2)],A0.2 + SETB [D1Ar1+#(-1)],A0.2 +! +! Return if all data has been output, otherwise do $LLongStub +! + BNZ $LLongStub +$Lexit: + MOV PC,D1RtP + .size _memset,.-_memset + +libc_hidden_def(memset) diff --git a/libc/string/metag/strchr.S b/libc/string/metag/strchr.S new file mode 100644 index 000000000..6b0f2ea43 --- /dev/null +++ b/libc/string/metag/strchr.S @@ -0,0 +1,167 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + +#include <features.h> + + .text + .global _strchr + .type _strchr, function +! D1Ar1 src +! D0Ar2 c +_strchr: + AND D0Ar2,D0Ar2,#0xff ! Drop all but 8 bits of c + MOV D1Ar5, D1Ar1 ! Copy src to D1Ar5 + AND D1Ar5, D1Ar5, #7 ! Check 64 bit alignment + CMP D1Ar5, #0 + BZ $Laligned64bit ! Jump to 64 bit aligned strchr +$Lalign64bit: + GETB D0Re0, [D1Ar1++] ! Get the next character + ADD D1Ar5, D1Ar5, #1 ! Increment alignment counter + CMP D0Re0, D0Ar2 ! Is the char c + BZ $Lcharatprevious ! If so exit returning position + CMP D0Re0, #0 ! End of string? + BZ $Lnotfound ! If so exit + CMP D1Ar5, #8 ! Are we aligned 64bit yet? + BNZ $Lalign64bit ! If not keep aligning +$Laligned64bit: ! src is 64bit aligned + MOV D0Ar4, D0Ar2 ! put c into D0Ar4 + LSL D0Ar4, D0Ar4, #8 ! Shift it up + ADD D0Ar4, D0Ar4, D0Ar2 ! another c + LSL D0Ar4, D0Ar4, #8 ! shift + ADD D0Ar4, D0Ar4, D0Ar2 ! another c + LSL D0Ar4, D0Ar4, #8 ! shift + ADD D0Ar4, D0Ar4, D0Ar2 ! 4 copies of c +$Lcheck8bytes: + GETL D0Re0, D1Re0, [D1Ar1++] ! grab 16 bytes + MOV A0.3, D0Re0 ! save for later use + ! first word + ! check for \0 + MOV D0Ar2, D0Re0 ! D0Ar2 is a scratch now + ADDT D0Re0, D0Re0, #HI(0xfefefeff) ! Do 4 1-byte compares + ADD D0Re0, D0Re0, #LO(0xfefefeff) + XOR D0Ar2, D0Ar2, #-1 + AND D0Re0, D0Re0, D0Ar2 + ANDMT D0Re0, D0Re0, #HI(0x80808080) + ANDMB D0Re0, D0Re0, #LO(0x80808080) + CMP D0Re0, #0 + BNZ $Lnullinword1 ! found \0 (or c if c==\0) + + ! Check for c + MOV D0Re0, A0.3 ! restore the first word + XOR D0Re0, D0Re0, D0Ar4 + MOV D0Ar2, D0Re0 ! DO 4 1-byte compares + ADDT D0Re0, D0Re0, #HI(0xfefefeff) + ADD D0Re0, D0Re0, #LO(0xfefefeff) + XOR D0Ar2, D0Ar2, #-1 + AND D0Re0, D0Re0, D0Ar2 + ANDMT D0Re0, D0Re0, #HI(0x80808080) + ANDMB D0Re0, D0Re0, #LO(0x80808080) + CMP D0Re0, #0 + BNZ $Lcharinword1 ! found c + + ! second word + ! check for \0 + MOV A0.3, D1Re0 ! save for later use + MOV D1Ar3, D1Re0 + ADDT D1Re0, D1Re0, #HI(0xfefefeff) ! Do 4 1-byte compares + ADD D1Re0, D1Re0, #LO(0xfefefeff) + XOR D1Ar3, D1Ar3, #-1 + AND D1Re0, D1Re0, D1Ar3 + ANDMT D1Re0, D1Re0, #HI(0x80808080) + ANDMB D1Re0, D1Re0, #LO(0x80808080) + CMP D1Re0, #0 + BNZ $Lnullinword2 ! Found \0 (or c if c==\0) + + MOV D0.4, A0.3 ! restore the second word + XOR D1Re0, D0.4, D0Ar4 ! test c + + MOV D1Ar3, D1Re0 + ADDT D1Re0, D1Re0, #HI(0xfefefeff) ! Do 4 1-byte compares + ADD D1Re0, D1Re0, #LO(0xfefefeff) + XOR D1Ar3, D1Ar3, #-1 + AND D1Re0, D1Re0, D1Ar3 + ANDMT D1Re0, D1Re0, #HI(0x80808080) + ANDMB D1Re0, D1Re0, #LO(0x80808080) + CMP D1Re0, #0 + BNZ $Lcharinword2 ! found c + + B $Lcheck8bytes ! Keep checking + +$Lnullinword1: ! found \0 somewhere, check for c too + SUB D1Ar1, D1Ar1, #4 +$Lnullinword2: + SUB D1Ar1, D1Ar1, #4 + AND D0Ar2, D0Ar4, #0xff ! restore c + MOV D0Re0, A0.3 ! restore the word + MOV D0.4, D0Re0 ! for shifting later + AND D0Re0, D0Re0, #0xff ! take first byte of word + CMP D0Re0, D0Ar2 + BZ $Lcharatcurrent ! found c + CMP D0Re0, #0! + BZ $Lnotfound ! found \0 + + ADD D1Ar1, D1Ar1, #1 + LSR D0.4, D0.4, #8 + MOV D0Re0, D0.4 + AND D0Re0, D0Re0, #0xff ! take second byte of word + CMP D0Re0, D0Ar2 + BZ $Lcharatcurrent ! found c + CMP D0Re0, #0 + BZ $Lnotfound ! found \0 + + ADD D1Ar1, D1Ar1, #1 + LSR D0.4, D0.4, #8 + MOV D0Re0, D0.4 + AND D0Re0, D0Re0, #0xff ! take third byte of word + CMP D0Re0, D0Ar2 + BZ $Lcharatcurrent ! found c + CMP D0Re0, #0 + BZ $Lnotfound ! found \0 + + ADD D1Ar1, D1Ar1, #1 ! move to 4th byte + CMP D0Ar2, #0 ! If c was \0 + BZ $Lcharatcurrent ! c has been found! + +$Lnotfound: + MOV D0Re0, #0 ! End of string c not found + B $Lend + +$Lcharinword1: ! found c in first word + MOV D1Re0, D0Re0 + SUB D1Ar1, D1Ar1, #4 +$Lcharinword2: ! found c in second word + SUB D1Ar1, D1Ar1, #4 + + AND D0Re0, D1Re0, #0xff ! First byte + CMP D0Re0, #0 ! Test c (zero indicates c due + ! to the 4 1-byte compare code) + BNE $Lcharatcurrent + ADD D1Ar1, D1Ar1, #1 + + LSR D1Re0, D1Re0, #8 + AND D0Re0, D1Re0, #0xff ! Second byte + CMP D0Re0, #0 ! Test c (indicated by zero) + BNE $Lcharatcurrent + ADD D1Ar1, D1Ar1, #1 + + LSR D1Re0, D1Re0, #8 + AND D0Re0, D1Re0, #0xff ! Third byte + CMP D0Re0, #0 ! Test c (indicated by zero) + BNE $Lcharatcurrent + ADD D1Ar1, D1Ar1, #1 ! Must be the fourth byte + B $Lcharatcurrent + +$Lcharatprevious: + SUB D1Ar1, D1Ar1, #1 ! Fix-up pointer +$Lcharatcurrent: + MOV D0Re0, D1Ar1 ! Return the string pointer +$Lend: + MOV PC, D1RtP + .size _strchr,.-_strchr + +libc_hidden_def(strchr) +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias(strchr,index) +#endif diff --git a/libc/string/metag/strcmp.S b/libc/string/metag/strcmp.S new file mode 100644 index 000000000..3278ffaa5 --- /dev/null +++ b/libc/string/metag/strcmp.S @@ -0,0 +1,65 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + +#include <features.h> + + .text + .global _strcmp + .type _strcmp,function +!D1Ar1 s1 +!D0Ar2 s2 +_strcmp: + TST D1Ar1,#3 + TSTZ D0Ar2,#3 + MOVT D1Re0,#0x0101 + ADD D1Re0,D1Re0,#0x0101 + BNZ $Lstrcmp_slow + GETD D1Ar3,[D1Ar1+#4++] ! Load 32-bits from s1 + GETD D1Ar5,[D0Ar2+#4++] ! Load 32-bits from s2 + LSL D0FrT,D1Re0,#7 ! D0FrT = 0x80808080 +$Lstrcmp4_loop: + SUB D0Re0,D1Ar3,D1Re0 ! D1Re0 = 0x01010101 + MOV D0Ar6,D1Ar3 + SUBS D0Ar4,D1Ar3,D1Ar5 ! Calculate difference + XOR D0Ar6,D0Ar6,#-1 + GETD D1Ar3,[D1Ar1+#4++] ! Load 32-bits from s1 + AND D0Re0,D0Re0,D0Ar6 + ANDSZ D0Ar6,D0Re0,D0FrT ! D0FrT = 0x80808080 + GETD D1Ar5,[D0Ar2+#4++] ! Load 32-bits from s2 + BZ $Lstrcmp4_loop + AND D0Ar6, D0Re0, D0FrT ! D0FrT = 0x80808080 +! +! Either they are different or they both contain a NULL + junk +! +$Lstrcmp4_end: + LSLS D0Re0,D0Ar4,#24 ! Was Byte[0] the same? + LSLSZ D0Ar2,D0Ar6,#24 ! Yes: AND they where not zero? + LSLSZ D0Re0,D0Ar4,#16 ! Yes: Was Byte[1] the same? + LSLSZ D0Ar2,D0Ar6,#16 ! Yes: AND they where not zero? + LSLSZ D0Re0,D0Ar4,#8 ! Tes: Was Byte[2] the same? + LSLSZ D0Ar2,D0Ar6,#8 ! Yes: AND they where not zero? + MOVZ D0Re0,D0Ar4 ! Yes: Must by Byte[3] thats the result + ASR D0Re0,D0Re0,#24 ! Sign extend result to integer + MOV PC,D1RtP +! +! Misaligned case, byte at a time +! +$Lstrcmp_slow: + GETB D1Ar3,[D1Ar1++] ! Load char from s1 + GETB D1Ar5,[D0Ar2++] ! Load char from s2 + CMP D1Ar3,#1 ! Null -> C and NZ, rest -> NC (\1->Z) + CMPNC D1Ar3,D1Ar5 ! NOT Null: Same -> Z, else -> NZ + BZ $Lstrcmp_slow ! NOT Null and Same: Loop + SUB D0Re0,D1Ar3,D1Ar5 ! Generate result + MOV PC,D1RtP + + .size _strcmp,.-_strcmp + + +libc_hidden_def(strcmp) +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias(strcmp,strcoll) +libc_hidden_def(strcoll) +#endif diff --git a/libc/string/metag/strcpy.S b/libc/string/metag/strcpy.S new file mode 100644 index 000000000..529ac9279 --- /dev/null +++ b/libc/string/metag/strcpy.S @@ -0,0 +1,94 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + + .text + .global _strcpy + .type _strcpy,function +! D1Ar1 dst +! D0Ar2 src + +_strcpy: + MOV A1.2, D1Ar1 + + ! test 4 byte alignment of src + ANDS D0Ar4, D0Ar2, #3 + BNZ $Lbyteloop + + ! test 4 byte alignment of dest + ANDS D1Ar5, D1Ar1, #3 + BNZ $Lbyteloop + + ! load mask values for aligned loops + MOVT D1Ar3, #HI(0xfefefeff) + ADD D1Ar3, D1Ar3, #LO(0xfefefeff) + MOVT D0FrT, #HI(0x80808080) + ADD D0FrT, D0FrT, #LO(0x80808080) + + ! test 8 byte alignment of src + ANDS D0Ar4, D0Ar2, #7 + BNZ $Lwordloop + + ! test 8 byte alignment of dest + ANDS D1Ar5, D1Ar1, #7 + BNZ $Lwordloop + +$L8byteloop: + GETL D1Ar5, D0Ar6, [D0Ar2++] + MOV D1Re0, D1Ar5 + MOV D0Re0, D1Ar5 + ADD D1Re0, D1Re0, D1Ar3 + XOR D0Re0, D0Re0, #-1 + AND D1Re0, D1Re0, D0Re0 + ANDS D1Re0, D1Re0, D0FrT + BNZ $Lnullfound ! NULL in first word + + MOV D1Re0, D0Ar6 + MOV D0Re0, D0Ar6 + ADD D1Re0, D1Re0, D1Ar3 + XOR D0Re0, D0Re0, #-1 + AND D1Re0, D1Re0, D0Re0 + ANDS D1Re0, D1Re0, D0FrT + BNZ $Lnullfound2 ! NULL in the second word + + SETL [A1.2++], D1Ar5, D0Ar6 + B $L8byteloop + +$Lwordloop: + GETD D0Ar6, [D0Ar2++] + MOV D1Re0, D0Ar6 + MOV D0Re0, D0Ar6 + ADD D1Re0, D1Re0, D1Ar3 + XOR D0Re0, D0Re0, #-1 + AND D1Re0, D1Re0, D0Re0 + ANDS D1Re0, D1Re0, D0FrT + MOV D1Ar5, D0Ar6 + BNZ $Lnullfound + SETD [A1.2++], D0Ar6 + B $Lwordloop + +$Lnullfound2: + SETD [A1.2++], D1Ar5 + MOV D1Ar5, D0Ar6 + +$Lnullfound: + SETB [A1.2++], D1Ar5 + ANDS D0Ar6, D1Ar5, #0xff + LSR D1Ar5, D1Ar5, #8 + BNZ $Lnullfound + B $Lend + +$Lbyteloop: + GETB D0Ar6, [D0Ar2++] + SETB [A1.2++], D0Ar6 + CMP D0Ar6, #0 + BNZ $Lbyteloop + +$Lend: + MOV D0Re0, D1Ar1 + MOV PC, D1RtP + + .size _strcpy,.-_strcpy + +libc_hidden_def(strcpy) diff --git a/libc/string/sh64/Makefile b/libc/string/microblaze/Makefile index 0a95346fd..5bdfef2f7 100644 --- a/libc/string/sh64/Makefile +++ b/libc/string/microblaze/Makefile @@ -5,8 +5,8 @@ # Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. # -top_srcdir:=../../../ -top_builddir:=../../../ +top_srcdir := ../../../ +top_builddir := ../../../ all: objs include $(top_builddir)Rules.mak include ../Makefile.in diff --git a/libc/string/microblaze/memcpy.S b/libc/string/microblaze/memcpy.S new file mode 100644 index 000000000..5219e9919 --- /dev/null +++ b/libc/string/microblaze/memcpy.S @@ -0,0 +1,334 @@ +/* + * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> + * Copyright (C) 2008-2009 PetaLogix + * Copyright (C) 2008 Jim Law - Iris LP All rights reserved. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + * Written by Jim Law <jlaw@irispower.com> + * + * intended to replace: + * memcpy in memcpy.c and + * memmove in memmove.c + * ... in arch/microblaze/lib + * + * + * assly_fastcopy.S + * + * Attempt at quicker memcpy and memmove for MicroBlaze + * Input : Operand1 in Reg r5 - destination address + * Operand2 in Reg r6 - source address + * Operand3 in Reg r7 - number of bytes to transfer + * Output: Result in Reg r3 - starting destinaition address + * + * + * Explanation: + * Perform (possibly unaligned) copy of a block of memory + * between mem locations with size of xfer spec'd in bytes + */ + + .text + .globl memcpy + .type memcpy, @function + .ent memcpy + +#ifdef __MICROBLAZEEL__ +# define BSLLI bsrli +# define BSRLI bslli +#else +# define BSLLI bslli +# define BSRLI bsrli +#endif + +memcpy: +fast_memcpy_ascending: + /* move d to return register as value of function */ + addi r3, r5, 0 + + addi r4, r0, 4 /* n = 4 */ + cmpu r4, r4, r7 /* n = c - n (unsigned) */ + blti r4, a_xfer_end /* if n < 0, less than one word to transfer */ + + /* transfer first 0~3 bytes to get aligned dest address */ + andi r4, r5, 3 /* n = d & 3 */ + /* if zero, destination already aligned */ + beqi r4, a_dalign_done + /* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */ + rsubi r4, r4, 4 + rsub r7, r4, r7 /* c = c - n adjust c */ + +a_xfer_first_loop: + /* if no bytes left to transfer, transfer the bulk */ + beqi r4, a_dalign_done + lbui r11, r6, 0 /* h = *s */ + sbi r11, r5, 0 /* *d = h */ + addi r6, r6, 1 /* s++ */ + addi r5, r5, 1 /* d++ */ + brid a_xfer_first_loop /* loop */ + addi r4, r4, -1 /* n-- (IN DELAY SLOT) */ + +a_dalign_done: + addi r4, r0, 32 /* n = 32 */ + cmpu r4, r4, r7 /* n = c - n (unsigned) */ + /* if n < 0, less than one block to transfer */ + blti r4, a_block_done + +a_block_xfer: + andi r9, r6, 3 /* t1 = s & 3 */ + /* if temp == 0, everything is word-aligned */ + beqi r9, a_word_xfer + +a_block_unaligned: + andi r4, r7, 0xffffffe0 /* n = c & ~31 */ + rsub r7, r4, r7 /* c = c - n */ + andi r8, r6, 0xfffffffc /* as = s & ~3 */ + add r6, r6, r4 /* s = s + n */ + lwi r11, r8, 0 /* h = *(as + 0) */ + + addi r9, r9, -1 + beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */ + addi r9, r9, -1 + beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */ + +a_block_u3: + BSLLI r11, r11, 24 /* h = h << 24 */ +a_bu3_loop: + lwi r12, r8, 4 /* v = *(as + 4) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 0 /* *(d + 0) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 8 /* v = *(as + 8) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 4 /* *(d + 4) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 12 /* v = *(as + 12) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 8 /* *(d + 8) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 16 /* v = *(as + 16) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 12 /* *(d + 12) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 20 /* v = *(as + 20) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 16 /* *(d + 16) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 24 /* v = *(as + 24) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 20 /* *(d + 20) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 28 /* v = *(as + 28) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 24 /* *(d + 24) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + lwi r12, r8, 32 /* v = *(as + 32) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + addi r8, r8, 32 /* as = as + 32 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, a_bu3_loop /* while (n) loop */ + addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ + bri a_block_done + +a_block_u1: + BSLLI r11, r11, 8 /* h = h << 8 */ +a_bu1_loop: + lwi r12, r8, 4 /* v = *(as + 4) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 0 /* *(d + 0) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 8 /* v = *(as + 8) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 4 /* *(d + 4) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 12 /* v = *(as + 12) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 8 /* *(d + 8) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 16 /* v = *(as + 16) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 12 /* *(d + 12) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 20 /* v = *(as + 20) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 16 /* *(d + 16) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 24 /* v = *(as + 24) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 20 /* *(d + 20) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 28 /* v = *(as + 28) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 24 /* *(d + 24) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + lwi r12, r8, 32 /* v = *(as + 32) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + addi r8, r8, 32 /* as = as + 32 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, a_bu1_loop /* while (n) loop */ + addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ + bri a_block_done + +a_block_u2: + BSLLI r11, r11, 16 /* h = h << 16 */ +a_bu2_loop: + lwi r12, r8, 4 /* v = *(as + 4) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 0 /* *(d + 0) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 8 /* v = *(as + 8) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 4 /* *(d + 4) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 12 /* v = *(as + 12) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 8 /* *(d + 8) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 16 /* v = *(as + 16) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 12 /* *(d + 12) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 20 /* v = *(as + 20) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 16 /* *(d + 16) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 24 /* v = *(as + 24) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 20 /* *(d + 20) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 28 /* v = *(as + 28) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 24 /* *(d + 24) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + lwi r12, r8, 32 /* v = *(as + 32) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + addi r8, r8, 32 /* as = as + 32 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, a_bu2_loop /* while (n) loop */ + addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */ + +a_block_done: + addi r4, r0, 4 /* n = 4 */ + cmpu r4, r4, r7 /* n = c - n (unsigned) */ + blti r4, a_xfer_end /* if n < 0, less than one word to transfer */ + +a_word_xfer: + andi r4, r7, 0xfffffffc /* n = c & ~3 */ + addi r10, r0, 0 /* offset = 0 */ + + andi r9, r6, 3 /* t1 = s & 3 */ + /* if temp != 0, unaligned transfers needed */ + bnei r9, a_word_unaligned + +a_word_aligned: + lw r9, r6, r10 /* t1 = *(s+offset) */ + sw r9, r5, r10 /* *(d+offset) = t1 */ + addi r4, r4,-4 /* n-- */ + bneid r4, a_word_aligned /* loop */ + addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */ + + bri a_word_done + +a_word_unaligned: + andi r8, r6, 0xfffffffc /* as = s & ~3 */ + lwi r11, r8, 0 /* h = *(as + 0) */ + addi r8, r8, 4 /* as = as + 4 */ + + addi r9, r9, -1 + beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */ + addi r9, r9, -1 + beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */ + +a_word_u3: + BSLLI r11, r11, 24 /* h = h << 24 */ +a_wu3_loop: + lw r12, r8, r10 /* v = *(as + offset) */ + BSRLI r9, r12, 8 /* t1 = v >> 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + sw r9, r5, r10 /* *(d + offset) = t1 */ + BSLLI r11, r12, 24 /* h = v << 24 */ + addi r4, r4,-4 /* n = n - 4 */ + bneid r4, a_wu3_loop /* while (n) loop */ + addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ + + bri a_word_done + +a_word_u1: + BSLLI r11, r11, 8 /* h = h << 8 */ +a_wu1_loop: + lw r12, r8, r10 /* v = *(as + offset) */ + BSRLI r9, r12, 24 /* t1 = v >> 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + sw r9, r5, r10 /* *(d + offset) = t1 */ + BSLLI r11, r12, 8 /* h = v << 8 */ + addi r4, r4,-4 /* n = n - 4 */ + bneid r4, a_wu1_loop /* while (n) loop */ + addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ + + bri a_word_done + +a_word_u2: + BSLLI r11, r11, 16 /* h = h << 16 */ +a_wu2_loop: + lw r12, r8, r10 /* v = *(as + offset) */ + BSRLI r9, r12, 16 /* t1 = v >> 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + sw r9, r5, r10 /* *(d + offset) = t1 */ + BSLLI r11, r12, 16 /* h = v << 16 */ + addi r4, r4,-4 /* n = n - 4 */ + bneid r4, a_wu2_loop /* while (n) loop */ + addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */ + +a_word_done: + add r5, r5, r10 /* d = d + offset */ + add r6, r6, r10 /* s = s + offset */ + rsub r7, r10, r7 /* c = c - offset */ + +a_xfer_end: +a_xfer_end_loop: + beqi r7, a_done /* while (c) */ + lbui r9, r6, 0 /* t1 = *s */ + addi r6, r6, 1 /* s++ */ + sbi r9, r5, 0 /* *d = t1 */ + addi r7, r7, -1 /* c-- */ + brid a_xfer_end_loop /* loop */ + addi r5, r5, 1 /* d++ (IN DELAY SLOT) */ + +a_done: + rtsd r15, 8 + nop + +.size memcpy, . - memcpy +.end memcpy +libc_hidden_def(memcpy) diff --git a/libc/string/microblaze/memmove.S b/libc/string/microblaze/memmove.S new file mode 100644 index 000000000..6bac01620 --- /dev/null +++ b/libc/string/microblaze/memmove.S @@ -0,0 +1,356 @@ +/* + * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu> + * Copyright (C) 2008-2009 PetaLogix + * Copyright (C) 2008 Jim Law - Iris LP All rights reserved. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + * Written by Jim Law <jlaw@irispower.com> + * + * intended to replace: + * memcpy in memcpy.c and + * memmove in memmove.c + * ... in arch/microblaze/lib + * + * + * assly_fastcopy.S + * + * Attempt at quicker memcpy and memmove for MicroBlaze + * Input : Operand1 in Reg r5 - destination address + * Operand2 in Reg r6 - source address + * Operand3 in Reg r7 - number of bytes to transfer + * Output: Result in Reg r3 - starting destinaition address + * + * + * Explanation: + * Perform (possibly unaligned) copy of a block of memory + * between mem locations with size of xfer spec'd in bytes + */ + + .globl memmove + .type memmove, @function + .ent memmove + +#ifdef __MICROBLAZEEL__ +# define BSLLI bsrli +# define BSRLI bslli +#else +# define BSLLI bslli +# define BSRLI bsrli +#endif + +memmove: + cmpu r4, r5, r6 /* n = s - d */ + bgei r4, HIDDEN_JUMPTARGET(memcpy) + +fast_memcpy_descending: + /* move d to return register as value of function */ + addi r3, r5, 0 + + add r5, r5, r7 /* d = d + c */ + add r6, r6, r7 /* s = s + c */ + + addi r4, r0, 4 /* n = 4 */ + cmpu r4, r4, r7 /* n = c - n (unsigned) */ + blti r4,d_xfer_end /* if n < 0, less than one word to transfer */ + + /* transfer first 0~3 bytes to get aligned dest address */ + andi r4, r5, 3 /* n = d & 3 */ + /* if zero, destination already aligned */ + beqi r4,d_dalign_done + rsub r7, r4, r7 /* c = c - n adjust c */ + +d_xfer_first_loop: + /* if no bytes left to transfer, transfer the bulk */ + beqi r4,d_dalign_done + addi r6, r6, -1 /* s-- */ + addi r5, r5, -1 /* d-- */ + lbui r11, r6, 0 /* h = *s */ + sbi r11, r5, 0 /* *d = h */ + brid d_xfer_first_loop /* loop */ + addi r4, r4, -1 /* n-- (IN DELAY SLOT) */ + +d_dalign_done: + addi r4, r0, 32 /* n = 32 */ + cmpu r4, r4, r7 /* n = c - n (unsigned) */ + /* if n < 0, less than one block to transfer */ + blti r4, d_block_done + +d_block_xfer: + andi r4, r7, 0xffffffe0 /* n = c & ~31 */ + rsub r7, r4, r7 /* c = c - n */ + + andi r9, r6, 3 /* t1 = s & 3 */ + /* if temp != 0, unaligned transfers needed */ + bnei r9, d_block_unaligned + +d_block_aligned: + addi r6, r6, -32 /* s = s - 32 */ + addi r5, r5, -32 /* d = d - 32 */ + lwi r9, r6, 28 /* t1 = *(s + 28) */ + lwi r10, r6, 24 /* t2 = *(s + 24) */ + lwi r11, r6, 20 /* t3 = *(s + 20) */ + lwi r12, r6, 16 /* t4 = *(s + 16) */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + swi r10, r5, 24 /* *(d + 24) = t2 */ + swi r11, r5, 20 /* *(d + 20) = t3 */ + swi r12, r5, 16 /* *(d + 16) = t4 */ + lwi r9, r6, 12 /* t1 = *(s + 12) */ + lwi r10, r6, 8 /* t2 = *(s + 8) */ + lwi r11, r6, 4 /* t3 = *(s + 4) */ + lwi r12, r6, 0 /* t4 = *(s + 0) */ + swi r9, r5, 12 /* *(d + 12) = t1 */ + swi r10, r5, 8 /* *(d + 8) = t2 */ + swi r11, r5, 4 /* *(d + 4) = t3 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, d_block_aligned /* while (n) loop */ + swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */ + bri d_block_done + +d_block_unaligned: + andi r8, r6, 0xfffffffc /* as = s & ~3 */ + rsub r6, r4, r6 /* s = s - n */ + lwi r11, r8, 0 /* h = *(as + 0) */ + + addi r9, r9, -1 + beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */ + addi r9, r9, -1 + beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */ + +d_block_u3: + BSRLI r11, r11, 8 /* h = h >> 8 */ +d_bu3_loop: + addi r8, r8, -32 /* as = as - 32 */ + addi r5, r5, -32 /* d = d - 32 */ + lwi r12, r8, 28 /* v = *(as + 28) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 24 /* v = *(as + 24) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 24 /* *(d + 24) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 20 /* v = *(as + 20) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 20 /* *(d + 20) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 16 /* v = *(as + 16) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 16 /* *(d + 16) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 12 /* v = *(as + 12) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 12 /* *(d + 112) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 8 /* v = *(as + 8) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 8 /* *(d + 8) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 4 /* v = *(as + 4) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 4 /* *(d + 4) = t1 */ + BSRLI r11, r12, 8 /* h = v >> 8 */ + lwi r12, r8, 0 /* v = *(as + 0) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 0 /* *(d + 0) = t1 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, d_bu3_loop /* while (n) loop */ + BSRLI r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */ + bri d_block_done + +d_block_u1: + BSRLI r11, r11, 24 /* h = h >> 24 */ +d_bu1_loop: + addi r8, r8, -32 /* as = as - 32 */ + addi r5, r5, -32 /* d = d - 32 */ + lwi r12, r8, 28 /* v = *(as + 28) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 24 /* v = *(as + 24) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 24 /* *(d + 24) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 20 /* v = *(as + 20) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 20 /* *(d + 20) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 16 /* v = *(as + 16) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 16 /* *(d + 16) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 12 /* v = *(as + 12) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 12 /* *(d + 112) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 8 /* v = *(as + 8) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 8 /* *(d + 8) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 4 /* v = *(as + 4) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 4 /* *(d + 4) = t1 */ + BSRLI r11, r12, 24 /* h = v >> 24 */ + lwi r12, r8, 0 /* v = *(as + 0) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 0 /* *(d + 0) = t1 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, d_bu1_loop /* while (n) loop */ + BSRLI r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */ + bri d_block_done + +d_block_u2: + BSRLI r11, r11, 16 /* h = h >> 16 */ +d_bu2_loop: + addi r8, r8, -32 /* as = as - 32 */ + addi r5, r5, -32 /* d = d - 32 */ + lwi r12, r8, 28 /* v = *(as + 28) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 28 /* *(d + 28) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 24 /* v = *(as + 24) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 24 /* *(d + 24) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 20 /* v = *(as + 20) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 20 /* *(d + 20) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 16 /* v = *(as + 16) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 16 /* *(d + 16) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 12 /* v = *(as + 12) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 12 /* *(d + 112) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 8 /* v = *(as + 8) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 8 /* *(d + 8) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 4 /* v = *(as + 4) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 4 /* *(d + 4) = t1 */ + BSRLI r11, r12, 16 /* h = v >> 16 */ + lwi r12, r8, 0 /* v = *(as + 0) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + swi r9, r5, 0 /* *(d + 0) = t1 */ + addi r4, r4, -32 /* n = n - 32 */ + bneid r4, d_bu2_loop /* while (n) loop */ + BSRLI r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */ + +d_block_done: + addi r4, r0, 4 /* n = 4 */ + cmpu r4, r4, r7 /* n = c - n (unsigned) */ + blti r4,d_xfer_end /* if n < 0, less than one word to transfer */ + +d_word_xfer: + andi r4, r7, 0xfffffffc /* n = c & ~3 */ + rsub r5, r4, r5 /* d = d - n */ + rsub r6, r4, r6 /* s = s - n */ + rsub r7, r4, r7 /* c = c - n */ + + andi r9, r6, 3 /* t1 = s & 3 */ + /* if temp != 0, unaligned transfers needed */ + bnei r9, d_word_unaligned + +d_word_aligned: + addi r4, r4,-4 /* n-- */ + lw r9, r6, r4 /* t1 = *(s+n) */ + bneid r4, d_word_aligned /* loop */ + sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */ + + bri d_word_done + +d_word_unaligned: + andi r8, r6, 0xfffffffc /* as = s & ~3 */ + lw r11, r8, r4 /* h = *(as + n) */ + + addi r9, r9, -1 + beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */ + addi r9, r9, -1 + beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */ + +d_word_u3: + BSRLI r11, r11, 8 /* h = h >> 8 */ +d_wu3_loop: + addi r4, r4,-4 /* n = n - 4 */ + lw r12, r8, r4 /* v = *(as + n) */ + BSLLI r9, r12, 24 /* t1 = v << 24 */ + or r9, r11, r9 /* t1 = h | t1 */ + sw r9, r5, r4 /* *(d + n) = t1 */ + bneid r4, d_wu3_loop /* while (n) loop */ + BSRLI r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */ + + bri d_word_done + +d_word_u1: + BSRLI r11, r11, 24 /* h = h >> 24 */ +d_wu1_loop: + addi r4, r4,-4 /* n = n - 4 */ + lw r12, r8, r4 /* v = *(as + n) */ + BSLLI r9, r12, 8 /* t1 = v << 8 */ + or r9, r11, r9 /* t1 = h | t1 */ + sw r9, r5, r4 /* *(d + n) = t1 */ + bneid r4, d_wu1_loop /* while (n) loop */ + BSRLI r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */ + + bri d_word_done + +d_word_u2: + BSRLI r11, r11, 16 /* h = h >> 16 */ +d_wu2_loop: + addi r4, r4,-4 /* n = n - 4 */ + lw r12, r8, r4 /* v = *(as + n) */ + BSLLI r9, r12, 16 /* t1 = v << 16 */ + or r9, r11, r9 /* t1 = h | t1 */ + sw r9, r5, r4 /* *(d + n) = t1 */ + bneid r4, d_wu2_loop /* while (n) loop */ + BSRLI r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */ + +d_word_done: + +d_xfer_end: +d_xfer_end_loop: + beqi r7, a_done /* while (c) */ + addi r6, r6, -1 /* s-- */ + lbui r9, r6, 0 /* t1 = *s */ + addi r5, r5, -1 /* d-- */ + sbi r9, r5, 0 /* *d = t1 */ + brid d_xfer_end_loop /* loop */ + addi r7, r7, -1 /* c-- (IN DELAY SLOT) */ + +a_done: +d_done: + rtsd r15, 8 + nop + +.size memmove, . - memmove +.end memmove +libc_hidden_def(memmove) diff --git a/libc/string/mips/memcpy.S b/libc/string/mips/memcpy.S index 9b05ee6da..59f9f0a3a 100644 --- a/libc/string/mips/memcpy.S +++ b/libc/string/mips/memcpy.S @@ -1,6 +1,5 @@ -/* Copyright (C) 2002, 2003 Free Software Foundation, Inc. +/* Copyright (C) 2012-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Hartvig Ekner <hartvige@mips.com>, 2002. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -13,245 +12,861 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ -#include <features.h> -/*#include <sysdep.h>*/ -#include <endian.h> -#include "sysdep.h" +#ifdef ANDROID_CHANGES +# include "machine/asm.h" +# include "machine/regdef.h" +# define USE_MEMMOVE_FOR_OVERLAP +# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED +# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE +#elif _LIBC +# include <sysdep.h> +# include <sys/regdef.h> +# include <sys/asm.h> +# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED +# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE +#elif defined _COMPILING_NEWLIB +# include "machine/asm.h" +# include "machine/regdef.h" +# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED +# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE +#else +# include <sys/regdef.h> +# include <sys/asm.h> +#endif + +#if (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS5) || \ + (_MIPS_ISA == _MIPS_ISA_MIPS32) || (_MIPS_ISA == _MIPS_ISA_MIPS64) +# ifndef DISABLE_PREFETCH +# define USE_PREFETCH +# endif +#endif + +#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32)) +# ifndef DISABLE_DOUBLE +# define USE_DOUBLE +# endif +#endif + +/* Some asm.h files do not have the L macro definition. */ +#ifndef L +# if _MIPS_SIM == _ABIO32 +# define L(label) $L ## label +# else +# define L(label) .L ## label +# endif +#endif + +/* Some asm.h files do not have the PTR_ADDIU macro definition. */ +#ifndef PTR_ADDIU +# ifdef USE_DOUBLE +# define PTR_ADDIU daddiu +# else +# define PTR_ADDIU addiu +# endif +#endif + +/* Some asm.h files do not have the PTR_SRA macro definition. */ +#ifndef PTR_SRA +# ifdef USE_DOUBLE +# define PTR_SRA dsra +# else +# define PTR_SRA sra +# endif +#endif + +/* New R6 instructions that may not be in asm.h. */ +#ifndef PTR_LSA +# if _MIPS_SIM == _ABI64 +# define PTR_LSA dlsa +# else +# define PTR_LSA lsa +# endif +#endif + +/* + * Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load + * prefetches appears to offer a slight preformance advantage. + * + * Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE + * or PREFETCH_STORE_STREAMED offers a large performance advantage + * but PREPAREFORSTORE has some special restrictions to consider. + * + * Prefetch with the 'prepare for store' hint does not copy a memory + * location into the cache, it just allocates a cache line and zeros + * it out. This means that if you do not write to the entire cache + * line before writing it out to memory some data will get zero'ed out + * when the cache line is written back to memory and data will be lost. + * + * Also if you are using this memcpy to copy overlapping buffers it may + * not behave correctly when using the 'prepare for store' hint. If you + * use the 'prepare for store' prefetch on a memory area that is in the + * memcpy source (as well as the memcpy destination), then you will get + * some data zero'ed out before you have a chance to read it and data will + * be lost. + * + * If you are going to use this memcpy routine with the 'prepare for store' + * prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid + * the problem of running memcpy on overlapping buffers. + * + * There are ifdef'ed sections of this memcpy to make sure that it does not + * do prefetches on cache lines that are not going to be completely written. + * This code is only needed and only used when PREFETCH_STORE_HINT is set to + * PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are + * 32 bytes and if the cache line is larger it will not work correctly. + */ + +#ifdef USE_PREFETCH +# define PREFETCH_HINT_LOAD 0 +# define PREFETCH_HINT_STORE 1 +# define PREFETCH_HINT_LOAD_STREAMED 4 +# define PREFETCH_HINT_STORE_STREAMED 5 +# define PREFETCH_HINT_LOAD_RETAINED 6 +# define PREFETCH_HINT_STORE_RETAINED 7 +# define PREFETCH_HINT_WRITEBACK_INVAL 25 +# define PREFETCH_HINT_PREPAREFORSTORE 30 + +/* + * If we have not picked out what hints to use at this point use the + * standard load and store prefetch hints. + */ +# ifndef PREFETCH_STORE_HINT +# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE +# endif +# ifndef PREFETCH_LOAD_HINT +# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD +# endif + +/* + * We double everything when USE_DOUBLE is true so we do 2 prefetches to + * get 64 bytes in that case. The assumption is that each individual + * prefetch brings in 32 bytes. + */ + +# ifdef USE_DOUBLE +# define PREFETCH_CHUNK 64 +# define PREFETCH_FOR_LOAD(chunk, reg) \ + pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \ + pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg) +# define PREFETCH_FOR_STORE(chunk, reg) \ + pref PREFETCH_STORE_HINT, (chunk)*64(reg); \ + pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg) +# else +# define PREFETCH_CHUNK 32 +# define PREFETCH_FOR_LOAD(chunk, reg) \ + pref PREFETCH_LOAD_HINT, (chunk)*32(reg) +# define PREFETCH_FOR_STORE(chunk, reg) \ + pref PREFETCH_STORE_HINT, (chunk)*32(reg) +# endif +/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less + * than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size + * of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE + * hint is used, the code will not work correctly. If PREPAREFORSTORE is not + * used then MAX_PREFETCH_SIZE does not matter. */ +# define MAX_PREFETCH_SIZE 128 +/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater + * than 5 on a STORE prefetch and that a single prefetch can never be larger + * than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because + * we actually do two prefetches in that case, one 32 bytes after the other. */ +# ifdef USE_DOUBLE +# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE +# else +# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE +# endif +# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \ + && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE) +/* We cannot handle this because the initial prefetches may fetch bytes that + * are before the buffer being copied. We start copies with an offset + * of 4 so avoid this situation when using PREPAREFORSTORE. */ +#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small." +# endif +#else /* USE_PREFETCH not defined */ +# define PREFETCH_FOR_LOAD(offset, reg) +# define PREFETCH_FOR_STORE(offset, reg) +#endif + +#if __mips_isa_rev > 5 +# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) +# undef PREFETCH_STORE_HINT +# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED +# endif +# define R6_CODE +#endif -/* void *memcpy(void *s1, const void *s2, size_t n); */ +/* Allow the routine to be named something else if desired. */ +#ifndef MEMCPY_NAME +# define MEMCPY_NAME memcpy +#endif + +/* We use these 32/64 bit registers as temporaries to do the copying. */ +#define REG0 t0 +#define REG1 t1 +#define REG2 t2 +#define REG3 t3 +#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABIO32) || (_MIPS_SIM == _ABIO64)) +# define REG4 t4 +# define REG5 t5 +# define REG6 t6 +# define REG7 t7 +#else +# define REG4 ta0 +# define REG5 ta1 +# define REG6 ta2 +# define REG7 ta3 +#endif -#ifdef __mips64 +/* We load/store 64 bits at a time when USE_DOUBLE is true. + * The C_ prefix stands for CHUNK and is used to avoid macro name + * conflicts with system header files. */ -#include <sys/asm.h> +#ifdef USE_DOUBLE +# define C_ST sd +# define C_LD ld +# ifdef __MIPSEB +# define C_LDHI ldl /* high part is left in big-endian */ +# define C_STHI sdl /* high part is left in big-endian */ +# define C_LDLO ldr /* low part is right in big-endian */ +# define C_STLO sdr /* low part is right in big-endian */ +# else +# define C_LDHI ldr /* high part is right in little-endian */ +# define C_STHI sdr /* high part is right in little-endian */ +# define C_LDLO ldl /* low part is left in little-endian */ +# define C_STLO sdl /* low part is left in little-endian */ +# endif +# define C_ALIGN dalign /* r6 align instruction */ +#else +# define C_ST sw +# define C_LD lw +# ifdef __MIPSEB +# define C_LDHI lwl /* high part is left in big-endian */ +# define C_STHI swl /* high part is left in big-endian */ +# define C_LDLO lwr /* low part is right in big-endian */ +# define C_STLO swr /* low part is right in big-endian */ +# else +# define C_LDHI lwr /* high part is right in little-endian */ +# define C_STHI swr /* high part is right in little-endian */ +# define C_LDLO lwl /* low part is left in little-endian */ +# define C_STLO swl /* low part is left in little-endian */ +# endif +# define C_ALIGN align /* r6 align instruction */ +#endif -#if __BYTE_ORDER == __BIG_ENDIAN -# define LDHI ldl /* high part is left in big-endian */ -# define SDHI sdl /* high part is left in big-endian */ -# define LDLO ldr /* low part is right in big-endian */ -# define SDLO sdr /* low part is right in big-endian */ +/* Bookkeeping values for 32 vs. 64 bit mode. */ +#ifdef USE_DOUBLE +# define NSIZE 8 +# define NSIZEMASK 0x3f +# define NSIZEDMASK 0x7f #else -# define LDHI ldr /* high part is right in little-endian */ -# define SDHI sdr /* high part is right in little-endian */ -# define LDLO ldl /* low part is left in little-endian */ -# define SDLO sdl /* low part is left in little-endian */ +# define NSIZE 4 +# define NSIZEMASK 0x1f +# define NSIZEDMASK 0x3f #endif +#define UNIT(unit) ((unit)*NSIZE) +#define UNITM1(unit) (((unit)*NSIZE)-1) -ENTRY (memcpy) +#ifdef ANDROID_CHANGES +LEAF(MEMCPY_NAME, 0) +#else +LEAF(MEMCPY_NAME) +#endif + .set nomips16 .set noreorder +/* + * Below we handle the case where memcpy is called with overlapping src and dst. + * Although memcpy is not required to handle this case, some parts of Android + * like Skia rely on such usage. We call memmove to handle such cases. + */ +#ifdef USE_MEMMOVE_FOR_OVERLAP + PTR_SUBU t0,a0,a1 + PTR_SRA t2,t0,31 + xor t1,t0,t2 + PTR_SUBU t0,t1,t2 + sltu t2,t0,a2 + beq t2,zero,L(memcpy) + la t9,memmove + jr t9 + nop +L(memcpy): +#endif +/* + * If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of + * size, copy dst pointer to v0 for the return value. + */ + slti t2,a2,(2 * NSIZE) + bne t2,zero,L(lasts) +#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH) + move v0,zero +#else + move v0,a0 +#endif - slti t0, a2, 16 # Less than 16? - bne t0, zero, L(last16) - move v0, a0 # Setup exit value before too late - - xor t0, a1, a0 # Find a0/a1 displacement - andi t0, 0x7 - bne t0, zero, L(shift) # Go handle the unaligned case - PTR_SUBU t1, zero, a1 - andi t1, 0x7 # a0/a1 are aligned, but are we - beq t1, zero, L(chk8w) # starting in the middle of a word? - PTR_SUBU a2, t1 - LDHI t0, 0(a1) # Yes we are... take care of that - PTR_ADDU a1, t1 - SDHI t0, 0(a0) - PTR_ADDU a0, t1 - -L(chk8w): - andi t0, a2, 0x3f # 64 or more bytes left? - beq t0, a2, L(chk1w) - PTR_SUBU a3, a2, t0 # Yes - PTR_ADDU a3, a1 # a3 = end address of loop - move a2, t0 # a2 = what will be left after loop -L(lop8w): - ld t0, 0(a1) # Loop taking 8 words at a time - ld t1, 8(a1) - ld t2, 16(a1) - ld t3, 24(a1) - ld ta0, 32(a1) - ld ta1, 40(a1) - ld ta2, 48(a1) - ld ta3, 56(a1) - PTR_ADDIU a0, 64 - PTR_ADDIU a1, 64 - sd t0, -64(a0) - sd t1, -56(a0) - sd t2, -48(a0) - sd t3, -40(a0) - sd ta0, -32(a0) - sd ta1, -24(a0) - sd ta2, -16(a0) - bne a1, a3, L(lop8w) - sd ta3, -8(a0) +#ifndef R6_CODE -L(chk1w): - andi t0, a2, 0x7 # 8 or more bytes left? - beq t0, a2, L(last16) - PTR_SUBU a3, a2, t0 # Yes, handle them one dword at a time - PTR_ADDU a3, a1 # a3 again end address - move a2, t0 -L(lop1w): - ld t0, 0(a1) - PTR_ADDIU a0, 8 - PTR_ADDIU a1, 8 - bne a1, a3, L(lop1w) - sd t0, -8(a0) - -L(last16): - blez a2, L(lst16e) # Handle last 16 bytes, one at a time - PTR_ADDU a3, a2, a1 -L(lst16l): - lb t0, 0(a1) - PTR_ADDIU a0, 1 - PTR_ADDIU a1, 1 - bne a1, a3, L(lst16l) - sb t0, -1(a0) -L(lst16e): - jr ra # Bye, bye - nop +/* + * If src and dst have different alignments, go to L(unaligned), if they + * have the same alignment (but are not actually aligned) do a partial + * load/store to make them aligned. If they are both already aligned + * we can start copying at L(aligned). + */ + xor t8,a1,a0 + andi t8,t8,(NSIZE-1) /* t8 is a0/a1 word-displacement */ + bne t8,zero,L(unaligned) + PTR_SUBU a3, zero, a0 -L(shift): - PTR_SUBU a3, zero, a0 # Src and Dest unaligned - andi a3, 0x7 # (unoptimized case...) - beq a3, zero, L(shft1) - PTR_SUBU a2, a3 # a2 = bytes left - LDHI t0, 0(a1) # Take care of first odd part - LDLO t0, 7(a1) - PTR_ADDU a1, a3 - SDHI t0, 0(a0) - PTR_ADDU a0, a3 -L(shft1): - andi t0, a2, 0x7 - PTR_SUBU a3, a2, t0 - PTR_ADDU a3, a1 -L(shfth): - LDHI t1, 0(a1) # Limp through, dword by dword - LDLO t1, 7(a1) - PTR_ADDIU a0, 8 - PTR_ADDIU a1, 8 - bne a1, a3, L(shfth) - sd t1, -8(a0) - b L(last16) # Handle anything which may be left - move a2, t0 + andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */ + beq a3,zero,L(aligned) /* if a3=0, it is already aligned */ + PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */ - .set reorder -END (memcpy) + C_LDHI t8,0(a1) + PTR_ADDU a1,a1,a3 + C_STHI t8,0(a0) + PTR_ADDU a0,a0,a3 + +#else /* R6_CODE */ + +/* + * Align the destination and hope that the source gets aligned too. If it + * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6 + * align instruction. + */ + andi t8,a0,7 + lapc t9,L(atable) + PTR_LSA t9,t8,t9,2 + jrc t9 +L(atable): + bc L(lb0) + bc L(lb7) + bc L(lb6) + bc L(lb5) + bc L(lb4) + bc L(lb3) + bc L(lb2) + bc L(lb1) +L(lb7): + lb a3, 6(a1) + sb a3, 6(a0) +L(lb6): + lb a3, 5(a1) + sb a3, 5(a0) +L(lb5): + lb a3, 4(a1) + sb a3, 4(a0) +L(lb4): + lb a3, 3(a1) + sb a3, 3(a0) +L(lb3): + lb a3, 2(a1) + sb a3, 2(a0) +L(lb2): + lb a3, 1(a1) + sb a3, 1(a0) +L(lb1): + lb a3, 0(a1) + sb a3, 0(a0) + + li t9,8 + subu t8,t9,t8 + PTR_SUBU a2,a2,t8 + PTR_ADDU a0,a0,t8 + PTR_ADDU a1,a1,t8 +L(lb0): -#else /* !__mips64 */ + andi t8,a1,(NSIZE-1) + lapc t9,L(jtable) + PTR_LSA t9,t8,t9,2 + jrc t9 +L(jtable): + bc L(aligned) + bc L(r6_unaligned1) + bc L(r6_unaligned2) + bc L(r6_unaligned3) +# ifdef USE_DOUBLE + bc L(r6_unaligned4) + bc L(r6_unaligned5) + bc L(r6_unaligned6) + bc L(r6_unaligned7) +# endif +#endif /* R6_CODE */ -#if __BYTE_ORDER == __BIG_ENDIAN -# define LWHI lwl /* high part is left in big-endian */ -# define SWHI swl /* high part is left in big-endian */ -# define LWLO lwr /* low part is right in big-endian */ -# define SWLO swr /* low part is right in big-endian */ +L(aligned): + +/* + * Now dst/src are both aligned to (word or double word) aligned addresses + * Set a2 to count how many bytes we have to copy after all the 64/128 byte + * chunks are copied and a3 to the dst pointer after all the 64/128 byte + * chunks have been copied. We will loop, incrementing a0 and a1 until a0 + * equals a3. + */ + + andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */ + beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */ + PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */ + PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */ + +/* When in the loop we may prefetch with the 'prepare to store' hint, + * in this case the a0+x should not be past the "t0-32" address. This + * means: for x=128 the last "safe" a0 address is "t0-160". Alternatively, + * for x=64 the last "safe" a0 address is "t0-96" In the current version we + * will use "prefetch hint,128(a0)", so "t0-160" is the limit. + */ +#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */ + PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */ +#endif + PREFETCH_FOR_LOAD (0, a1) + PREFETCH_FOR_LOAD (1, a1) + PREFETCH_FOR_LOAD (2, a1) + PREFETCH_FOR_LOAD (3, a1) +#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE) + PREFETCH_FOR_STORE (1, a0) + PREFETCH_FOR_STORE (2, a0) + PREFETCH_FOR_STORE (3, a0) +#endif +#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH) +# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE + sltu v1,t9,a0 + bgtz v1,L(skip_set) + nop + PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4) +L(skip_set): +# else + PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1) +# endif +#endif +#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \ + && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE) + PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3) +# ifdef USE_DOUBLE + PTR_ADDIU v0,v0,32 +# endif +#endif +L(loop16w): + C_LD t0,UNIT(0)(a1) +#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */ + bgtz v1,L(skip_pref) +#endif + C_LD t1,UNIT(1)(a1) +#ifdef R6_CODE + PREFETCH_FOR_STORE (2, a0) #else -# define LWHI lwr /* high part is right in little-endian */ -# define SWHI swr /* high part is right in little-endian */ -# define LWLO lwl /* low part is left in little-endian */ -# define SWLO swl /* low part is left in little-endian */ + PREFETCH_FOR_STORE (4, a0) + PREFETCH_FOR_STORE (5, a0) +#endif +#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) + PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5) +# ifdef USE_DOUBLE + PTR_ADDIU v0,v0,32 +# endif #endif +L(skip_pref): + C_LD REG2,UNIT(2)(a1) + C_LD REG3,UNIT(3)(a1) + C_LD REG4,UNIT(4)(a1) + C_LD REG5,UNIT(5)(a1) + C_LD REG6,UNIT(6)(a1) + C_LD REG7,UNIT(7)(a1) +#ifdef R6_CODE + PREFETCH_FOR_LOAD (3, a1) +#else + PREFETCH_FOR_LOAD (4, a1) +#endif + C_ST t0,UNIT(0)(a0) + C_ST t1,UNIT(1)(a0) + C_ST REG2,UNIT(2)(a0) + C_ST REG3,UNIT(3)(a0) + C_ST REG4,UNIT(4)(a0) + C_ST REG5,UNIT(5)(a0) + C_ST REG6,UNIT(6)(a0) + C_ST REG7,UNIT(7)(a0) -ENTRY (memcpy) - .set noreorder + C_LD t0,UNIT(8)(a1) + C_LD t1,UNIT(9)(a1) + C_LD REG2,UNIT(10)(a1) + C_LD REG3,UNIT(11)(a1) + C_LD REG4,UNIT(12)(a1) + C_LD REG5,UNIT(13)(a1) + C_LD REG6,UNIT(14)(a1) + C_LD REG7,UNIT(15)(a1) +#ifndef R6_CODE + PREFETCH_FOR_LOAD (5, a1) +#endif + C_ST t0,UNIT(8)(a0) + C_ST t1,UNIT(9)(a0) + C_ST REG2,UNIT(10)(a0) + C_ST REG3,UNIT(11)(a0) + C_ST REG4,UNIT(12)(a0) + C_ST REG5,UNIT(13)(a0) + C_ST REG6,UNIT(14)(a0) + C_ST REG7,UNIT(15)(a0) + PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */ + bne a0,a3,L(loop16w) + PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */ + move a2,t8 + +/* Here we have src and dest word-aligned but less than 64-bytes or + * 128 bytes to go. Check for a 32(64) byte chunk and copy if if there + * is one. Otherwise jump down to L(chk1w) to handle the tail end of + * the copy. + */ + +L(chkw): + PREFETCH_FOR_LOAD (0, a1) + andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */ + /* The t8 is the reminder count past 32-bytes */ + beq a2,t8,L(chk1w) /* When a2=t8, no 32-byte chunk */ + nop + C_LD t0,UNIT(0)(a1) + C_LD t1,UNIT(1)(a1) + C_LD REG2,UNIT(2)(a1) + C_LD REG3,UNIT(3)(a1) + C_LD REG4,UNIT(4)(a1) + C_LD REG5,UNIT(5)(a1) + C_LD REG6,UNIT(6)(a1) + C_LD REG7,UNIT(7)(a1) + PTR_ADDIU a1,a1,UNIT(8) + C_ST t0,UNIT(0)(a0) + C_ST t1,UNIT(1)(a0) + C_ST REG2,UNIT(2)(a0) + C_ST REG3,UNIT(3)(a0) + C_ST REG4,UNIT(4)(a0) + C_ST REG5,UNIT(5)(a0) + C_ST REG6,UNIT(6)(a0) + C_ST REG7,UNIT(7)(a0) + PTR_ADDIU a0,a0,UNIT(8) + +/* + * Here we have less than 32(64) bytes to copy. Set up for a loop to + * copy one word (or double word) at a time. Set a2 to count how many + * bytes we have to copy after all the word (or double word) chunks are + * copied and a3 to the dst pointer after all the (d)word chunks have + * been copied. We will loop, incrementing a0 and a1 until a0 equals a3. + */ +L(chk1w): + andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */ + beq a2,t8,L(lastw) + PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */ + PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */ - slti t0, a2, 8 # Less than 8? - bne t0, zero, L(last8) - move v0, a0 # Setup exit value before too late - - xor t0, a1, a0 # Find a0/a1 displacement - andi t0, 0x3 - bne t0, zero, L(shift) # Go handle the unaligned case - subu t1, zero, a1 - andi t1, 0x3 # a0/a1 are aligned, but are we - beq t1, zero, L(chk8w) # starting in the middle of a word? - subu a2, t1 - LWHI t0, 0(a1) # Yes we are... take care of that - addu a1, t1 - SWHI t0, 0(a0) - addu a0, t1 - -L(chk8w): - andi t0, a2, 0x1f # 32 or more bytes left? - beq t0, a2, L(chk1w) - subu a3, a2, t0 # Yes - addu a3, a1 # a3 = end address of loop - move a2, t0 # a2 = what will be left after loop -L(lop8w): - lw t0, 0(a1) # Loop taking 8 words at a time - lw t1, 4(a1) - lw t2, 8(a1) - lw t3, 12(a1) - lw t4, 16(a1) - lw t5, 20(a1) - lw t6, 24(a1) - lw t7, 28(a1) - addiu a0, 32 - addiu a1, 32 - sw t0, -32(a0) - sw t1, -28(a0) - sw t2, -24(a0) - sw t3, -20(a0) - sw t4, -16(a0) - sw t5, -12(a0) - sw t6, -8(a0) - bne a1, a3, L(lop8w) - sw t7, -4(a0) - -L(chk1w): - andi t0, a2, 0x3 # 4 or more bytes left? - beq t0, a2, L(last8) - subu a3, a2, t0 # Yes, handle them one word at a time - addu a3, a1 # a3 again end address - move a2, t0 -L(lop1w): - lw t0, 0(a1) - addiu a0, 4 - addiu a1, 4 - bne a1, a3, L(lop1w) - sw t0, -4(a0) - -L(last8): - blez a2, L(lst8e) # Handle last 8 bytes, one at a time - addu a3, a2, a1 -L(lst8l): - lb t0, 0(a1) - addiu a0, 1 - addiu a1, 1 - bne a1, a3, L(lst8l) - sb t0, -1(a0) -L(lst8e): - jr ra # Bye, bye +/* copying in words (4-byte or 8-byte chunks) */ +L(wordCopy_loop): + C_LD REG3,UNIT(0)(a1) + PTR_ADDIU a0,a0,UNIT(1) + PTR_ADDIU a1,a1,UNIT(1) + bne a0,a3,L(wordCopy_loop) + C_ST REG3,UNIT(-1)(a0) + +/* If we have been copying double words, see if we can copy a single word + before doing byte copies. We can have, at most, one word to copy. */ + +L(lastw): +#ifdef USE_DOUBLE + andi t8,a2,3 /* a2 is the remainder past 4 byte chunks. */ + beq t8,a2,L(lastb) + move a2,t8 + lw REG3,0(a1) + sw REG3,0(a0) + PTR_ADDIU a0,a0,4 + PTR_ADDIU a1,a1,4 +#endif + +/* Copy the last 8 (or 16) bytes */ +L(lastb): + blez a2,L(leave) + PTR_ADDU a3,a0,a2 /* a3 is the last dst address */ +L(lastbloop): + lb v1,0(a1) + PTR_ADDIU a0,a0,1 + PTR_ADDIU a1,a1,1 + bne a0,a3,L(lastbloop) + sb v1,-1(a0) +L(leave): + j ra nop -L(shift): - subu a3, zero, a0 # Src and Dest unaligned - andi a3, 0x3 # (unoptimized case...) - beq a3, zero, L(shft1) - subu a2, a3 # a2 = bytes left - LWHI t0, 0(a1) # Take care of first odd part - LWLO t0, 3(a1) - addu a1, a3 - SWHI t0, 0(a0) - addu a0, a3 -L(shft1): - andi t0, a2, 0x3 - subu a3, a2, t0 - addu a3, a1 -L(shfth): - LWHI t1, 0(a1) # Limp through, word by word - LWLO t1, 3(a1) - addiu a0, 4 - addiu a1, 4 - bne a1, a3, L(shfth) - sw t1, -4(a0) - b L(last8) # Handle anything which may be left - move a2, t0 +/* We jump here with a memcpy of less than 8 or 16 bytes, depending on + whether or not USE_DOUBLE is defined. Instead of just doing byte + copies, check the alignment and size and use lw/sw if possible. + Otherwise, do byte copies. */ - .set reorder -END (memcpy) +L(lasts): + andi t8,a2,3 + beq t8,a2,L(lastb) + + andi t9,a0,3 + bne t9,zero,L(lastb) + andi t9,a1,3 + bne t9,zero,L(lastb) + + PTR_SUBU a3,a2,t8 + PTR_ADDU a3,a0,a3 + +L(wcopy_loop): + lw REG3,0(a1) + PTR_ADDIU a0,a0,4 + PTR_ADDIU a1,a1,4 + bne a0,a3,L(wcopy_loop) + sw REG3,-4(a0) -#endif /* !__mips64 */ + b L(lastb) + move a2,t8 -libc_hidden_def(memcpy) +#ifndef R6_CODE +/* + * UNALIGNED case, got here with a3 = "negu a0" + * This code is nearly identical to the aligned code above + * but only the destination (not the source) gets aligned + * so we need to do partial loads of the source followed + * by normal stores to the destination (once we have aligned + * the destination). + */ + +L(unaligned): + andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */ + beqz a3,L(ua_chk16w) /* if a3=0, it is already aligned */ + PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */ + + C_LDHI v1,UNIT(0)(a1) + C_LDLO v1,UNITM1(1)(a1) + PTR_ADDU a1,a1,a3 + C_STHI v1,UNIT(0)(a0) + PTR_ADDU a0,a0,a3 + +/* + * Now the destination (but not the source) is aligned + * Set a2 to count how many bytes we have to copy after all the 64/128 byte + * chunks are copied and a3 to the dst pointer after all the 64/128 byte + * chunks have been copied. We will loop, incrementing a0 and a1 until a0 + * equals a3. + */ + +L(ua_chk16w): + andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */ + beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */ + PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */ + PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */ + +# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */ + PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */ +# endif + PREFETCH_FOR_LOAD (0, a1) + PREFETCH_FOR_LOAD (1, a1) + PREFETCH_FOR_LOAD (2, a1) +# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE) + PREFETCH_FOR_STORE (1, a0) + PREFETCH_FOR_STORE (2, a0) + PREFETCH_FOR_STORE (3, a0) +# endif +# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH) +# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + sltu v1,t9,a0 + bgtz v1,L(ua_skip_set) + nop + PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4) +L(ua_skip_set): +# else + PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1) +# endif +# endif +L(ua_loop16w): + PREFETCH_FOR_LOAD (3, a1) + C_LDHI t0,UNIT(0)(a1) + C_LDHI t1,UNIT(1)(a1) + C_LDHI REG2,UNIT(2)(a1) +# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + sltu v1,t9,a0 + bgtz v1,L(ua_skip_pref) +# endif + C_LDHI REG3,UNIT(3)(a1) + PREFETCH_FOR_STORE (4, a0) + PREFETCH_FOR_STORE (5, a0) +L(ua_skip_pref): + C_LDHI REG4,UNIT(4)(a1) + C_LDHI REG5,UNIT(5)(a1) + C_LDHI REG6,UNIT(6)(a1) + C_LDHI REG7,UNIT(7)(a1) + C_LDLO t0,UNITM1(1)(a1) + C_LDLO t1,UNITM1(2)(a1) + C_LDLO REG2,UNITM1(3)(a1) + C_LDLO REG3,UNITM1(4)(a1) + C_LDLO REG4,UNITM1(5)(a1) + C_LDLO REG5,UNITM1(6)(a1) + C_LDLO REG6,UNITM1(7)(a1) + C_LDLO REG7,UNITM1(8)(a1) + PREFETCH_FOR_LOAD (4, a1) + C_ST t0,UNIT(0)(a0) + C_ST t1,UNIT(1)(a0) + C_ST REG2,UNIT(2)(a0) + C_ST REG3,UNIT(3)(a0) + C_ST REG4,UNIT(4)(a0) + C_ST REG5,UNIT(5)(a0) + C_ST REG6,UNIT(6)(a0) + C_ST REG7,UNIT(7)(a0) + C_LDHI t0,UNIT(8)(a1) + C_LDHI t1,UNIT(9)(a1) + C_LDHI REG2,UNIT(10)(a1) + C_LDHI REG3,UNIT(11)(a1) + C_LDHI REG4,UNIT(12)(a1) + C_LDHI REG5,UNIT(13)(a1) + C_LDHI REG6,UNIT(14)(a1) + C_LDHI REG7,UNIT(15)(a1) + C_LDLO t0,UNITM1(9)(a1) + C_LDLO t1,UNITM1(10)(a1) + C_LDLO REG2,UNITM1(11)(a1) + C_LDLO REG3,UNITM1(12)(a1) + C_LDLO REG4,UNITM1(13)(a1) + C_LDLO REG5,UNITM1(14)(a1) + C_LDLO REG6,UNITM1(15)(a1) + C_LDLO REG7,UNITM1(16)(a1) + PREFETCH_FOR_LOAD (5, a1) + C_ST t0,UNIT(8)(a0) + C_ST t1,UNIT(9)(a0) + C_ST REG2,UNIT(10)(a0) + C_ST REG3,UNIT(11)(a0) + C_ST REG4,UNIT(12)(a0) + C_ST REG5,UNIT(13)(a0) + C_ST REG6,UNIT(14)(a0) + C_ST REG7,UNIT(15)(a0) + PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */ + bne a0,a3,L(ua_loop16w) + PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */ + move a2,t8 + +/* Here we have src and dest word-aligned but less than 64-bytes or + * 128 bytes to go. Check for a 32(64) byte chunk and copy if if there + * is one. Otherwise jump down to L(ua_chk1w) to handle the tail end of + * the copy. */ + +L(ua_chkw): + PREFETCH_FOR_LOAD (0, a1) + andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */ + /* t8 is the reminder count past 32-bytes */ + beq a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */ + nop + C_LDHI t0,UNIT(0)(a1) + C_LDHI t1,UNIT(1)(a1) + C_LDHI REG2,UNIT(2)(a1) + C_LDHI REG3,UNIT(3)(a1) + C_LDHI REG4,UNIT(4)(a1) + C_LDHI REG5,UNIT(5)(a1) + C_LDHI REG6,UNIT(6)(a1) + C_LDHI REG7,UNIT(7)(a1) + C_LDLO t0,UNITM1(1)(a1) + C_LDLO t1,UNITM1(2)(a1) + C_LDLO REG2,UNITM1(3)(a1) + C_LDLO REG3,UNITM1(4)(a1) + C_LDLO REG4,UNITM1(5)(a1) + C_LDLO REG5,UNITM1(6)(a1) + C_LDLO REG6,UNITM1(7)(a1) + C_LDLO REG7,UNITM1(8)(a1) + PTR_ADDIU a1,a1,UNIT(8) + C_ST t0,UNIT(0)(a0) + C_ST t1,UNIT(1)(a0) + C_ST REG2,UNIT(2)(a0) + C_ST REG3,UNIT(3)(a0) + C_ST REG4,UNIT(4)(a0) + C_ST REG5,UNIT(5)(a0) + C_ST REG6,UNIT(6)(a0) + C_ST REG7,UNIT(7)(a0) + PTR_ADDIU a0,a0,UNIT(8) +/* + * Here we have less than 32(64) bytes to copy. Set up for a loop to + * copy one word (or double word) at a time. + */ +L(ua_chk1w): + andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */ + beq a2,t8,L(ua_smallCopy) + PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */ + PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */ + +/* copying in words (4-byte or 8-byte chunks) */ +L(ua_wordCopy_loop): + C_LDHI v1,UNIT(0)(a1) + C_LDLO v1,UNITM1(1)(a1) + PTR_ADDIU a0,a0,UNIT(1) + PTR_ADDIU a1,a1,UNIT(1) + bne a0,a3,L(ua_wordCopy_loop) + C_ST v1,UNIT(-1)(a0) + +/* Copy the last 8 (or 16) bytes */ +L(ua_smallCopy): + beqz a2,L(leave) + PTR_ADDU a3,a0,a2 /* a3 is the last dst address */ +L(ua_smallCopy_loop): + lb v1,0(a1) + PTR_ADDIU a0,a0,1 + PTR_ADDIU a1,a1,1 + bne a0,a3,L(ua_smallCopy_loop) + sb v1,-1(a0) + + j ra + nop + +#else /* R6_CODE */ + +# ifdef __MIPSEB +# define SWAP_REGS(X,Y) X, Y +# define ALIGN_OFFSET(N) (N) +# else +# define SWAP_REGS(X,Y) Y, X +# define ALIGN_OFFSET(N) (NSIZE-N) +# endif +# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \ + andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \ + beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \ + PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \ + /* (d)word chunks. */ \ + move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \ + /* after word loop is finished. */ \ + PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \ + PTR_SUBU REG2, a1, t8; /* REG2 is the aligned src address. */ \ + PTR_ADDU a1, a1, a3; /* a1 is addr of source after word loop. */ \ + C_LD t0, UNIT(0)(REG2); /* Load first part of source. */ \ +L(r6_ua_wordcopy##BYTEOFFSET): \ + C_LD t1, UNIT(1)(REG2); /* Load second part of source. */ \ + C_ALIGN REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET); \ + PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \ + PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \ + move t0, t1; /* Move second part of source to first. */ \ + bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \ + C_ST REG3, UNIT(-1)(a0); \ + j L(lastb); \ + nop + + /* We are generating R6 code, the destination is 4 byte aligned and + the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the + alignment of the source. */ + +L(r6_unaligned1): + R6_UNALIGNED_WORD_COPY(1) +L(r6_unaligned2): + R6_UNALIGNED_WORD_COPY(2) +L(r6_unaligned3): + R6_UNALIGNED_WORD_COPY(3) +# ifdef USE_DOUBLE +L(r6_unaligned4): + R6_UNALIGNED_WORD_COPY(4) +L(r6_unaligned5): + R6_UNALIGNED_WORD_COPY(5) +L(r6_unaligned6): + R6_UNALIGNED_WORD_COPY(6) +L(r6_unaligned7): + R6_UNALIGNED_WORD_COPY(7) +# endif +#endif /* R6_CODE */ + + .set at + .set reorder +END(MEMCPY_NAME) +#ifndef ANDROID_CHANGES +# ifdef _LIBC +# ifdef __UCLIBC__ +libc_hidden_def(MEMCPY_NAME) +# else +libc_hidden_builtin_def (MEMCPY_NAME) +# endif +# endif +#endif diff --git a/libc/string/mips/memset.S b/libc/string/mips/memset.S index ff0554ff9..43034cebb 100644 --- a/libc/string/mips/memset.S +++ b/libc/string/mips/memset.S @@ -1,6 +1,5 @@ -/* Copyright (C) 2002, 2003 Free Software Foundation, Inc. +/* Copyright (C) 2013-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Hartvig Ekner <hartvige@mips.com>, 2002. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -13,147 +12,420 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ -#include <features.h> -/*#include <sysdep.h>*/ -#include <endian.h> -#include "sysdep.h" +#ifdef ANDROID_CHANGES +# include "machine/asm.h" +# include "machine/regdef.h" +# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE +#elif _LIBC +# include <sysdep.h> +# include <sys/regdef.h> +# include <sys/asm.h> +# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE +#elif defined _COMPILING_NEWLIB +# include "machine/asm.h" +# include "machine/regdef.h" +# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE +#else +# include <sys/regdef.h> +# include <sys/asm.h> +#endif + +/* Check to see if the MIPS architecture we are compiling for supports + prefetching. */ + +#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64) +# ifndef DISABLE_PREFETCH +# define USE_PREFETCH +# endif +#endif + +#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32)) +# ifndef DISABLE_DOUBLE +# define USE_DOUBLE +# endif +#endif + +#ifndef USE_DOUBLE +# ifndef DISABLE_DOUBLE_ALIGN +# define DOUBLE_ALIGN +# endif +#endif + + +/* Some asm.h files do not have the L macro definition. */ +#ifndef L +# if _MIPS_SIM == _ABIO32 +# define L(label) $L ## label +# else +# define L(label) .L ## label +# endif +#endif + +/* Some asm.h files do not have the PTR_ADDIU macro definition. */ +#ifndef PTR_ADDIU +# ifdef USE_DOUBLE +# define PTR_ADDIU daddiu +# else +# define PTR_ADDIU addiu +# endif +#endif -/* void *memset(void *s, int c, size_t n). */ +/* New R6 instructions that may not be in asm.h. */ +#ifndef PTR_LSA +# if _MIPS_SIM == _ABI64 +# define PTR_LSA dlsa +# else +# define PTR_LSA lsa +# endif +#endif + +/* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE + or PREFETCH_STORE_STREAMED offers a large performance advantage + but PREPAREFORSTORE has some special restrictions to consider. + + Prefetch with the 'prepare for store' hint does not copy a memory + location into the cache, it just allocates a cache line and zeros + it out. This means that if you do not write to the entire cache + line before writing it out to memory some data will get zero'ed out + when the cache line is written back to memory and data will be lost. + + There are ifdef'ed sections of this memcpy to make sure that it does not + do prefetches on cache lines that are not going to be completely written. + This code is only needed and only used when PREFETCH_STORE_HINT is set to + PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are + less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will + not work correctly. */ + +#ifdef USE_PREFETCH +# define PREFETCH_HINT_STORE 1 +# define PREFETCH_HINT_STORE_STREAMED 5 +# define PREFETCH_HINT_STORE_RETAINED 7 +# define PREFETCH_HINT_PREPAREFORSTORE 30 + +/* If we have not picked out what hints to use at this point use the + standard load and store prefetch hints. */ +# ifndef PREFETCH_STORE_HINT +# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE +# endif + +/* We double everything when USE_DOUBLE is true so we do 2 prefetches to + get 64 bytes in that case. The assumption is that each individual + prefetch brings in 32 bytes. */ +# ifdef USE_DOUBLE +# define PREFETCH_CHUNK 64 +# define PREFETCH_FOR_STORE(chunk, reg) \ + pref PREFETCH_STORE_HINT, (chunk)*64(reg); \ + pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg) +# else +# define PREFETCH_CHUNK 32 +# define PREFETCH_FOR_STORE(chunk, reg) \ + pref PREFETCH_STORE_HINT, (chunk)*32(reg) +# endif -#ifdef __mips64 +/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less + than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size + of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE + hint is used, the code will not work correctly. If PREPAREFORSTORE is not + used than MAX_PREFETCH_SIZE does not matter. */ +# define MAX_PREFETCH_SIZE 128 +/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater + than 5 on a STORE prefetch and that a single prefetch can never be larger + than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because + we actually do two prefetches in that case, one 32 bytes after the other. */ +# ifdef USE_DOUBLE +# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE +# else +# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE +# endif -#include <sys/asm.h> +# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \ + && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE) +/* We cannot handle this because the initial prefetches may fetch bytes that + are before the buffer being copied. We start copies with an offset + of 4 so avoid this situation when using PREPAREFORSTORE. */ +# error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small." +# endif +#else /* USE_PREFETCH not defined */ +# define PREFETCH_FOR_STORE(offset, reg) +#endif + +#if __mips_isa_rev > 5 +# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) +# undef PREFETCH_STORE_HINT +# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED +# endif +# define R6_CODE +#endif -#if __BYTE_ORDER == __BIG_ENDIAN -# define SDHI sdl /* high part is left in big-endian */ +/* Allow the routine to be named something else if desired. */ +#ifndef MEMSET_NAME +# define MEMSET_NAME memset +#endif + +/* We load/store 64 bits at a time when USE_DOUBLE is true. + The C_ prefix stands for CHUNK and is used to avoid macro name + conflicts with system header files. */ + +#ifdef USE_DOUBLE +# define C_ST sd +# ifdef __MIPSEB +# define C_STHI sdl /* high part is left in big-endian */ +# else +# define C_STHI sdr /* high part is right in little-endian */ +# endif #else -# define SDHI sdr /* high part is right in little-endian */ +# define C_ST sw +# ifdef __MIPSEB +# define C_STHI swl /* high part is left in big-endian */ +# else +# define C_STHI swr /* high part is right in little-endian */ +# endif #endif -ENTRY (memset) - .set noreorder +/* Bookkeeping values for 32 vs. 64 bit mode. */ +#ifdef USE_DOUBLE +# define NSIZE 8 +# define NSIZEMASK 0x3f +# define NSIZEDMASK 0x7f +#else +# define NSIZE 4 +# define NSIZEMASK 0x1f +# define NSIZEDMASK 0x3f +#endif +#define UNIT(unit) ((unit)*NSIZE) +#define UNITM1(unit) (((unit)*NSIZE)-1) - slti ta1, a2, 16 # Less than 16? - bne ta1, zero, L(last16) - move v0, a0 # Setup exit value before too late - - beq a1, zero, L(ueven) # If zero pattern, no need to extend - andi a1, 0xff # Avoid problems with bogus arguments - dsll ta0, a1, 8 - or a1, ta0 - dsll ta0, a1, 16 - or a1, ta0 # a1 is now pattern in full word - dsll ta0, a1, 32 - or a1, ta0 # a1 is now pattern in double word - -L(ueven): - PTR_SUBU ta0, zero, a0 # Unaligned address? - andi ta0, 0x7 - beq ta0, zero, L(chkw) - PTR_SUBU a2, ta0 - SDHI a1, 0(a0) # Yes, handle first unaligned part - PTR_ADDU a0, ta0 # Now both a0 and a2 are updated +#ifdef ANDROID_CHANGES +LEAF(MEMSET_NAME,0) +#else +LEAF(MEMSET_NAME) +#endif -L(chkw): - andi ta0, a2, 0xf # Enough left for one loop iteration? - beq ta0, a2, L(chkl) - PTR_SUBU a3, a2, ta0 - PTR_ADDU a3, a0 # a3 is last loop address +1 - move a2, ta0 # a2 is now # of bytes left after loop -L(loopw): - PTR_ADDIU a0, 16 # Handle 2 dwords pr. iteration - sd a1, -16(a0) - bne a0, a3, L(loopw) - sd a1, -8(a0) - -L(chkl): - andi ta0, a2, 0x8 # Check if there is at least a double - beq ta0, zero, L(last16) # word remaining after the loop - PTR_SUBU a2, ta0 - sd a1, 0(a0) # Yes... - PTR_ADDIU a0, 8 - -L(last16): - blez a2, L(exit) # Handle last 16 bytes (if cnt>0) - PTR_ADDU a3, a2, a0 # a3 is last address +1 -L(lst16l): - PTR_ADDIU a0, 1 - bne a0, a3, L(lst16l) - sb a1, -1(a0) -L(exit): - j ra # Bye, bye + .set nomips16 + .set noreorder +/* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of + size, copy dst pointer to v0 for the return value. */ + slti t2,a2,(2 * NSIZE) + bne t2,zero,L(lastb) + move v0,a0 + +/* If memset value is not zero, we copy it to all the bytes in a 32 or 64 + bit word. */ + beq a1,zero,L(set0) /* If memset value is zero no smear */ + PTR_SUBU a3,zero,a0 nop - .set reorder -END (memset) + /* smear byte into 32 or 64 bit word */ +#if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2) +# ifdef USE_DOUBLE + dins a1, a1, 8, 8 /* Replicate fill byte into half-word. */ + dins a1, a1, 16, 16 /* Replicate fill byte into word. */ + dins a1, a1, 32, 32 /* Replicate fill byte into dbl word. */ +# else + ins a1, a1, 8, 8 /* Replicate fill byte into half-word. */ + ins a1, a1, 16, 16 /* Replicate fill byte into word. */ +# endif +#else +# ifdef USE_DOUBLE + and a1,0xff + dsll t2,a1,8 + or a1,t2 + dsll t2,a1,16 + or a1,t2 + dsll t2,a1,32 + or a1,t2 +# else + and a1,0xff + sll t2,a1,8 + or a1,t2 + sll t2,a1,16 + or a1,t2 +# endif +#endif + +/* If the destination address is not aligned do a partial store to get it + aligned. If it is already aligned just jump to L(aligned). */ +L(set0): +#ifndef R6_CODE + andi t2,a3,(NSIZE-1) /* word-unaligned address? */ + beq t2,zero,L(aligned) /* t2 is the unalignment count */ + PTR_SUBU a2,a2,t2 + C_STHI a1,0(a0) + PTR_ADDU a0,a0,t2 +#else /* R6_CODE */ + andi t2,a0,(NSIZE-1) + lapc t9,L(atable) + PTR_LSA t9,t2,t9,2 + jrc t9 +L(atable): + bc L(aligned) +# ifdef USE_DOUBLE + bc L(lb7) + bc L(lb6) + bc L(lb5) + bc L(lb4) +# endif + bc L(lb3) + bc L(lb2) + bc L(lb1) +L(lb7): + sb a1,6(a0) +L(lb6): + sb a1,5(a0) +L(lb5): + sb a1,4(a0) +L(lb4): + sb a1,3(a0) +L(lb3): + sb a1,2(a0) +L(lb2): + sb a1,1(a0) +L(lb1): + sb a1,0(a0) + + li t9,NSIZE + subu t2,t9,t2 + PTR_SUBU a2,a2,t2 + PTR_ADDU a0,a0,t2 +#endif /* R6_CODE */ + +L(aligned): +/* If USE_DOUBLE is not set we may still want to align the data on a 16 + byte boundry instead of an 8 byte boundry to maximize the opportunity + of proAptiv chips to do memory bonding (combining two sequential 4 + byte stores into one 8 byte store). We know there are at least 4 bytes + left to store or we would have jumped to L(lastb) earlier in the code. */ +#ifdef DOUBLE_ALIGN + andi t2,a3,4 + beq t2,zero,L(double_aligned) + PTR_SUBU a2,a2,t2 + sw a1,0(a0) + PTR_ADDU a0,a0,t2 +L(double_aligned): +#endif -#else /* !__mips64 */ +/* Now the destination is aligned to (word or double word) aligned address + Set a2 to count how many bytes we have to copy after all the 64/128 byte + chunks are copied and a3 to the dest pointer after all the 64/128 byte + chunks have been copied. We will loop, incrementing a0 until it equals + a3. */ + andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */ + beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */ + PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */ + PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */ -#if __BYTE_ORDER == __BIG_ENDIAN -# define SWHI swl /* high part is left in big-endian */ +/* When in the loop we may prefetch with the 'prepare to store' hint, + in this case the a0+x should not be past the "t0-32" address. This + means: for x=128 the last "safe" a0 address is "t0-160". Alternatively, + for x=64 the last "safe" a0 address is "t0-96" In the current version we + will use "prefetch hint,128(a0)", so "t0-160" is the limit. */ +#if defined(USE_PREFETCH) \ + && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */ + PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */ +#endif +#if defined(USE_PREFETCH) \ + && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE) + PREFETCH_FOR_STORE (1, a0) + PREFETCH_FOR_STORE (2, a0) + PREFETCH_FOR_STORE (3, a0) +#endif + +L(loop16w): +#if defined(USE_PREFETCH) \ + && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) + sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */ + bgtz v1,L(skip_pref) + nop +#endif +#ifdef R6_CODE + PREFETCH_FOR_STORE (2, a0) #else -# define SWHI swr /* high part is right in little-endian */ + PREFETCH_FOR_STORE (4, a0) + PREFETCH_FOR_STORE (5, a0) #endif +L(skip_pref): + C_ST a1,UNIT(0)(a0) + C_ST a1,UNIT(1)(a0) + C_ST a1,UNIT(2)(a0) + C_ST a1,UNIT(3)(a0) + C_ST a1,UNIT(4)(a0) + C_ST a1,UNIT(5)(a0) + C_ST a1,UNIT(6)(a0) + C_ST a1,UNIT(7)(a0) + C_ST a1,UNIT(8)(a0) + C_ST a1,UNIT(9)(a0) + C_ST a1,UNIT(10)(a0) + C_ST a1,UNIT(11)(a0) + C_ST a1,UNIT(12)(a0) + C_ST a1,UNIT(13)(a0) + C_ST a1,UNIT(14)(a0) + C_ST a1,UNIT(15)(a0) + PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */ + bne a0,a3,L(loop16w) + nop + move a2,t8 -ENTRY (memset) - .set noreorder +/* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go. + Check for a 32(64) byte chunk and copy if if there is one. Otherwise + jump down to L(chk1w) to handle the tail end of the copy. */ +L(chkw): + andi t8,a2,NSIZEMASK /* is there a 32-byte/64-byte chunk. */ + /* the t8 is the reminder count past 32-bytes */ + beq a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */ + nop + C_ST a1,UNIT(0)(a0) + C_ST a1,UNIT(1)(a0) + C_ST a1,UNIT(2)(a0) + C_ST a1,UNIT(3)(a0) + C_ST a1,UNIT(4)(a0) + C_ST a1,UNIT(5)(a0) + C_ST a1,UNIT(6)(a0) + C_ST a1,UNIT(7)(a0) + PTR_ADDIU a0,a0,UNIT(8) + +/* Here we have less than 32(64) bytes to set. Set up for a loop to + copy one word (or double word) at a time. Set a2 to count how many + bytes we have to copy after all the word (or double word) chunks are + copied and a3 to the dest pointer after all the (d)word chunks have + been copied. We will loop, incrementing a0 until a0 equals a3. */ +L(chk1w): + andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */ + beq a2,t8,L(lastb) + PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */ + PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */ - slti t1, a2, 8 # Less than 8? - bne t1, zero, L(last8) - move v0, a0 # Setup exit value before too late - - beq a1, zero, L(ueven) # If zero pattern, no need to extend - andi a1, 0xff # Avoid problems with bogus arguments - sll t0, a1, 8 - or a1, t0 - sll t0, a1, 16 - or a1, t0 # a1 is now pattern in full word - -L(ueven): - subu t0, zero, a0 # Unaligned address? - andi t0, 0x3 - beq t0, zero, L(chkw) - subu a2, t0 - SWHI a1, 0(a0) # Yes, handle first unaligned part - addu a0, t0 # Now both a0 and a2 are updated - -L(chkw): - andi t0, a2, 0x7 # Enough left for one loop iteration? - beq t0, a2, L(chkl) - subu a3, a2, t0 - addu a3, a0 # a3 is last loop address +1 - move a2, t0 # a2 is now # of bytes left after loop -L(loopw): - addiu a0, 8 # Handle 2 words pr. iteration - sw a1, -8(a0) - bne a0, a3, L(loopw) - sw a1, -4(a0) - -L(chkl): - andi t0, a2, 0x4 # Check if there is at least a full - beq t0, zero, L(last8) # word remaining after the loop - subu a2, t0 - sw a1, 0(a0) # Yes... - addiu a0, 4 - -L(last8): - blez a2, L(exit) # Handle last 8 bytes (if cnt>0) - addu a3, a2, a0 # a3 is last address +1 -L(lst8l): - addiu a0, 1 - bne a0, a3, L(lst8l) - sb a1, -1(a0) -L(exit): - j ra # Bye, bye +/* copying in words (4-byte or 8 byte chunks) */ +L(wordCopy_loop): + PTR_ADDIU a0,a0,UNIT(1) + bne a0,a3,L(wordCopy_loop) + C_ST a1,UNIT(-1)(a0) + +/* Copy the last 8 (or 16) bytes */ +L(lastb): + blez a2,L(leave) + PTR_ADDU a3,a0,a2 /* a3 is the last dst address */ +L(lastbloop): + PTR_ADDIU a0,a0,1 + bne a0,a3,L(lastbloop) + sb a1,-1(a0) +L(leave): + j ra nop + .set at .set reorder -END (memset) - -#endif /* !__mips64 */ +END(MEMSET_NAME) +#ifndef ANDROID_CHANGES +# ifdef _LIBC +# ifdef __UCLIBC__ +libc_hidden_def(MEMSET_NAME) +# else +libc_hidden_builtin_def (MEMSET_NAME) +# endif +# endif +#endif -libc_hidden_def(memset) diff --git a/libc/string/mips/sysdep.h b/libc/string/mips/sysdep.h deleted file mode 100644 index 5dad8342e..000000000 --- a/libc/string/mips/sysdep.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Adapted from glibc's sysdeps/unix/mips/sysdep.h */ - -/* Copyright (C) 1992, 1995, 1997, 1999, 2000, 2002, 2003 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Brendan Kehoe (brendan@zen.org). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifdef __ASSEMBLER__ - -#include <sgidefs.h> -#include <sys/regdef.h> - -#define ENTRY(name) \ - .globl name; \ - .align 2; \ - .ent name,0; \ - name/* use a comment rather than ## to workaround bug in gcc-3.4.x */: - -#undef END -#define END(function) \ - .end function; \ - .size function,.-function - -#if _MIPS_SIM == _MIPS_SIM_ABI32 || _MIPS_SIM == _MIPS_SIM_ABIO64 -# define L(label) $L ## label -#else -# define L(label) .L ## label -#endif - -#endif diff --git a/libc/string/powerpc/memcpy.c b/libc/string/powerpc/memcpy.c index f3d800739..22794ec33 100644 --- a/libc/string/powerpc/memcpy.c +++ b/libc/string/powerpc/memcpy.c @@ -21,16 +21,15 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memcpy) */ -void *memcpy(void *to, const void *from, size_t n) -/* PPC can do pre increment and load/store, but not post increment and load/store. - Therefore use *++ptr instead of *ptr++. */ +/* PPC can do pre increment and load/store, but not post increment and + load/store. Therefore use *++ptr instead of *ptr++. */ +void *memcpy(void *to, const void *from, size_t len) { unsigned long rem, chunks, tmp1, tmp2; unsigned char *tmp_to; unsigned char *tmp_from = (unsigned char *)from; - chunks = n / 8; + chunks = len / 8; tmp_from -= 4; tmp_to = to - 4; if (!chunks) @@ -49,30 +48,33 @@ void *memcpy(void *to, const void *from, size_t n) *(unsigned long *)tmp_to = tmp2; } while (--chunks); lessthan8: - n = n % 8; - if (n >= 4) { - *(unsigned long *)(tmp_to+4) = *(unsigned long *)(tmp_from+4); + len = len % 8; + if (len >= 4) { tmp_from += 4; tmp_to += 4; - n = n-4; + *(unsigned long *)(tmp_to) = *(unsigned long *)(tmp_from); + len -= 4; } - if (!n ) return to; + if (!len) + return to; tmp_from += 3; tmp_to += 3; do { *++tmp_to = *++tmp_from; - } while (--n); + } while (--len); return to; align: + /* ???: Do we really need to generate the carry flag here? If not, then: + rem -= 4; */ rem = 4 - rem; - n = n - rem; + len -= rem; do { *(tmp_to+4) = *(tmp_from+4); ++tmp_from; ++tmp_to; } while (--rem); - chunks = n / 8; + chunks = len / 8; if (chunks) goto copy_chunks; goto lessthan8; diff --git a/libc/string/powerpc/memmove.c b/libc/string/powerpc/memmove.c index 8badae37d..6bd79915d 100644 --- a/libc/string/powerpc/memmove.c +++ b/libc/string/powerpc/memmove.c @@ -21,9 +21,7 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memcpy) */ -/* Experimentally off - libc_hidden_proto(memmove) */ void *memmove(void *to, const void *from, size_t n) { unsigned long rem, chunks, tmp1, tmp2; diff --git a/libc/string/powerpc/memset.c b/libc/string/powerpc/memset.c index 1cbfd04fc..a900b92cb 100644 --- a/libc/string/powerpc/memset.c +++ b/libc/string/powerpc/memset.c @@ -21,7 +21,6 @@ #include <string.h> -/* Experimentally off - libc_hidden_proto(memset) */ static __inline__ int expand_byte_word(int c){ /* this does: diff --git a/libc/string/psignal.c b/libc/string/psignal.c index 1ca8725db..3e1f68b94 100644 --- a/libc/string/psignal.c +++ b/libc/string/psignal.c @@ -10,8 +10,6 @@ #include <string.h> #include <signal.h> -libc_hidden_proto(fprintf) -/* Experimentally off - libc_hidden_proto(strsignal) */ /* TODO: make this threadsafe with a reentrant version of strsignal? */ diff --git a/libc/string/rawmemchr.c b/libc/string/rawmemchr.c index 3cddefa10..f0cb7ee47 100644 --- a/libc/string/rawmemchr.c +++ b/libc/string/rawmemchr.c @@ -8,7 +8,6 @@ #include "_string.h" #ifdef __USE_GNU -/* Experimentally off - libc_hidden_proto(rawmemchr) */ void *rawmemchr(const void *s, int c) { register const unsigned char *r = s; diff --git a/libc/string/sh/memchr.S b/libc/string/sh/memchr.S new file mode 100644 index 000000000..6b7142f69 --- /dev/null +++ b/libc/string/sh/memchr.S @@ -0,0 +1,30 @@ +/* $Id: memchr.S,v 1.1 2000/04/14 16:49:01 mjd Exp $ + * + * "memchr" implementation of SuperH + * + * Copyright (C) 1999 Niibe Yutaka + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +/* + * void *memchr(const void *s, int c, size_t n); + */ + +#include <sysdep.h> + +ENTRY(memchr) + tst r6,r6 + bt/s 2f + exts.b r5,r5 +1: mov.b @r4,r1 + cmp/eq r1,r5 + bt/s 3f + dt r6 + bf/s 1b + add #1,r4 +2: mov #0,r4 +3: rts + mov r4,r0 +END(memchr) +libc_hidden_def (memchr) diff --git a/libc/string/sh/sh4/memcpy.S b/libc/string/sh/sh4/memcpy.S index 2d918293e..6a229a06c 100644 --- a/libc/string/sh/sh4/memcpy.S +++ b/libc/string/sh/sh4/memcpy.S @@ -6,6 +6,9 @@ * Modified from memcpy.S and micro-optimised for SH4 * Stuart Menefy (stuart.menefy@st.com) * + * Copyright (c) 2009 STMicroelectronics Ltd + * Optimised using prefetching and 64bit data transfer via FPU + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> */ /* @@ -15,8 +18,32 @@ * If there is an overlap, then the results are undefined. */ +#include <sysdep.h> #include <endian.h> +#if defined (__LITTLE_ENDIAN__) && defined (__SH_FPU_ANY__) +#define MEMCPY_USES_FPU +/* Use paired single precision load or store mode for 64-bit tranfering. + * FPSCR.SZ=1,FPSCR.SZ=0 is well defined on both SH4-200 and SH4-300. + * Currenlty it has been only implemented and tested for little endian mode. */ +.macro FPU_SET_PAIRED_PREC + sts fpscr, r7 + mov #0x10, r0 ! PR=0 SZ=1 + shll16 r0 + lds r0, fpscr +.endm +.macro RESTORE_FPSCR + lds r7, fpscr +.endm +.macro DALLOC + ! Cache allocate + store on dst-32. + add #-32, r1 + movca.l r0, @r1 + add #32, r1 +.endm + +#endif + ! ! GHIJ KLMN OPQR --> ...G HIJK LMNO PQR. ! @@ -127,10 +154,10 @@ mov.l r3,@-r0 ! 30 LS #else -3: mov r1,r3 ! OPQR +3: mov r7,r3 ! OPQR shlr8 r3 ! xOPQ - mov.l @(r0,r5),r1 ! KLMN - mov r1,r6 + mov.l @(r0,r5),r7 ! KLMN + mov r7,r6 shll16 r6 shll8 r6 ! Nxxx or r6,r3 ! NOPQ @@ -157,12 +184,7 @@ 9: rts nop -/* void * memcpy(void *dst, const void *src, size_t len) */ -.text -.align 4 -.type memcpy,@function -.globl memcpy; -memcpy: +ENTRY(memcpy) ! Calculate the invariants which will be used in the remainder ! of the code: @@ -189,9 +211,7 @@ memcpy: mov r4, r0 ! 5 MT (0 cycle latency) add r6, r0 ! 49 EX - mov #16, r1 ! 6 EX bt/s .Lcase00 ! 111 BR (aligned) - sub r4, r5 ! 75 EX ! Arguments are not nicely long word aligned or zero len. @@ -207,6 +227,7 @@ memcpy: ! However the penalty for getting it 'wrong' is much higher for long word ! aligned data (and this is more common), so use a value of 16. + mov #16, r1 ! 6 EX cmp/gt r6,r1 ! 56 MT add #-1,r5 ! 50 EX @@ -447,6 +468,183 @@ memcpy: mov.l r7, @-r0 ! 30 LS +#ifdef MEMCPY_USES_FPU + ! Copy the cache line aligned blocks by using the FPU registers. + ! If src and dst are well aligned adopt 64-bit data transfer. + ! We also need r0 as a temporary (for movca), so 'undo' the invariant: + ! r5: src (was r0+r5) + ! r1: dest (was r0) +1: + add r0, r5 + mov r0, r1 + + mov r1, r3 ! MT + sub r2, r3 ! EX (r3 - r2 -> r3) + mov #-5, r0 + shld r0, r3 ! number of the cache lines + + mov #8, r0 + cmp/ge r0, r3 ! Check if there are many cache lines to copy. + bf 45f ! Copy cache line aligned blocks without pref. + mov r5, r0 + add #-0x7c, r0 + tst #7, r0 ! src is 8byte aligned + bf 45f + + ! Many cache lines have to be copied and the buffers are well aligned. + ! Aggressive prefetching and FPU in single paired precision. + mov r0, r5 + mov r5, r6 + add #-0x80, r6 ! prefetch head + + ! store FPU (in single precision mode, do not check R15 align). + fmov fr12, @-r15 + fmov fr13, @-r15 + fmov fr14, @-r15 + fmov fr15, @-r15 + + FPU_SET_PAIRED_PREC + + mov #4, r0 +67: + add #-0x20, r6 + pref @r6 + add #-0x20, r6 + pref @r6 + + fmov @r5+, dr0 + fmov @r5+, dr2 + fmov @r5+, dr4 + fmov @r5+, dr6 + fmov @r5+, dr8 + fmov @r5+, dr10 + fmov @r5+, dr12 + fmov @r5+, dr14 + fmov @r5+, xd0 + fmov @r5+, xd2 + fmov @r5+, xd4 + fmov @r5+, xd6 + fmov @r5+, xd8 + fmov @r5+, xd10 + fmov @r5+, xd12 + fmov @r5+, xd14 + + DALLOC + fmov xd14, @-r1 + fmov xd12, @-r1 + fmov xd10, @-r1 + fmov xd8, @-r1 + DALLOC + fmov xd6, @-r1 + fmov xd4, @-r1 + fmov xd2, @-r1 + fmov xd0, @-r1 + DALLOC + fmov dr14, @-r1 + fmov dr12, @-r1 + fmov dr10, @-r1 + fmov dr8, @-r1 + DALLOC + fmov dr6, @-r1 + add #-0x80, r5 + fmov dr4, @-r1 + add #-0x80, r5 + fmov dr2, @-r1 + add #-0x20, r6 + fmov dr0, @-r1 + add #-4, r3 + pref @r6 + add #-0x20, r6 + cmp/ge r0, r3 + bt/s 67b + pref @r6 + + RESTORE_FPSCR + + ! Restore FPU callee save registers + fmov @r15+, fr15 + fmov @r15+, fr14 + fmov @r15+, fr13 + fmov @r15+, fr12 + + ! Other cache lines could be copied: so use the FPU in single paired + ! precision without prefetching. No check for alignment is necessary. + + mov #1, r0 + cmp/ge r0, r3 + bt/s 3f + add #0x60, r5 + + bra 5f + nop + + ! No prefetch and FPU in single precision. +45: + add #-0x1c, r5 + mov r5, r0 + tst #7, r0 + bt 3f + +2: fmov.s @r5+, fr0 + fmov.s @r5+, fr1 + fmov.s @r5+, fr2 + fmov.s @r5+, fr3 + fmov.s @r5+, fr4 + fmov.s @r5+, fr5 + fmov.s @r5+, fr6 + fmov.s @r5+, fr7 + + DALLOC + + fmov.s fr7, @-r1 + fmov.s fr6, @-r1 + fmov.s fr5, @-r1 + fmov.s fr4, @-r1 + fmov.s fr3, @-r1 + fmov.s fr2, @-r1 + fmov.s fr1, @-r1 + fmov.s fr0, @-r1 + + cmp/eq r2,r1 + + bf/s 2b + add #-0x40, r5 + + bra 5f + nop + + ! No prefetch and FPU in single paired precision. + +3: FPU_SET_PAIRED_PREC + +4: fmov @r5+, dr0 + fmov @r5+, dr2 + fmov @r5+, dr4 + fmov @r5+, dr6 + + DALLOC + + fmov dr6, @-r1 + fmov dr4, @-r1 + fmov dr2, @-r1 + fmov dr0, @-r1 + cmp/eq r2,r1 + + bf/s 4b + add #-0x40, r5 + + RESTORE_FPSCR + +5: mov r1, r0 + + cmp/eq r4, r0 ! 54 MT + bf/s 1f ! 109 BR + sub r1, r5 ! 75 EX + + rts + nop +1: +#else ! Copy the cache line aligned blocks ! ! In use: r0, r2, r4, r5 @@ -512,6 +710,7 @@ memcpy: rts 1: mov.l @r15+, r8 ! 15 LS +#endif sub r4, r1 ! 75 EX (len remaining) ! number of trailing bytes is non-zero @@ -733,30 +932,30 @@ memcpy: mov.l @(0x04,r5), r11 ! 18 LS (latency=2) xtrct r9, r8 ! 48 EX - mov.w @(0x02,r5), r12 ! 18 LS (latency=2) + mov.l @(0x00,r5), r12 ! 18 LS (latency=2) xtrct r10, r9 ! 48 EX movca.l r0,@r1 ! 40 LS (latency=3-7) add #-0x1c, r1 ! 50 EX - mov.l r3, @(0x1c,r1) ! 33 LS + mov.l r3, @(0x18,r1) ! 33 LS xtrct r11, r10 ! 48 EX - mov.l r6, @(0x18,r1) ! 33 LS + mov.l r6, @(0x14,r1) ! 33 LS xtrct r12, r11 ! 48 EX - mov.l r7, @(0x14,r1) ! 33 LS + mov.l r7, @(0x10,r1) ! 33 LS - mov.l r8, @(0x10,r1) ! 33 LS - add #-0x3e, r5 ! 50 EX + mov.l r8, @(0x0c,r1) ! 33 LS + add #-0x1e, r5 ! 50 EX - mov.l r9, @(0x0c,r1) ! 33 LS + mov.l r9, @(0x08,r1) ! 33 LS cmp/eq r2,r1 ! 54 MT - mov.l r10, @(0x08,r1) ! 33 LS + mov.l r10, @(0x04,r1) ! 33 LS bf/s 2b ! 109 BR - mov.l r11, @(0x04,r1) ! 33 LS + mov.l r11, @(0x00,r1) ! 33 LS #endif mov.l @r15+, r12 @@ -803,6 +1002,5 @@ memcpy: rts mov.b r1,@-r0 -.size memcpy,.-memcpy; - +END(memcpy) libc_hidden_def (memcpy) diff --git a/libc/string/sh/sh4/memmove.c b/libc/string/sh/sh4/memmove.c new file mode 100644 index 000000000..8059bd4cc --- /dev/null +++ b/libc/string/sh/sh4/memmove.c @@ -0,0 +1,121 @@ +/* memmove implementation for SH4 + * + * Copyright (C) 2009 STMicroelectronics Ltd. + * + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#ifndef __SH_FPU_ANY__ +#include "../../generic/memmove.c" +#else + +#include <string.h> + +#define FPSCR_SR (1 << 20) +#define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x)) +#define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x)) + +static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len) +{ + char *d = (char *)dest; + char *s = (char *)src; + + if (len >= 64) { + unsigned long fpscr; + int *s1; + int *d1; + + /* Align the dest to 4 byte boundary. */ + while ((unsigned)d & 0x7) { + *d++ = *s++; + len--; + } + + s1 = (int *)s; + d1 = (int *)d; + + /* check if s is well aligned to use FPU */ + if (!((unsigned)s1 & 0x7)) { + + /* Align the dest to cache-line boundary */ + while ((unsigned)d1 & 0x1c) { + *d1++ = *s1++; + len -= 4; + } + + /* Use paired single precision load or store mode for + * 64-bit tranfering.*/ + STORE_FPSCR(fpscr); + LOAD_FPSCR(FPSCR_SR); + + while (len >= 32) { + __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1)); + __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1)); + __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1)); + __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1)); + __asm__ + __volatile__ ("fmov dr0,@%0"::"r" + (d1):"memory"); + d1 += 2; + __asm__ + __volatile__ ("fmov dr2,@%0"::"r" + (d1):"memory"); + d1 += 2; + __asm__ + __volatile__ ("fmov dr4,@%0"::"r" + (d1):"memory"); + d1 += 2; + __asm__ + __volatile__ ("fmov dr6,@%0"::"r" + (d1):"memory"); + d1 += 2; + len -= 32; + } + LOAD_FPSCR(fpscr); + } + s = (char *)s1; + d = (char *)d1; + /*TODO: other subcases could be covered here?!?*/ + } + /* Go to per-byte copy */ + while (len > 0) { + *d++ = *s++; + len--; + } + return; +} + +void *memmove(void *dest, const void *src, size_t len) +{ + unsigned long int d = (long int)dest; + unsigned long int s = (long int)src; + unsigned long int res; + + if (d >= s) + res = d - s; + else + res = s - d; + /* + * 1) dest and src are not overlap ==> memcpy (BWD/FDW) + * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW) + * 3) left-to-right overlap ==> Copy from the beginning to the end + * 4) right-to-left overlap ==> Copy from the end to the beginning + */ + + if (res == 0) /* 100% overlap */ + memcpy(dest, src, len); /* No overlap */ + else if (res >= len) + memcpy(dest, src, len); + else { + if (d > s) /* right-to-left overlap */ + memcpy(dest, src, len); /* memcpy is BWD */ + else /* cannot use SH4 memcpy for this case */ + fpu_optimised_copy_fwd(dest, src, len); + } + return (dest); +} + +libc_hidden_def(memmove) +#endif /*__SH_FPU_ANY__ */ diff --git a/libc/string/sh/sh4/memset.S b/libc/string/sh/sh4/memset.S new file mode 100644 index 000000000..eb83355ce --- /dev/null +++ b/libc/string/sh/sh4/memset.S @@ -0,0 +1,152 @@ +/* $Id: memset.S,v 1.1 2000/04/14 16:49:01 mjd Exp $ + * + * "memset" implementation of SuperH + * + * Copyright (C) 1999 Niibe Yutaka + * + * Copyright (c) 2009 STMicroelectronics Ltd + * Optimised using 64bit data transfer (via FPU) and the movca.l inst. + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +/* + * void *memset(void *s, int c, size_t n); + */ + +#include <sysdep.h> + +#if defined (__LITTLE_ENDIAN__) && defined (__SH_FPU_ANY__) +#define MEMSET_USES_FPU +/* Use paired single precision load or store mode for 64-bit tranfering. + * FPSCR.SZ=1,FPSCR.SZ=0 is well defined on both SH4-200 and SH4-300. + * Currenlty it has been only implemented and tested for little endian mode. */ +.macro FPU_SET_PAIRED_PREC + sts fpscr, r3 + mov #0x10, r1 ! PR=0 SZ=1 + shll16 r1 + lds r1, fpscr +.endm +.macro RESTORE_FPSCR + lds r3, fpscr +.endm +#endif + +ENTRY(memset) + mov #12,r0 + add r6,r4 + cmp/gt r6,r0 + bt/s 40f ! if it's too small, set a byte at once + mov r4,r0 + and #3,r0 + cmp/eq #0,r0 + bt/s 2f ! It's aligned + sub r0,r6 +1: + dt r0 + bf/s 1b + mov.b r5,@-r4 +2: ! make VVVV + extu.b r5,r5 + swap.b r5,r0 ! V0 + or r0,r5 ! VV + swap.w r5,r0 ! VV00 + or r0,r5 ! VVVV + + ! Check if enough bytes need to be copied to be worth the big loop + mov #0x40, r0 ! (MT) + cmp/gt r6,r0 ! (MT) 64 > len => slow loop + + bt/s 22f + mov r6,r0 + + ! align the dst to the cache block size if necessary + mov r4, r3 + mov #~(0x1f), r1 + + and r3, r1 + cmp/eq r3, r1 + + bt/s 11f ! dst is already aligned + sub r1, r3 ! r3-r1 -> r3 + shlr2 r3 ! number of loops + +10: mov.l r5,@-r4 + dt r3 + bf/s 10b + add #-4, r6 + +11: ! dst is 32byte aligned + mov r6,r2 + mov #-5,r0 + shld r0,r2 ! number of loops + + add #-32, r4 + mov r5, r0 + +#ifdef MEMSET_USES_FPU + lds r5, fpul ! (CO) + fsts fpul, fr0 ! Dr0 will be 'VVVVVVVV' + fsts fpul, fr1 + + FPU_SET_PAIRED_PREC +12: + movca.l r0, @r4 + mov.l r5, @(4, r4) + add #32, r4 + fmov dr0, @-r4 + fmov dr0, @-r4 + add #-0x20, r6 + fmov dr0, @-r4 + dt r2 + bf/s 12b + add #-40, r4 + + RESTORE_FPSCR +#else +12: + movca.l r0,@r4 + mov.l r5,@(4, r4) + mov.l r5,@(8, r4) + mov.l r5,@(12,r4) + mov.l r5,@(16,r4) + mov.l r5,@(20,r4) + add #-0x20, r6 + mov.l r5,@(24,r4) + dt r2 + mov.l r5,@(28,r4) + bf/s 12b + add #-32, r4 + +#endif + add #32, r4 + mov #8, r0 + cmp/ge r0, r6 + bf 40f + + mov r6,r0 +22: + shlr2 r0 + shlr r0 ! r0 = r6 >> 3 +3: + dt r0 + mov.l r5,@-r4 ! set 8-byte at once + bf/s 3b + mov.l r5,@-r4 + ! + mov #7,r0 + and r0,r6 + + ! fill bytes (length may be zero) +40: tst r6,r6 + bt 5f +4: + dt r6 + bf/s 4b + mov.b r5,@-r4 +5: + rts + mov r4,r0 +END(memset) +libc_hidden_def (memset) diff --git a/libc/string/sh/sh4/strcpy.S b/libc/string/sh/sh4/strcpy.S new file mode 100644 index 000000000..0f8278017 --- /dev/null +++ b/libc/string/sh/sh4/strcpy.S @@ -0,0 +1,28 @@ +/* strcpy implementation for SUPERH + * + * Copyright (C) 2009 STMicroelectronics Ltd. + * + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +/* + char *strcpy(char *dest, const char *src); + */ + +#include <sysdep.h> + +ENTRY(strcpy) + mov r4,r2 +1: + mov.b @r5+,r1 + tst r1,r1 + mov.b r1,@r2 + bf/s 1b + add #1,r2 + + rts + mov r4,r0 +END(strcpy) +libc_hidden_def (strcpy) diff --git a/libc/string/sh/sh4/strncpy.S b/libc/string/sh/sh4/strncpy.S new file mode 100644 index 000000000..8a16f39d4 --- /dev/null +++ b/libc/string/sh/sh4/strncpy.S @@ -0,0 +1,43 @@ +/* strncpy implementation for SUPERH + * + * Copyright (C) 2009 STMicroelectronics Ltd. + * + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +/* + char *strncpy(char *dest, const char *src, size_t n); + */ + +#include <sysdep.h> + +ENTRY(strncpy) + mov #0,r0 + bra 2f + mov r4,r2 +1: + mov.b r1,@(r0,r2) + add #1,r0 +2: + cmp/hs r6,r0 + bt 5f + mov.b @(r0,r5),r1 + tst r1,r1 + bf/s 1b + cmp/hs r6,r0 + bra 4f + nop +3: + mov.b r1,@(r0,r2) + add #1,r0 + cmp/hs r6,r0 +4: + bf/s 3b + mov #0,r1 +5: + rts + mov r2,r0 +END(strncpy) +libc_hidden_def(strncpy) diff --git a/libc/string/sh/strlen.S b/libc/string/sh/strlen.S new file mode 100644 index 000000000..1ccecc17b --- /dev/null +++ b/libc/string/sh/strlen.S @@ -0,0 +1,75 @@ +/* $Id: strlen.S,v 1.2 2001/06/29 14:07:15 gniibe Exp $ + * + * "strlen" implementation of SuperH + * + * Copyright (C) 1999 Kaz Kojima + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +/* size_t strlen (const char *s) */ + +#include <sysdep.h> +#include <endian.h> + +ENTRY(strlen) + mov r4,r0 + and #3,r0 + tst r0,r0 + bt/s 1f + mov #0,r2 + + add #-1,r0 + shll2 r0 + shll r0 + braf r0 + nop + + mov.b @r4+,r1 + tst r1,r1 + bt 8f + add #1,r2 + + mov.b @r4+,r1 + tst r1,r1 + bt 8f + add #1,r2 + + mov.b @r4+,r1 + tst r1,r1 + bt 8f + add #1,r2 + +1: + mov #0,r3 +2: + mov.l @r4+,r1 + cmp/str r3,r1 + bf/s 2b + add #4,r2 + + add #-4,r2 +#ifndef __LITTLE_ENDIAN__ + swap.b r1,r1 + swap.w r1,r1 + swap.b r1,r1 +#endif + extu.b r1,r0 + tst r0,r0 + bt/s 8f + shlr8 r1 + add #1,r2 + extu.b r1,r0 + tst r0,r0 + bt/s 8f + shlr8 r1 + add #1,r2 + extu.b r1,r0 + tst r0,r0 + bt 8f + add #1,r2 +8: + rts + mov r2,r0 +END(strlen) +libc_hidden_def (strlen) diff --git a/libc/string/sh64/memcpy.S b/libc/string/sh64/memcpy.S deleted file mode 100644 index 3c0ea0c0d..000000000 --- a/libc/string/sh64/memcpy.S +++ /dev/null @@ -1,205 +0,0 @@ -/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ -/* Modified by SuperH, Inc. September 2003 */ -! -! Fast SH memcpy -! -! by Toshiyasu Morita (tm@netcom.com) -! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut) -! SH5 code Copyright 2002 SuperH Ltd. -! -! Entry: ARG0: destination pointer -! ARG1: source pointer -! ARG2: byte count -! -! Exit: RESULT: destination pointer -! any other registers in the range r0-r7: trashed -! -! Notes: Usually one wants to do small reads and write a longword, but -! unfortunately it is difficult in some cases to concatanate bytes -! into a longword on the SH, so this does a longword read and small -! writes. -! -! This implementation makes two assumptions about how it is called: -! -! 1.: If the byte count is nonzero, the address of the last byte to be -! copied is unsigned greater than the address of the first byte to -! be copied. This could be easily swapped for a signed comparison, -! but the algorithm used needs some comparison. -! -! 2.: When there are two or three bytes in the last word of an 11-or-more -! bytes memory chunk to b copied, the rest of the word can be read -! without side effects. -! This could be easily changed by increasing the minumum size of -! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2, -! however, this would cost a few extra cyles on average. -! For SHmedia, the assumption is that any quadword can be read in its -! enirety if at least one byte is included in the copy. -! - -#include <features.h> - - .section .text..SHmedia32,"ax" - .globl memcpy - .type memcpy, @function - .align 5 - -memcpy: - -#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1 -#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1 -#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1 -#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1 - - ld.b r3,0,r63 - pta/l Large,tr0 - movi 25,r0 - bgeu/u r4,r0,tr0 - nsb r4,r0 - shlli r0,5,r0 - movi (L1-L0+63*32 + 1) & 0xffff,r1 - sub r1, r0, r0 -L0: ptrel r0,tr0 - add r2,r4,r5 - ptabs r18,tr1 - add r3,r4,r6 - blink tr0,r63 - -/* Rearranged to make cut2 safe */ - .balign 8 -L4_7: /* 4..7 byte memcpy cntd. */ - stlo.l r2, 0, r0 - or r6, r7, r6 - sthi.l r5, -1, r6 - stlo.l r5, -4, r6 - blink tr1,r63 - - .balign 8 -L1: /* 0 byte memcpy */ - nop - blink tr1,r63 - nop - nop - nop - nop - -L2_3: /* 2 or 3 byte memcpy cntd. */ - st.b r5,-1,r6 - blink tr1,r63 - - /* 1 byte memcpy */ - ld.b r3,0,r0 - st.b r2,0,r0 - blink tr1,r63 - -L8_15: /* 8..15 byte memcpy cntd. */ - stlo.q r2, 0, r0 - or r6, r7, r6 - sthi.q r5, -1, r6 - stlo.q r5, -8, r6 - blink tr1,r63 - - /* 2 or 3 byte memcpy */ - ld.b r3,0,r0 - ld.b r2,0,r63 - ld.b r3,1,r1 - st.b r2,0,r0 - pta/l L2_3,tr0 - ld.b r6,-1,r6 - st.b r2,1,r1 - blink tr0, r63 - - /* 4 .. 7 byte memcpy */ - LDUAL (r3, 0, r0, r1) - pta L4_7, tr0 - ldlo.l r6, -4, r7 - or r0, r1, r0 - sthi.l r2, 3, r0 - ldhi.l r6, -1, r6 - blink tr0, r63 - - /* 8 .. 15 byte memcpy */ - LDUAQ (r3, 0, r0, r1) - pta L8_15, tr0 - ldlo.q r6, -8, r7 - or r0, r1, r0 - sthi.q r2, 7, r0 - ldhi.q r6, -1, r6 - blink tr0, r63 - - /* 16 .. 24 byte memcpy */ - LDUAQ (r3, 0, r0, r1) - LDUAQ (r3, 8, r8, r9) - or r0, r1, r0 - sthi.q r2, 7, r0 - or r8, r9, r8 - sthi.q r2, 15, r8 - ldlo.q r6, -8, r7 - ldhi.q r6, -1, r6 - stlo.q r2, 8, r8 - stlo.q r2, 0, r0 - or r6, r7, r6 - sthi.q r5, -1, r6 - stlo.q r5, -8, r6 - blink tr1,r63 - -Large: - ld.b r2, 0, r63 - pta/l Loop_ua, tr1 - ori r3, -8, r7 - sub r2, r7, r22 - sub r3, r2, r6 - add r2, r4, r5 - ldlo.q r3, 0, r0 - addi r5, -16, r5 - movi 64+8, r27 // could subtract r7 from that. - stlo.q r2, 0, r0 - sthi.q r2, 7, r0 - ldx.q r22, r6, r0 - bgtu/l r27, r4, tr1 - - addi r5, -48, r27 - pta/l Loop_line, tr0 - addi r6, 64, r36 - addi r6, -24, r19 - addi r6, -16, r20 - addi r6, -8, r21 - -Loop_line: - ldx.q r22, r36, r63 - alloco r22, 32 - addi r22, 32, r22 - ldx.q r22, r19, r23 - sthi.q r22, -25, r0 - ldx.q r22, r20, r24 - ldx.q r22, r21, r25 - stlo.q r22, -32, r0 - ldx.q r22, r6, r0 - sthi.q r22, -17, r23 - sthi.q r22, -9, r24 - sthi.q r22, -1, r25 - stlo.q r22, -24, r23 - stlo.q r22, -16, r24 - stlo.q r22, -8, r25 - bgeu r27, r22, tr0 - -Loop_ua: - addi r22, 8, r22 - sthi.q r22, -1, r0 - stlo.q r22, -8, r0 - ldx.q r22, r6, r0 - bgtu/l r5, r22, tr1 - - add r3, r4, r7 - ldlo.q r7, -8, r1 - sthi.q r22, 7, r0 - ldhi.q r7, -1, r7 - ptabs r18,tr1 - stlo.q r22, 0, r0 - or r1, r7, r1 - sthi.q r5, 15, r1 - stlo.q r5, 8, r1 - blink tr1, r63 - - .size memcpy,.-memcpy - -libc_hidden_def(memcpy) diff --git a/libc/string/sh64/memset.S b/libc/string/sh64/memset.S deleted file mode 100644 index f588323f0..000000000 --- a/libc/string/sh64/memset.S +++ /dev/null @@ -1,96 +0,0 @@ -/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ -/* Modified by SuperH, Inc. September 2003 */ -! -! Fast SH memset -! -! by Toshiyasu Morita (tm@netcom.com) -! -! SH5 code by J"orn Rennecke (joern.rennecke@superh.com) -! Copyright 2002 SuperH Ltd. -! - -#include <features.h> -#include <endian.h> - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define SHHI shlld -#define SHLO shlrd -#else -#define SHHI shlrd -#define SHLO shlld -#endif - - .section .text..SHmedia32,"ax" - .globl memset - .type memset, @function - - .align 5 - -memset: - pta/l multiquad, tr0 - andi r2, 7, r22 - ptabs r18, tr2 - mshflo.b r3,r3,r3 - add r4, r22, r23 - mperm.w r3, r63, r3 // Fill pattern now in every byte of r3 - - movi 8, r9 - bgtu/u r23, r9, tr0 // multiquad - - beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses - ldlo.q r2, 0, r7 - shlli r4, 2, r4 - movi -1, r8 - SHHI r8, r4, r8 - SHHI r8, r4, r8 - mcmv r7, r8, r3 - stlo.q r2, 0, r3 - blink tr2, r63 - -multiquad: - pta/l lastquad, tr0 - stlo.q r2, 0, r3 - shlri r23, 3, r24 - add r2, r4, r5 - beqi/u r24, 1, tr0 // lastquad - pta/l loop, tr1 - sub r2, r22, r25 - andi r5, -8, r20 // calculate end address and - addi r20, -7*8, r8 // loop end address; This might overflow, so we need - // to use a different test before we start the loop - bge/u r24, r9, tr1 // loop - st.q r25, 8, r3 - st.q r20, -8, r3 - shlri r24, 1, r24 - beqi/u r24, 1, tr0 // lastquad - st.q r25, 16, r3 - st.q r20, -16, r3 - beqi/u r24, 2, tr0 // lastquad - st.q r25, 24, r3 - st.q r20, -24, r3 -lastquad: - sthi.q r5, -1, r3 - blink tr2,r63 - -loop: -!!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895. - // QQQ commenting out is locically correct, but sub-optimal - // QQQ Sean McGoogan - 4th April 2003. - st.q r25, 8, r3 - st.q r25, 16, r3 - st.q r25, 24, r3 - st.q r25, 32, r3 - addi r25, 32, r25 - bgeu/l r8, r25, tr1 // loop - - st.q r20, -40, r3 - st.q r20, -32, r3 - st.q r20, -24, r3 - st.q r20, -16, r3 - st.q r20, -8, r3 - sthi.q r5, -1, r3 - blink tr2,r63 - - .size memset,.-memset - -libc_hidden_def(memset) diff --git a/libc/string/sh64/strcpy.S b/libc/string/sh64/strcpy.S deleted file mode 100644 index da79d5143..000000000 --- a/libc/string/sh64/strcpy.S +++ /dev/null @@ -1,102 +0,0 @@ -/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */ -/* Modified by SuperH, Inc. September 2003 */ -! Entry: arg0: destination -! arg1: source -! Exit: result: destination -! -! SH5 code Copyright 2002 SuperH Ltd. - -#include <features.h> -#include <endian.h> - -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define SHHI shlld -#define SHLO shlrd -#else -#define SHHI shlrd -#define SHLO shlld -#endif - - .section .text..SHmedia32,"ax" - .globl strcpy - .type strcpy, @function - .align 5 - -strcpy: - - pta/l shortstring,tr1 - ldlo.q r3,0,r4 - ptabs r18,tr4 - shlli r3,3,r7 - addi r2, 8, r0 - mcmpeq.b r4,r63,r6 - SHHI r6,r7,r6 - bnei/u r6,0,tr1 // shortstring - pta/l no_lddst, tr2 - ori r3,-8,r23 - sub r2, r23, r0 - sub r3, r2, r21 - addi r21, 8, r20 - ldx.q r0, r21, r5 - pta/l loop, tr0 - ori r2,-8,r22 - mcmpeq.b r5, r63, r6 - bgt/u r22, r23, tr2 // no_lddst - - // r22 < r23 : Need to do a load from the destination. - // r22 == r23 : Doesn't actually need to load from destination, - // but still can be handled here. - ldlo.q r2, 0, r9 - movi -1, r8 - SHLO r8, r7, r8 - mcmv r4, r8, r9 - stlo.q r2, 0, r9 - beqi/l r6, 0, tr0 // loop - - add r5, r63, r4 - addi r0, 8, r0 - blink tr1, r63 // shortstring -no_lddst: - // r22 > r23: note that for r22 == r23 the sthi.q would clobber - // bytes before the destination region. - stlo.q r2, 0, r4 - SHHI r4, r7, r4 - sthi.q r0, -1, r4 - beqi/l r6, 0, tr0 // loop - - add r5, r63, r4 - addi r0, 8, r0 -shortstring: -#if __BYTE_ORDER != __LITTLE_ENDIAN - pta/l shortstring2,tr1 - byterev r4,r4 -#endif -shortstring2: - st.b r0,-8,r4 - andi r4,0xff,r5 - shlri r4,8,r4 - addi r0,1,r0 - bnei/l r5,0,tr1 - blink tr4,r63 // return - - .balign 8 -loop: - stlo.q r0, 0, r5 - ldx.q r0, r20, r4 - addi r0, 16, r0 - sthi.q r0, -9, r5 - mcmpeq.b r4, r63, r6 - bnei/u r6, 0, tr1 // shortstring - ldx.q r0, r21, r5 - stlo.q r0, -8, r4 - sthi.q r0, -1, r4 - mcmpeq.b r5, r63, r6 - beqi/l r6, 0, tr0 // loop - - add r5, r63, r4 - addi r0, 8, r0 - blink tr1, r63 // shortstring - - .size strcpy,.-strcpy - -libc_hidden_def(strcpy) diff --git a/libc/string/sh64/strlen.S b/libc/string/sh64/strlen.S deleted file mode 100644 index 18f4164ff..000000000 --- a/libc/string/sh64/strlen.S +++ /dev/null @@ -1,63 +0,0 @@ -/* vi: set sw=8 ts=8: */ -/* - * libc/string/sh64/strlen.S - * - * Simplistic strlen() implementation for SHmedia. - * - * Copyright (C) 2003 Paul Mundt <lethal@linux-sh.org> - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. The name of the above contributors may not be - * used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <features.h> - - .section .text..SHmedia32,"ax" - .globl strlen - .type strlen,@function - - .balign 16 -strlen: - ptabs r18, tr4 - - /* - * Note: We could easily deal with the NULL case here with a simple - * sanity check, though it seems that the behavior we want is to fault - * in the event that r2 == NULL, so we don't bother. - */ -/* beqi r2, 0, tr4 */ ! Sanity check - - movi -1, r0 - pta/l loop, tr0 -loop: - ld.b r2, 0, r1 - addi r2, 1, r2 - addi r0, 1, r0 - bnei/l r1, 0, tr0 - - or r0, r63, r2 - blink tr4, r63 - - .size strlen,.-strlen - -libc_hidden_def(strlen) diff --git a/libc/string/sparc/sparc32/memchr.S b/libc/string/sparc/sparc32/memchr.S index 4d57a553b..1949db2e5 100644 --- a/libc/string/sparc/sparc32/memchr.S +++ b/libc/string/sparc/sparc32/memchr.S @@ -24,9 +24,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ .text .align 4 @@ -139,6 +138,4 @@ ENTRY(memchr) END(memchr) libc_hidden_def(memchr) -#if !__BOUNDED_POINTERS__ weak_alias(memchr,__ubp_memchr) -#endif diff --git a/libc/string/sparc/sparc32/memcpy.S b/libc/string/sparc/sparc32/memcpy.S index 25a48844d..2fb87bb17 100644 --- a/libc/string/sparc/sparc32/memcpy.S +++ b/libc/string/sparc/sparc32/memcpy.S @@ -17,9 +17,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <features.h> diff --git a/libc/string/sparc/sparc32/memset.S b/libc/string/sparc/sparc32/memset.S index 6c6424cf8..6d02fc1a8 100644 --- a/libc/string/sparc/sparc32/memset.S +++ b/libc/string/sparc/sparc32/memset.S @@ -16,9 +16,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <features.h> diff --git a/libc/string/sparc/sparc32/stpcpy.S b/libc/string/sparc/sparc32/stpcpy.S index daf116eb1..2984ea156 100644 --- a/libc/string/sparc/sparc32/stpcpy.S +++ b/libc/string/sparc/sparc32/stpcpy.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test to find out if any byte in xword could be zero. This is fast, but diff --git a/libc/string/sparc/sparc32/strcat.S b/libc/string/sparc/sparc32/strcat.S index eda029a16..e968a18a3 100644 --- a/libc/string/sparc/sparc32/strcat.S +++ b/libc/string/sparc/sparc32/strcat.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test to find out if any byte in xword could be zero. This is fast, but diff --git a/libc/string/sparc/sparc32/strchr.S b/libc/string/sparc/sparc32/strchr.S index 16710d4e8..fabc3e7e5 100644 --- a/libc/string/sparc/sparc32/strchr.S +++ b/libc/string/sparc/sparc32/strchr.S @@ -16,9 +16,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test to find out if any byte in xword could be zero. This is fast, but diff --git a/libc/string/sparc/sparc32/strcmp.S b/libc/string/sparc/sparc32/strcmp.S index d43883de6..07284cd18 100644 --- a/libc/string/sparc/sparc32/strcmp.S +++ b/libc/string/sparc/sparc32/strcmp.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test to find out if any byte in xword could be zero. This is fast, but diff --git a/libc/string/sparc/sparc32/strcpy.S b/libc/string/sparc/sparc32/strcpy.S index 4d7742ebc..3287546f3 100644 --- a/libc/string/sparc/sparc32/strcpy.S +++ b/libc/string/sparc/sparc32/strcpy.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test to find out if any byte in xword could be zero. This is fast, but diff --git a/libc/string/sparc/sparc32/strlen.S b/libc/string/sparc/sparc32/strlen.S index 4edfe7e78..66c790cb6 100644 --- a/libc/string/sparc/sparc32/strlen.S +++ b/libc/string/sparc/sparc32/strlen.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test to find out if any byte in xword could be zero. This is fast, but diff --git a/libc/string/sparc/sparc64/memchr.S b/libc/string/sparc/sparc64/memchr.S deleted file mode 100644 index 6096cc218..000000000 --- a/libc/string/sparc/sparc64/memchr.S +++ /dev/null @@ -1,261 +0,0 @@ -/* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less - than N. - For SPARC v9. - Copyright (C) 1998, 1999, 2000, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. - This version is developed using the same algorithm as the fast C - version which carries the following introduction: - Based on strlen implementation by Torbjorn Granlund (tege@sics.se), - with help from Dan Sahlin (dan@sics.se) and - commentary by Jim Blandy (jimb@ai.mit.edu); - adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), - and implemented by Roland McGrath (roland@ai.mit.edu). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <asm/asi.h> -#ifndef XCC -#define XCC xcc -#define USE_BPR - .register %g2, #scratch - .register %g3, #scratch -#endif - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(memchr) - and %o1, 0xff, %o1 /* IEU0 Group */ -#ifdef USE_BPR - brz,pn %o2, 12f /* CTI+IEU1 */ -#else - tst %o2 /* IEU1 */ - be,pn %XCC, 12f /* CTI */ -#endif - sll %o1, 8, %g3 /* IEU0 Group */ - add %o0, %o2, %o2 /* IEU1 */ - - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - or %g3, %o1, %g3 /* IEU1 */ - ldub [%o0], %o3 /* Load */ - sllx %g3, 16, %g5 /* IEU0 Group */ - - or %g1, %lo(0x01010101), %g1 /* IEU1 */ - sllx %g1, 32, %g2 /* IEU0 Group */ - or %g3, %g5, %g3 /* IEU1 */ - sllx %g3, 32, %g5 /* IEU0 Group */ - - cmp %o3, %o1 /* IEU1 */ - be,pn %xcc, 13f /* CTI */ - or %g1, %g2, %g1 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - - bne,a,pn %icc, 21f /* CTI */ - add %o0, 1, %o0 /* IEU0 Group */ - ldx [%o0], %o3 /* Load Group */ - sllx %g1, 7, %g2 /* IEU0 */ - - or %g3, %g5, %g3 /* IEU1 */ -1: add %o0, 8, %o0 /* IEU0 Group */ - xor %o3, %g3, %o4 /* IEU1 */ - /* %g1 = 0101010101010101 * - * %g2 = 8080088080808080 * - * %g3 = c c c c c c c c * - * %o3 = value * - * %o4 = value XOR c */ -2: cmp %o0, %o2 /* IEU1 Group */ - - bg,pn %XCC, 11f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - sub %o4, %g1, %o5 /* IEU0 Group */ - add %o0, 8, %o0 /* IEU1 */ -#ifdef EIGHTBIT_NOT_RARE - andn %o5, %o4, %o5 /* IEU0 Group */ -#endif - - andcc %o5, %g2, %g0 /* IEU1 Group */ - be,a,pt %xcc, 2b /* CTI */ - xor %o3, %g3, %o4 /* IEU0 */ - srlx %o4, 56, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 3f /* CTI */ - srlx %o4, 48, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4f /* CTI */ - srlx %o4, 40, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - - srlx %o4, 32, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - srlx %o4, 24, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - srlx %o4, 16, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %o4, 8, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - bne,pt %icc, 2b /* CTI */ - xor %o3, %g3, %o4 /* IEU0 */ - retl /* CTI+IEU1 Group */ - - add %o0, -9, %o0 /* IEU0 */ - - .align 16 -3: retl /* CTI+IEU1 Group */ - add %o0, -16, %o0 /* IEU0 */ -4: retl /* CTI+IEU1 Group */ - add %o0, -15, %o0 /* IEU0 */ - -5: retl /* CTI+IEU1 Group */ - add %o0, -14, %o0 /* IEU0 */ -6: retl /* CTI+IEU1 Group */ - add %o0, -13, %o0 /* IEU0 */ - -7: retl /* CTI+IEU1 Group */ - add %o0, -12, %o0 /* IEU0 */ -8: retl /* CTI+IEU1 Group */ - add %o0, -11, %o0 /* IEU0 */ - -9: retl /* CTI+IEU1 Group */ - add %o0, -10, %o0 /* IEU0 */ -11: sub %o4, %g1, %o5 /* IEU0 Group */ - sub %o0, 8, %o0 /* IEU1 */ - - andcc %o5, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 12f /* CTI */ - sub %o2, %o0, %o2 /* IEU0 */ - tst %o2 /* IEU1 Group */ - - be,pn %XCC, 12f /* CTI */ - srlx %o4, 56, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 13f /* CTI */ - - cmp %o2, 1 /* IEU0 */ - be,pn %XCC, 12f /* CTI Group */ - srlx %o4, 48, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 14f /* CTI */ - cmp %o2, 2 /* IEU1 Group */ - be,pn %XCC, 12f /* CTI */ - srlx %o4, 40, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 15f /* CTI */ - cmp %o2, 3 /* IEU1 Group */ - be,pn %XCC, 12f /* CTI */ - - srlx %o4, 32, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 16f /* CTI */ - cmp %o2, 4 /* IEU1 Group */ - - be,pn %XCC, 12f /* CTI */ - srlx %o4, 24, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 17f /* CTI */ - - cmp %o2, 5 /* IEU1 Group */ - be,pn %XCC, 12f /* CTI */ - srlx %o4, 16, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 18f /* CTI */ - cmp %o2, 6 /* IEU1 Group */ - be,pn %XCC, 12f /* CTI */ - srlx %o4, 8, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 19f /* CTI */ - nop /* IEU0 */ -12: retl /* CTI+IEU1 Group */ - - clr %o0 /* IEU0 */ - nop /* Stub */ -13: retl /* CTI+IEU1 Group */ - nop /* IEU0 */ - -14: retl /* CTI+IEU1 Group */ - add %o0, 1, %o0 /* IEU0 */ -15: retl /* CTI+IEU1 Group */ - add %o0, 2, %o0 /* IEU0 */ - -16: retl /* CTI+IEU1 Group */ - add %o0, 3, %o0 /* IEU0 */ -17: retl /* CTI+IEU1 Group */ - add %o0, 4, %o0 /* IEU0 */ - -18: retl /* CTI+IEU1 Group */ - add %o0, 5, %o0 /* IEU0 */ -19: retl /* CTI+IEU1 Group */ - add %o0, 6, %o0 /* IEU0 */ - -21: cmp %o0, %o2 /* IEU1 */ - be,pn %XCC, 12b /* CTI */ - sllx %g1, 7, %g2 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - - or %g3, %g5, %g3 /* IEU1 */ -22: andcc %o0, 7, %g0 /* IEU1 Group */ - be,a,pn %icc, 1b /* CTI */ - ldx [%o0], %o3 /* Load */ - - cmp %o3, %o1 /* IEU1 Group */ - be,pn %xcc, 23f /* CTI */ - add %o0, 1, %o0 /* IEU0 */ - cmp %o0, %o2 /* IEU1 Group */ - - bne,a,pt %XCC, 22b /* CTI */ - ldub [%o0], %o3 /* Load */ - retl /* CTI+IEU1 Group */ - clr %o0 /* IEU0 */ - -23: retl /* CTI+IEU1 Group */ - add %o0, -1, %o0 /* IEU0 */ -END(memchr) - -libc_hidden_def(memchr) -#if !__BOUNDED_POINTERS__ -weak_alias(memchr,__ubp_memchr) -#endif diff --git a/libc/string/sparc/sparc64/memcpy.S b/libc/string/sparc/sparc64/memcpy.S deleted file mode 100644 index db63d1da2..000000000 --- a/libc/string/sparc/sparc64/memcpy.S +++ /dev/null @@ -1,923 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. - For UltraSPARC. - Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@caip.rutgers.edu) and - Jakub Jelinek (jakub@redhat.com). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <features.h> -#include <asm/asi.h> -#ifndef XCC -#define USE_BPR - .register %g2, #scratch - .register %g3, #scratch - .register %g6, #scratch -#define XCC xcc -#endif -#define FPRS_FEF 4 - -#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ - faligndata %f1, %f2, %f48; \ - faligndata %f2, %f3, %f50; \ - faligndata %f3, %f4, %f52; \ - faligndata %f4, %f5, %f54; \ - faligndata %f5, %f6, %f56; \ - faligndata %f6, %f7, %f58; \ - faligndata %f7, %f8, %f60; \ - faligndata %f8, %f9, %f62; - -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - ldda [%src] %asi, %fdest; \ - add %src, 0x40, %src; \ - add %dest, 0x40, %dest; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - stda %fsrc, [%dest - 0x40] %asi; - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) - -#define STORE_SYNC(dest, fsrc) \ - stda %fsrc, [%dest] %asi; \ - add %dest, 0x40, %dest; - -#define STORE_JUMP(dest, fsrc, target) \ - stda %fsrc, [%dest] %asi; \ - add %dest, 0x40, %dest; \ - ba,pt %xcc, target; - -#define VISLOOP_PAD nop; nop; nop; nop; \ - nop; nop; nop; nop; \ - nop; nop; nop; nop; \ - nop; nop; nop; - -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 205f; \ - faligndata %f0, %f1, %f48; \ - std %f48, [%dest]; \ - add %dest, 8, %dest; - -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 205f; \ - fsrc1 %f0, %f1; \ - ba,a,pt %xcc, 204f; - - /* Macros for non-VIS memcpy code. */ -#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - ldx [%src + offset + 0x10], %t2; \ - ldx [%src + offset + 0x18], %t3; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst + offset + 0x08]; \ - stw %t2, [%dst + offset + 0x14]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst + offset + 0x10]; \ - stw %t3, [%dst + offset + 0x1c]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst + offset + 0x18]; - -#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - ldx [%src + offset + 0x10], %t2; \ - ldx [%src + offset + 0x18], %t3; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; \ - stx %t2, [%dst + offset + 0x10]; \ - stx %t3, [%dst + offset + 0x18]; \ - ldx [%src + offset + 0x20], %t0; \ - ldx [%src + offset + 0x28], %t1; \ - ldx [%src + offset + 0x30], %t2; \ - ldx [%src + offset + 0x38], %t3; \ - stx %t0, [%dst + offset + 0x20]; \ - stx %t1, [%dst + offset + 0x28]; \ - stx %t2, [%dst + offset + 0x30]; \ - stx %t3, [%dst + offset + 0x38]; - -#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x10], %t0; \ - ldx [%src - offset - 0x08], %t1; \ - stw %t0, [%dst - offset - 0x0c]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t1, [%dst - offset - 0x04]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src - offset - 0x10], %t0; \ - ldx [%src - offset - 0x08], %t1; \ - stx %t0, [%dst - offset - 0x10]; \ - stx %t1, [%dst - offset - 0x08]; - - /* Macros for non-VIS memmove code. */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .text - .align 32 - -#ifdef __UCLIBC_SUSV3_LEGACY__ -ENTRY(bcopy) - sub %o1, %o0, %o4 /* IEU0 Group */ - mov %o0, %g3 /* IEU1 */ - cmp %o4, %o2 /* IEU1 Group */ - mov %o1, %o0 /* IEU0 */ - bgeu,pt %XCC, 210f /* CTI */ - mov %g3, %o1 /* IEU0 Group */ -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 */ -#endif - brnz,pn %o2, 220f /* CTI Group */ - add %o0, %o2, %o0 /* IEU0 */ - retl - nop -END(bcopy) -#endif - - .align 32 -200: be,pt %xcc, 201f /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - mov 8, %g1 /* IEU0 */ - sub %g1, %g2, %g2 /* IEU0 Group */ - andcc %o0, 1, %g0 /* IEU1 */ - be,pt %icc, 2f /* CTI */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1], %o5 /* Load Group */ - add %o1, 1, %o1 /* IEU0 */ - add %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - be,pn %xcc, 3f /* CTI */ - stb %o5, [%o0 - 1] /* Store */ -2: ldub [%o1], %o5 /* Load Group */ - add %o0, 2, %o0 /* IEU0 */ - ldub [%o1 + 1], %g3 /* Load Group */ - subcc %g2, 2, %g2 /* IEU1 Group */ - stb %o5, [%o0 - 2] /* Store */ - add %o1, 2, %o1 /* IEU0 */ - bne,pt %xcc, 2b /* CTI Group */ - stb %g3, [%o0 - 1] /* Store */ -3: andcc %o0, 0x38, %g5 /* IEU1 Group */ -201: be,pt %icc, 202f /* CTI */ - mov 64, %g1 /* IEU0 */ - fmovd %f0, %f2 /* FPU */ - sub %g1, %g5, %g5 /* IEU0 Group */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ - sub %o2, %g5, %o2 /* IEU0 */ -1: ldd [%g1 + 0x8], %f6 /* Load Group */ - add %g1, 0x8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0] /* Store */ - add %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 202f /* CTI */ - add %o0, 8, %o0 /* IEU1 */ - ldd [%g1 + 0x8], %f4 /* Load Group */ - add %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0] /* Store */ - add %o1, 8, %o1 /* IEU0 */ - bne,pt %xcc, 1b /* CTI Group */ - add %o0, 8, %o0 /* IEU0 */ -202: membar #LoadStore | #StoreStore | #StoreLoad /* LSU Group */ - wr %g0, ASI_BLK_P, %asi /* LSU Group */ - subcc %o2, 0x40, %g6 /* IEU1 Group */ - mov %o1, %g1 /* IEU0 */ - andncc %g6, (0x40 - 1), %g6 /* IEU1 Group */ - srl %g1, 3, %g2 /* IEU0 */ - sub %o2, %g6, %g3 /* IEU0 Group */ - andn %o1, (0x40 - 1), %o1 /* IEU1 */ - and %g2, 7, %g2 /* IEU0 Group */ - andncc %g3, 0x7, %g3 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - sub %g3, 0x10, %g3 /* IEU0 Group */ - sub %o2, %g6, %o2 /* IEU1 */ - alignaddr %g1, %g0, %g0 /* GRU Group */ - add %g1, %g6, %g1 /* IEU0 Group */ - subcc %o2, %g3, %o2 /* IEU1 */ - ldda [%o1 + 0x00] %asi, %f0 /* LSU Group */ - add %g1, %g3, %g1 /* IEU0 */ - ldda [%o1 + 0x40] %asi, %f16 /* LSU Group */ - sub %g6, 0x80, %g6 /* IEU0 */ - ldda [%o1 + 0x80] %asi, %f32 /* LSU Group */ - /* Clk1 Group 8-( */ - /* Clk2 Group 8-( */ - /* Clk3 Group 8-( */ - /* Clk4 Group 8-( */ -203: rd %pc, %g5 /* PDU Group 8-( */ - addcc %g5, %lo(300f - 203b), %g5 /* IEU1 Group */ - sll %g2, 9, %g2 /* IEU0 */ - jmpl %g5 + %g2, %g0 /* CTI Group brk forced*/ - addcc %o1, 0xc0, %o1 /* IEU1 Group */ - - .align 512 /* OK, here comes the fun part... */ -300: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g6, 301f) - FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g6, 302f) - FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) LOOP_CHUNK3(o1, o0, g6, 303f) - b,pt %xcc, 300b+4; faligndata %f0, %f2, %f48 -301: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_JUMP(o0, f48, 400f) membar #Sync -302: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, 416f) membar #Sync -303: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, 432f) membar #Sync - VISLOOP_PAD -310: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g6, 311f) - FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g6, 312f) - FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) LOOP_CHUNK3(o1, o0, g6, 313f) - b,pt %xcc, 310b+4; faligndata %f2, %f4, %f48 -311: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_JUMP(o0, f48, 402f) membar #Sync -312: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, 418f) membar #Sync -313: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, 434f) membar #Sync - VISLOOP_PAD -320: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g6, 321f) - FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g6, 322f) - FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) LOOP_CHUNK3(o1, o0, g6, 323f) - b,pt %xcc, 320b+4; faligndata %f4, %f6, %f48 -321: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_JUMP(o0, f48, 404f) membar #Sync -322: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, 420f) membar #Sync -323: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, 436f) membar #Sync - VISLOOP_PAD -330: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g6, 331f) - FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g6, 332f) - FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) LOOP_CHUNK3(o1, o0, g6, 333f) - b,pt %xcc, 330b+4; faligndata %f6, %f8, %f48 -331: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_JUMP(o0, f48, 406f) membar #Sync -332: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, 422f) membar #Sync -333: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, 438f) membar #Sync - VISLOOP_PAD -340: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g6, 341f) - FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g6, 342f) - FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) LOOP_CHUNK3(o1, o0, g6, 343f) - b,pt %xcc, 340b+4; faligndata %f8, %f10, %f48 -341: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_JUMP(o0, f48, 408f) membar #Sync -342: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, 424f) membar #Sync -343: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, 440f) membar #Sync - VISLOOP_PAD -350: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g6, 351f) - FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g6, 352f) - FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g6, 353f) - b,pt %xcc, 350b+4; faligndata %f10, %f12, %f48 -351: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, 410f) membar #Sync -352: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, 426f) membar #Sync -353: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, 442f) membar #Sync - VISLOOP_PAD -360: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g6, 361f) - FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g6, 362f) - FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g6, 363f) - b,pt %xcc, 360b+4; faligndata %f12, %f14, %f48 -361: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, 412f) membar #Sync -362: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, 428f) membar #Sync -363: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, 444f) membar #Sync - VISLOOP_PAD -370: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g6, 371f) - FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g6, 372f) - FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g6, 373f) - b,pt %xcc, 370b+4; faligndata %f14, %f16, %f48 -371: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, 414f) membar #Sync -372: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, 430f) membar #Sync -373: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync - FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 446f) membar #Sync - VISLOOP_PAD -400: FINISH_VISCHUNK(o0, f0, f2, g3) -402: FINISH_VISCHUNK(o0, f2, f4, g3) -404: FINISH_VISCHUNK(o0, f4, f6, g3) -406: FINISH_VISCHUNK(o0, f6, f8, g3) -408: FINISH_VISCHUNK(o0, f8, f10, g3) -410: FINISH_VISCHUNK(o0, f10, f12, g3) -412: FINISH_VISCHUNK(o0, f12, f14, g3) -414: UNEVEN_VISCHUNK(o0, f14, f0, g3) -416: FINISH_VISCHUNK(o0, f16, f18, g3) -418: FINISH_VISCHUNK(o0, f18, f20, g3) -420: FINISH_VISCHUNK(o0, f20, f22, g3) -422: FINISH_VISCHUNK(o0, f22, f24, g3) -424: FINISH_VISCHUNK(o0, f24, f26, g3) -426: FINISH_VISCHUNK(o0, f26, f28, g3) -428: FINISH_VISCHUNK(o0, f28, f30, g3) -430: UNEVEN_VISCHUNK(o0, f30, f0, g3) -432: FINISH_VISCHUNK(o0, f32, f34, g3) -434: FINISH_VISCHUNK(o0, f34, f36, g3) -436: FINISH_VISCHUNK(o0, f36, f38, g3) -438: FINISH_VISCHUNK(o0, f38, f40, g3) -440: FINISH_VISCHUNK(o0, f40, f42, g3) -442: FINISH_VISCHUNK(o0, f42, f44, g3) -444: FINISH_VISCHUNK(o0, f44, f46, g3) -446: UNEVEN_VISCHUNK(o0, f46, f0, g3) -204: ldd [%o1], %f2 /* Load Group */ - add %o1, 8, %o1 /* IEU0 */ - subcc %g3, 8, %g3 /* IEU1 */ - faligndata %f0, %f2, %f8 /* GRU Group */ - std %f8, [%o0] /* Store */ - bl,pn %xcc, 205f /* CTI */ - add %o0, 8, %o0 /* IEU0 Group */ - ldd [%o1], %f0 /* Load Group */ - add %o1, 8, %o1 /* IEU0 */ - subcc %g3, 8, %g3 /* IEU1 */ - faligndata %f2, %f0, %f8 /* GRU Group */ - std %f8, [%o0] /* Store */ - bge,pt %xcc, 204b /* CTI */ - add %o0, 8, %o0 /* IEU0 Group */ -205: brz,pt %o2, 207f /* CTI Group */ - mov %g1, %o1 /* IEU0 */ -206: ldub [%o1], %g5 /* LOAD */ - add %o1, 1, %o1 /* IEU0 */ - add %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 206b /* CTI */ - stb %g5, [%o0 - 1] /* Store Group */ -207: membar #StoreLoad | #StoreStore /* LSU Group */ - wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 - -208: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1], %g5 /* LOAD Group */ - add %o1, 1, %o1 /* IEU0 */ - add %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 209f /* CTI */ - stb %g5, [%o0 - 1] /* Store */ -2: ldub [%o1], %g5 /* LOAD Group */ - add %o0, 2, %o0 /* IEU0 */ - ldub [%o1 + 1], %o5 /* LOAD Group */ - add %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %g5, [%o0 - 2] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %o5, [%o0 - 1] /* Store */ -209: retl - mov %g4, %o0 - -#ifdef USE_BPR - - /* void *__align_cpy_4(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3)) - */ - - .align 32 -ENTRY(__align_cpy_4) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 200b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - ba,pt %xcc, 216f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ -END(__align_cpy_4) - - /* void *__align_cpy_8(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7)) - */ - - .align 32 -ENTRY(__align_cpy_8) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_8) - - /* void *__align_cpy_16(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15)) - */ - - .align 32 -ENTRY(__align_cpy_16) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, (64 * 6) /* IEU1 */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_16) - -#endif - - .align 32 -ENTRY(memcpy) -210: -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, 209b /* CTI Group */ - mov %o0, %g4 /* IEU0 */ -218: cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 200b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 212f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 216f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1], %g2 /* Load Group */ - add %o1, 1, %o1 /* IEU0 */ - add %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - bne,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0 - 1] /* Store */ -4: lduh [%o1], %g2 /* Load Group */ - add %o1, 2, %o1 /* IEU0 */ - add %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -216: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1], %g5 /* Load Group */ - add %o1, 4, %o1 /* IEU0 */ - add %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0 - 4] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 215f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 82f + 4 /* CTI Group */ -5: MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) -35: subcc %g6, 128, %g6 /* IEU1 Group */ - add %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - add %o0, 128, %o0 /* IEU0 Group */ -215: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 80f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -79: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - add %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(80f - 79b), %g0 /* CTI Group brk forced*/ - add %o0, %g6, %o0 /* IEU0 Group */ -36: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -80: be,pt %xcc, 81f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1], %g2 /* Load Group */ - add %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 - 0x4] /* Store Group */ - add %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0 - 0x8] /* Store */ -81: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1], %g2 /* Load Group */ - add %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0] /* Store Group */ - add %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1], %g2 /* Load Group */ - add %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0] /* Store Group */ - add %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1], %g2 /* Load Group */ - stb %g2, [%o0] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -82: MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) -37: subcc %g6, 128, %g6 /* IEU1 Group */ - add %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 82b /* CTI */ - add %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 84f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -83: rd %pc, %o5 /* PDU Group */ - add %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(84f - 83b), %g0 /* CTI Group brk forced*/ - add %o0, %g6, %o0 /* IEU0 Group */ -38: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -84: be,pt %xcc, 85f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1], %g2 /* Load Group */ - add %o0, 8, %o0 /* IEU0 */ - add %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0 - 0x8] /* Store */ -85: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1], %g2 /* Load Group */ - add %o0, 4, %o0 /* IEU0 */ - add %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0 - 0x4] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1], %g2 /* Load Group */ - add %o0, 2, %o0 /* IEU0 */ - add %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0 - 0x2] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1], %g2 /* Load Group */ - stb %g2, [%o0] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -212: brz,pt %g2, 2f /* CTI Group */ - mov 8, %g1 /* IEU0 */ - sub %g1, %g2, %g2 /* IEU0 Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1], %g5 /* Load Group */ - add %o1, 1, %o1 /* IEU0 */ - add %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0 - 1] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 + 0x8], %f6 /* Load Group */ - add %g1, 0x8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0] /* Store */ - add %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 213f /* CTI */ - add %o0, 8, %o0 /* IEU1 */ - ldd [%g1 + 0x8], %f4 /* Load Group */ - add %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0] /* Store */ - add %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - add %o0, 8, %o0 /* IEU0 */ -213: brz,pn %o2, 214f /* CTI Group */ - nop /* IEU0 */ - ldub [%o1], %g5 /* LOAD */ - add %o1, 1, %o1 /* IEU0 */ - add %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 206b /* CTI */ - stb %g5, [%o0 - 1] /* Store Group */ -214: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memcpy) -libc_hidden_def(memcpy) - - .align 32 -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 -219: retl - nop - - .align 32 -ENTRY(memmove) -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, 219b /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) -libc_hidden_def(memmove) - -#ifdef USE_BPR -weak_alias(memcpy,__align_cpy_1) -weak_alias(memcpy,__align_cpy_2) -#endif diff --git a/libc/string/sparc/sparc64/memset.S b/libc/string/sparc/sparc64/memset.S deleted file mode 100644 index 50e404bcc..000000000 --- a/libc/string/sparc/sparc64/memset.S +++ /dev/null @@ -1,317 +0,0 @@ -/* Set a block of memory to some byte value. - For UltraSPARC. - Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@caip.rutgers.edu) and - Jakub Jelinek (jj@ultra.linux.cz). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <features.h> -#include <asm/asi.h> -#ifndef XCC -#define XCC xcc -#define USE_BPR -#endif -#define FPRS_FEF 4 - -#define SET_BLOCKS(base, offset, source) \ - stx source, [base - offset - 0x18]; \ - stx source, [base - offset - 0x10]; \ - stx source, [base - offset - 0x08]; \ - stx source, [base - offset - 0x00]; - - /* Well, memset is a lot easier to get right than bcopy... */ - .text - .align 32 -ENTRY(memset) - andcc %o1, 0xff, %o1 - mov %o0, %o5 - be,a,pt %icc, 50f -#ifndef USE_BPR - srl %o2, 0, %o1 -#else - mov %o2, %o1 -#endif - cmp %o2, 7 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - bleu,pn %XCC, 17f - andcc %o0, 3, %g5 - be,pt %xcc, 4f - and %o1, 0xff, %o1 - cmp %g5, 3 - be,pn %xcc, 2f - stb %o1, [%o0 + 0x00] - cmp %g5, 2 - be,pt %xcc, 2f - stb %o1, [%o0 + 0x01] - stb %o1, [%o0 + 0x02] -2: sub %g5, 4, %g5 - sub %o0, %g5, %o0 - add %o2, %g5, %o2 -4: sllx %o1, 8, %g1 - andcc %o0, 4, %g0 - or %o1, %g1, %o1 - sllx %o1, 16, %g1 - or %o1, %g1, %o1 - be,pt %xcc, 2f - sllx %o1, 32, %g1 - stw %o1, [%o0] - sub %o2, 4, %o2 - add %o0, 4, %o0 -2: cmp %o2, 128 - or %o1, %g1, %o1 - blu,pn %xcc, 9f - andcc %o0, 0x38, %g5 - be,pn %icc, 6f - mov 64, %o4 - andcc %o0, 8, %g0 - be,pn %icc, 1f - sub %o4, %g5, %o4 - stx %o1, [%o0] - add %o0, 8, %o0 -1: andcc %o4, 16, %g0 - be,pn %icc, 1f - sub %o2, %o4, %o2 - stx %o1, [%o0] - stx %o1, [%o0 + 8] - add %o0, 16, %o0 -1: andcc %o4, 32, %g0 - be,pn %icc, 7f - andncc %o2, 0x3f, %o3 - stw %o1, [%o0] - stw %o1, [%o0 + 4] - stw %o1, [%o0 + 8] - stw %o1, [%o0 + 12] - stw %o1, [%o0 + 16] - stw %o1, [%o0 + 20] - stw %o1, [%o0 + 24] - stw %o1, [%o0 + 28] - add %o0, 32, %o0 -7: be,pn %xcc, 9f - nop - ldd [%o0 - 8], %f0 -18: wr %g0, ASI_BLK_P, %asi - membar #StoreStore | #LoadStore - andcc %o3, 0xc0, %g5 - and %o2, 0x3f, %o2 - fmovd %f0, %f2 - fmovd %f0, %f4 - andn %o3, 0xff, %o3 - fmovd %f0, %f6 - cmp %g5, 64 - fmovd %f0, %f8 - fmovd %f0, %f10 - fmovd %f0, %f12 - brz,pn %g5, 10f - fmovd %f0, %f14 - be,pn %icc, 2f - stda %f0, [%o0 + 0x00] %asi - cmp %g5, 128 - be,pn %icc, 2f - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi -2: brz,pn %o3, 12f - add %o0, %g5, %o0 -10: stda %f0, [%o0 + 0x00] %asi - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi - stda %f0, [%o0 + 0xc0] %asi -11: subcc %o3, 256, %o3 - bne,pt %xcc, 10b - add %o0, 256, %o0 -12: wr %g0, FPRS_FEF, %fprs - membar #StoreLoad | #StoreStore -9: andcc %o2, 0x78, %g5 - be,pn %xcc, 13f - andcc %o2, 7, %o2 -14: rd %pc, %o4 - srl %g5, 1, %o3 - sub %o4, %o3, %o4 - jmpl %o4 + (13f - 14b), %g0 - add %o0, %g5, %o0 -12: SET_BLOCKS (%o0, 0x68, %o1) - SET_BLOCKS (%o0, 0x48, %o1) - SET_BLOCKS (%o0, 0x28, %o1) - SET_BLOCKS (%o0, 0x08, %o1) -13: be,pn %xcc, 8f - andcc %o2, 4, %g0 - be,pn %xcc, 1f - andcc %o2, 2, %g0 - stw %o1, [%o0] - add %o0, 4, %o0 -1: be,pn %xcc, 1f - andcc %o2, 1, %g0 - sth %o1, [%o0] - add %o0, 2, %o0 -1: bne,a,pn %xcc, 8f - stb %o1, [%o0] -8: retl - mov %o5, %o0 -17: brz,pn %o2, 0f -8: add %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %xcc, 8b - stb %o1, [%o0 - 1] -0: retl - mov %o5, %o0 - -6: stx %o1, [%o0] - andncc %o2, 0x3f, %o3 - be,pn %xcc, 9b - nop - ba,pt %xcc, 18b - ldd [%o0], %f0 -END(memset) -libc_hidden_def(memset) - -#define ZERO_BLOCKS(base, offset, source) \ - stx source, [base - offset - 0x38]; \ - stx source, [base - offset - 0x30]; \ - stx source, [base - offset - 0x28]; \ - stx source, [base - offset - 0x20]; \ - stx source, [base - offset - 0x18]; \ - stx source, [base - offset - 0x10]; \ - stx source, [base - offset - 0x08]; \ - stx source, [base - offset - 0x00]; - - .text - .align 32 -#ifdef __UCLIBC_SUSV3_LEGACY__ -ENTRY(bzero) -#ifndef USE_BPR - srl %o1, 0, %o1 -#endif - mov %o0, %o5 -#endif -50: cmp %o1, 7 - bleu,pn %xcc, 17f - andcc %o0, 3, %o2 - be,a,pt %xcc, 4f - andcc %o0, 4, %g0 - cmp %o2, 3 - be,pn %xcc, 2f - stb %g0, [%o0 + 0x00] - cmp %o2, 2 - be,pt %xcc, 2f - stb %g0, [%o0 + 0x01] - stb %g0, [%o0 + 0x02] -2: sub %o2, 4, %o2 - sub %o0, %o2, %o0 - add %o1, %o2, %o1 - andcc %o0, 4, %g0 -4: be,pt %xcc, 2f - cmp %o1, 128 - stw %g0, [%o0] - sub %o1, 4, %o1 - add %o0, 4, %o0 -2: blu,pn %xcc, 9f - andcc %o0, 0x38, %o2 - be,pn %icc, 6f - mov 64, %o4 - andcc %o0, 8, %g0 - be,pn %icc, 1f - sub %o4, %o2, %o4 - stx %g0, [%o0] - add %o0, 8, %o0 -1: andcc %o4, 16, %g0 - be,pn %icc, 1f - sub %o1, %o4, %o1 - stx %g0, [%o0] - stx %g0, [%o0 + 8] - add %o0, 16, %o0 -1: andcc %o4, 32, %g0 - be,pn %icc, 7f - andncc %o1, 0x3f, %o3 - stx %g0, [%o0] - stx %g0, [%o0 + 8] - stx %g0, [%o0 + 16] - stx %g0, [%o0 + 24] - add %o0, 32, %o0 -6: andncc %o1, 0x3f, %o3 -7: be,pn %xcc, 9f - wr %g0, ASI_BLK_P, %asi - membar #StoreLoad | #StoreStore | #LoadStore - fzero %f0 - andcc %o3, 0xc0, %o2 - and %o1, 0x3f, %o1 - fzero %f2 - andn %o3, 0xff, %o3 - faddd %f0, %f2, %f4 - fmuld %f0, %f2, %f6 - cmp %o2, 64 - faddd %f0, %f2, %f8 - fmuld %f0, %f2, %f10 - faddd %f0, %f2, %f12 - brz,pn %o2, 10f - fmuld %f0, %f2, %f14 - be,pn %icc, 2f - stda %f0, [%o0 + 0x00] %asi - cmp %o2, 128 - be,pn %icc, 2f - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi -2: brz,pn %o3, 12f - add %o0, %o2, %o0 -10: stda %f0, [%o0 + 0x00] %asi - stda %f0, [%o0 + 0x40] %asi - stda %f0, [%o0 + 0x80] %asi - stda %f0, [%o0 + 0xc0] %asi -11: subcc %o3, 256, %o3 - bne,pt %xcc, 10b - add %o0, 256, %o0 -12: wr %g0, FPRS_FEF, %fprs - membar #StoreLoad | #StoreStore -9: andcc %o1, 0xf8, %o2 - be,pn %xcc, 13f - andcc %o1, 7, %o1 -14: rd %pc, %o4 - srl %o2, 1, %o3 - sub %o4, %o3, %o4 - jmpl %o4 + (13f - 14b), %g0 - add %o0, %o2, %o0 -12: ZERO_BLOCKS (%o0, 0xc8, %g0) - ZERO_BLOCKS (%o0, 0x88, %g0) - ZERO_BLOCKS (%o0, 0x48, %g0) - ZERO_BLOCKS (%o0, 0x08, %g0) -13: be,pn %xcc, 8f - andcc %o1, 4, %g0 - be,pn %xcc, 1f - andcc %o1, 2, %g0 - stw %g0, [%o0] - add %o0, 4, %o0 -1: be,pn %xcc, 1f - andcc %o1, 1, %g0 - sth %g0, [%o0] - add %o0, 2, %o0 -1: bne,a,pn %xcc, 8f - stb %g0, [%o0] -8: retl - mov %o5, %o0 -17: be,pn %xcc, 13b - orcc %o1, 0, %g0 - be,pn %xcc, 0f -8: add %o0, 1, %o0 - subcc %o1, 1, %o1 - bne,pt %xcc, 8b - stb %g0, [%o0 - 1] -0: retl - mov %o5, %o0 -#ifdef __UCLIBC_SUSV3_LEGACY__ -END(bzero) -#endif diff --git a/libc/string/sparc/sparc64/sparcv9b/memcpy.S b/libc/string/sparc/sparc64/sparcv9b/memcpy.S deleted file mode 100644 index 64f6a92e0..000000000 --- a/libc/string/sparc/sparc64/sparcv9b/memcpy.S +++ /dev/null @@ -1,612 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. - For UltraSPARC-III. - Copyright (C) 2001, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@redhat.com) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <features.h> - -#define ASI_BLK_P 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - .align 32 - -#ifdef __UCLIBC_SUSV3_LEGACY__ -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) -#endif - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - * - * The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - .align 32 -ENTRY(memcpy) - -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 - cmp %o2, 0 - be,pn %XCC, out -218: or %o0, %o1, %o3 - cmp %o2, 16 - bleu,a,pn %XCC, small_copy - or %o3, %o2, %o3 - - cmp %o2, 256 - blu,pt %XCC, medium_copy - andcc %o3, 0x7, %g0 - - ba,pt %xcc, enter - andcc %o0, 0x3f, %g2 - - /* Here len >= 256 and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %XCC, 2f - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - - bg,pt %XCC, 1b - stb %o3, [%o0 + -1] - -2: VISEntryHalf - and %o1, 0x7, %g1 - ba,pt %xcc, begin - alignaddr %o1, %g0, %o1 - - .align 64 -begin: - prefetch [%o1 + 0x000], #one_read - prefetch [%o1 + 0x040], #one_read - andn %o2, (0x40 - 1), %o4 - prefetch [%o1 + 0x080], #one_read - prefetch [%o1 + 0x0c0], #one_read - ldd [%o1 + 0x000], %f0 - prefetch [%o1 + 0x100], #one_read - ldd [%o1 + 0x008], %f2 - prefetch [%o1 + 0x140], #one_read - ldd [%o1 + 0x010], %f4 - prefetch [%o1 + 0x180], #one_read - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x018], %f6 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x020], %f8 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x028], %f10 - faligndata %f6, %f8, %f22 - - ldd [%o1 + 0x030], %f12 - faligndata %f8, %f10, %f24 - ldd [%o1 + 0x038], %f14 - faligndata %f10, %f12, %f26 - ldd [%o1 + 0x040], %f0 - - sub %o4, 0x80, %o4 - add %o1, 0x40, %o1 - ba,pt %xcc, loop - srl %o4, 6, %o3 - - .align 64 -loop: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - - ldd [%o1 + 0x040], %f0 - prefetch [%o1 + 0x180], #one_read - faligndata %f10, %f12, %f26 - subcc %o3, 0x01, %o3 - add %o1, 0x40, %o1 - bg,pt %XCC, loop - add %o0, 0x40, %o0 - - /* Finally we copy the last full 64-byte block. */ -loopfini: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - cmp %g1, 0 - be,pt %XCC, 1f - add %o0, 0x40, %o0 - ldd [%o1 + 0x040], %f0 -1: faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - add %o0, 0x40, %o0 - add %o1, 0x40, %o1 - membar #Sync - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer. - */ -loopend: - and %o2, 0x3f, %o2 - andcc %o2, 0x38, %g2 - be,pn %XCC, endcruft - subcc %g2, 0x8, %g2 - be,pn %XCC, endcruft - cmp %g1, 0 - - be,a,pt %XCC, 1f - ldd [%o1 + 0x00], %f0 - -1: ldd [%o1 + 0x08], %f2 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f0, %f2, %f8 - std %f8, [%o0 + 0x00] - be,pn %XCC, endcruft - add %o0, 0x8, %o0 - ldd [%o1 + 0x08], %f0 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f2, %f0, %f8 - std %f8, [%o0 + 0x00] - bne,pn %XCC, 1b - add %o0, 0x8, %o0 - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %XCC, out - sub %o0, %o1, %o3 - - andcc %g1, 0x7, %g0 - bne,pn %icc, small_copy_unaligned - andcc %o2, 0x8, %g0 - be,pt %icc, 1f - nop - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - add %o1, 0x8, %o1 - -1: andcc %o2, 0x4, %g0 - be,pt %icc, 1f - nop - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 - -1: andcc %o2, 0x2, %g0 - be,pt %icc, 1f - nop - lduh [%o1], %o5 - sth %o5, [%o1 + %o3] - add %o1, 0x2, %o1 - -1: andcc %o2, 0x1, %g0 - be,pt %icc, out - nop - ldub [%o1], %o5 - ba,pt %xcc, out - stb %o5, [%o1 + %o3] - -medium_copy: /* 16 < len <= 64 */ - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -medium_copy_aligned: - andn %o2, 0x7, %o4 - and %o2, 0x7, %o2 -1: subcc %o4, 0x8, %o4 - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 -1: cmp %o2, 0 - be,pt %XCC, out - nop - ba,pt %xcc, small_copy_unaligned - nop - -small_copy: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -small_copy_aligned: - subcc %o2, 4, %o2 - lduw [%o1], %g1 - stw %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_aligned - add %o1, 4, %o1 - -out: retl - mov %g5, %o0 - - .align 32 -small_copy_unaligned: - subcc %o2, 1, %o2 - ldub [%o1], %g1 - stb %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_unaligned - add %o1, 1, %o1 - retl - mov %g5, %o0 - -END(memcpy) -libc_hidden_def(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .align 32 -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, out /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) -libc_hidden_def(memmove) - -#ifdef USE_BPR -weak_alias(memcpy,__align_cpy_1) -weak_alias(memcpy,__align_cpy_2) -weak_alias(memcpy,__align_cpy_4) -weak_alias(memcpy,__align_cpy_8) -weak_alias(memcpy,__align_cpy_16) -#endif diff --git a/libc/string/sparc/sparc64/stpcpy.S b/libc/string/sparc/sparc64/stpcpy.S deleted file mode 100644 index 8c26c6bec..000000000 --- a/libc/string/sparc/sparc64/stpcpy.S +++ /dev/null @@ -1,271 +0,0 @@ -/* Copy SRC to DEST returning the address of the terminating '\0' in DEST. - For SPARC v9. - Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <asm/asi.h> -#ifndef XCC - .register %g2, #scratch - .register %g3, #scratch - .register %g6, #scratch -#endif - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(stpcpy) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - sllx %g1, 32, %g2 /* IEU0 Group */ - - bne,pn %icc, 12f /* CTI */ - andcc %o1, 7, %g3 /* IEU1 */ - or %g1, %g2, %g1 /* IEU0 Group */ - bne,pn %icc, 14f /* CTI */ - - sllx %g1, 7, %g2 /* IEU0 Group */ -1: ldx [%o1], %o3 /* Load */ - add %o1, 8, %o1 /* IEU1 */ -2: mov %o3, %g3 /* IEU0 Group */ - - sub %o3, %g1, %o2 /* IEU1 */ -3: ldxa [%o1] ASI_PNF, %o3 /* Load */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %g3, %o2 /* IEU0 Group */ -#endif - add %o0, 8, %o0 /* IEU0 Group */ - andcc %o2, %g2, %g0 /* IEU1 */ - - add %o1, 8, %o1 /* IEU0 Group */ - be,a,pt %xcc, 2b /* CTI */ - stx %g3, [%o0 - 8] /* Store */ - srlx %g3, 56, %g5 /* IEU0 Group */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - srlx %g3, 48, %g4 /* IEU0 */ - andcc %g4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 10f /* CTI */ - srlx %g3, 40, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ - - srlx %g3, 32, %g4 /* IEU0 */ - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 8f /* CTI */ - srlx %g3, 24, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - srlx %g3, 16, %g4 /* IEU0 */ - andcc %g4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 6f /* CTI */ - srlx %g3, 8, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - - sub %o3, %g1, %o2 /* IEU0 */ - stx %g3, [%o0 - 8] /* Store Group */ - andcc %g3, 0xff, %g0 /* IEU1 */ - bne,pt %icc, 3b /* CTI */ - - mov %o3, %g3 /* IEU0 Group */ -4: retl /* CTI+IEU1 Group */ - sub %o0, 1, %o0 /* IEU0 */ - - .align 16 -6: ba,pt %xcc, 23f /* CTI Group */ - sub %o0, 3, %g6 /* IEU0 */ -5: sub %o0, 2, %g6 /* IEU0 Group */ - stb %g5, [%o0 - 2] /* Store */ - - srlx %g3, 16, %g4 /* IEU0 Group */ -23: sth %g4, [%o0 - 4] /* Store */ - srlx %g3, 32, %g4 /* IEU0 Group */ - stw %g4, [%o0 - 8] /* Store */ - - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -8: ba,pt %xcc, 24f /* CTI Group */ - sub %o0, 5, %g6 /* IEU0 */ - -7: sub %o0, 4, %g6 /* IEU0 Group */ - stb %g5, [%o0 - 4] /* Store */ - srlx %g3, 32, %g4 /* IEU0 Group */ -24: stw %g4, [%o0 - 8] /* Store */ - - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -10: ba,pt %xcc, 25f /* CTI Group */ - sub %o0, 7, %g6 /* IEU0 */ - -9: sub %o0, 6, %g6 /* IEU0 Group */ - stb %g5, [%o0 - 6] /* Store */ - srlx %g3, 48, %g4 /* IEU0 */ -25: sth %g4, [%o0 - 8] /* Store Group */ - - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -11: stb %g5, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, 8, %o0 /* IEU0 */ - - .align 16 -12: or %g1, %g2, %g1 /* IEU0 Group */ - ldub [%o1], %o3 /* Load */ - sllx %g1, 7, %g2 /* IEU0 Group */ - stb %o3, [%o0] /* Store Group */ - -13: add %o0, 1, %o0 /* IEU0 */ - add %o1, 1, %o1 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - - lduba [%o1] ASI_PNF, %o3 /* Load */ - andcc %o0, 7, %g0 /* IEU1 Group */ - bne,a,pt %icc, 13b /* CTI */ - stb %o3, [%o0] /* Store */ - - andcc %o1, 7, %g3 /* IEU1 Group */ - be,a,pt %icc, 1b /* CTI */ - ldx [%o1], %o3 /* Load */ -14: orcc %g0, 64, %g4 /* IEU1 Group */ - - sllx %g3, 3, %g5 /* IEU0 */ - sub %o1, %g3, %o1 /* IEU0 Group */ - sub %g4, %g5, %g4 /* IEU1 */ - /* %g1 = 0101010101010101 * - * %g2 = 8080808080808080 * - * %g3 = source alignment * - * %g5 = number of bits to shift left * - * %g4 = number of bits to shift right */ - ldxa [%o1] ASI_PNF, %o5 /* Load Group */ - - addcc %o1, 8, %o1 /* IEU1 */ -15: sllx %o5, %g5, %o3 /* IEU0 Group */ - ldxa [%o1] ASI_PNF, %o5 /* Load */ - srlx %o5, %g4, %o4 /* IEU0 Group */ - - add %o0, 8, %o0 /* IEU1 */ - or %o3, %o4, %o3 /* IEU0 Group */ - add %o1, 8, %o1 /* IEU1 */ - sub %o3, %g1, %o4 /* IEU0 Group */ - -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o3, %o4 /* IEU0 Group */ -#endif - andcc %o4, %g2, %g0 /* IEU1 Group */ - be,a,pt %xcc, 15b /* CTI */ - stx %o3, [%o0 - 8] /* Store */ - srlx %o3, 56, %o4 /* IEU0 Group */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 22f /* CTI */ - srlx %o3, 48, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 21f /* CTI */ - srlx %o3, 40, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 20f /* CTI */ - - srlx %o3, 32, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 19f /* CTI */ - srlx %o3, 24, %o4 /* IEU0 */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 18f /* CTI */ - srlx %o3, 16, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 17f /* CTI */ - srlx %o3, 8, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 16f /* CTI */ - - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,pn %icc, 15b /* CTI */ - stx %o3, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - - sub %o0, 1, %o0 /* IEU0 */ - - .align 16 -17: ba,pt %xcc, 26f /* CTI Group */ - subcc %o0, 3, %g6 /* IEU1 */ -18: ba,pt %xcc, 27f /* CTI Group */ - subcc %o0, 4, %g6 /* IEU1 */ - -19: ba,pt %xcc, 28f /* CTI Group */ - subcc %o0, 5, %g6 /* IEU1 */ -16: subcc %o0, 2, %g6 /* IEU1 Group */ - srlx %o3, 8, %o4 /* IEU0 */ - - stb %o4, [%o0 - 2] /* Store */ -26: srlx %o3, 16, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 3] /* Store */ -27: srlx %o3, 24, %o4 /* IEU0 Group */ - - stb %o4, [%o0 - 4] /* Store */ -28: srlx %o3, 32, %o4 /* IEU0 Group */ - stw %o4, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - - mov %g6, %o0 /* IEU0 */ - - .align 16 -21: ba,pt %xcc, 29f /* CTI Group */ - subcc %o0, 7, %g6 /* IEU1 */ -22: ba,pt %xcc, 30f /* CTI Group */ - subcc %o0, 8, %g6 /* IEU1 */ - -20: subcc %o0, 6, %g6 /* IEU1 Group */ - srlx %o3, 40, %o4 /* IEU0 */ - stb %o4, [%o0 - 6] /* Store */ -29: srlx %o3, 48, %o4 /* IEU0 Group */ - - stb %o4, [%o0 - 7] /* Store */ -30: srlx %o3, 56, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - - mov %g6, %o0 /* IEU0 */ -END(stpcpy) -libc_hidden_def(stpcpy) diff --git a/libc/string/sparc/sparc64/strcat.S b/libc/string/sparc/sparc64/strcat.S deleted file mode 100644 index fcc4ba59c..000000000 --- a/libc/string/sparc/sparc64/strcat.S +++ /dev/null @@ -1,339 +0,0 @@ -/* strcat (dest, src) -- Append SRC on the end of DEST. - For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jakub Jelinek <jj@ultra.linux.cz> and - Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <asm/asi.h> -#ifndef XCC -#define XCC xcc -#define USE_BPR - .register %g2, #scratch - .register %g3, #scratch - .register %g6, #scratch -#endif - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(strcat) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %g6 /* IEU1 */ - - sllx %g1, 32, %g2 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g2, %g1 /* IEU0 Group */ - bne,pn %icc, 32f /* CTI */ - - sllx %g1, 7, %g2 /* IEU0 Group */ - brz,pn %o3, 30f /* CTI+IEU1 */ - ldx [%o0], %o3 /* Load */ -48: add %o0, 8, %o0 /* IEU0 Group */ - -49: sub %o3, %g1, %o2 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %g5 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %g5, %g2, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g2, %g0 /* IEU1 Group */ -#endif - be,pt %xcc, 49b /* CTI */ - - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g3 /* IEU1 Group */ - srlx %o2, 32, %o2 /* IEU0 */ -50: andcc %o2, %g2, %g0 /* IEU1 Group */ - - be,pn %xcc, 51f /* CTI */ - srlx %g3, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 29f /* CTI */ - - srlx %g3, 48, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 28f /* CTI */ - srlx %g3, 40, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 27f /* CTI */ - srlx %g3, 32, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 26f /* CTI */ -51: srlx %g3, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 25f /* CTI */ - - srlx %g3, 16, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 24f /* CTI */ - srlx %g3, 8, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 23f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 52f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 49b /* CTI */ - - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g3 /* IEU1 Group */ - ba,pt %xcc, 50b /* CTI */ - srlx %o2, 32, %o2 /* IEU0 */ - - .align 16 -52: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -9, %o0 /* IEU0 */ -23: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -10, %o0 /* IEU0 */ - -24: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -11, %o0 /* IEU0 */ -25: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -12, %o0 /* IEU0 */ - -26: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -13, %o0 /* IEU0 */ -27: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -14, %o0 /* IEU0 */ - -28: ba,pt %xcc, 12f /* CTI Group */ - add %o0, -15, %o0 /* IEU0 */ -29: add %o0, -16, %o0 /* IEU0 Group */ -30: andcc %o1, 7, %g3 /* IEU1 */ - -31: bne,pn %icc, 14f /* CTI */ - orcc %g0, 64, %g4 /* IEU1 Group */ -1: ldx [%o1], %o3 /* Load */ - add %o1, 8, %o1 /* IEU1 */ - -2: mov %o3, %g3 /* IEU0 Group */ -3: sub %o3, %g1, %o2 /* IEU1 */ - ldxa [%o1] ASI_PNF, %o3 /* Load */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %g3, %o2 /* IEU0 Group */ -#endif - add %o0, 8, %o0 /* IEU0 Group */ - - andcc %o2, %g2, %g0 /* IEU1 */ - add %o1, 8, %o1 /* IEU0 Group */ - be,a,pt %xcc, 2b /* CTI */ - stx %g3, [%o0 - 8] /* Store */ - - srlx %g3, 56, %g5 /* IEU0 Group */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - srlx %g3, 48, %g4 /* IEU0 */ - - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g3, 40, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 9f /* CTI */ - srlx %g3, 32, %g4 /* IEU0 */ - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 8f /* CTI */ - - srlx %g3, 24, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - srlx %g3, 16, %g4 /* IEU0 */ - - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - srlx %g3, 8, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 5f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - stx %g3, [%o0 - 8] /* Store Group */ - andcc %g3, 0xff, %g0 /* IEU1 */ - - bne,pt %icc, 3b /* CTI */ - mov %o3, %g3 /* IEU0 Group */ -4: retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - - .align 16 -5: stb %g5, [%o0 - 2] /* Store Group */ - srlx %g3, 16, %g4 /* IEU0 */ -6: sth %g4, [%o0 - 4] /* Store Group */ - srlx %g3, 32, %g4 /* IEU0 */ - - stw %g4, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -7: stb %g5, [%o0 - 4] /* Store Group */ - - srlx %g3, 32, %g4 /* IEU0 */ -8: stw %g4, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - -9: stb %g5, [%o0 - 6] /* Store Group */ - srlx %g3, 48, %g4 /* IEU0 */ -10: sth %g4, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - - mov %g6, %o0 /* IEU0 */ -11: stb %g5, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - - .align 16 -32: andcc %o0, 7, %g0 /* IEU1 Group */ - be,a,pn %icc, 48b /* CTI */ - ldx [%o0], %o3 /* Load */ - add %o0, 1, %o0 /* IEU0 Group */ - - brnz,a,pt %o3, 32b /* CTI+IEU1 */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - add %o0, -1, %o0 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 Group */ - - be,a,pn %icc, 31b /* CTI */ - andcc %o1, 7, %g3 /* IEU1 Group */ -12: ldub [%o1], %o3 /* Load */ - stb %o3, [%o0] /* Store Group */ - -13: add %o0, 1, %o0 /* IEU0 */ - add %o1, 1, %o1 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - - lduba [%o1] ASI_PNF, %o3 /* Load */ - andcc %o0, 7, %g0 /* IEU1 Group */ - bne,a,pt %icc, 13b /* CTI */ - stb %o3, [%o0] /* Store */ - - andcc %o1, 7, %g3 /* IEU1 Group */ - be,a,pt %icc, 1b /* CTI */ - ldx [%o1], %o3 /* Load */ - orcc %g0, 64, %g4 /* IEU1 Group */ - -14: sllx %g3, 3, %g5 /* IEU0 */ - sub %o1, %g3, %o1 /* IEU0 Group */ - sub %g4, %g5, %g4 /* IEU1 */ - /* %g1 = 0101010101010101 * - * %g2 = 8080808080808080 * - * %g3 = source alignment * - * %g5 = number of bits to shift left * - * %g4 = number of bits to shift right */ - ldxa [%o1] ASI_PNF, %o5 /* Load Group */ - - addcc %o1, 8, %o1 /* IEU1 */ -15: sllx %o5, %g5, %o3 /* IEU0 Group */ - ldxa [%o1] ASI_PNF, %o5 /* Load */ - srlx %o5, %g4, %o4 /* IEU0 Group */ - - add %o0, 8, %o0 /* IEU1 */ - or %o3, %o4, %o3 /* IEU0 Group */ - add %o1, 8, %o1 /* IEU1 */ - sub %o3, %g1, %o4 /* IEU0 Group */ - -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o3, %o4 /* IEU0 Group */ -#endif - andcc %o4, %g2, %g0 /* IEU1 Group */ - be,a,pt %xcc, 15b /* CTI */ - stx %o3, [%o0 - 8] /* Store */ - srlx %o3, 56, %o4 /* IEU0 Group */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 22f /* CTI */ - srlx %o3, 48, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 21f /* CTI */ - srlx %o3, 40, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 20f /* CTI */ - - srlx %o3, 32, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 19f /* CTI */ - srlx %o3, 24, %o4 /* IEU0 */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 18f /* CTI */ - srlx %o3, 16, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 17f /* CTI */ - srlx %o3, 8, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 16f /* CTI */ - - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,pn %icc, 15b /* CTI */ - stx %o3, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - - mov %g6, %o0 /* IEU0 */ - - .align 16 -16: srlx %o3, 8, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 2] /* Store */ -17: srlx %o3, 16, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 3] /* Store */ - -18: srlx %o3, 24, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 4] /* Store */ -19: srlx %o3, 32, %o4 /* IEU0 Group */ - stw %o4, [%o0 - 8] /* Store */ - - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - nop - nop - -20: srlx %o3, 40, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 6] /* Store */ -21: srlx %o3, 48, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 7] /* Store */ - -22: srlx %o3, 56, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -END(strcat) -libc_hidden_def(strcat) diff --git a/libc/string/sparc/sparc64/strchr.S b/libc/string/sparc/sparc64/strchr.S deleted file mode 100644 index da26d1f9c..000000000 --- a/libc/string/sparc/sparc64/strchr.S +++ /dev/null @@ -1,486 +0,0 @@ -/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. - For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <features.h> -#include <asm/asi.h> -#ifndef XCC -#define XCC xcc -#define USE_BPR - .register %g2, #scratch - .register %g3, #scratch - .register %g6, #scratch -#endif - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(strchr) - andcc %o1, 0xff, %o1 /* IEU1 Group */ - be,pn %icc, 17f /* CTI */ - sllx %o1, 8, %g3 /* IEU0 Group */ - sethi %hi(0x01010101), %g1 /* IEU1 */ - - or %g3, %o1, %g3 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - sllx %g3, 16, %g5 /* IEU0 Group */ - or %g1, %lo(0x01010101), %g1 /* IEU1 */ - - sllx %g1, 32, %g2 /* IEU0 Group */ - brz,pn %o3, 5f /* CTI+IEU1 */ - orcc %g3, %g5, %g3 /* IEU1 Group */ - sllx %g3, 32, %g5 /* IEU0 */ - - cmp %o3, %o1 /* IEU1 Group */ - be,pn %xcc, 14f /* CTI */ - or %g1, %g2, %g1 /* IEU0 */ - andcc %o0, 7, %g0 /* IEU1 Group */ - - bne,a,pn %icc, 15f /* CTI */ - add %o0, 1, %o0 /* IEU0 */ - ldx [%o0], %o3 /* Load Group */ -1: sllx %g1, 7, %g2 /* IEU0 */ - - or %g3, %g5, %g3 /* IEU1 */ - add %o0, 8, %o0 /* IEU0 Group */ - xor %o3, %g3, %o4 /* IEU1 */ - /* %g1 = 0101010101010101 * - * %g2 = 8080088080808080 * - * %g3 = c c c c c c c c * - * %o3 = value * - * %o4 = value XOR c */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ - - sub %o4, %g1, %o5 /* IEU1 */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %g6 /* IEU0 Group */ - andn %o5, %o4, %o5 /* IEU1 */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - or %o5, %g6, %o5 /* IEU0 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - or %o5, %o2, %o5 /* IEU0 Group */ -#endif - add %o0, 8, %o0 /* IEU1 */ - - andcc %o5, %g2, %g0 /* IEU1 Group */ - be,a,pt %xcc, 2b /* CTI */ - xor %o3, %g3, %o4 /* IEU0 */ - srlx %o5, 32, %g5 /* IEU0 Group */ - - add %o2, %g1, %o2 /* IEU1 */ -3: andcc %g5, %g2, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - srlx %o2, 56, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - srlx %o4, 56, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 6f /* CTI */ - srlx %o2, 48, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - - srlx %o4, 48, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - srlx %o2, 40, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - srlx %o4, 40, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %o2, 32, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - - srlx %o4, 32, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ -4: srlx %o2, 24, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - srlx %o4, 24, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 10f /* CTI */ - srlx %o2, 16, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - - srlx %o4, 16, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - srlx %o2, 8, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - srlx %o4, 8, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 13f /* CTI */ - xor %o3, %g3, %o4 /* IEU0 */ - ldxa [%o0] ASI_PNF, %o3 /* Load Group */ - - sub %o4, %g1, %o5 /* IEU0 */ - or %o5, %o2, %o5 /* IEU1 */ - add %o0, 8, %o0 /* IEU0 Group */ - andcc %o5, %g2, %g0 /* IEU1 */ - - be,a,pt %xcc, 2b /* CTI */ - xor %o3, %g3, %o4 /* IEU0 Group */ - srlx %o5, 32, %g5 /* IEU0 Group */ - ba,pt %xcc, 3b /* CTI */ - - add %o2, %g1, %o2 /* IEU1 */ - - .align 16 -5: retl /* CTI+IEU1 Group */ - clr %o0 /* IEU0 */ -6: retl /* CTI+IEU1 Group */ - add %o0, -16, %o0 /* IEU0 */ - -7: retl /* CTI+IEU1 Group */ - add %o0, -15, %o0 /* IEU0 */ -8: retl /* CTI+IEU1 Group */ - add %o0, -14, %o0 /* IEU0 */ - -9: retl /* CTI+IEU1 Group */ - add %o0, -13, %o0 /* IEU0 */ -10: retl /* CTI+IEU1 Group */ - add %o0, -12, %o0 /* IEU0 */ - -11: retl /* CTI+IEU1 Group */ - add %o0, -11, %o0 /* IEU0 */ -12: retl /* CTI+IEU1 Group */ - add %o0, -10, %o0 /* IEU0 */ - -13: retl /* CTI+IEU1 Group */ - add %o0, -9, %o0 /* IEU0 */ -14: retl /* CTI+IEU1 Group */ - nop /* IEU0 */ - - .align 16 -15: ldub [%o0], %o3 /* Load Group */ -16: andcc %o0, 7, %g0 /* IEU1 */ - be,a,pn %icc, 1b /* CTI */ - ldx [%o0], %o3 /* Load Group */ - - andcc %o3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5b /* CTI */ - add %o0, 1, %o0 /* IEU0 */ - cmp %o3, %o1 /* IEU1 Group */ - - bne,a,pn %icc, 16b /* CTI */ - ldub [%o0], %o3 /* Load */ - retl /* CTI+IEU1 Group */ - add %o0, -1, %o0 /* IEU0 */ - - /* strchr (str, 0) */ - .align 32 - nop - .align 16 -17: sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - sllx %g1, 32, %g2 /* IEU0 Group */ - - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g2, %g1 /* IEU0 Group */ - bne,pn %icc, 32f /* CTI */ - sllx %g1, 7, %g2 /* IEU0 Group */ - - brz,pn %o3, 30f /* CTI+IEU1 */ - ldx [%o0], %o3 /* Load */ -18: add %o0, 8, %o0 /* IEU0 Group */ -19: sub %o3, %g1, %o2 /* IEU0 Group */ - -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %g6 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %g6, %g2, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g2, %g0 /* IEU1 Group */ -#endif - be,pt %xcc, 19b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - - addcc %o2, %g1, %g3 /* IEU1 Group */ - srlx %o2, 32, %o2 /* IEU0 */ -20: andcc %o2, %g2, %g0 /* IEU1 Group */ - be,pn %xcc, 21f /* CTI */ - - srlx %g3, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 29f /* CTI */ - srlx %g3, 48, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 28f /* CTI */ - srlx %g3, 40, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 27f /* CTI */ - srlx %g3, 32, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 26f /* CTI */ - -21: srlx %g3, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 25f /* CTI */ - srlx %g3, 16, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 24f /* CTI */ - srlx %g3, 8, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 23f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 22f /* CTI */ - - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 19b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - - addcc %o2, %g1, %g3 /* IEU1 Group */ - ba,pt %xcc, 20b /* CTI */ - srlx %o2, 32, %o2 /* IEU0 */ - - .align 16 -22: retl /* CTI+IEU1 Group */ - add %o0, -9, %o0 /* IEU0 */ -23: retl /* CTI+IEU1 Group */ - add %o0, -10, %o0 /* IEU0 */ - -24: retl /* CTI+IEU1 Group */ - add %o0, -11, %o0 /* IEU0 */ -25: retl /* CTI+IEU1 Group */ - add %o0, -12, %o0 /* IEU0 */ - -26: retl /* CTI+IEU1 Group */ - add %o0, -13, %o0 /* IEU0 */ -27: retl /* CTI+IEU1 Group */ - add %o0, -14, %o0 /* IEU0 */ - -28: retl /* CTI+IEU1 Group */ - add %o0, -15, %o0 /* IEU0 */ -29: retl /* CTI+IEU1 Group */ - add %o0, -16, %o0 /* IEU0 */ - -30: retl /* CTI+IEU1 Group */ - nop /* IEU0 */ - - .align 16 -32: andcc %o0, 7, %g0 /* IEU1 Group */ - be,a,pn %icc, 18b /* CTI */ - ldx [%o0], %o3 /* Load */ - add %o0, 1, %o0 /* IEU0 Group */ - - brnz,a,pt %o3, 32b /* CTI+IEU1 */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - retl /* CTI+IEU1 Group */ - add %o0, -1, %o0 /* IEU0 */ -END(strchr) -libc_hidden_def(strchr) -#ifdef __UCLIBC_SUSV3_LEGACY__ -strong_alias(strchr,index) -#endif - - .align 32 -ENTRY(strrchr) - andcc %o1, 0xff, %o1 /* IEU1 Group */ - be,pn %icc, 17b /* CTI */ - clr %g4 /* IEU0 */ - andcc %o0, 7, %g0 /* IEU1 Group */ - - bne,pn %icc, 13f /* CTI */ - sllx %o1, 8, %g3 /* IEU0 */ - ldx [%o0], %o3 /* Load Group */ -1: sethi %hi(0x01010101), %g1 /* IEU0 */ - - or %g3, %o1, %g3 /* IEU1 */ - sllx %g3, 16, %g5 /* IEU0 Group */ - or %g1, %lo(0x01010101), %g1 /* IEU1 */ - sllx %g1, 32, %g2 /* IEU0 Group */ - - or %g3, %g5, %g3 /* IEU1 */ - sllx %g3, 32, %g5 /* IEU0 Group */ - or %g1, %g2, %g1 /* IEU1 */ - sllx %g1, 7, %g2 /* IEU0 Group */ - - or %g3, %g5, %g3 /* IEU1 */ - add %o0, 8, %o0 /* IEU0 Group */ - xor %o3, %g3, %o4 /* IEU1 */ - /* %g1 = 0101010101010101 * - * %g2 = 8080088080808080 * - * %g3 = c c c c c c c c * - * %o3 = value * - * %o4 = value XOR c */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ - -3: sub %o4, %g1, %o5 /* IEU1 */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %g6 /* IEU0 Group */ - andn %o5, %o4, %o5 /* IEU1 */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - - or %o5, %g6, %o5 /* IEU0 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - - or %o5, %o2, %o5 /* IEU0 Group */ -#endif - add %o0, 8, %o0 /* IEU1 */ - andcc %o5, %g2, %g0 /* IEU1 Group */ - be,a,pt %xcc, 2b /* CTI */ - - xor %o3, %g3, %o4 /* IEU0 */ - srlx %o5, 32, %g5 /* IEU0 Group */ - add %o2, %g1, %o2 /* IEU1 */ - andcc %g5, %g2, %g0 /* IEU1 Group */ - - be,pn %xcc, 7f /* CTI */ - srlx %o2, 56, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - - srlx %o4, 56, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - srlx %o2, 48, %g5 /* IEU0 */ - be,a,pn %icc, 4f /* CTI */ - - add %o0, -16, %g4 /* IEU0 Group */ -4: andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - srlx %o4, 48, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - srlx %o2, 40, %g5 /* IEU0 */ - be,a,pn %icc, 5f /* CTI */ - add %o0, -15, %g4 /* IEU0 Group */ - -5: andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - srlx %o4, 40, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - srlx %o2, 32, %g5 /* IEU0 */ - be,a,pn %icc, 6f /* CTI */ - add %o0, -14, %g4 /* IEU0 Group */ -6: andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - srlx %o4, 32, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,a,pn %icc, 7f /* CTI */ - - add %o0, -13, %g4 /* IEU0 */ -7: srlx %o2, 24, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - - srlx %o4, 24, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - srlx %o2, 16, %g5 /* IEU0 */ - be,a,pn %icc, 8f /* CTI */ - - add %o0, -12, %g4 /* IEU0 Group */ -8: andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - srlx %o4, 16, %g5 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - srlx %o2, 8, %g5 /* IEU0 */ - be,a,pn %icc, 9f /* CTI */ - add %o0, -11, %g4 /* IEU0 Group */ - -9: andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - srlx %o4, 8, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,a,pn %icc, 10f /* CTI */ - add %o0, -10, %g4 /* IEU0 */ -10: andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12f /* CTI */ - - sub %o3, %g1, %o2 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,a,pn %icc, 11f /* CTI */ - add %o0, -9, %g4 /* IEU0 */ - -11: ba,pt %xcc, 3b /* CTI Group */ - xor %o3, %g3, %o4 /* IEU0 Group */ -12: retl /* CTI+IEU1 Group */ - mov %g4, %o0 /* IEU0 */ - - .align 16 -13: ldub [%o0], %o3 /* Load Group */ - add %o0, 1, %o0 /* IEU0 */ -14: andcc %o3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 12b /* CTI */ - - cmp %o3, %o1 /* IEU1 Group */ - ldub [%o0], %o3 /* Load */ - be,a,pn %icc, 15f /* CTI */ - add %o0, -1, %g4 /* IEU0 Group */ - -15: andcc %o0, 7, %g0 /* IEU1 Group */ - bne,a,pt %icc, 14b /* CTI */ - add %o0, 1, %o0 /* IEU0 */ - ba,pt %xcc, 1b /* CTI Group */ - - ldx [%o0], %o3 /* Load */ -END(strrchr) -libc_hidden_def(strrchr) -#ifdef __UCLIBC_SUSV3_LEGACY__ -strong_alias(strrchr,rindex) -#endif diff --git a/libc/string/sparc/sparc64/strcmp.S b/libc/string/sparc/sparc64/strcmp.S deleted file mode 100644 index df9e69179..000000000 --- a/libc/string/sparc/sparc64/strcmp.S +++ /dev/null @@ -1,279 +0,0 @@ -/* Compare two strings for differences. - For SPARC v9. - Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <asm/asi.h> -#ifndef XCC - .register %g2, #scratch - .register %g3, #scratch - .register %g6, #scratch -#endif - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(strcmp) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - bne,pn %icc, 7f /* CTI */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - - andcc %o1, 7, %g3 /* IEU1 */ - bne,pn %icc, 9f /* CTI */ - sllx %g1, 32, %g2 /* IEU0 Group */ - ldx [%o0], %o2 /* Load */ - - or %g1, %g2, %g1 /* IEU0 Group */ -1: ldx [%o1], %o3 /* Load */ - sub %o1, %o0, %o1 /* IEU1 */ - sllx %g1, 7, %g2 /* IEU0 Group */ - -2: add %o0, 8, %o0 /* IEU1 */ - sub %o2, %g1, %g3 /* IEU0 Group */ - subcc %o2, %o3, %g0 /* IEU1 */ - bne,pn %xcc, 13f /* CTI */ - -#ifdef EIGHTBIT_NOT_RARE - andn %g3, %o2, %g4 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o2 /* Load */ - andcc %g4, %g2, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o2 /* Load Group */ - andcc %g3, %g2, %g0 /* IEU1 */ -#endif - be,a,pt %xcc, 2b /* CTI */ - ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */ - - addcc %g3, %g1, %o4 /* IEU1 */ - srlx %g3, 32, %g3 /* IEU0 */ - andcc %g3, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 3f /* CTI */ - - srlx %o4, 56, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 48, %o5 /* IEU0 */ - - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 40, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4f /* CTI */ - srlx %o4, 32, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - -3: srlx %o4, 24, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 16, %o5 /* IEU0 */ - - andcc %o5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4f /* CTI */ - srlx %o4, 8, %o5 /* IEU0 */ - andcc %o5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4f /* CTI */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - bne,a,pn %icc, 2b /* CTI */ - ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */ - -4: retl /* CTI+IEU1 Group */ - clr %o0 /* IEU0 */ - - .align 32 -13: mov 0xff, %g6 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andcc %g4, %g2, %g0 /* IEU1 */ -#else - andcc %g3, %g2, %g0 /* IEU1 */ -#endif - be,pt %xcc, 25f /* CTI */ - addcc %g3, %g1, %o4 /* IEU1 Group */ - - srlx %g3, 32, %g3 /* IEU0 */ - andcc %g3, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 23f /* CTI */ - sllx %g6, 56, %o5 /* IEU0 */ - - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %xcc, 24f /* CTI */ - sllx %g6, 48, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - - be,pn %xcc, 24f /* CTI */ - sllx %g6, 40, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %xcc, 24f /* CTI */ - - sllx %g6, 32, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %xcc, 24f /* CTI */ -23: sllx %g6, 24, %o5 /* IEU0 */ - - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %icc, 24f /* CTI */ - sllx %g6, 16, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - - be,pn %icc, 24f /* CTI */ - sllx %g6, 8, %o5 /* IEU0 */ - andcc %o4, %o5, %g0 /* IEU1 Group */ - be,pn %icc, 24f /* CTI */ - - mov %g6, %o5 /* IEU0 */ -25: cmp %o4, %o3 /* IEU1 Group */ -5: mov -1, %o0 /* IEU0 */ - retl /* CTI+IEU1 Group */ - - movgu %xcc, 1, %o0 /* Single Group */ - - .align 16 -24: sub %o5, 1, %g6 /* IEU0 Group */ - clr %o0 /* IEU1 */ - or %o5, %g6, %o5 /* IEU0 Group */ - andn %o4, %o5, %o4 /* IEU0 Group */ - - andn %o3, %o5, %o3 /* IEU1 */ - cmp %o4, %o3 /* IEU1 Group */ - movgu %xcc, 1, %o0 /* Single Group */ - retl /* CTI+IEU1 Group */ - - movlu %xcc, -1, %o0 /* Single Group */ -6: retl /* CTI+IEU1 Group */ - mov %o4, %o0 /* IEU0 */ - - .align 16 -7: ldub [%o0], %o2 /* Load */ - add %o0, 1, %o0 /* IEU1 */ - ldub [%o1], %o3 /* Load Group */ - sllx %g1, 32, %g2 /* IEU0 */ - -8: add %o1, 1, %o1 /* IEU1 */ - subcc %o2, %o3, %o4 /* IEU1 Group */ - bne,pn %xcc, 6b /* CTI */ - lduba [%o0] ASI_PNF, %o2 /* Load */ - - brz,pn %o3, 4b /* CTI+IEU1 Group */ - lduba [%o1] ASI_PNF, %o3 /* Load */ - andcc %o0, 7, %g0 /* IEU1 Group */ - bne,a,pn %icc, 8b /* CTI */ - - add %o0, 1, %o0 /* IEU0 */ - or %g1, %g2, %g1 /* IEU0 Group */ - andcc %o1, 7, %g3 /* IEU1 */ - be,a,pn %icc, 1b /* CTI */ - - ldxa [%o0] ASI_PNF, %o2 /* Load Group */ -9: sllx %g3, 3, %g5 /* IEU0 */ - mov 64, %o5 /* IEU1 */ - sub %o1, %g3, %o1 /* IEU0 Group */ - - sub %o5, %g5, %o5 /* IEU1 */ - ldxa [%o1] ASI_PNF, %g6 /* Load Group */ - or %g1, %g2, %g1 /* IEU0 */ - sub %o1, %o0, %o1 /* IEU1 */ - - sllx %g1, 7, %g2 /* IEU0 Group */ - add %o1, 8, %o1 /* IEU1 */ - /* %g1 = 0101010101010101 - * %g2 = 8080808080800880 - * %g5 = number of bits to shift left - * %o5 = number of bits to shift right */ -10: sllx %g6, %g5, %o3 /* IEU0 Group */ - ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */ - -11: srlx %g6, %o5, %o4 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o2 /* Load */ - or %o3, %o4, %o3 /* IEU1 */ - add %o0, 8, %o0 /* IEU0 Group */ - - subcc %o2, %o3, %g0 /* IEU1 */ -#ifdef EIGHTBIT_NOT_RARE - sub %o2, %g1, %g3 /* IEU0 Group */ - bne,pn %xcc, 13b /* CTI */ - andn %g3, %o2, %g4 /* IEU0 Group */ - - andcc %g4, %g2, %g0 /* IEU1 Group */ - be,pt %xcc, 10b /* CTI */ - srlx %g4, 32, %g4 /* IEU0 */ - andcc %g4, %g2, %g0 /* IEU1 Group */ -#else - bne,pn %xcc, 13b /* CTI */ - sub %o2, %g1, %g3 /* IEU0 Group */ - andcc %g3, %g2, %g0 /* IEU1 Group */ - - be,pt %xcc, 10b /* CTI */ - srlx %g3, 32, %g3 /* IEU0 */ - andcc %g3, %g2, %g0 /* IEU1 Group */ -#endif - be,pt %xcc, 12f /* CTI */ - - srlx %o2, 56, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 48, %g3 /* IEU0 */ - - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 40, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4b /* CTI */ - srlx %o2, 32, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - -12: srlx %o2, 24, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 16, %g3 /* IEU0 */ - - andcc %g3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - srlx %o2, 8, %g3 /* IEU0 */ - andcc %g3, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 4b /* CTI */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - sllx %g6, %g5, %o3 /* IEU0 */ - - ba,pt %xcc, 11b /* CTI Group */ - ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */ -END(strcmp) -libc_hidden_def(strcmp) diff --git a/libc/string/sparc/sparc64/strcpy.S b/libc/string/sparc/sparc64/strcpy.S deleted file mode 100644 index 1317d5489..000000000 --- a/libc/string/sparc/sparc64/strcpy.S +++ /dev/null @@ -1,245 +0,0 @@ -/* Copy SRC to DEST returning DEST. - For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <asm/asi.h> -#ifndef XCC - .register %g2, #scratch - .register %g3, #scratch - .register %g6, #scratch -#endif - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(strcpy) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %g6 /* IEU1 */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - - sllx %g1, 32, %g2 /* IEU0 Group */ - bne,pn %icc, 12f /* CTI */ - andcc %o1, 7, %g3 /* IEU1 */ - or %g1, %g2, %g1 /* IEU0 Group */ - - bne,pn %icc, 14f /* CTI */ - sllx %g1, 7, %g2 /* IEU0 Group */ -1: ldx [%o1], %o3 /* Load */ - add %o1, 8, %o1 /* IEU1 */ - -2: mov %o3, %g3 /* IEU0 Group */ -3: sub %o3, %g1, %o2 /* IEU1 */ - ldxa [%o1] ASI_PNF, %o3 /* Load */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %g3, %o2 /* IEU0 Group */ -#endif - add %o0, 8, %o0 /* IEU0 Group */ - - andcc %o2, %g2, %g0 /* IEU1 */ - add %o1, 8, %o1 /* IEU0 Group */ - be,a,pt %xcc, 2b /* CTI */ - stx %g3, [%o0 - 8] /* Store */ - - srlx %g3, 56, %g5 /* IEU0 Group */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - srlx %g3, 48, %g4 /* IEU0 */ - - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g3, 40, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 9f /* CTI */ - srlx %g3, 32, %g4 /* IEU0 */ - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 8f /* CTI */ - - srlx %g3, 24, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - srlx %g3, 16, %g4 /* IEU0 */ - - andcc %g4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - srlx %g3, 8, %g5 /* IEU0 */ - andcc %g5, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 5f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - stx %g3, [%o0 - 8] /* Store Group */ - andcc %g3, 0xff, %g0 /* IEU1 */ - - bne,pt %icc, 3b /* CTI */ - mov %o3, %g3 /* IEU0 Group */ -4: retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - - .align 16 -5: stb %g5, [%o0 - 2] /* Store Group */ - srlx %g3, 16, %g4 /* IEU0 */ -6: sth %g4, [%o0 - 4] /* Store Group */ - srlx %g3, 32, %g4 /* IEU0 */ - - stw %g4, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -7: stb %g5, [%o0 - 4] /* Store Group */ - - srlx %g3, 32, %g4 /* IEU0 */ -8: stw %g4, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - -9: stb %g5, [%o0 - 6] /* Store Group */ - srlx %g3, 48, %g4 /* IEU0 */ -10: sth %g4, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - - mov %g6, %o0 /* IEU0 */ -11: stb %g5, [%o0 - 8] /* Store Group */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - -12: or %g1, %g2, %g1 /* IEU0 Group */ - ldub [%o1], %o3 /* Load */ - sllx %g1, 7, %g2 /* IEU0 Group */ - stb %o3, [%o0] /* Store Group */ - -13: add %o0, 1, %o0 /* IEU0 */ - add %o1, 1, %o1 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 4b /* CTI */ - - lduba [%o1] ASI_PNF, %o3 /* Load */ - andcc %o0, 7, %g0 /* IEU1 Group */ - bne,a,pt %icc, 13b /* CTI */ - stb %o3, [%o0] /* Store */ - - andcc %o1, 7, %g3 /* IEU1 Group */ - be,a,pt %icc, 1b /* CTI */ - ldx [%o1], %o3 /* Load */ -14: orcc %g0, 64, %g4 /* IEU1 Group */ - - sllx %g3, 3, %g5 /* IEU0 */ - sub %o1, %g3, %o1 /* IEU0 Group */ - sub %g4, %g5, %g4 /* IEU1 */ - /* %g1 = 0101010101010101 * - * %g2 = 8080808080808080 * - * %g3 = source alignment * - * %g5 = number of bits to shift left * - * %g4 = number of bits to shift right */ - ldxa [%o1] ASI_PNF, %o5 /* Load Group */ - - addcc %o1, 8, %o1 /* IEU1 */ -15: sllx %o5, %g5, %o3 /* IEU0 Group */ - ldxa [%o1] ASI_PNF, %o5 /* Load */ - srlx %o5, %g4, %o4 /* IEU0 Group */ - - add %o0, 8, %o0 /* IEU1 */ - or %o3, %o4, %o3 /* IEU0 Group */ - add %o1, 8, %o1 /* IEU1 */ - sub %o3, %g1, %o4 /* IEU0 Group */ - -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o3, %o4 /* IEU0 Group */ -#endif - andcc %o4, %g2, %g0 /* IEU1 Group */ - be,a,pt %xcc, 15b /* CTI */ - stx %o3, [%o0 - 8] /* Store */ - srlx %o3, 56, %o4 /* IEU0 Group */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 22f /* CTI */ - srlx %o3, 48, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 21f /* CTI */ - srlx %o3, 40, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 20f /* CTI */ - - srlx %o3, 32, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 19f /* CTI */ - srlx %o3, 24, %o4 /* IEU0 */ - - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 18f /* CTI */ - srlx %o3, 16, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 17f /* CTI */ - srlx %o3, 8, %o4 /* IEU0 */ - andcc %o4, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 16f /* CTI */ - - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,pn %icc, 15b /* CTI */ - stx %o3, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - - mov %g6, %o0 /* IEU0 */ - - .align 16 -16: srlx %o3, 8, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 2] /* Store */ -17: srlx %o3, 16, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 3] /* Store */ - -18: srlx %o3, 24, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 4] /* Store */ -19: srlx %o3, 32, %o4 /* IEU0 Group */ - stw %o4, [%o0 - 8] /* Store */ - - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ - nop - nop - -20: srlx %o3, 40, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 6] /* Store */ -21: srlx %o3, 48, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 7] /* Store */ - -22: srlx %o3, 56, %o4 /* IEU0 Group */ - stb %o4, [%o0 - 8] /* Store */ - retl /* CTI+IEU1 Group */ - mov %g6, %o0 /* IEU0 */ -END(strcpy) - -libc_hidden_def(strcpy) diff --git a/libc/string/sparc/sparc64/strlen.S b/libc/string/sparc/sparc64/strlen.S deleted file mode 100644 index 1fe854961..000000000 --- a/libc/string/sparc/sparc64/strlen.S +++ /dev/null @@ -1,173 +0,0 @@ -/* Determine the length of a string. For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <asm/asi.h> - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - - .text - .align 32 -ENTRY(strlen) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %o1 /* IEU1 */ - - sllx %g1, 32, %g4 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g4, %g1 /* IEU0 Group */ - brz,pn %o3, 13f /* CTI+IEU1 */ - - sllx %g1, 7, %g4 /* IEU0 Group */ - bne,a,pn %icc, 15f /* CTI */ - add %o0, 1, %o0 /* IEU1 */ - /* %g1 = 0x0101010101010101 * - * %g4 = 0x8080808080808080 * - * %o0 = string pointer * - * %o1 = start of string */ -1: ldx [%o0], %o3 /* Load Group */ - - add %o0, 8, %o0 /* IEU1 */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %o5 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ -#ifdef EIGHTBIT_NOT_RARE - srlx %o5, 32, %o5 /* IEU0 */ - -3: andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - srlx %o2, 32, %o2 /* IEU0 */ - -3: andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - be,pn %xcc, 4f /* CTI */ - srlx %g5, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - srlx %g5, 48, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - - srlx %g5, 40, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g5, 32, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ -4: srlx %g5, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %g5, 16, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - - srlx %g5, 8, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ - ba,pt %xcc, 3b /* CTI */ - - srlx %o2, 32, %o2 /* IEU0 */ -5: add %o0, -9, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -6: add %o0, -10, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -7: add %o0, -11, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -8: add %o0, -12, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -9: add %o0, -13, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -10: add %o0, -14, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -11: add %o0, -15, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -12: add %o0, -16, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -13: retl /* CTI+IEU1 Group */ - mov 0, %o0 /* IEU0 */ - nop - -15: ldub [%o0], %o3 /* Load Group */ -16: andcc %o0, 7, %g0 /* IEU1 */ - be,pn %icc, 1b /* CTI */ - nop /* IEU0 Group */ - - add %o0, 1, %o0 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,a,pt %icc, 16b /* CTI */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - - add %o0, -1, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -END(strlen) -libc_hidden_def(strlen) diff --git a/libc/string/stpcpy.c b/libc/string/stpcpy.c index 8a487584e..2fd2c0648 100644 --- a/libc/string/stpcpy.c +++ b/libc/string/stpcpy.c @@ -10,19 +10,13 @@ #ifdef WANT_WIDE # define Wstpcpy wcpcpy #else -/* Experimentally off - libc_hidden_proto(stpcpy) */ +# undef stpcpy # define Wstpcpy stpcpy #endif Wchar *Wstpcpy(register Wchar * __restrict s1, const Wchar * __restrict s2) { -#ifdef __BCC__ - do { - *s1 = *s2++; - } while (*s1++ != 0); -#else while ( (*s1++ = *s2++) != 0 ); -#endif return s1 - 1; } diff --git a/libc/string/stpncpy.c b/libc/string/stpncpy.c index dac8471fd..50d83a131 100644 --- a/libc/string/stpncpy.c +++ b/libc/string/stpncpy.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstpncpy wcpncpy #else -/* Experimentally off - libc_hidden_proto(stpncpy) */ # define Wstpncpy stpncpy #endif @@ -21,22 +20,10 @@ Wchar *Wstpncpy(register Wchar * __restrict s1, Wchar *s = s1; const Wchar *p = s2; -#ifdef __BCC__ - while (n--) { - if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */ - ++s; - } - return s1 + (s2 - p); -#else while (n) { if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */ ++s; --n; } return s1 + (s2 - p); -#endif } - -#ifndef WANT_WIDE -libc_hidden_def(stpncpy) -#endif diff --git a/libc/string/strcasecmp.c b/libc/string/strcasecmp.c index f9852236b..f894e426e 100644 --- a/libc/string/strcasecmp.c +++ b/libc/string/strcasecmp.c @@ -12,28 +12,15 @@ #ifdef WANT_WIDE # define strcasecmp wcscasecmp # define strcasecmp_l wcscasecmp_l -libc_hidden_proto(wcscasecmp) -# if defined(__USE_GNU) && defined(__UCLIBC_HAS_XLOCALE__) -libc_hidden_proto(wcscasecmp_l) -# endif # ifdef __UCLIBC_DO_XLOCALE -libc_hidden_proto(towlower_l) # define TOLOWER(C) towlower_l((C), locale_arg) # else -libc_hidden_proto(towlower) # define TOLOWER(C) towlower((C)) # endif #else -/* Experimentally off - libc_hidden_proto(strcasecmp) */ -/* Experimentally off - libc_hidden_proto(strcasecmp_l) */ # ifdef __UCLIBC_DO_XLOCALE -libc_hidden_proto(tolower_l) # define TOLOWER(C) tolower_l((C), locale_arg) # else -#if !defined __UCLIBC_HAS_XLOCALE__ && defined __UCLIBC_HAS_CTYPE_TABLES__ -libc_hidden_proto(__ctype_tolower) -#endif -libc_hidden_proto(tolower) # define TOLOWER(C) tolower((C)) # endif #endif @@ -44,11 +31,12 @@ int strcasecmp(register const Wchar *s1, register const Wchar *s2) { return strcasecmp_l(s1, s2, __UCLIBC_CURLOCALE); } +#ifndef WANT_WIDE libc_hidden_def(strcasecmp) +#endif #else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ -/* Experimentally off - libc_hidden_proto(__XL_NPP(strcasecmp)) */ int __XL_NPP(strcasecmp)(register const Wchar *s1, register const Wchar *s2 __LOCALE_PARAM ) { @@ -73,6 +61,8 @@ int __XL_NPP(strcasecmp)(register const Wchar *s1, register const Wchar *s2 return r; #endif } +#if !defined WANT_WIDE || (defined WANT_WIDE && defined __UCLIBC_DO_XLOCALE) libc_hidden_def(__XL_NPP(strcasecmp)) +#endif #endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ diff --git a/libc/string/strcasestr.c b/libc/string/strcasestr.c index 2671b4b98..3334086bf 100644 --- a/libc/string/strcasestr.c +++ b/libc/string/strcasestr.c @@ -8,13 +8,6 @@ #include "_string.h" #include <ctype.h> -#ifdef __UCLIBC_HAS_XLOCALE__ -libc_hidden_proto(__ctype_tolower_loc) -#elif defined __UCLIBC_HAS_CTYPE_TABLES__ -libc_hidden_proto(__ctype_tolower) -#endif -libc_hidden_proto(tolower) - char *strcasestr(const char *s1, const char *s2) { register const char *s = s1; diff --git a/libc/string/strcat.c b/libc/string/strcat.c index 40a9be111..63619bcc8 100644 --- a/libc/string/strcat.c +++ b/libc/string/strcat.c @@ -13,8 +13,6 @@ # define Wstrcat strcat #endif -libc_hidden_proto(Wstrcat) - Wchar *Wstrcat(Wchar * __restrict s1, register const Wchar * __restrict s2) { register Wchar *s = s1; diff --git a/libc/string/strchr.c b/libc/string/strchr.c index 329545e9f..7ea477362 100644 --- a/libc/string/strchr.c +++ b/libc/string/strchr.c @@ -13,8 +13,6 @@ # define Wstrchr strchr #endif -libc_hidden_proto(Wstrchr) - Wchar *Wstrchr(register const Wchar *s, Wint c) { do { @@ -25,8 +23,9 @@ Wchar *Wstrchr(register const Wchar *s, Wint c) return NULL; } -libc_hidden_def(Wstrchr) - -#if !defined WANT_WIDE && defined __UCLIBC_SUSV3_LEGACY__ +#ifndef WANT_WIDE +libc_hidden_def(strchr) +# ifdef __UCLIBC_SUSV3_LEGACY__ weak_alias(strchr,index) +# endif #endif diff --git a/libc/string/strchrnul.c b/libc/string/strchrnul.c index 6fe7f6c3d..9c10e1fc8 100644 --- a/libc/string/strchrnul.c +++ b/libc/string/strchrnul.c @@ -15,13 +15,13 @@ # define Wstrchrnul strchrnul #endif -libc_hidden_proto(Wstrchrnul) - Wchar *Wstrchrnul(register const Wchar *s, Wint c) { --s; while (*++s && (*s != ((Wchar)c))); return (Wchar *) s; } -libc_hidden_def(Wstrchrnul) +# ifndef WANT_WIDE +libc_hidden_def(strchrnul) +# endif #endif diff --git a/libc/string/strcmp.c b/libc/string/strcmp.c index 5477adf3a..abae61812 100644 --- a/libc/string/strcmp.c +++ b/libc/string/strcmp.c @@ -15,8 +15,6 @@ # define Wstrcoll strcoll #endif -libc_hidden_proto(Wstrcmp) - int Wstrcmp(register const Wchar *s1, register const Wchar *s2) { #ifdef WANT_WIDE @@ -40,7 +38,6 @@ int Wstrcmp(register const Wchar *s1, register const Wchar *s2) libc_hidden_def(Wstrcmp) #ifndef __UCLIBC_HAS_LOCALE__ -libc_hidden_proto(Wstrcoll) strong_alias(Wstrcmp,Wstrcoll) libc_hidden_def(Wstrcoll) #endif diff --git a/libc/string/strcpy.c b/libc/string/strcpy.c index cda4094ac..549360c22 100644 --- a/libc/string/strcpy.c +++ b/libc/string/strcpy.c @@ -13,20 +13,15 @@ # define Wstrcpy strcpy #endif -libc_hidden_proto(Wstrcpy) - Wchar *Wstrcpy(Wchar * __restrict s1, const Wchar * __restrict s2) { register Wchar *s = s1; -#ifdef __BCC__ - do { - *s = *s2++; - } while (*s++ != 0); -#else while ( (*s++ = *s2++) != 0 ); -#endif return s1; } -libc_hidden_def(Wstrcpy) + +#ifndef WANT_WIDE +libc_hidden_def(strcpy) +#endif diff --git a/libc/string/strcspn.c b/libc/string/strcspn.c index 1ec460a15..0466af99b 100644 --- a/libc/string/strcspn.c +++ b/libc/string/strcspn.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstrcspn wcscspn #else -/* Experimentally off - libc_hidden_proto(strcspn) */ # define Wstrcspn strcspn #endif diff --git a/libc/string/strdup.c b/libc/string/strdup.c index 61fc186c8..049a23f63 100644 --- a/libc/string/strdup.c +++ b/libc/string/strdup.c @@ -9,16 +9,12 @@ #include <stdlib.h> #ifdef WANT_WIDE -libc_hidden_proto(wcslen) # define Wstrdup wcsdup # define Wstrlen wcslen #else -/* Experimentally off - libc_hidden_proto(strdup) */ -/* Experimentally off - libc_hidden_proto(strlen) */ # define Wstrdup strdup # define Wstrlen strlen #endif -/* Experimentally off - libc_hidden_proto(memcpy) */ Wchar *Wstrdup(register const Wchar *s1) { diff --git a/libc/string/strerror.c b/libc/string/strerror.c index 355c7bdda..7250da07d 100644 --- a/libc/string/strerror.c +++ b/libc/string/strerror.c @@ -9,8 +9,6 @@ #include <string.h> #include "_syserrmsg.h" -/* Experimentally off - libc_hidden_proto(strerror) */ -libc_hidden_proto(__xpg_strerror_r) char *strerror(int errnum) { diff --git a/libc/string/strlcpy.c b/libc/string/strlcpy.c index cdad4dc5d..83787049a 100644 --- a/libc/string/strlcpy.c +++ b/libc/string/strlcpy.c @@ -11,21 +11,14 @@ # define Wstrlcpy __wcslcpy # define Wstrxfrm wcsxfrm #else -/* Experimentally off - libc_hidden_proto(strlcpy) */ # define Wstrlcpy strlcpy # define Wstrxfrm strxfrm #endif - /* OpenBSD function: * Copy at most n-1 chars from src to dst and nul-terminate dst. * Returns strlen(src), so truncation occurred if the return value is >= n. */ -#ifdef WANT_WIDE -size_t Wstrlcpy(register Wchar *__restrict dst, - register const Wchar *__restrict src, - size_t n) attribute_hidden; -#endif size_t Wstrlcpy(register Wchar *__restrict dst, register const Wchar *__restrict src, size_t n) @@ -51,13 +44,8 @@ size_t Wstrlcpy(register Wchar *__restrict dst, } #ifndef WANT_WIDE libc_hidden_def(strlcpy) -#ifndef __UCLIBC_HAS_LOCALE__ -/* Experimentally off - libc_hidden_proto(strxfrm) */ -strong_alias(strlcpy,strxfrm) -libc_hidden_def(strxfrm) #endif -#else + #ifndef __UCLIBC_HAS_LOCALE__ -strong_alias(__wcslcpy,wcsxfrm) -#endif +strong_alias(Wstrlcpy,Wstrxfrm) #endif diff --git a/libc/string/strlen.c b/libc/string/strlen.c index 2edb6e4e8..021a8cabc 100644 --- a/libc/string/strlen.c +++ b/libc/string/strlen.c @@ -13,8 +13,6 @@ # define Wstrlen strlen #endif -libc_hidden_proto(Wstrlen) - size_t Wstrlen(const Wchar *s) { register const Wchar *p; diff --git a/libc/string/strncasecmp.c b/libc/string/strncasecmp.c index ed052fa21..2eac47dd4 100644 --- a/libc/string/strncasecmp.c +++ b/libc/string/strncasecmp.c @@ -12,28 +12,15 @@ #ifdef WANT_WIDE # define strncasecmp wcsncasecmp # define strncasecmp_l wcsncasecmp_l -libc_hidden_proto(wcsncasecmp) -# if defined(__USE_GNU) && defined(__UCLIBC_HAS_XLOCALE__) -libc_hidden_proto(wcsncasecmp_l) -# endif # ifdef __UCLIBC_DO_XLOCALE -libc_hidden_proto(towlower_l) # define TOLOWER(C) towlower_l((C), locale_arg) # else -libc_hidden_proto(towlower) # define TOLOWER(C) towlower((C)) # endif #else -/* Experimentally off - libc_hidden_proto(strncasecmp) */ -/* Experimentally off - libc_hidden_proto(strncasecmp_l) */ # ifdef __UCLIBC_DO_XLOCALE -libc_hidden_proto(tolower_l) # define TOLOWER(C) tolower_l((C), locale_arg) # else -#if !defined __UCLIBC_HAS_XLOCALE__ && defined __UCLIBC_HAS_CTYPE_TABLES__ -libc_hidden_proto(__ctype_tolower) -#endif -libc_hidden_proto(tolower) # define TOLOWER(C) tolower((C)) # endif #endif @@ -44,11 +31,12 @@ int strncasecmp(register const Wchar *s1, register const Wchar *s2, size_t n) { return strncasecmp_l(s1, s2, n, __UCLIBC_CURLOCALE); } +#ifndef WANT_WIDE libc_hidden_def(strncasecmp) +#endif #else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ -/* Experimentally off - libc_hidden_proto(__XL_NPP(strncasecmp)) */ int __XL_NPP(strncasecmp)(register const Wchar *s1, register const Wchar *s2, size_t n __LOCALE_PARAM ) { @@ -76,6 +64,8 @@ int __XL_NPP(strncasecmp)(register const Wchar *s1, register const Wchar *s2, return r; #endif } +#if !defined WANT_WIDE || (defined WANT_WIDE && defined __UCLIBC_DO_XLOCALE) libc_hidden_def(__XL_NPP(strncasecmp)) +#endif #endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ diff --git a/libc/string/strncat.c b/libc/string/strncat.c index 0180d1328..0fa9b4ae1 100644 --- a/libc/string/strncat.c +++ b/libc/string/strncat.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstrncat wcsncat #else -/* Experimentally off - libc_hidden_proto(strncat) */ # define Wstrncat strncat #endif @@ -21,14 +20,10 @@ Wchar *Wstrncat(Wchar * __restrict s1, register const Wchar * __restrict s2, while (*s++); --s; -#ifdef __BCC__ - while (n-- && ((*s = *s2++) != 0)) ++s; -#else while (n && ((*s = *s2++) != 0)) { --n; ++s; } -#endif *s = 0; return s1; diff --git a/libc/string/strncmp.c b/libc/string/strncmp.c index 59e4a2c22..2da61771c 100644 --- a/libc/string/strncmp.c +++ b/libc/string/strncmp.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstrncmp wcsncmp #else -/* Experimentally off - libc_hidden_proto(strncmp) */ # define Wstrncmp strncmp #endif diff --git a/libc/string/strncpy.c b/libc/string/strncpy.c index d93561294..4a44e1f02 100644 --- a/libc/string/strncpy.c +++ b/libc/string/strncpy.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstrncpy wcsncpy #else -/* Experimentally off - libc_hidden_proto(strncpy) */ # define Wstrncpy strncpy #endif @@ -19,18 +18,11 @@ Wchar *Wstrncpy(Wchar * __restrict s1, register const Wchar * __restrict s2, { register Wchar *s = s1; -#ifdef __BCC__ - while (n--) { - if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */ - ++s; - } -#else while (n) { if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */ ++s; --n; } -#endif return s1; } diff --git a/libc/string/strndup.c b/libc/string/strndup.c index 96a36d404..8e608669c 100644 --- a/libc/string/strndup.c +++ b/libc/string/strndup.c @@ -8,9 +8,6 @@ #include "_string.h" #include <stdlib.h> -/* Experimentally off - libc_hidden_proto(strndup) */ -/* Experimentally off - libc_hidden_proto(strnlen) */ -/* Experimentally off - libc_hidden_proto(memcpy) */ char *strndup(register const char *s1, size_t n) { diff --git a/libc/string/strnlen.c b/libc/string/strnlen.c index 8fbc25c11..08de0887d 100644 --- a/libc/string/strnlen.c +++ b/libc/string/strnlen.c @@ -15,26 +15,17 @@ # define Wstrnlen strnlen #endif -libc_hidden_proto(Wstrnlen) - size_t Wstrnlen(const Wchar *s, size_t max) { register const Wchar *p = s; -#ifdef __BCC__ - /* bcc can optimize the counter if it thinks it is a pointer... */ - register const char *maxp = (const char *) max; -#else -# define maxp max -#endif - while (maxp && *p) { + while (max && *p) { ++p; - --maxp; + --max; } return p - s; } -#undef maxp libc_hidden_def(Wstrnlen) #endif diff --git a/libc/string/strpbrk.c b/libc/string/strpbrk.c index abeb84380..ddfc75172 100644 --- a/libc/string/strpbrk.c +++ b/libc/string/strpbrk.c @@ -13,8 +13,6 @@ # define Wstrpbrk strpbrk #endif -libc_hidden_proto(Wstrpbrk) - Wchar *Wstrpbrk(const Wchar *s1, const Wchar *s2) { register const Wchar *s; diff --git a/libc/string/strrchr.c b/libc/string/strrchr.c index 253c4166d..db12bbc7c 100644 --- a/libc/string/strrchr.c +++ b/libc/string/strrchr.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstrrchr wcsrchr #else -/* Experimentally off - libc_hidden_proto(strrchr) */ # define Wstrrchr strrchr #endif diff --git a/libc/string/strsep.c b/libc/string/strsep.c index 373b00a71..ce17dcf89 100644 --- a/libc/string/strsep.c +++ b/libc/string/strsep.c @@ -9,10 +9,7 @@ #ifdef __USE_BSD -/* Experimentally off - libc_hidden_proto(strpbrk) */ -/* Experimentally off - libc_hidden_proto(strcspn) */ -/* Experimentally off - libc_hidden_proto(strsep) */ char *strsep(char ** __restrict s1, const char * __restrict s2) { register char *s = *s1; diff --git a/libc/string/strsignal.c b/libc/string/strsignal.c index ee083d649..0fbbf8504 100644 --- a/libc/string/strsignal.c +++ b/libc/string/strsignal.c @@ -18,16 +18,13 @@ #include <bits/uClibc_uintmaxtostr.h> #include <signal.h> -/* Experimentally off - libc_hidden_proto(strsignal) */ -/* Experimentally off - libc_hidden_proto(memcpy) */ - #define _SYS_NSIG 32 #ifdef __UCLIBC_HAS_SIGNUM_MESSAGES__ # define _SYS_SIGMSG_MAXLEN 25 -#else /* __UCLIBC_HAS_SIGNUM_MESSAGES__ */ +#else # define _SYS_SIGMSG_MAXLEN 0 -#endif /* __UCLIBC_HAS_SIGNUM_MESSAGES__ */ +#endif #if _SYS_SIGMSG_MAXLEN < __UIM_BUFLEN_INT + 15 # define _STRSIGNAL_BUFSIZE (__UIM_BUFLEN_INT + 15) @@ -85,16 +82,16 @@ static const unsigned char sstridx[] = { char *strsignal(int signum) { - register char *s; - int i; - static char buf[_STRSIGNAL_BUFSIZE]; - static const char unknown[] = { + register char *s; + int i; + static char buf[_STRSIGNAL_BUFSIZE]; + static const char unknown[] = { 'U', 'n', 'k', 'n', 'o', 'w', 'n', ' ', 's', 'i', 'g', 'n', 'a', 'l', ' ' - }; + }; #if defined(__alpha__) || defined(__mips__) || defined(__hppa__) || defined(__sparc__) /* Need to translate signum to string index. */ - for (i = 0 ; i < sizeof(sstridx)/sizeof(sstridx[0]) ; i++) { + for (i = 0; i < sizeof(sstridx)/sizeof(sstridx[0]); i++) { if (sstridx[i] == signum) { goto GOT_SSTRIDX; } @@ -106,12 +103,12 @@ char *strsignal(int signum) i = signum; #endif - if (((unsigned int) signum) < _SYS_NSIG) { + if (((unsigned int) signum) < _SYS_NSIG) { /* Trade time for space. This function should rarely be called * so rather than keeping an array of pointers for the different * messages, just run through the buffer until we find the * correct string. */ - for (s = (char *) _string_syssigmsgs ; i ; ++s) { + for (s = (char *) _string_syssigmsgs; i; ++s) { if (!*s) { --i; } @@ -119,10 +116,10 @@ char *strsignal(int signum) if (*s) { /* Make sure we have an actual message. */ goto DONE; } - } + } - s = _int10tostr(buf+sizeof(buf)-1, signum) - sizeof(unknown); - memcpy(s, unknown, sizeof(unknown)); + s = _int10tostr(buf + sizeof(buf)-1, signum) - sizeof(unknown); + memcpy(s, unknown, sizeof(unknown)); DONE: return s; @@ -132,13 +129,12 @@ char *strsignal(int signum) char *strsignal(int signum) { - static char buf[_STRSIGNAL_BUFSIZE]; - static const char unknown[] = { + static char buf[_STRSIGNAL_BUFSIZE]; + static const char unknown[] = { 'U', 'n', 'k', 'n', 'o', 'w', 'n', ' ', 's', 'i', 'g', 'n', 'a', 'l', ' ' - }; + }; - return (char *) memcpy(_int10tostr(buf+sizeof(buf)-1, signum) - - sizeof(unknown), + return memcpy(_int10tostr(buf + sizeof(buf)-1, signum) - sizeof(unknown), unknown, sizeof(unknown)); } diff --git a/libc/string/strspn.c b/libc/string/strspn.c index ca83ef900..942b6f308 100644 --- a/libc/string/strspn.c +++ b/libc/string/strspn.c @@ -13,8 +13,6 @@ # define Wstrspn strspn #endif -libc_hidden_proto(Wstrspn) - size_t Wstrspn(const Wchar *s1, const Wchar *s2) { register const Wchar *s = s1; diff --git a/libc/string/strstr.c b/libc/string/strstr.c index 05712e62b..7e2a64e7d 100644 --- a/libc/string/strstr.c +++ b/libc/string/strstr.c @@ -10,7 +10,6 @@ #ifdef WANT_WIDE # define Wstrstr wcsstr #else -/* Experimentally off - libc_hidden_proto(strstr) */ # define Wstrstr strstr #endif @@ -39,6 +38,6 @@ Wchar *Wstrstr(const Wchar *s1, const Wchar *s2) } #ifndef WANT_WIDE libc_hidden_def(strstr) -#else +#elif defined __UCLIBC_SUSV3_LEGACY__ strong_alias(wcsstr,wcswcs) #endif diff --git a/libc/string/strtok.c b/libc/string/strtok.c index 159dd6b6a..c337d81a7 100644 --- a/libc/string/strtok.c +++ b/libc/string/strtok.c @@ -15,7 +15,6 @@ # define Wstrtok_r strtok_r #endif -/* Experimentally off - libc_hidden_proto(Wstrtok_r) */ Wchar *Wstrtok(Wchar * __restrict s1, const Wchar * __restrict s2) { diff --git a/libc/string/strtok_r.c b/libc/string/strtok_r.c index 2ad7746b1..2026888f8 100644 --- a/libc/string/strtok_r.c +++ b/libc/string/strtok_r.c @@ -8,15 +8,10 @@ #include "_string.h" #ifdef WANT_WIDE -libc_hidden_proto(wcsspn) -libc_hidden_proto(wcspbrk) # define Wstrtok_r wcstok # define Wstrspn wcsspn # define Wstrpbrk wcspbrk #else -/* Experimentally off - libc_hidden_proto(strtok_r) */ -/* Experimentally off - libc_hidden_proto(strspn) */ -/* Experimentally off - libc_hidden_proto(strpbrk) */ # define Wstrtok_r strtok_r # define Wstrspn strspn # define Wstrpbrk strpbrk diff --git a/libc/string/strverscmp.c b/libc/string/strverscmp.c new file mode 100644 index 000000000..7818a9186 --- /dev/null +++ b/libc/string/strverscmp.c @@ -0,0 +1,106 @@ +/* Compare strings while treating digits characters numerically. + Copyright (C) 1997-2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> +#include <string.h> +#include <ctype.h> + +/* states: S_N: normal, S_I: comparing integral part, S_F: comparing + fractionnal parts, S_Z: idem but with leading Zeroes only */ +#define S_N 0x0 +#define S_I 0x3 +#define S_F 0x6 +#define S_Z 0x9 + +/* result_type: CMP: return diff; LEN: compare using len_diff/diff */ +#define CMP 2 +#define LEN 3 + + +/* Compare S1 and S2 as strings holding indices/version numbers, + returning less than, equal to or greater than zero if S1 is less than, + equal to or greater than S2 (for more info, see the texinfo doc). +*/ + +int strverscmp (const char *s1, const char *s2) +{ + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + + /* Symbol(s) 0 [1-9] others + Transition (10) 0 (01) d (00) x */ + static const uint8_t next_state[] = + { + /* state x d 0 */ + /* S_N */ S_N, S_I, S_Z, + /* S_I */ S_N, S_I, S_I, + /* S_F */ S_N, S_F, S_F, + /* S_Z */ S_N, S_F, S_Z + }; + + static const int8_t result_type[] = + { + /* state x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 */ + + /* S_N */ CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP, + /* S_I */ CMP, -1, -1, +1, LEN, LEN, +1, LEN, LEN, + /* S_F */ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, + /* S_Z */ CMP, +1, +1, -1, CMP, CMP, -1, CMP, CMP + }; + unsigned char c1, c2; + int state, diff; + + if (p1 == p2) + return 0; + + c1 = *p1++; + c2 = *p2++; + /* Hint: '0' is a digit too. */ + state = S_N + ((c1 == '0') + (isdigit (c1) != 0)); + + while ((diff = c1 - c2) == 0) + { + if (c1 == '\0') + return diff; + + state = next_state[state]; + c1 = *p1++; + c2 = *p2++; + state += (c1 == '0') + (isdigit (c1) != 0); + } + + state = result_type[state * 3 + (((c2 == '0') + (isdigit (c2) != 0)))]; + + switch (state) + { + case CMP: + return diff; + + case LEN: + while (isdigit (*p1++)) + if (!isdigit (*p2++)) + return 1; + + return isdigit (*p2) ? -1 : diff; + + default: + return state; + } +} +libc_hidden_def(strverscmp) diff --git a/libc/string/sys_errlist.c b/libc/string/sys_errlist.c index 17ed4d62c..682ff0e7e 100644 --- a/libc/string/sys_errlist.c +++ b/libc/string/sys_errlist.c @@ -12,8 +12,6 @@ extern const char _string_syserrmsgs[] attribute_hidden; #ifdef __UCLIBC_HAS_SYS_ERRLIST__ -link_warning(_sys_errlist, "sys_nerr and sys_errlist are obsolete and uClibc support for them (in at least some configurations) will probably be unavailable in the near future.") - const char *const sys_errlist[] = { [0] = _string_syserrmsgs + 0, [EPERM] = _string_syserrmsgs + 8, diff --git a/libc/string/x86_64/bzero.S b/libc/string/x86_64/bzero.S index 4d179ec4e..231d7cb41 100644 --- a/libc/string/x86_64/bzero.S +++ b/libc/string/x86_64/bzero.S @@ -1,5 +1,6 @@ #include <features.h> #ifdef __UCLIBC_SUSV3_LEGACY__ # define memset bzero +# define __memset_chk __bzero_chk # include "memset.S" #endif diff --git a/libc/string/x86_64/memcpy.S b/libc/string/x86_64/memcpy.S index 697b992d0..e164278df 100644 --- a/libc/string/x86_64/memcpy.S +++ b/libc/string/x86_64/memcpy.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" @@ -26,7 +25,7 @@ #define MEMPCPY_P (defined memcpy) .text -#if defined PIC && !defined NOT_IN_libc +#if defined __PIC__ && !defined NOT_IN_libc && defined __UCLIBC_HAS_FORTIFY__ ENTRY (__memcpy_chk) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) diff --git a/libc/string/x86_64/mempcpy.S b/libc/string/x86_64/mempcpy.S index 3816d9f72..b0607aa57 100644 --- a/libc/string/x86_64/mempcpy.S +++ b/libc/string/x86_64/mempcpy.S @@ -1,3 +1,4 @@ #define memcpy mempcpy +#define __memcpy_chk __mempcpy_chk #include "memcpy.S" libc_hidden_def(mempcpy) diff --git a/libc/string/x86_64/memset.S b/libc/string/x86_64/memset.S index 46751006b..d6744129d 100644 --- a/libc/string/x86_64/memset.S +++ b/libc/string/x86_64/memset.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" @@ -29,7 +28,7 @@ #define LARGE $120000 .text -#if !BZERO_P && defined PIC && !defined NOT_IN_libc +#if defined __PIC__ && !defined NOT_IN_libc && defined __UCLIBC_HAS_FORTIFY__ ENTRY (__memset_chk) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) @@ -142,6 +141,6 @@ END (memset) libc_hidden_def(memset) #endif -#if !BZERO_P && defined PIC && !defined NOT_IN_libc +#if !BZERO_P && defined __PIC__ && !defined NOT_IN_libc && defined __UCLIBC_HAS_FORTIFY__ strong_alias (__memset_chk, __memset_zero_constant_len_parameter) #endif diff --git a/libc/string/x86_64/strcat.S b/libc/string/x86_64/strcat.S index 23d068fea..55e09e5f1 100644 --- a/libc/string/x86_64/strcat.S +++ b/libc/string/x86_64/strcat.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/x86_64/strchr.S b/libc/string/x86_64/strchr.S index 9ef46b7f2..256b97911 100644 --- a/libc/string/x86_64/strchr.S +++ b/libc/string/x86_64/strchr.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/x86_64/strcmp.S b/libc/string/x86_64/strcmp.S index 437e145bf..05d6f39c1 100644 --- a/libc/string/x86_64/strcmp.S +++ b/libc/string/x86_64/strcmp.S @@ -15,9 +15,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/x86_64/strcpy.S b/libc/string/x86_64/strcpy.S index 612a30d1a..3ada70fbd 100644 --- a/libc/string/x86_64/strcpy.S +++ b/libc/string/x86_64/strcpy.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/x86_64/strcspn.S b/libc/string/x86_64/strcspn.S index fd9b09c48..7a06c8867 100644 --- a/libc/string/x86_64/strcspn.S +++ b/libc/string/x86_64/strcspn.S @@ -19,9 +19,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/x86_64/strlen.S b/libc/string/x86_64/strlen.S index 4213f0ab6..9e84326c2 100644 --- a/libc/string/x86_64/strlen.S +++ b/libc/string/x86_64/strlen.S @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/x86_64/strspn.S b/libc/string/x86_64/strspn.S index 41cff0490..366377649 100644 --- a/libc/string/x86_64/strspn.S +++ b/libc/string/x86_64/strspn.S @@ -19,9 +19,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "_glibc_inc.h" diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S index 19f3a6818..244205611 100644 --- a/libc/string/xtensa/memcpy.S +++ b/libc/string/xtensa/memcpy.S @@ -13,11 +13,10 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street - Fifth Floor, - Boston, MA 02110-1301, USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ -#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <sysdep.h> #include <bits/xtensa-config.h> .macro src_b r, w0, w1 @@ -83,7 +82,7 @@ __memcpy_aux: loopnez a4, 2f #else beqz a4, 2f - add a7, a3, a4 // a7 = end address for source + add a7, a3, a4 /* a7 = end address for source */ #endif 1: l8ui a6, a3, 0 addi a3, a3, 1 @@ -92,13 +91,13 @@ __memcpy_aux: #if !XCHAL_HAVE_LOOPS blt a3, a7, 1b #endif -2: retw +2: abi_ret /* Destination is unaligned. */ .align 4 -.Ldst1mod2: // dst is only byte aligned +.Ldst1mod2: /* dst is only byte aligned */ /* Do short copies byte-by-byte. */ _bltui a4, 7, .Lbytecopy @@ -113,7 +112,7 @@ __memcpy_aux: /* Return to main algorithm if dst is now aligned. */ _bbci.l a5, 1, .Ldstaligned -.Ldst2mod4: // dst has 16-bit alignment +.Ldst2mod4: /* dst has 16-bit alignment */ /* Do short copies byte-by-byte. */ _bltui a4, 6, .Lbytecopy @@ -134,7 +133,7 @@ __memcpy_aux: ENTRY (memcpy) /* a2 = dst, a3 = src, a4 = len */ - mov a5, a2 // copy dst so that a2 is return value + mov a5, a2 /* copy dst so that a2 is return value */ _bbsi.l a2, 0, .Ldst1mod2 _bbsi.l a2, 1, .Ldst2mod4 .Ldstaligned: @@ -152,7 +151,7 @@ ENTRY (memcpy) #else beqz a7, 2f slli a8, a7, 4 - add a8, a8, a3 // a8 = end of last 16B source chunk + add a8, a8, a3 /* a8 = end of last 16B source chunk */ #endif 1: l32i a6, a3, 0 l32i a7, a3, 4 @@ -182,7 +181,7 @@ ENTRY (memcpy) 3: bbsi.l a4, 2, 4f bbsi.l a4, 1, 5f bbsi.l a4, 0, 6f - retw + abi_ret /* Copy 4 bytes. */ 4: l32i a6, a3, 0 @@ -191,7 +190,7 @@ ENTRY (memcpy) addi a5, a5, 4 bbsi.l a4, 1, 5f bbsi.l a4, 0, 6f - retw + abi_ret /* Copy 2 bytes. */ 5: l16ui a6, a3, 0 @@ -199,14 +198,14 @@ ENTRY (memcpy) s16i a6, a5, 0 addi a5, a5, 2 bbsi.l a4, 0, 6f - retw + abi_ret /* Copy 1 byte. */ 6: l8ui a6, a3, 0 s8i a6, a5, 0 .Ldone: - retw + abi_ret /* Destination is aligned; source is unaligned. */ @@ -218,18 +217,18 @@ ENTRY (memcpy) /* Copy 16 bytes per iteration for word-aligned dst and unaligned src. */ - ssa8 a3 // set shift amount from byte offset + ssa8 a3 /* set shift amount from byte offset */ #if UNALIGNED_ADDRESSES_CHECKED - and a11, a3, a8 // save unalignment offset for below - sub a3, a3, a11 // align a3 + and a11, a3, a8 /* save unalignment offset for below */ + sub a3, a3, a11 /* align a3 */ #endif - l32i a6, a3, 0 // load first word + l32i a6, a3, 0 /* load first word */ #if XCHAL_HAVE_LOOPS loopnez a7, 2f #else beqz a7, 2f slli a10, a7, 4 - add a10, a10, a3 // a10 = end of last 16B source chunk + add a10, a10, a3 /* a10 = end of last 16B source chunk */ #endif 1: l32i a7, a3, 4 l32i a8, a3, 8 @@ -273,11 +272,11 @@ ENTRY (memcpy) mov a6, a7 4: #if UNALIGNED_ADDRESSES_CHECKED - add a3, a3, a11 // readjust a3 with correct misalignment + add a3, a3, a11 /* readjust a3 with correct misalignment */ #endif bbsi.l a4, 1, 5f bbsi.l a4, 0, 6f - retw + abi_ret /* Copy 2 bytes. */ 5: l8ui a6, a3, 0 @@ -287,11 +286,11 @@ ENTRY (memcpy) s8i a7, a5, 1 addi a5, a5, 2 bbsi.l a4, 0, 6f - retw + abi_ret /* Copy 1 byte. */ 6: l8ui a6, a3, 0 s8i a6, a5, 0 - retw + abi_ret libc_hidden_def (memcpy) diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S index c0928825d..20bf14c75 100644 --- a/libc/string/xtensa/memset.S +++ b/libc/string/xtensa/memset.S @@ -13,11 +13,10 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street - Fifth Floor, - Boston, MA 02110-1301, USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ -#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <sysdep.h> #include <bits/xtensa-config.h> /* Do not use .literal_position in the ENTRY macro. */ @@ -29,7 +28,7 @@ The algorithm is as follows: Create a word with c in all byte positions. - + If the destination is aligned, set 16B chunks with a loop, and then finish up with 8B, 4B, 2B, and 1B stores conditional on the length. @@ -57,21 +56,21 @@ __memset_aux: loopnez a4, 2f #else beqz a4, 2f - add a6, a5, a4 // a6 = ending address + add a6, a5, a4 /* a6 = ending address */ #endif 1: s8i a3, a5, 0 addi a5, a5, 1 #if !XCHAL_HAVE_LOOPS blt a5, a6, 1b #endif -2: retw +2: abi_ret /* Destination is unaligned. */ .align 4 -.Ldst1mod2: // dst is only byte aligned +.Ldst1mod2: /* dst is only byte aligned */ /* Do short sizes byte-by-byte. */ bltui a4, 8, .Lbyteset @@ -84,7 +83,7 @@ __memset_aux: /* Now retest if dst is aligned. */ _bbci.l a5, 1, .Ldstaligned -.Ldst2mod4: // dst has 16-bit alignment +.Ldst2mod4: /* dst has 16-bit alignment */ /* Do short sizes byte-by-byte. */ bltui a4, 8, .Lbyteset @@ -108,7 +107,7 @@ ENTRY (memset) slli a7, a3, 16 or a3, a3, a7 - mov a5, a2 // copy dst so that a2 is return value + mov a5, a2 /* copy dst so that a2 is return value */ /* Check if dst is unaligned. */ _bbsi.l a2, 0, .Ldst1mod2 @@ -124,7 +123,7 @@ ENTRY (memset) #else beqz a7, 2f slli a6, a7, 4 - add a6, a6, a5 // a6 = end of last 16B chunk + add a6, a6, a5 /* a6 = end of last 16B chunk */ #endif /* Set 16 bytes per iteration. */ 1: s32i a3, a5, 0 @@ -160,6 +159,6 @@ ENTRY (memset) /* Set 1 byte. */ s8i a3, a5, 0 -6: retw +6: abi_ret libc_hidden_def (memset) diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S index 622bb27ed..2dce590db 100644 --- a/libc/string/xtensa/strcmp.S +++ b/libc/string/xtensa/strcmp.S @@ -13,11 +13,10 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street - Fifth Floor, - Boston, MA 02110-1301, USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ -#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <sysdep.h> #include <bits/xtensa-config.h> #include <features.h> @@ -35,45 +34,46 @@ #define MASK4 0x40404040 + .text + .align 4 + .literal_position .literal .Lmask0, MASK0 .literal .Lmask1, MASK1 .literal .Lmask2, MASK2 .literal .Lmask3, MASK3 .literal .Lmask4, MASK4 - - .text ENTRY (strcmp) /* a2 = s1, a3 = s2 */ - l8ui a8, a2, 0 // byte 0 from s1 - l8ui a9, a3, 0 // byte 0 from s2 - movi a10, 3 // mask + l8ui a8, a2, 0 /* byte 0 from s1 */ + l8ui a9, a3, 0 /* byte 0 from s2 */ + movi a10, 3 /* mask */ bne a8, a9, .Lretdiff or a11, a2, a3 bnone a11, a10, .Laligned - xor a11, a2, a3 // compare low two bits of s1 and s2 - bany a11, a10, .Lunaligned // if they have different alignment + xor a11, a2, a3 /* compare low two bits of s1 and s2 */ + bany a11, a10, .Lunaligned /* if they have different alignment */ /* s1/s2 are not word-aligned. */ - addi a2, a2, 1 // advance s1 - beqz a8, .Leq // bytes equal, if zero, strings are equal - addi a3, a3, 1 // advance s2 - bnone a2, a10, .Laligned // if s1/s2 now aligned - l8ui a8, a2, 0 // byte 1 from s1 - l8ui a9, a3, 0 // byte 1 from s2 - addi a2, a2, 1 // advance s1 - bne a8, a9, .Lretdiff // if different, return difference - beqz a8, .Leq // bytes equal, if zero, strings are equal - addi a3, a3, 1 // advance s2 - bnone a2, a10, .Laligned // if s1/s2 now aligned - l8ui a8, a2, 0 // byte 2 from s1 - l8ui a9, a3, 0 // byte 2 from s2 - addi a2, a2, 1 // advance s1 - bne a8, a9, .Lretdiff // if different, return difference - beqz a8, .Leq // bytes equal, if zero, strings are equal - addi a3, a3, 1 // advance s2 + addi a2, a2, 1 /* advance s1 */ + beqz a8, .Leq /* bytes equal, if zero, strings are equal */ + addi a3, a3, 1 /* advance s2 */ + bnone a2, a10, .Laligned /* if s1/s2 now aligned */ + l8ui a8, a2, 0 /* byte 1 from s1 */ + l8ui a9, a3, 0 /* byte 1 from s2 */ + addi a2, a2, 1 /* advance s1 */ + bne a8, a9, .Lretdiff /* if different, return difference */ + beqz a8, .Leq /* bytes equal, if zero, strings are equal */ + addi a3, a3, 1 /* advance s2 */ + bnone a2, a10, .Laligned /* if s1/s2 now aligned */ + l8ui a8, a2, 0 /* byte 2 from s1 */ + l8ui a9, a3, 0 /* byte 2 from s2 */ + addi a2, a2, 1 /* advance s1 */ + bne a8, a9, .Lretdiff /* if different, return difference */ + beqz a8, .Leq /* bytes equal, if zero, strings are equal */ + addi a3, a3, 1 /* advance s2 */ j .Laligned /* s1 and s2 have different alignment. @@ -92,8 +92,8 @@ ENTRY (strcmp) /* (2 mod 4) alignment for loop instruction */ .Lunaligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, .Lretdiff // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, .Lretdiff /* loop forever (almost anyway) */ #endif .Lnextbyte: l8ui a8, a2, 0 @@ -108,7 +108,7 @@ ENTRY (strcmp) #endif .Lretdiff: sub a2, a8, a9 - retw + abi_ret /* s1 is word-aligned; s2 is word-aligned. @@ -131,32 +131,32 @@ ENTRY (strcmp) #if XCHAL_HAVE_LOOPS .Laligned: .begin no-transform - l32r a4, .Lmask0 // mask for byte 0 + l32r a4, .Lmask0 /* mask for byte 0 */ l32r a7, .Lmask4 /* Loop forever. (a4 is more than than the maximum number of iterations) */ loop a4, .Laligned_done /* First unrolled loop body. */ - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ slli a5, a8, 1 bne a8, a9, .Lwne2 or a9, a8, a5 bnall a9, a7, .Lprobeq /* Second unrolled loop body. */ - l32i a8, a2, 4 // get word from s1+4 - l32i a9, a3, 4 // get word from s2+4 + l32i a8, a2, 4 /* get word from s1+4 */ + l32i a9, a3, 4 /* get word from s2+4 */ slli a5, a8, 1 bne a8, a9, .Lwne2 or a9, a8, a5 bnall a9, a7, .Lprobeq2 - addi a2, a2, 8 // advance s1 pointer - addi a3, a3, 8 // advance s2 pointer + addi a2, a2, 8 /* advance s1 pointer */ + addi a3, a3, 8 /* advance s2 pointer */ .Laligned_done: - or a1, a1, a1 // nop + or a1, a1, a1 /* nop */ .Lprobeq2: /* Adjust pointers to account for the loop unrolling. */ @@ -166,15 +166,15 @@ ENTRY (strcmp) #else /* !XCHAL_HAVE_LOOPS */ .Laligned: - movi a4, MASK0 // mask for byte 0 + movi a4, MASK0 /* mask for byte 0 */ movi a7, MASK4 j .Lfirstword .Lnextword: - addi a2, a2, 4 // advance s1 pointer - addi a3, a3, 4 // advance s2 pointer + addi a2, a2, 4 /* advance s1 pointer */ + addi a3, a3, 4 /* advance s2 pointer */ .Lfirstword: - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ slli a5, a8, 1 bne a8, a9, .Lwne2 or a9, a8, a5 @@ -186,50 +186,50 @@ ENTRY (strcmp) /* Words are probably equal, but check for sure. If not, loop over the rest of string using normal algorithm. */ - bnone a8, a4, .Leq // if byte 0 is zero - l32r a5, .Lmask1 // mask for byte 1 - l32r a6, .Lmask2 // mask for byte 2 - bnone a8, a5, .Leq // if byte 1 is zero - l32r a7, .Lmask3 // mask for byte 3 - bnone a8, a6, .Leq // if byte 2 is zero - bnone a8, a7, .Leq // if byte 3 is zero - addi.n a2, a2, 4 // advance s1 pointer - addi.n a3, a3, 4 // advance s2 pointer + bnone a8, a4, .Leq /* if byte 0 is zero */ + l32r a5, .Lmask1 /* mask for byte 1 */ + l32r a6, .Lmask2 /* mask for byte 2 */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + l32r a7, .Lmask3 /* mask for byte 3 */ + bnone a8, a6, .Leq /* if byte 2 is zero */ + bnone a8, a7, .Leq /* if byte 3 is zero */ + addi.n a2, a2, 4 /* advance s1 pointer */ + addi.n a3, a3, 4 /* advance s2 pointer */ #if XCHAL_HAVE_LOOPS /* align (1 mod 4) */ - loop a4, .Leq // loop forever (a4 is bigger than max iters) + loop a4, .Leq /* loop forever (a4 is bigger than max iters) */ .end no-transform - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 - addi a2, a2, 4 // advance s1 pointer + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ + addi a2, a2, 4 /* advance s1 pointer */ bne a8, a9, .Lwne - bnone a8, a4, .Leq // if byte 0 is zero - bnone a8, a5, .Leq // if byte 1 is zero - bnone a8, a6, .Leq // if byte 2 is zero - bnone a8, a7, .Leq // if byte 3 is zero - addi a3, a3, 4 // advance s2 pointer + bnone a8, a4, .Leq /* if byte 0 is zero */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + bnone a8, a6, .Leq /* if byte 2 is zero */ + bnone a8, a7, .Leq /* if byte 3 is zero */ + addi a3, a3, 4 /* advance s2 pointer */ #else /* !XCHAL_HAVE_LOOPS */ j .Lfirstword2 .Lnextword2: - addi a3, a3, 4 // advance s2 pointer + addi a3, a3, 4 /* advance s2 pointer */ .Lfirstword2: - l32i a8, a2, 0 // get word from s1 - l32i a9, a3, 0 // get word from s2 - addi a2, a2, 4 // advance s1 pointer + l32i a8, a2, 0 /* get word from s1 */ + l32i a9, a3, 0 /* get word from s2 */ + addi a2, a2, 4 /* advance s1 pointer */ bne a8, a9, .Lwne - bnone a8, a4, .Leq // if byte 0 is zero - bnone a8, a5, .Leq // if byte 1 is zero - bnone a8, a6, .Leq // if byte 2 is zero - bany a8, a7, .Lnextword2 // if byte 3 is zero + bnone a8, a4, .Leq /* if byte 0 is zero */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + bnone a8, a6, .Leq /* if byte 2 is zero */ + bany a8, a7, .Lnextword2 /* if byte 3 is zero */ #endif /* !XCHAL_HAVE_LOOPS */ /* Words are equal; some byte is zero. */ -.Leq: movi a2, 0 // return equal - retw +.Leq: movi a2, 0 /* return equal */ + abi_ret .Lwne2: /* Words are not equal. On big-endian processors, if none of the bytes are zero, the return value can be determined by a simple @@ -239,22 +239,22 @@ ENTRY (strcmp) bnall a10, a7, .Lsomezero bgeu a8, a9, .Lposreturn movi a2, -1 - retw + abi_ret .Lposreturn: movi a2, 1 - retw -.Lsomezero: // There is probably some zero byte. + abi_ret +.Lsomezero: /* There is probably some zero byte. */ #endif /* __XTENSA_EB__ */ .Lwne: /* Words are not equal. */ - xor a2, a8, a9 // get word with nonzero in byte that differs - bany a2, a4, .Ldiff0 // if byte 0 differs - movi a5, MASK1 // mask for byte 1 - bnone a8, a4, .Leq // if byte 0 is zero - bany a2, a5, .Ldiff1 // if byte 1 differs - movi a6, MASK2 // mask for byte 2 - bnone a8, a5, .Leq // if byte 1 is zero - bany a2, a6, .Ldiff2 // if byte 2 differs - bnone a8, a6, .Leq // if byte 2 is zero + xor a2, a8, a9 /* get word with nonzero in byte that differs */ + bany a2, a4, .Ldiff0 /* if byte 0 differs */ + movi a5, MASK1 /* mask for byte 1 */ + bnone a8, a4, .Leq /* if byte 0 is zero */ + bany a2, a5, .Ldiff1 /* if byte 1 differs */ + movi a6, MASK2 /* mask for byte 2 */ + bnone a8, a5, .Leq /* if byte 1 is zero */ + bany a2, a6, .Ldiff2 /* if byte 2 differs */ + bnone a8, a6, .Leq /* if byte 2 is zero */ #ifdef __XTENSA_EB__ .Ldiff3: .Ldiff2: @@ -263,14 +263,14 @@ ENTRY (strcmp) byte. Just subtract words to get the return value. The high order equal bytes cancel, leaving room for the sign. */ sub a2, a8, a9 - retw + abi_ret .Ldiff0: /* Need to make room for the sign, so can't subtract whole words. */ extui a10, a8, 24, 8 extui a11, a9, 24, 8 sub a2, a10, a11 - retw + abi_ret #else /* !__XTENSA_EB__ */ /* Little-endian is a little more difficult because can't subtract @@ -281,28 +281,28 @@ ENTRY (strcmp) extui a10, a8, 24, 8 extui a11, a9, 24, 8 sub a2, a10, a11 - retw + abi_ret .Ldiff0: /* Byte 0 is different. */ extui a10, a8, 0, 8 extui a11, a9, 0, 8 sub a2, a10, a11 - retw + abi_ret .Ldiff1: /* Byte 0 is equal; byte 1 is different. */ extui a10, a8, 8, 8 extui a11, a9, 8, 8 sub a2, a10, a11 - retw + abi_ret .Ldiff2: /* Bytes 0-1 are equal; byte 2 is different. */ extui a10, a8, 16, 8 extui a11, a9, 16, 8 sub a2, a10, a11 - retw + abi_ret #endif /* !__XTENSA_EB */ diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S index 108070384..9f42b34e6 100644 --- a/libc/string/xtensa/strcpy.S +++ b/libc/string/xtensa/strcpy.S @@ -13,11 +13,10 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street - Fifth Floor, - Boston, MA 02110-1301, USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ -#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <sysdep.h> #include <bits/xtensa-config.h> #ifdef __XTENSA_EB__ @@ -36,7 +35,7 @@ ENTRY (strcpy) /* a2 = dst, a3 = src */ - mov a10, a2 // leave dst in return value register + mov a10, a2 /* leave dst in return value register */ movi a4, MASK0 movi a5, MASK1 movi a6, MASK2 @@ -51,25 +50,25 @@ ENTRY (strcpy) j .Ldstunaligned -.Lsrc1mod2: // src address is odd - l8ui a8, a3, 0 // get byte 0 - addi a3, a3, 1 // advance src pointer - s8i a8, a10, 0 // store byte 0 - beqz a8, 1f // if byte 0 is zero - addi a10, a10, 1 // advance dst pointer - bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned +.Lsrc1mod2: /* src address is odd */ + l8ui a8, a3, 0 /* get byte 0 */ + addi a3, a3, 1 /* advance src pointer */ + s8i a8, a10, 0 /* store byte 0 */ + beqz a8, 1f /* if byte 0 is zero */ + addi a10, a10, 1 /* advance dst pointer */ + bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */ -.Lsrc2mod4: // src address is 2 mod 4 - l8ui a8, a3, 0 // get byte 0 +.Lsrc2mod4: /* src address is 2 mod 4 */ + l8ui a8, a3, 0 /* get byte 0 */ /* 1-cycle interlock */ - s8i a8, a10, 0 // store byte 0 - beqz a8, 1f // if byte 0 is zero - l8ui a8, a3, 1 // get byte 0 - addi a3, a3, 2 // advance src pointer - s8i a8, a10, 1 // store byte 0 - addi a10, a10, 2 // advance dst pointer + s8i a8, a10, 0 /* store byte 0 */ + beqz a8, 1f /* if byte 0 is zero */ + l8ui a8, a3, 1 /* get byte 0 */ + addi a3, a3, 2 /* advance src pointer */ + s8i a8, a10, 1 /* store byte 0 */ + addi a10, a10, 2 /* advance dst pointer */ bnez a8, .Lsrcaligned -1: retw +1: abi_ret /* dst is word-aligned; src is word-aligned. */ @@ -78,46 +77,46 @@ ENTRY (strcpy) #if XCHAL_HAVE_LOOPS /* (2 mod 4) alignment for loop instruction */ .Laligned: - _movi.n a8, 0 // set up for the maximum loop count - loop a8, .Lz3 // loop forever (almost anyway) - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a4, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - bnone a8, a7, .Lz3 // if byte 3 is zero - addi a10, a10, 4 // advance dst pointer + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, .Lz3 /* loop forever (almost anyway) */ + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a4, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + bnone a8, a7, .Lz3 /* if byte 3 is zero */ + addi a10, a10, 4 /* advance dst pointer */ #else /* !XCHAL_HAVE_LOOPS */ -1: addi a10, a10, 4 // advance dst pointer +1: addi a10, a10, 4 /* advance dst pointer */ .Laligned: - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a4, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - bany a8, a7, 1b // if byte 3 is zero + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a4, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + bany a8, a7, 1b /* if byte 3 is zero */ #endif /* !XCHAL_HAVE_LOOPS */ .Lz3: /* Byte 3 is zero. */ - retw + abi_ret .Lz0: /* Byte 0 is zero. */ #ifdef __XTENSA_EB__ movi a8, 0 #endif s8i a8, a10, 0 - retw + abi_ret .Lz1: /* Byte 1 is zero. */ #ifdef __XTENSA_EB__ extui a8, a8, 16, 16 #endif s16i a8, a10, 0 - retw + abi_ret .Lz2: /* Byte 2 is zero. */ #ifdef __XTENSA_EB__ @@ -126,15 +125,15 @@ ENTRY (strcpy) s16i a8, a10, 0 movi a8, 0 s8i a8, a10, 2 - retw + abi_ret .align 4 /* (2 mod 4) alignment for loop instruction */ .Ldstunaligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, 2f // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, 2f /* loop forever (almost anyway) */ #endif 1: l8ui a8, a3, 0 addi a3, a3, 1 @@ -145,6 +144,6 @@ ENTRY (strcpy) #else bnez a8, 1b #endif -2: retw +2: abi_ret libc_hidden_def (strcpy) diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S index dd72c16fa..e1c98c8f0 100644 --- a/libc/string/xtensa/strlen.S +++ b/libc/string/xtensa/strlen.S @@ -13,11 +13,10 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street - Fifth Floor, - Boston, MA 02110-1301, USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ -#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <sysdep.h> #include <bits/xtensa-config.h> #ifdef __XTENSA_EB__ @@ -36,7 +35,7 @@ ENTRY (strlen) /* a2 = s */ - addi a3, a2, -4 // because we overincrement at the end + addi a3, a2, -4 /* because we overincrement at the end */ movi a4, MASK0 movi a5, MASK1 movi a6, MASK2 @@ -45,22 +44,22 @@ ENTRY (strlen) bbsi.l a2, 1, .L2mod4 j .Laligned -.L1mod2: // address is odd - l8ui a8, a3, 4 // get byte 0 - addi a3, a3, 1 // advance string pointer - beqz a8, .Lz3 // if byte 0 is zero - bbci.l a3, 1, .Laligned // if string pointer is now word-aligned +.L1mod2: /* address is odd */ + l8ui a8, a3, 4 /* get byte 0 */ + addi a3, a3, 1 /* advance string pointer */ + beqz a8, .Lz3 /* if byte 0 is zero */ + bbci.l a3, 1, .Laligned /* if string pointer is now word-aligned */ -.L2mod4: // address is 2 mod 4 - addi a3, a3, 2 // advance ptr for aligned access - l32i a8, a3, 0 // get word with first two bytes of string - bnone a8, a6, .Lz2 // if byte 2 (of word, not string) is zero - bany a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero +.L2mod4: /* address is 2 mod 4 */ + addi a3, a3, 2 /* advance ptr for aligned access */ + l32i a8, a3, 0 /* get word with first two bytes of string */ + bnone a8, a6, .Lz2 /* if byte 2 (of word, not string) is zero */ + bany a8, a7, .Laligned /* if byte 3 (of word, not string) is nonzero */ /* Byte 3 is zero. */ - addi a3, a3, 3 // point to zero byte - sub a2, a3, a2 // subtract to get length - retw + addi a3, a3, 3 /* point to zero byte */ + sub a2, a3, a2 /* subtract to get length */ + abi_ret /* String is word-aligned. */ @@ -69,36 +68,36 @@ ENTRY (strlen) /* (2 mod 4) alignment for loop instruction */ .Laligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, .Lz3 // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, .Lz3 /* loop forever (almost anyway) */ #endif -1: l32i a8, a3, 4 // get next word of string - addi a3, a3, 4 // advance string pointer - bnone a8, a4, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero +1: l32i a8, a3, 4 /* get next word of string */ + addi a3, a3, 4 /* advance string pointer */ + bnone a8, a4, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ #if XCHAL_HAVE_LOOPS - bnone a8, a7, .Lz3 // if byte 3 is zero + bnone a8, a7, .Lz3 /* if byte 3 is zero */ #else - bany a8, a7, 1b // repeat if byte 3 is non-zero + bany a8, a7, 1b /* repeat if byte 3 is non-zero */ #endif .Lz3: /* Byte 3 is zero. */ - addi a3, a3, 3 // point to zero byte + addi a3, a3, 3 /* point to zero byte */ /* Fall through.... */ .Lz0: /* Byte 0 is zero. */ - sub a2, a3, a2 // subtract to get length - retw + sub a2, a3, a2 /* subtract to get length */ + abi_ret .Lz1: /* Byte 1 is zero. */ - addi a3, a3, 1 // point to zero byte - sub a2, a3, a2 // subtract to get length - retw + addi a3, a3, 1 /* point to zero byte */ + sub a2, a3, a2 /* subtract to get length */ + abi_ret .Lz2: /* Byte 2 is zero. */ - addi a3, a3, 2 // point to zero byte - sub a2, a3, a2 // subtract to get length - retw + addi a3, a3, 2 /* point to zero byte */ + sub a2, a3, a2 /* subtract to get length */ + abi_ret libc_hidden_def (strlen) diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S index 7ba2ef77d..aa8db5da1 100644 --- a/libc/string/xtensa/strncpy.S +++ b/libc/string/xtensa/strncpy.S @@ -13,11 +13,10 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 51 Franklin Street - Fifth Floor, - Boston, MA 02110-1301, USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ -#include "../../sysdeps/linux/xtensa/sysdep.h" +#include <sysdep.h> #include <bits/xtensa-config.h> #ifdef __XTENSA_EB__ @@ -41,41 +40,41 @@ .literal_position __strncpy_aux: -.Lsrc1mod2: // src address is odd - l8ui a8, a3, 0 // get byte 0 - addi a3, a3, 1 // advance src pointer - s8i a8, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, .Lret // if n is zero - addi a10, a10, 1 // advance dst pointer - beqz a8, .Lfill // if byte 0 is zero - bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned - -.Lsrc2mod4: // src address is 2 mod 4 - l8ui a8, a3, 0 // get byte 0 - addi a4, a4, -1 // decrement n - s8i a8, a10, 0 // store byte 0 - beqz a4, .Lret // if n is zero - addi a10, a10, 1 // advance dst pointer - beqz a8, .Lfill // if byte 0 is zero - l8ui a8, a3, 1 // get byte 0 - addi a3, a3, 2 // advance src pointer - s8i a8, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, .Lret // if n is zero - addi a10, a10, 1 // advance dst pointer +.Lsrc1mod2: /* src address is odd */ + l8ui a8, a3, 0 /* get byte 0 */ + addi a3, a3, 1 /* advance src pointer */ + s8i a8, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, .Lret /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ + beqz a8, .Lfill /* if byte 0 is zero */ + bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */ + +.Lsrc2mod4: /* src address is 2 mod 4 */ + l8ui a8, a3, 0 /* get byte 0 */ + addi a4, a4, -1 /* decrement n */ + s8i a8, a10, 0 /* store byte 0 */ + beqz a4, .Lret /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ + beqz a8, .Lfill /* if byte 0 is zero */ + l8ui a8, a3, 1 /* get byte 0 */ + addi a3, a3, 2 /* advance src pointer */ + s8i a8, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, .Lret /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ bnez a8, .Lsrcaligned j .Lfill .Lret: - retw + abi_ret ENTRY (strncpy) /* a2 = dst, a3 = src */ - mov a10, a2 // leave dst in return value register - beqz a4, .Lret // if n is zero + mov a10, a2 /* leave dst in return value register */ + beqz a4, .Lret /* if n is zero */ movi a11, MASK0 movi a5, MASK1 @@ -125,28 +124,28 @@ ENTRY (strncpy) .Lfillcleanup: /* Fill leftover (1 to 3) bytes with zero. */ - s8i a9, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n + s8i a9, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ addi a10, a10, 1 - bnez a4, .Lfillcleanup - -2: retw - -.Lfill1mod2: // dst address is odd - s8i a9, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, 2b // if n is zero - addi a10, a10, 1 // advance dst pointer - bbci.l a10, 1, .Lfillaligned // if dst is now word-aligned - -.Lfill2mod4: // dst address is 2 mod 4 - s8i a9, a10, 0 // store byte 0 - addi a4, a4, -1 // decrement n - beqz a4, 2b // if n is zero - s8i a9, a10, 1 // store byte 1 - addi a4, a4, -1 // decrement n - beqz a4, 2b // if n is zero - addi a10, a10, 2 // advance dst pointer + bnez a4, .Lfillcleanup + +2: abi_ret + +.Lfill1mod2: /* dst address is odd */ + s8i a9, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, 2b /* if n is zero */ + addi a10, a10, 1 /* advance dst pointer */ + bbci.l a10, 1, .Lfillaligned /* if dst is now word-aligned */ + +.Lfill2mod4: /* dst address is 2 mod 4 */ + s8i a9, a10, 0 /* store byte 0 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, 2b /* if n is zero */ + s8i a9, a10, 1 /* store byte 1 */ + addi a4, a4, -1 /* decrement n */ + beqz a4, 2b /* if n is zero */ + addi a10, a10, 2 /* advance dst pointer */ j .Lfillaligned @@ -156,32 +155,32 @@ ENTRY (strncpy) /* (2 mod 4) alignment for loop instruction */ .Laligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, 1f // loop forever (almost anyway) - blti a4, 5, .Ldstunaligned // n is near limit; do one at a time - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a11, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - addi a4, a4, -4 // decrement n - addi a10, a10, 4 // advance dst pointer - bnone a8, a7, .Lfill // if byte 3 is zero -1: + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, 1f /* loop forever (almost anyway) */ + blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */ + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a11, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + addi a4, a4, -4 /* decrement n */ + addi a10, a10, 4 /* advance dst pointer */ + bnone a8, a7, .Lfill /* if byte 3 is zero */ +1: #else /* !XCHAL_HAVE_LOOPS */ -1: blti a4, 5, .Ldstunaligned // n is near limit; do one at a time - l32i a8, a3, 0 // get word from src - addi a3, a3, 4 // advance src pointer - bnone a8, a11, .Lz0 // if byte 0 is zero - bnone a8, a5, .Lz1 // if byte 1 is zero - bnone a8, a6, .Lz2 // if byte 2 is zero - s32i a8, a10, 0 // store word to dst - addi a4, a4, -4 // decrement n - addi a10, a10, 4 // advance dst pointer - bany a8, a7, 1b // no zeroes +1: blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */ + l32i a8, a3, 0 /* get word from src */ + addi a3, a3, 4 /* advance src pointer */ + bnone a8, a11, .Lz0 /* if byte 0 is zero */ + bnone a8, a5, .Lz1 /* if byte 1 is zero */ + bnone a8, a6, .Lz2 /* if byte 2 is zero */ + s32i a8, a10, 0 /* store word to dst */ + addi a4, a4, -4 /* decrement n */ + addi a10, a10, 4 /* advance dst pointer */ + bany a8, a7, 1b /* no zeroes */ #endif /* !XCHAL_HAVE_LOOPS */ j .Lfill @@ -191,8 +190,8 @@ ENTRY (strncpy) movi a8, 0 #endif s8i a8, a10, 0 - addi a4, a4, -1 // decrement n - addi a10, a10, 1 // advance dst pointer + addi a4, a4, -1 /* decrement n */ + addi a10, a10, 1 /* advance dst pointer */ j .Lfill .Lz1: /* Byte 1 is zero. */ @@ -200,8 +199,8 @@ ENTRY (strncpy) extui a8, a8, 16, 16 #endif s16i a8, a10, 0 - addi a4, a4, -2 // decrement n - addi a10, a10, 2 // advance dst pointer + addi a4, a4, -2 /* decrement n */ + addi a10, a10, 2 /* advance dst pointer */ j .Lfill .Lz2: /* Byte 2 is zero. */ @@ -211,8 +210,8 @@ ENTRY (strncpy) s16i a8, a10, 0 movi a8, 0 s8i a8, a10, 2 - addi a4, a4, -3 // decrement n - addi a10, a10, 3 // advance dst pointer + addi a4, a4, -3 /* decrement n */ + addi a10, a10, 3 /* advance dst pointer */ j .Lfill .align 4 @@ -220,8 +219,8 @@ ENTRY (strncpy) .Ldstunaligned: #if XCHAL_HAVE_LOOPS - _movi.n a8, 0 // set up for the maximum loop count - loop a8, 2f // loop forever (almost anyway) + _movi.n a8, 0 /* set up for the maximum loop count */ + loop a8, 2f /* loop forever (almost anyway) */ #endif 1: l8ui a8, a3, 0 addi a3, a3, 1 @@ -236,6 +235,6 @@ ENTRY (strncpy) #endif 2: j .Lfill -3: retw +3: abi_ret libc_hidden_def (strncpy) |
