summaryrefslogtreecommitdiff
path: root/libc/string
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string')
-rw-r--r--libc/string/Makefile.in15
-rw-r--r--libc/string/__glibc_strerror_r.c9
-rw-r--r--libc/string/__xpg_basename.c5
-rw-r--r--libc/string/__xpg_strerror_r.c12
-rw-r--r--libc/string/_collate.c36
-rwxr-xr-xlibc/string/arc/Makefile13
-rw-r--r--libc/string/arc/arcv2/memcpy.S236
-rw-r--r--libc/string/arc/arcv2/memset.S115
-rw-r--r--libc/string/arc/arcv2/strcmp.S83
-rw-r--r--libc/string/arc/memcmp.S157
-rw-r--r--libc/string/arc/memcpy.S71
-rw-r--r--libc/string/arc/memset.S51
-rw-r--r--libc/string/arc/strchr.S138
-rw-r--r--libc/string/arc/strcmp.S102
-rw-r--r--libc/string/arc/strcpy.S71
-rw-r--r--libc/string/arc/strlen.S84
-rw-r--r--libc/string/arm/_memcpy.S24
-rw-r--r--libc/string/arm/memcmp.S13
-rw-r--r--libc/string/arm/memset.S19
-rw-r--r--libc/string/arm/strcmp.S7
-rw-r--r--libc/string/arm/strlen.S13
-rw-r--r--libc/string/arm/strncmp.S101
-rw-r--r--libc/string/avr32/Makefile3
-rw-r--r--libc/string/basename.c4
-rw-r--r--libc/string/bcopy.c24
-rw-r--r--libc/string/bfin/memchr.S4
-rw-r--r--libc/string/bfin/strcmp.S80
-rw-r--r--libc/string/bzero.c16
-rw-r--r--libc/string/cris/memcopy.h3
-rw-r--r--libc/string/cris/memcpy.c380
-rw-r--r--libc/string/cris/memmove.c6
-rw-r--r--libc/string/cris/memset.c435
-rw-r--r--libc/string/cris/strcpy.c1
-rw-r--r--libc/string/cris/strncpy.c2
-rw-r--r--libc/string/dirname.c3
-rw-r--r--libc/string/ffs.c12
-rw-r--r--libc/string/ffsll.c35
-rw-r--r--libc/string/frv/memcpy.S4
-rw-r--r--libc/string/frv/memset.S6
-rw-r--r--libc/string/generic/bp-checks.h129
-rw-r--r--libc/string/generic/memchr.c9
-rw-r--r--libc/string/generic/memcmp.c23
-rw-r--r--libc/string/generic/memcopy.h8
-rw-r--r--libc/string/generic/memcpy.c7
-rw-r--r--libc/string/generic/memmem.c8
-rw-r--r--libc/string/generic/memmove.c7
-rw-r--r--libc/string/generic/mempcpy.c4
-rw-r--r--libc/string/generic/memrchr.c7
-rw-r--r--libc/string/generic/memset.c6
-rw-r--r--libc/string/generic/pagecopy.h9
-rw-r--r--libc/string/generic/rawmemchr.c7
-rw-r--r--libc/string/generic/strcat.c6
-rw-r--r--libc/string/generic/strchr.c7
-rw-r--r--libc/string/generic/strchrnul.c7
-rw-r--r--libc/string/generic/strcmp.c7
-rw-r--r--libc/string/generic/strcpy.c33
-rw-r--r--libc/string/generic/strcspn.c7
-rw-r--r--libc/string/generic/strlen.c7
-rw-r--r--libc/string/generic/strncat.c6
-rw-r--r--libc/string/generic/strncmp.c6
-rw-r--r--libc/string/generic/strncpy.c6
-rw-r--r--libc/string/generic/strnlen.c11
-rw-r--r--libc/string/generic/strrchr.c7
-rw-r--r--libc/string/generic/strsep.c8
-rw-r--r--libc/string/generic/strspn.c6
-rw-r--r--libc/string/generic/strstr.c6
-rw-r--r--libc/string/generic/strtok_r.c24
-rw-r--r--libc/string/i386/memchr.c52
-rw-r--r--libc/string/i386/memcpy.c31
-rw-r--r--libc/string/i386/memmove.c52
-rw-r--r--libc/string/i386/memset.c66
-rw-r--r--libc/string/i386/rawmemchr.c24
-rw-r--r--libc/string/i386/strcat.c1
-rw-r--r--libc/string/i386/strchr.c32
-rw-r--r--libc/string/i386/strchrnul.c47
-rw-r--r--libc/string/i386/strcmp.c2
-rw-r--r--libc/string/i386/strcpy.c2
-rw-r--r--libc/string/i386/string.h338
-rw-r--r--libc/string/i386/strlen.c20
-rw-r--r--libc/string/i386/strncat.c73
-rw-r--r--libc/string/i386/strncmp.c41
-rw-r--r--libc/string/i386/strncpy.c55
-rw-r--r--libc/string/i386/strnlen.c51
-rw-r--r--libc/string/i386/strrchr.c30
-rw-r--r--libc/string/ia64/bcopy.S2
-rw-r--r--libc/string/ia64/bzero.S143
-rw-r--r--libc/string/ia64/memccpy.S109
-rw-r--r--libc/string/ia64/memchr.S47
-rw-r--r--libc/string/ia64/memcmp.S101
-rw-r--r--libc/string/ia64/memcpy.S167
-rw-r--r--libc/string/ia64/memmove.S177
-rw-r--r--libc/string/ia64/memset.S185
-rw-r--r--libc/string/ia64/softpipe.h5
-rw-r--r--libc/string/ia64/strchr.S39
-rw-r--r--libc/string/ia64/strcmp.S9
-rw-r--r--libc/string/ia64/strcpy.S73
-rw-r--r--libc/string/ia64/strlen.S25
-rw-r--r--libc/string/ia64/strncmp.S17
-rw-r--r--libc/string/ia64/strncpy.S97
-rw-r--r--libc/string/ia64/sysdep.h168
-rw-r--r--libc/string/memchr.c14
-rw-r--r--libc/string/memcmp.c1
-rw-r--r--libc/string/memcpy.c9
-rw-r--r--libc/string/memmem.c2
-rw-r--r--libc/string/memmove.c22
-rw-r--r--libc/string/mempcpy.c9
-rw-r--r--libc/string/memrchr.c16
-rw-r--r--libc/string/memset.c13
-rw-r--r--libc/string/metag/Makefile13
-rw-r--r--libc/string/metag/memchr.S156
-rw-r--r--libc/string/metag/memcpy.S189
-rw-r--r--libc/string/metag/memmove.S350
-rw-r--r--libc/string/metag/memset.S90
-rw-r--r--libc/string/metag/strchr.S167
-rw-r--r--libc/string/metag/strcmp.S65
-rw-r--r--libc/string/metag/strcpy.S94
-rw-r--r--libc/string/microblaze/Makefile (renamed from libc/string/sh64/Makefile)4
-rw-r--r--libc/string/microblaze/memcpy.S334
-rw-r--r--libc/string/microblaze/memmove.S356
-rw-r--r--libc/string/mips/memcpy.S1055
-rw-r--r--libc/string/mips/memset.S520
-rw-r--r--libc/string/mips/sysdep.h45
-rw-r--r--libc/string/powerpc/memcpy.c28
-rw-r--r--libc/string/powerpc/memmove.c2
-rw-r--r--libc/string/powerpc/memset.c1
-rw-r--r--libc/string/psignal.c2
-rw-r--r--libc/string/rawmemchr.c1
-rw-r--r--libc/string/sh/memchr.S30
-rw-r--r--libc/string/sh/sh4/memcpy.S242
-rw-r--r--libc/string/sh/sh4/memmove.c121
-rw-r--r--libc/string/sh/sh4/memset.S152
-rw-r--r--libc/string/sh/sh4/strcpy.S28
-rw-r--r--libc/string/sh/sh4/strncpy.S43
-rw-r--r--libc/string/sh/strlen.S75
-rw-r--r--libc/string/sh64/memcpy.S205
-rw-r--r--libc/string/sh64/memset.S96
-rw-r--r--libc/string/sh64/strcpy.S102
-rw-r--r--libc/string/sh64/strlen.S63
-rw-r--r--libc/string/sparc/sparc32/memchr.S7
-rw-r--r--libc/string/sparc/sparc32/memcpy.S5
-rw-r--r--libc/string/sparc/sparc32/memset.S5
-rw-r--r--libc/string/sparc/sparc32/stpcpy.S5
-rw-r--r--libc/string/sparc/sparc32/strcat.S5
-rw-r--r--libc/string/sparc/sparc32/strchr.S5
-rw-r--r--libc/string/sparc/sparc32/strcmp.S5
-rw-r--r--libc/string/sparc/sparc32/strcpy.S5
-rw-r--r--libc/string/sparc/sparc32/strlen.S5
-rw-r--r--libc/string/sparc/sparc64/memchr.S261
-rw-r--r--libc/string/sparc/sparc64/memcpy.S923
-rw-r--r--libc/string/sparc/sparc64/memset.S317
-rw-r--r--libc/string/sparc/sparc64/sparcv9b/memcpy.S612
-rw-r--r--libc/string/sparc/sparc64/stpcpy.S271
-rw-r--r--libc/string/sparc/sparc64/strcat.S339
-rw-r--r--libc/string/sparc/sparc64/strchr.S486
-rw-r--r--libc/string/sparc/sparc64/strcmp.S279
-rw-r--r--libc/string/sparc/sparc64/strcpy.S245
-rw-r--r--libc/string/sparc/sparc64/strlen.S173
-rw-r--r--libc/string/stpcpy.c8
-rw-r--r--libc/string/stpncpy.c13
-rw-r--r--libc/string/strcasecmp.c18
-rw-r--r--libc/string/strcasestr.c7
-rw-r--r--libc/string/strcat.c2
-rw-r--r--libc/string/strchr.c9
-rw-r--r--libc/string/strchrnul.c6
-rw-r--r--libc/string/strcmp.c3
-rw-r--r--libc/string/strcpy.c13
-rw-r--r--libc/string/strcspn.c1
-rw-r--r--libc/string/strdup.c4
-rw-r--r--libc/string/strerror.c2
-rw-r--r--libc/string/strlcpy.c16
-rw-r--r--libc/string/strlen.c2
-rw-r--r--libc/string/strncasecmp.c18
-rw-r--r--libc/string/strncat.c5
-rw-r--r--libc/string/strncmp.c1
-rw-r--r--libc/string/strncpy.c8
-rw-r--r--libc/string/strndup.c3
-rw-r--r--libc/string/strnlen.c13
-rw-r--r--libc/string/strpbrk.c2
-rw-r--r--libc/string/strrchr.c1
-rw-r--r--libc/string/strsep.c3
-rw-r--r--libc/string/strsignal.c38
-rw-r--r--libc/string/strspn.c2
-rw-r--r--libc/string/strstr.c3
-rw-r--r--libc/string/strtok.c1
-rw-r--r--libc/string/strtok_r.c5
-rw-r--r--libc/string/strverscmp.c106
-rw-r--r--libc/string/sys_errlist.c2
-rw-r--r--libc/string/x86_64/bzero.S1
-rw-r--r--libc/string/x86_64/memcpy.S7
-rw-r--r--libc/string/x86_64/mempcpy.S1
-rw-r--r--libc/string/x86_64/memset.S9
-rw-r--r--libc/string/x86_64/strcat.S5
-rw-r--r--libc/string/x86_64/strchr.S5
-rw-r--r--libc/string/x86_64/strcmp.S5
-rw-r--r--libc/string/x86_64/strcpy.S5
-rw-r--r--libc/string/x86_64/strcspn.S5
-rw-r--r--libc/string/x86_64/strlen.S5
-rw-r--r--libc/string/x86_64/strspn.S5
-rw-r--r--libc/string/xtensa/memcpy.S45
-rw-r--r--libc/string/xtensa/memset.S23
-rw-r--r--libc/string/xtensa/strcmp.S180
-rw-r--r--libc/string/xtensa/strcpy.S91
-rw-r--r--libc/string/xtensa/strlen.S71
-rw-r--r--libc/string/xtensa/strncpy.S165
204 files changed, 7290 insertions, 7373 deletions
diff --git a/libc/string/Makefile.in b/libc/string/Makefile.in
index 2f14cc0e6..e7f2ccde1 100644
--- a/libc/string/Makefile.in
+++ b/libc/string/Makefile.in
@@ -1,10 +1,12 @@
# Makefile for uClibc
#
-# Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
+# Copyright (C) 2000-2008 Erik Andersen <andersen@uclibc.org>
#
# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
#
+subdirs += libc/string/$(TARGET_ARCH) libc/string/generic
+
#
# Arch specific fun
#
@@ -16,7 +18,10 @@ STRING_SUBARCH_OUT := $(top_builddir)libc/string/$(TARGET_ARCH)/$(TARGET_SUBARCH
STRING_SUBARCH_SSRC := $(wildcard $(STRING_SUBARCH_OUT)/*.S)
STRING_SUBARCH_SOBJ := $(patsubst $(STRING_SUBARCH_DIR)/%.S,$(STRING_SUBARCH_OUT)/%.o,$(STRING_SUBARCH_SSRC))
-STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ)
+STRING_SUBARCH_CSRC := $(wildcard $(STRING_SUBARCH_OUT)/*.c)
+STRING_SUBARCH_COBJ := $(patsubst $(STRING_SUBARCH_DIR)/%.c,$(STRING_SUBARCH_OUT)/%.o,$(STRING_SUBARCH_CSRC))
+
+STRING_SUBARCH_OBJS := $(STRING_SUBARCH_SOBJ) $(STRING_SUBARCH_COBJ)
endif
# Collect the arch specific implementation (asm, c files)
@@ -133,7 +138,7 @@ libc-y += $(STRING_COBJ)
libc-nomulti-$(UCLIBC_HAS_XLOCALE) += $(STRING_OUT)/wcsxfrm_l.o
libc-nomulti-y += $(STRING_OUT)/__xpg_strerror_r.o
-objclean-y += string_objclean
+objclean-y += CLEAN_libc/string
-string_objclean:
- $(RM) $(STRING_OUT)/{,*/}{,*/}*.{o,os,oS}
+CLEAN_libc/string:
+ $(do_rm) $(addprefix $(STRING_OUT)/,$(addprefix *., o os oS) $(addprefix */*., o os oS) $(addprefix */*/*., o os oS))
diff --git a/libc/string/__glibc_strerror_r.c b/libc/string/__glibc_strerror_r.c
index 0f9cd16a9..96b881700 100644
--- a/libc/string/__glibc_strerror_r.c
+++ b/libc/string/__glibc_strerror_r.c
@@ -5,11 +5,13 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
+/* get rid of REDIRECT */
+#define strerror_r __hide_strerror_r
+
#include <features.h>
#include <string.h>
-libc_hidden_proto(__glibc_strerror_r)
-libc_hidden_proto(__xpg_strerror_r)
+#undef strerror_r
char *__glibc_strerror_r(int errnum, char *strerrbuf, size_t buflen)
{
@@ -18,3 +20,6 @@ char *__glibc_strerror_r(int errnum, char *strerrbuf, size_t buflen)
return strerrbuf;
}
libc_hidden_def(__glibc_strerror_r)
+#if !defined __USE_XOPEN2K || defined __USE_GNU
+strong_alias(__glibc_strerror_r,strerror_r)
+#endif
diff --git a/libc/string/__xpg_basename.c b/libc/string/__xpg_basename.c
index 2449d1d42..2e7ade913 100644
--- a/libc/string/__xpg_basename.c
+++ b/libc/string/__xpg_basename.c
@@ -5,7 +5,6 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-#include "_string.h"
#include <libgen.h>
char *__xpg_basename(register char *path)
@@ -34,3 +33,7 @@ char *__xpg_basename(register char *path)
return first;
}
+#ifndef __USE_GNU
+# undef basename
+weak_alias(__xpg_basename,basename)
+#endif
diff --git a/libc/string/__xpg_strerror_r.c b/libc/string/__xpg_strerror_r.c
index ff41192e5..3e78da1be 100644
--- a/libc/string/__xpg_strerror_r.c
+++ b/libc/string/__xpg_strerror_r.c
@@ -5,8 +5,8 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-/* Make sure we get proper strerror_r() prototype */
-#define strerror_r _hidestrerror_r
+/* get rid of REDIRECT */
+#define strerror_r __hide_strerror_r
#include <features.h>
#include <errno.h>
@@ -15,10 +15,6 @@
#undef strerror_r
-libc_hidden_proto(__xpg_strerror_r)
-/* Experimentally off - libc_hidden_proto(memcpy) */
-/* Experimentally off - libc_hidden_proto(strlen) */
-
#ifdef __UCLIBC_HAS_ERRNO_MESSAGES__
extern const char _string_syserrmsgs[] attribute_hidden;
@@ -276,4 +272,6 @@ int __xpg_strerror_r(int errnum, char *strerrbuf, size_t buflen)
#endif /* __UCLIBC_HAS_ERRNO_MESSAGES__ */
libc_hidden_def(__xpg_strerror_r)
-weak_alias(__xpg_strerror_r, strerror_r)
+#if defined __USE_XOPEN2K && !defined __USE_GNU
+strong_alias(__xpg_strerror_r,strerror_r)
+#endif
diff --git a/libc/string/_collate.c b/libc/string/_collate.c
index 64b5d9608..93501b85e 100644
--- a/libc/string/_collate.c
+++ b/libc/string/_collate.c
@@ -19,15 +19,6 @@
#include <errno.h>
#include <assert.h>
-/* Experimentally off - libc_hidden_proto(memset) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
-/* Experimentally off - libc_hidden_proto(strlcpy) */
-/* Experimentally off - libc_hidden_proto(strcmp) */
-#ifdef WANT_WIDE
-libc_hidden_proto(wcsxfrm)
-libc_hidden_proto(wcscmp)
-#endif
-
#ifdef __UCLIBC_HAS_LOCALE__
#if defined(L_strxfrm) || defined(L_strxfrm_l) || defined(L_wcsxfrm) || defined(L_wcsxfrm_l)
@@ -59,29 +50,24 @@ libc_hidden_proto(wcscmp)
#if defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE)
-libc_hidden_proto(wcscoll_l)
-libc_hidden_proto(wcscoll)
int wcscoll (const Wchar *s0, const Wchar *s1)
{
return wcscoll_l(s0, s1, __UCLIBC_CURLOCALE );
}
libc_hidden_def(wcscoll)
-libc_hidden_proto(wcsxfrm_l)
-libc_hidden_proto(wcsxfrm)
size_t wcsxfrm(Wchar *__restrict ws1, const Wchar *__restrict ws2, size_t n)
{
return wcsxfrm_l(ws1, ws2, n, __UCLIBC_CURLOCALE );
}
-libc_hidden_def(wcsxfrm)
#else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
#if 0
-#define CUR_COLLATE (&__UCLIBC_CURLOCALE_DATA.collate)
+#define CUR_COLLATE (&__UCLIBC_CURLOCALE->collate)
#else
#define CUR_COLLATE (& __LOCALE_PTR->collate)
#endif
@@ -173,7 +159,7 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM )
#define N (1)
#else /* WANT_WIDE */
wchar_t WC;
- size_t n0, nx;
+ size_t n0, nx = 0;
#define N n0
#endif /* WANT_WIDE */
@@ -381,7 +367,7 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM )
if (cs->back_buf == cs->ibb) { /* was using internal buffer */
cs->bp = malloc(cs->bb_size + 128);
if (!cs->bp) {
- __set_errno(ENOMEM);
+ /* __set_errno(ENOMEM); */
#ifdef __UCLIBC_MJN3_ONLY__
#warning what to do here?
#endif
@@ -393,7 +379,7 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM )
} else {
cs->bp = realloc(cs->back_buf, cs->bb_size + 128);
if (!cs->bp) {
- __set_errno(ENOMEM);
+ /* __set_errno(ENOMEM); */
#ifdef __UCLIBC_MJN3_ONLY__
#warning what to do here?
#endif
@@ -513,7 +499,6 @@ static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM )
} while (1);
}
-libc_hidden_proto(__XL_NPP(wcscoll))
int __XL_NPP(wcscoll) (const Wchar *s0, const Wchar *s1 __LOCALE_PARAM )
{
col_state_t ws[2];
@@ -522,9 +507,9 @@ int __XL_NPP(wcscoll) (const Wchar *s0, const Wchar *s1 __LOCALE_PARAM )
if (!CUR_COLLATE->num_weights) { /* C locale */
#ifdef WANT_WIDE
return wcscmp(s0, s1);
-#else /* WANT_WIDE */
+#else
return strcmp(s0, s1);
-#endif /* WANT_WIDE */
+#endif
}
pass = 0;
@@ -551,10 +536,6 @@ libc_hidden_def(__XL_NPP(wcscoll))
#ifdef WANT_WIDE
-extern size_t __wcslcpy(wchar_t *__restrict dst,
- const wchar_t *__restrict src, size_t n);
-
-libc_hidden_proto(__XL_NPP(wcsxfrm))
size_t __XL_NPP(wcsxfrm)(wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
size_t n __LOCALE_PARAM )
{
@@ -592,7 +573,9 @@ size_t __XL_NPP(wcsxfrm)(wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
}
return count-1;
}
+#if defined L_strxfrm_l || defined L_wcsxfrm_l
libc_hidden_def(__XL_NPP(wcsxfrm))
+#endif
#else /* WANT_WIDE */
@@ -636,7 +619,6 @@ static size_t store(unsigned char *s, size_t count, size_t n, __uwchar_t weight)
return r;
}
-libc_hidden_proto(__XL_NPP(strxfrm))
size_t __XL_NPP(strxfrm)(char *__restrict ws1, const char *__restrict ws2, size_t n
__LOCALE_PARAM )
{
@@ -674,7 +656,9 @@ size_t __XL_NPP(strxfrm)(char *__restrict ws1, const char *__restrict ws2, size_
}
return count-1;
}
+#ifdef L_strxfrm_l
libc_hidden_def(__XL_NPP(strxfrm))
+#endif
#endif /* WANT_WIDE */
diff --git a/libc/string/arc/Makefile b/libc/string/arc/Makefile
new file mode 100755
index 000000000..523cf6842
--- /dev/null
+++ b/libc/string/arc/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/arc/arcv2/memcpy.S b/libc/string/arc/arcv2/memcpy.S
new file mode 100644
index 000000000..7573daf51
--- /dev/null
+++ b/libc/string/arc/arcv2/memcpy.S
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <features.h>
+#include <sysdep.h>
+
+#ifdef __LITTLE_ENDIAN__
+# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
+# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
+# define MERGE_2(RX,RY,IMM)
+# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
+# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
+#else
+# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
+# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
+# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
+# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
+#endif
+
+#ifdef __LL64__
+# define PREFETCH_READ(RX) prefetch [RX, 56]
+# define PREFETCH_WRITE(RX) prefetchw [RX, 64]
+# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
+# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
+# define ZOLSHFT 5
+# define ZOLAND 0x1F
+#else
+# define PREFETCH_READ(RX) prefetch [RX, 28]
+# define PREFETCH_WRITE(RX) prefetchw [RX, 32]
+# define LOADX(DST,RX) ld.ab DST, [RX, 4]
+# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
+# define ZOLSHFT 4
+# define ZOLAND 0xF
+#endif
+
+ENTRY(memcpy)
+ prefetch [r1] ; Prefetch the read location
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don't clobber ret val
+
+;;; if size <= 8
+ cmp r2, 8
+ bls.d @.Lsmallchunk
+ mov.f lp_count, r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @.Laligndestination
+ ;; LOOP BEGIN
+ ldb.ab r5, [r1,1]
+ sub r2, r2, 1
+ stb.ab r5, [r3,1]
+.Laligndestination:
+
+;;; Check the alignment of the source
+ and.f r4, r1, 0x03
+ bnz.d @.Lsourceunaligned
+
+;;; CASE 0: Both source and destination are 32bit aligned
+;;; Convert len to Dwords, unfold x4
+ lsr.f lp_count, r2, ZOLSHFT
+ lpnz @.Lcopy32_64bytes
+ ;; LOOP START
+ LOADX (r6, r1)
+ PREFETCH_READ (r1)
+ PREFETCH_WRITE (r3)
+ LOADX (r8, r1)
+ LOADX (r10, r1)
+ LOADX (r4, r1)
+ STOREX (r6, r3)
+ STOREX (r8, r3)
+ STOREX (r10, r3)
+ STOREX (r4, r3)
+.Lcopy32_64bytes:
+
+ and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
+.Lsmallchunk:
+ lpnz @.Lcopyremainingbytes
+ ;; LOOP START
+ ldb.ab r5, [r1,1]
+ stb.ab r5, [r3,1]
+.Lcopyremainingbytes:
+
+ j [blink]
+;;; END CASE 0
+
+.Lsourceunaligned:
+ cmp r4, 2
+ beq.d @.LunalignedOffby2
+ sub r2, r2, 1
+
+ bhi.d @.LunalignedOffby3
+ ldb.ab r5, [r1, 1]
+
+;;; CASE 1: The source is unaligned, off by 1
+ ;; Hence I need to read 1 byte for a 16bit alignment
+ ;; and 2bytes to reach 32bit alignment
+ ldh.ab r6, [r1, 2]
+ sub r2, r2, 2
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+ MERGE_1 (r6, r6, 8)
+ MERGE_2 (r5, r5, 24)
+ or r5, r5, r6
+
+ ;; Both src and dst are aligned
+ lpnz @.Lcopy8bytes_1
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 24)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 8)
+
+ SHIFT_1 (r9, r8, 24)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 8)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+.Lcopy8bytes_1:
+
+ ;; Write back the remaining 16bits
+ EXTRACT_1 (r6, r5, 16)
+ sth.ab r6, [r3, 2]
+ ;; Write back the remaining 8bits
+ EXTRACT_2 (r5, r5, 16)
+ stb.ab r5, [r3, 1]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @.Lcopybytewise_1
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+.Lcopybytewise_1:
+ j [blink]
+
+.LunalignedOffby2:
+;;; CASE 2: The source is unaligned, off by 2
+ ldh.ab r5, [r1, 2]
+ sub r2, r2, 1
+
+ ;; Both src and dst are aligned
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+ asl.nz r5, r5, 16
+#endif
+ lpnz @.Lcopy8bytes_2
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 16)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 16)
+
+ SHIFT_1 (r9, r8, 16)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 16)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+.Lcopy8bytes_2:
+
+#ifdef __BIG_ENDIAN__
+ lsr.nz r5, r5, 16
+#endif
+ sth.ab r5, [r3, 2]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @.Lcopybytewise_2
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+.Lcopybytewise_2:
+ j [blink]
+
+.LunalignedOffby3:
+;;; CASE 3: The source is unaligned, off by 3
+;;; Hence, I need to read 1byte for achieve the 32bit alignment
+
+ ;; Both src and dst are aligned
+ ;; Convert to words, unfold x2
+ lsr.f lp_count, r2, 3
+#ifdef __BIG_ENDIAN__
+ asl.ne r5, r5, 24
+#endif
+ lpnz @.Lcopy8bytes_3
+ ;; LOOP START
+ ld.ab r6, [r1, 4]
+ prefetch [r1, 28] ;Prefetch the next read location
+ ld.ab r8, [r1,4]
+ prefetchw [r3, 32] ;Prefetch the next write location
+
+ SHIFT_1 (r7, r6, 8)
+ or r7, r7, r5
+ SHIFT_2 (r5, r6, 24)
+
+ SHIFT_1 (r9, r8, 8)
+ or r9, r9, r5
+ SHIFT_2 (r5, r8, 24)
+
+ st.ab r7, [r3, 4]
+ st.ab r9, [r3, 4]
+.Lcopy8bytes_3:
+
+#ifdef __BIG_ENDIAN__
+ lsr.nz r5, r5, 24
+#endif
+ stb.ab r5, [r3, 1]
+
+ and.f lp_count, r2, 0x07 ;Last 8bytes
+ lpnz @.Lcopybytewise_3
+ ;; LOOP START
+ ldb.ab r6, [r1,1]
+ stb.ab r6, [r3,1]
+.Lcopybytewise_3:
+ j [blink]
+
+END(memcpy)
+libc_hidden_def(memcpy)
diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S
new file mode 100644
index 000000000..0918d3774
--- /dev/null
+++ b/libc/string/arc/arcv2/memset.S
@@ -0,0 +1,115 @@
+
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <features.h>
+#include <sysdep.h>
+
+#ifdef DONT_USE_PREALLOC
+#define PREWRITE(A,B) prefetchw [(A),(B)]
+#else
+#define PREWRITE(A,B) prealloc [(A),(B)]
+#endif
+
+ENTRY(memset)
+ prefetchw [r0] ; Prefetch the write location
+ mov.f 0, r2
+;;; if size is zero
+ jz.d [blink]
+ mov r3, r0 ; don't clobber ret val
+
+;;; if length < 8
+ brls.d.nt r2, 8, .Lsmallchunk
+ mov.f lp_count,r2
+
+ and.f r4, r0, 0x03
+ rsub lp_count, r4, 4
+ lpnz @.Laligndestination
+ ;; LOOP BEGIN
+ stb.ab r1, [r3,1]
+ sub r2, r2, 1
+.Laligndestination:
+
+;;; Destination is aligned
+ and r1, r1, 0xFF
+ asl r4, r1, 8
+ or r4, r4, r1
+ asl r5, r4, 16
+ or r5, r5, r4
+ mov r4, r5
+
+ sub3 lp_count, r2, 8
+ cmp r2, 64
+ bmsk.hi r2, r2, 5
+ mov.ls lp_count, 0
+ add3.hi r2, r2, 8
+
+;;; Convert len to Dwords, unfold x8
+ lsr.f lp_count, lp_count, 6
+ lpnz @.Lset64bytes
+ ;; LOOP START
+ PREWRITE(r3, 64) ;Prefetch the next write location
+#ifdef __LL64__
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+#else
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+#endif
+.Lset64bytes:
+
+ lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
+ lpnz .Lset32bytes
+ ;; LOOP START
+ prefetchw [r3, 32] ;Prefetch the next write location
+#ifdef __LL64__
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+ std.ab r4, [r3, 8]
+#else
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+ st.ab r4, [r3, 4]
+#endif
+.Lset32bytes:
+
+ and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
+.Lsmallchunk:
+ lpnz .Lcopy3bytes
+ ;; LOOP START
+ stb.ab r1, [r3, 1]
+.Lcopy3bytes:
+
+ j [blink]
+
+END(memset)
+libc_hidden_def(memset)
diff --git a/libc/string/arc/arcv2/strcmp.S b/libc/string/arc/arcv2/strcmp.S
new file mode 100644
index 000000000..2e0e64a0c
--- /dev/null
+++ b/libc/string/arc/arcv2/strcmp.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <features.h>
+#include <sysdep.h>
+
+ENTRY(strcmp)
+ or r2, r0, r1
+ bmsk_s r2, r2, 1
+ brne r2, 0, @.Lcharloop
+
+;;; s1 and s2 are word aligned
+ ld.ab r2, [r0, 4]
+
+ mov_s r12, 0x01010101
+ ror r11, r12
+ .align 4
+.LwordLoop:
+ ld.ab r3, [r1, 4]
+ ;; Detect NULL char in str1
+ sub r4, r2, r12
+ ld.ab r5, [r0, 4]
+ bic r4, r4, r2
+ and r4, r4, r11
+ brne.d.nt r4, 0, .LfoundNULL
+ ;; Check if the read locations are the same
+ cmp r2, r3
+ beq.d .LwordLoop
+ mov.eq r2, r5
+
+ ;; A match is found, spot it out
+#ifdef __LITTLE_ENDIAN__
+ swape r3, r3
+ mov_s r0, 1
+ swape r2, r2
+#else
+ mov_s r0, 1
+#endif
+ cmp_s r2, r3
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.LfoundNULL:
+#ifdef __BIG_ENDIAN__
+ swape r4, r4
+ swape r2, r2
+ swape r3, r3
+#endif
+ ;; Find null byte
+ ffs r0, r4
+ bmsk r2, r2, r0
+ bmsk r3, r3, r0
+ swape r2, r2
+ swape r3, r3
+ ;; make the return value
+ sub.f r0, r2, r3
+ mov.hi r0, 1
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.Lcharloop:
+ ldb.ab r2, [r0, 1]
+ ldb.ab r3, [r1, 1]
+ nop
+ breq r2, 0, .Lcmpend
+ breq r2, r3, .Lcharloop
+
+ .align 4
+.Lcmpend:
+ j_s.d [blink]
+ sub r0, r2, r3
+END(strcmp)
+libc_hidden_def(strcmp)
+
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias(strcmp,strcoll)
+libc_hidden_def(strcoll)
+#endif
diff --git a/libc/string/arc/memcmp.S b/libc/string/arc/memcmp.S
new file mode 100644
index 000000000..a60757e7a
--- /dev/null
+++ b/libc/string/arc/memcmp.S
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+#include <features.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ENTRY(memcmp)
+ or r12,r0,r1
+ asl_s r12,r12,30
+ sub r3,r2,1
+ brls r2,r12,.Lbytewise
+ ld r4,[r0,0]
+ ld r5,[r1,0]
+ lsr.f lp_count,r3,3
+#ifdef __HS__
+ /* In ARCv2 a branch can't be the last instruction in a zero overhead
+ * loop.
+ * So we move the branch to the start of the loop, duplicate it
+ * after the end, and set up r12 so that the branch isn't taken
+ * initially.
+ */
+ mov_s r12,WORD2
+ lpne .Loop_end
+ brne WORD2,r12,.Lodd
+ ld WORD2,[r0,4]
+#else
+ lpne .Loop_end
+ ld_s WORD2,[r0,4]
+#endif
+ ld_s r12,[r1,4]
+ brne r4,r5,.Leven
+ ld.a r4,[r0,8]
+ ld.a r5,[r1,8]
+#ifdef __HS__
+.Loop_end:
+ brne WORD2,r12,.Lodd
+#else
+ brne WORD2,r12,.Lodd
+.Loop_end:
+#endif
+ asl_s SHIFT,SHIFT,3
+ bhs_s .Last_cmp
+ brne r4,r5,.Leven
+ ld r4,[r0,4]
+ ld r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+ nop_s
+ ; one more load latency cycle
+.Last_cmp:
+ xor r0,r4,r5
+ bset r0,r0,SHIFT
+ sub_s r1,r0,1
+ bic_s r1,r1,r0
+ norm r1,r1
+ b.d .Leven_cmp
+ and r1,r1,24
+.Leven:
+ xor r0,r4,r5
+ sub_s r1,r0,1
+ bic_s r1,r1,r0
+ norm r1,r1
+ ; slow track insn
+ and r1,r1,24
+.Leven_cmp:
+ asl r2,r4,r1
+ asl r12,r5,r1
+ lsr_s r2,r2,1
+ lsr_s r12,r12,1
+ j_s.d [blink]
+ sub r0,r2,r12
+ .balign 4
+.Lodd:
+ xor r0,WORD2,r12
+ sub_s r1,r0,1
+ bic_s r1,r1,r0
+ norm r1,r1
+ ; slow track insn
+ and r1,r1,24
+ asl_s r2,r2,r1
+ asl_s r12,r12,r1
+ lsr_s r2,r2,1
+ lsr_s r12,r12,1
+ j_s.d [blink]
+ sub r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+ neg_s SHIFT,SHIFT
+ lsr r4,r4,SHIFT
+ lsr r5,r5,SHIFT
+ ; slow track insn
+.Leven:
+ sub.f r0,r4,r5
+ mov.ne r0,1
+ j_s.d [blink]
+ bset.cs r0,r0,31
+.Lodd:
+ cmp_s WORD2,r12
+ mov_s r0,1
+ j_s.d [blink]
+ bset.cs r0,r0,31
+#endif /* ENDIAN */
+ .balign 4
+.Lbytewise:
+ breq r2,0,.Lnil
+ ldb r4,[r0,0]
+ ldb r5,[r1,0]
+ lsr.f lp_count,r3
+#ifdef __HS__
+ mov r12,r3
+ lpne .Lbyte_end
+ brne r3,r12,.Lbyte_odd
+#else
+ lpne .Lbyte_end
+#endif
+ ldb_s r3,[r0,1]
+ ldb r12,[r1,1]
+ brne r4,r5,.Lbyte_even
+ ldb.a r4,[r0,2]
+ ldb.a r5,[r1,2]
+#ifdef __HS__
+.Lbyte_end:
+ brne r3,r12,.Lbyte_odd
+#else
+ brne r3,r12,.Lbyte_odd
+.Lbyte_end:
+#endif
+ bcc .Lbyte_even
+ brne r4,r5,.Lbyte_even
+ ldb_s r3,[r0,1]
+ ldb_s r12,[r1,1]
+.Lbyte_odd:
+ j_s.d [blink]
+ sub r0,r3,r12
+.Lbyte_even:
+ j_s.d [blink]
+ sub r0,r4,r5
+.Lnil:
+ j_s.d [blink]
+ mov r0,0
+END(memcmp)
+libc_hidden_def(memcmp)
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias(memcmp,bcmp)
+#endif
diff --git a/libc/string/arc/memcpy.S b/libc/string/arc/memcpy.S
new file mode 100644
index 000000000..1c11951e4
--- /dev/null
+++ b/libc/string/arc/memcpy.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+/* This memcpy implementation does not support objects of 1GB or larger -
+ the check for alignment does not work then. */
+/* We assume that most sources and destinations are aligned, and
+ that also lengths are mostly a multiple of four, although to a lesser
+ extent. */
+ENTRY(memcpy)
+ or r3,r0,r1
+ asl_s r3,r3,30
+ mov_s r5,r0
+ brls.d r2,r3,.Lcopy_bytewise
+ sub.f r3,r2,1
+ ld_s r12,[r1,0]
+ asr.f lp_count,r3,3
+ bbit0.d r3,2,.Lnox4
+ bmsk_s r2,r2,1
+ st.ab r12,[r5,4]
+ ld.a r12,[r1,4]
+.Lnox4:
+ lppnz .Lendloop
+ ld_s r3,[r1,4]
+ st.ab r12,[r5,4]
+ ld.a r12,[r1,8]
+ st.ab r3,[r5,4]
+.Lendloop:
+ breq r2,0,.Last_store
+ ld r3,[r5,0]
+#ifdef __LITTLE_ENDIAN__
+ add3 r2,-1,r2
+ ; uses long immediate
+ xor_s r12,r12,r3
+ bmsk r12,r12,r2
+ xor_s r12,r12,r3
+#else /* BIG ENDIAN */
+ sub3 r2,31,r2
+ ; uses long immediate
+ xor_s r3,r3,r12
+ bmsk r3,r3,r2
+ xor_s r12,r12,r3
+#endif /* ENDIAN */
+.Last_store:
+ j_s.d [blink]
+ st r12,[r5,0]
+
+ .balign 4
+.Lcopy_bytewise:
+ jcs [blink]
+ ldb_s r12,[r1,0]
+ lsr.f lp_count,r3
+ bhs_s .Lnox1
+ stb.ab r12,[r5,1]
+ ldb.a r12,[r1,1]
+.Lnox1:
+ lppnz .Lendbloop
+ ldb_s r3,[r1,1]
+ stb.ab r12,[r5,1]
+ ldb.a r12,[r1,2]
+ stb.ab r3,[r5,1]
+.Lendbloop:
+ j_s.d [blink]
+ stb r12,[r5,0]
+END(memcpy)
+libc_hidden_def(memcpy)
diff --git a/libc/string/arc/memset.S b/libc/string/arc/memset.S
new file mode 100644
index 000000000..f4048455a
--- /dev/null
+++ b/libc/string/arc/memset.S
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
+
+ENTRY(memset)
+
+ mov_s r4,r0
+ or r12,r0,r2
+ bmsk.f r12,r12,1
+ extb_s r1,r1
+ asl r3,r1,8
+ beq.d .Laligned
+ or_s r1,r1,r3
+ brls r2,SMALL,.Ltiny
+ add r3,r2,r0
+ stb r1,[r3,-1]
+ bclr_s r3,r3,0
+ stw r1,[r3,-2]
+ bmsk.f r12,r0,1
+ add_s r2,r2,r12
+ sub.ne r2,r2,4
+ stb.ab r1,[r4,1]
+ and r4,r4,-2
+ stw.ab r1,[r4,2]
+ and r4,r4,-4
+.Laligned: ; This code address should be aligned for speed.
+ asl r3,r1,16
+ lsr.f lp_count,r2,2
+ or_s r1,r1,r3
+ lpne .Loop_end
+ st.ab r1,[r4,4]
+.Loop_end:
+ j_s [blink]
+
+
+ .balign 4
+.Ltiny:
+ mov.f lp_count,r2
+ lpne .Ltiny_end
+ stb.ab r1,[r4,1]
+.Ltiny_end:
+ j_s [blink]
+END(memset)
+libc_hidden_def(memset)
diff --git a/libc/string/arc/strchr.S b/libc/string/arc/strchr.S
new file mode 100644
index 000000000..443993589
--- /dev/null
+++ b/libc/string/arc/strchr.S
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+#include <features.h>
+
+/* ARC700 has a relatively long pipeline and branch prediction, so we want
+ to avoid branches that are hard to predict. On the other hand, the
+ presence of the norm instruction makes it easier to operate on whole
+ words branch-free. */
+
+ENTRY(strchr)
+ extb_s r1,r1
+ asl r5,r1,8
+ bmsk r2,r0,1
+ or r5,r5,r1
+ mov_s r3,0x01010101
+ breq.d r2,r0,.Laligned
+ asl r4,r5,16
+ sub_s r0,r0,r2
+ asl r7,r2,3
+ ld_s r2,[r0]
+#ifdef __LITTLE_ENDIAN__
+ asl r7,r3,r7
+#else
+ lsr r7,r3,r7
+#endif
+ or r5,r5,r4
+ ror r4,r3
+ sub r12,r2,r7
+ bic_s r12,r12,r2
+ and r12,r12,r4
+ brne.d r12,0,.Lfound0_ua
+ xor r6,r2,r5
+ ld.a r2,[r0,4]
+ sub r12,r6,r7
+ bic r12,r12,r6
+#ifdef __LITTLE_ENDIAN__
+ and r7,r12,r4
+ breq r7,0,.Loop ; For speed, we want this branch to be unaligned.
+ b .Lfound_char ; Likewise this one.
+#else
+ and r12,r12,r4
+ breq r12,0,.Loop ; For speed, we want this branch to be unaligned.
+ lsr_s r12,r12,7
+ bic r2,r7,r6
+ b.d .Lfound_char_b
+ and_s r2,r2,r12
+#endif
+; /* We require this code address to be unaligned for speed... */
+.Laligned:
+ ld_s r2,[r0]
+ or r5,r5,r4
+ ror r4,r3
+; /* ... so that this code address is aligned, for itself and ... */
+.Loop:
+ sub r12,r2,r3
+ bic_s r12,r12,r2
+ and r12,r12,r4
+ brne.d r12,0,.Lfound0
+ xor r6,r2,r5
+ ld.a r2,[r0,4]
+ sub r12,r6,r3
+ bic r12,r12,r6
+ and r7,r12,r4
+ breq r7,0,.Loop /* ... so that this branch is unaligned. */
+ ; Found searched-for character. r0 has already advanced to next word.
+#ifdef __LITTLE_ENDIAN__
+/* We only need the information about the first matching byte
+ (i.e. the least significant matching byte) to be exact,
+ hence there is no problem with carry effects. */
+.Lfound_char:
+ sub r3,r7,1
+ bic r3,r3,r7
+ norm r2,r3
+ sub_s r0,r0,1
+ asr_s r2,r2,3
+ j.d [blink]
+ sub_s r0,r0,r2
+
+ .balign 4
+.Lfound0_ua:
+ mov r3,r7
+.Lfound0:
+ sub r3,r6,r3
+ bic r3,r3,r6
+ and r2,r3,r4
+ or_s r12,r12,r2
+ sub_s r3,r12,1
+ bic_s r3,r3,r12
+ norm r3,r3
+ add_s r0,r0,3
+ asr_s r12,r3,3
+ asl.f 0,r2,r3
+ sub_s r0,r0,r12
+ j_s.d [blink]
+ mov.pl r0,0
+#else /* BIG ENDIAN */
+.Lfound_char:
+ lsr r7,r7,7
+
+ bic r2,r7,r6
+.Lfound_char_b:
+ norm r2,r2
+ sub_s r0,r0,4
+ asr_s r2,r2,3
+ j.d [blink]
+ add_s r0,r0,r2
+
+.Lfound0_ua:
+ mov_s r3,r7
+.Lfound0:
+ asl_s r2,r2,7
+ or r7,r6,r4
+ bic_s r12,r12,r2
+ sub r2,r7,r3
+ or r2,r2,r6
+ bic r12,r2,r12
+ bic.f r3,r4,r12
+ norm r3,r3
+
+ add.pl r3,r3,1
+ asr_s r12,r3,3
+ asl.f 0,r2,r3
+ add_s r0,r0,r12
+ j_s.d [blink]
+ mov.mi r0,0
+#endif /* ENDIAN */
+END(strchr)
+libc_hidden_def(strchr)
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias(strchr,index)
+#endif
diff --git a/libc/string/arc/strcmp.S b/libc/string/arc/strcmp.S
new file mode 100644
index 000000000..5a0e56045
--- /dev/null
+++ b/libc/string/arc/strcmp.S
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+#include <features.h>
+#include <sysdep.h>
+
+/* This is optimized primarily for the ARC700.
+ It would be possible to speed up the loops by one cycle / word
+ respective one cycle / byte by forcing double source 1 alignment, unrolling
+ by a factor of two, and speculatively loading the second word / byte of
+ source 1; however, that would increase the overhead for loop setup / finish,
+ and strcmp might often terminate early. */
+
+ENTRY(strcmp)
+ or r2,r0,r1
+ bmsk_s r2,r2,1
+ brne r2,0,.Lcharloop
+ mov_s r12,0x01010101
+ ror r5,r12
+.Lwordloop:
+ ld.ab r2,[r0,4]
+ ld.ab r3,[r1,4]
+ nop_s
+ sub r4,r2,r12
+ bic r4,r4,r2
+ and r4,r4,r5
+ brne r4,0,.Lfound0
+ breq r2,r3,.Lwordloop
+#ifdef __LITTLE_ENDIAN__
+ xor r0,r2,r3 ; mask for difference
+ sub_s r1,r0,1
+ bic_s r0,r0,r1 ; mask for least significant difference bit
+ sub r1,r5,r0
+ xor r0,r5,r1 ; mask for least significant difference byte
+ and_s r2,r2,r0
+ and_s r3,r3,r0
+#endif /* LITTLE ENDIAN */
+ cmp_s r2,r3
+ mov_s r0,1
+ j_s.d [blink]
+ bset.lo r0,r0,31
+
+ .balign 4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+ xor r0,r2,r3 ; mask for difference
+ or r0,r0,r4 ; or in zero indicator
+ sub_s r1,r0,1
+ bic_s r0,r0,r1 ; mask for least significant difference bit
+ sub r1,r5,r0
+ xor r0,r5,r1 ; mask for least significant difference byte
+ and_s r2,r2,r0
+ and_s r3,r3,r0
+ sub.f r0,r2,r3
+ mov.hi r0,1
+ j_s.d [blink]
+ bset.lo r0,r0,31
+#else /* BIG ENDIAN */
+ /* The zero-detection above can mis-detect 0x01 bytes as zeroes
+ because of carry-propagateion from a lower significant zero byte.
+ We can compensate for this by checking that bit0 is zero.
+ This compensation is not necessary in the step where we
+ get a low estimate for r2, because in any affected bytes
+ we already have 0x00 or 0x01, which will remain unchanged
+ when bit 7 is cleared. */
+ .balign 4
+.Lfound0:
+ lsr r0,r4,8
+ lsr_s r1,r2
+ bic_s r2,r2,r0 ; get low estimate for r2 and get ...
+ bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
+ or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
+ cmp_s r3,r2 ; ... be independent of trailing garbage
+ or_s r2,r2,r0 ; likewise for r3 > r2
+ bic_s r3,r3,r0
+ rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
+ cmp_s r2,r3
+ j_s.d [blink]
+ bset.lo r0,r0,31
+#endif /* ENDIAN */
+
+ .balign 4
+.Lcharloop:
+ ldb.ab r2,[r0,1]
+ ldb.ab r3,[r1,1]
+ nop_s
+ breq r2,0,.Lcmpend
+ breq r2,r3,.Lcharloop
+.Lcmpend:
+ j_s.d [blink]
+ sub r0,r2,r3
+END(strcmp)
+libc_hidden_def(strcmp)
+
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias(strcmp,strcoll)
+libc_hidden_def(strcoll)
+#endif
diff --git a/libc/string/arc/strcpy.S b/libc/string/arc/strcpy.S
new file mode 100644
index 000000000..241bf3ee6
--- /dev/null
+++ b/libc/string/arc/strcpy.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+
+#include <sysdep.h>
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+ If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+ it 8 byte aligned. Thus, we can do a little read-ahead, without
+ dereferencing a cache line that we should not touch.
+ Note that short and long instructions have been scheduled to avoid
+ branch stalls.
+ The beq_s to r3z could be made unaligned & long to avoid a stall
+ there, but the it is not likely to be taken often, and it
+ would also be likey to cost an unaligned mispredict at the next call. */
+
+ENTRY(strcpy)
+ or r2,r0,r1
+ bmsk_s r2,r2,1
+ brne.d r2,0,charloop
+ mov_s r10,r0
+ ld_s r3,[r1,0]
+ mov r8,0x01010101
+ bbit0.d r1,2,loop_start
+ ror r12,r8
+ sub r2,r3,r8
+ bic_s r2,r2,r3
+ tst_s r2,r12
+ bne r3z
+ mov_s r4,r3
+ .balign 4
+loop:
+ ld.a r3,[r1,4]
+ st.ab r4,[r10,4]
+loop_start:
+ ld.a r4,[r1,4]
+ sub r2,r3,r8
+ bic_s r2,r2,r3
+ tst_s r2,r12
+ bne_s r3z
+ st.ab r3,[r10,4]
+ sub r2,r4,r8
+ bic r2,r2,r4
+ tst r2,r12
+ beq loop
+ mov_s r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z: bmsk.f r1,r3,7
+ lsr_s r3,r3,8
+#else
+r3z: lsr.f r1,r3,24
+ asl_s r3,r3,8
+#endif
+ bne.d r3z
+ stb.ab r1,[r10,1]
+ j_s [blink]
+
+ .balign 4
+charloop:
+ ldb.ab r3,[r1,1]
+
+
+ brne.d r3,0,charloop
+ stb.ab r3,[r10,1]
+ j [blink]
+END(strcpy)
+libc_hidden_def(strcpy)
diff --git a/libc/string/arc/strlen.S b/libc/string/arc/strlen.S
new file mode 100644
index 000000000..0b9b93815
--- /dev/null
+++ b/libc/string/arc/strlen.S
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007 ARC International (UK) LTD
+ *
+ * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+ */
+
+
+#include <sysdep.h>
+
+ENTRY(strlen)
+ or r3,r0,7
+ ld r2,[r3,-7]
+ ld.a r6,[r3,-3]
+ mov r4,0x01010101
+ ; uses long immediate
+#ifdef __LITTLE_ENDIAN__
+ asl_s r1,r0,3
+ btst_s r0,2
+ asl r7,r4,r1
+ ror r5,r4
+ sub r1,r2,r7
+ bic_s r1,r1,r2
+ mov.eq r7,r4
+ sub r12,r6,r7
+ bic r12,r12,r6
+ or.eq r12,r12,r1
+ and r12,r12,r5
+ brne r12,0,.Learly_end
+#else /* BIG ENDIAN */
+ ror r5,r4
+ btst_s r0,2
+ mov_s r1,31
+ sub3 r7,r1,r0
+ sub r1,r2,r4
+ bic_s r1,r1,r2
+ bmsk r1,r1,r7
+ sub r12,r6,r4
+ bic r12,r12,r6
+ bmsk.ne r12,r12,r7
+ or.eq r12,r12,r1
+ and r12,r12,r5
+ brne r12,0,.Learly_end
+#endif /* ENDIAN */
+
+.Loop:
+ ld_s r2,[r3,4]
+ ld.a r6,[r3,8]
+ ; stall for load result
+ sub r1,r2,r4
+ bic_s r1,r1,r2
+ sub r12,r6,r4
+ bic r12,r12,r6
+ or r12,r12,r1
+ and r12,r12,r5
+ breq r12,0,.Loop
+.Lend:
+ and.f r1,r1,r5
+ sub.ne r3,r3,4
+ mov.eq r1,r12
+#ifdef __LITTLE_ENDIAN__
+ sub_s r2,r1,1
+ bic_s r2,r2,r1
+ norm r1,r2
+ sub_s r0,r0,3
+ lsr_s r1,r1,3
+ sub r0,r3,r0
+ j_s.d [blink]
+ sub r0,r0,r1
+#else /* BIG ENDIAN */
+ lsr_s r1,r1,7
+ mov.eq r2,r6
+ bic_s r1,r1,r2
+ norm r1,r1
+ sub r0,r3,r0
+ lsr_s r1,r1,3
+ j_s.d [blink]
+ add r0,r0,r1
+#endif /* ENDIAN */
+.Learly_end:
+ b.d .Lend
+ sub_s.ne r1,r1,r1
+END(strlen)
+libc_hidden_def(strlen)
diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S
index 103580a0c..2999e8ee6 100644
--- a/libc/string/arm/_memcpy.S
+++ b/libc/string/arm/_memcpy.S
@@ -40,6 +40,7 @@
#include <features.h>
#include <endian.h>
#include <bits/arm_asm.h>
+#include <bits/arm_bx.h>
#if !defined(THUMB1_ONLY)
/*
@@ -67,8 +68,9 @@
* a time where possible.
*
* Note: r12 (aka ip) can be trashed during the function along with
- * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
+ * r0-r3 although r0-r2 have defined uses i.e. dest, src, len throughout.
* Additional registers are preserved prior to use i.e. r4, r5 & lr
+ * The return value in r0 must be the destination address.
*
* Apologies for the state of the comments ;-)
*/
@@ -108,12 +110,8 @@ _memcpy:
cmp r1, r0
bcc .Lmemcpy_backwards
- IT(tt, eq) /* Quick abort for src=dst */
-#if defined(__USE_BX__)
- bxeq lr
-#else
- moveq pc, lr
-#endif
+ IT(t, eq) /* Quick abort for src=dst */
+ BXC(eq, lr)
stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
subs r2, r2, #4
blt .Lmemcpy_fl4 /* less than 4 bytes */
@@ -453,11 +451,7 @@ _memcpy:
/* less than 4 bytes to go */
adds r2, r2, #4
IT(t, eq)
-#if defined(__USE_BX__)
- bxeq lr
-#else
- moveq pc, lr /* done */
-#endif
+ BXC(eq, lr) /* done */
/* copy the crud byte at a time */
cmp r2, #2
ldrb r3, [r1, #-1]!
@@ -475,11 +469,7 @@ _memcpy:
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
#endif
-#if defined(__USE_BX__)
- bx lr
-#else
- mov pc, lr
-#endif
+ BX(lr)
/* erg - unaligned destination */
.Lmemcpy_bdestul:
cmp r12, #2
diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S
index 65409f43a..5b9473cd0 100644
--- a/libc/string/arm/memcmp.S
+++ b/libc/string/arm/memcmp.S
@@ -31,6 +31,7 @@
#include <features.h>
#include <bits/arm_asm.h>
+#include <bits/arm_bx.h>
.text
.global memcmp
@@ -66,11 +67,7 @@ memcmp:
subs r2, r2, #1
IT(tt, mi)
movmi r0, #0
-#if defined(__USE_BX__)
- bxmi lr
-#else
- movmi pc, lr
-#endif
+ BXC(mi, lr)
/* ip == last src address to compare */
add ip, r0, r2
1:
@@ -81,11 +78,7 @@ memcmp:
cmpcs r2, r3
beq 1b
sub r0, r2, r3
-#if defined(__USE_BX__)
- bx lr
-#else
- mov pc, lr
-#endif
+ BX(lr)
#endif
.size memcmp,.-memcmp
diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S
index 66aa6039c..2be4850e4 100644
--- a/libc/string/arm/memset.S
+++ b/libc/string/arm/memset.S
@@ -13,13 +13,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <features.h>
-#include <sys/syscall.h>
#include <bits/arm_asm.h>
+#include <bits/arm_bx.h>
.text
.global memset
@@ -109,11 +108,7 @@ memset:
2:
movs a3, a3 @ anything left?
IT(t, eq)
-#if defined(__USE_BX__)
- bxeq lr
-#else
- moveq pc, lr @ nope
-#endif
+ BXC(eq, lr) @ nope
#if defined (__thumb2__)
1:
strb a2, [a4], #1
@@ -131,11 +126,7 @@ memset:
strb a2, [a4], $1
strb a2, [a4], $1
strb a2, [a4], $1
-#if defined(__USE_BX__)
- bx lr
-#else
- mov pc, lr
-#endif
+ BX(lr)
#endif
#endif
diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S
index 97363c1c2..81416a9a5 100644
--- a/libc/string/arm/strcmp.S
+++ b/libc/string/arm/strcmp.S
@@ -31,6 +31,7 @@
#include <features.h>
#include <bits/arm_asm.h>
+#include <bits/arm_bx.h>
.text
.global strcmp
@@ -62,11 +63,7 @@ strcmp:
cmpcs r2, r3
beq 1b
sub r0, r2, r3
-#if defined(__USE_BX__)
- bx lr
-#else
- mov pc, lr
-#endif
+ BX(lr)
#endif
.size strcmp,.-strcmp
diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S
index 949e918f4..9995d768c 100644
--- a/libc/string/arm/strlen.S
+++ b/libc/string/arm/strlen.S
@@ -13,14 +13,13 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <features.h>
#include <endian.h>
-#include <sys/syscall.h>
#include <bits/arm_asm.h>
+#include <bits/arm_bx.h>
/* size_t strlen(const char *S)
* entry: r0 -> string
@@ -99,11 +98,7 @@ Llastword: @ drop through to here once we find a
IT(t, ne)
addne r0, r0, $1 @ must be zero)
#endif
-#if defined(__USE_BX__)
- bx lr
-#else
- mov pc,lr
-#endif
+ BX(lr)
#endif
.size strlen,.-strlen
diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S
deleted file mode 100644
index 8487639c8..000000000
--- a/libc/string/arm/strncmp.S
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2002 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Adapted for uClibc from NetBSD strncmp.S, version 1.2 2003/04/05
- * by Erik Andersen <andersen@codepoet.org>
- */
-
-#include <features.h>
-#include <bits/arm_asm.h>
-
-.text
-.global strncmp
-.type strncmp,%function
-.align 4
-
-#if defined(THUMB1_ONLY)
-.thumb_func
-strncmp:
- /* if (len == 0) return 0 */
- cmp r2, #0
- bne 1f
- mov r0, #0
- bx lr
-1:
- push {r4}
-
- /* ip == last src address to compare */
- add r4, r0, r2
-2:
- cmp r4, r0
- beq 3f
- ldrb r2, [r0]
- add r0, r0, #1
- ldrb r3, [r1]
- add r1, r1, #1
- cmp r2, #0
- beq 3f
- cmp r2, r3
- beq 2b
-3:
- sub r0, r2, r3
- pop {r4}
- bx lr
-#else
-strncmp:
- /* if (len == 0) return 0 */
- cmp r2, #0
- IT(tt, eq)
- moveq r0, #0
-#if defined(__USE_BX__)
- bxeq lr
-#else
- moveq pc, lr
-#endif
- subs r2, r2, #1
-
- /* ip == last src address to compare */
- add ip, r0, r2
-1:
- ldrb r2, [r0], #1
- ldrb r3, [r1], #1
- cmp ip, r0
- IT(tt, cs)
- cmpcs r2, #1
- cmpcs r2, r3
- beq 1b
- sub r0, r2, r3
-#if defined(__USE_BX__)
- bx lr
-#else
- mov pc, lr
-#endif
-#endif
-
-.size strncmp,.-strncmp
-
-libc_hidden_weak(strncmp)
diff --git a/libc/string/avr32/Makefile b/libc/string/avr32/Makefile
index e19e9d9ec..385cf085e 100644
--- a/libc/string/avr32/Makefile
+++ b/libc/string/avr32/Makefile
@@ -13,8 +13,7 @@
# details.
#
# You should have received a copy of the GNU Library General Public License
-# along with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
top_srcdir := ../../../
top_builddir := ../../../
diff --git a/libc/string/basename.c b/libc/string/basename.c
index a076c20e9..abc9d89db 100644
--- a/libc/string/basename.c
+++ b/libc/string/basename.c
@@ -5,10 +5,9 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-#include "_string.h"
+#include <string.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(basename) */
char *basename(const char *path)
{
@@ -25,5 +24,4 @@ char *basename(const char *path)
return (char *) p;
}
-libc_hidden_def(basename)
#endif
diff --git a/libc/string/bcopy.c b/libc/string/bcopy.c
index 3aa7eab1e..e16ba241d 100644
--- a/libc/string/bcopy.c
+++ b/libc/string/bcopy.c
@@ -5,35 +5,14 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-#include "_string.h"
+#include <string.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
-
-/* Experimentally off - libc_hidden_proto(memmove) */
-
void bcopy(const void *s2, void *s1, size_t n)
{
#if 1
memmove(s1, s2, n);
#else
-#ifdef __BCC__
- register char *s;
- register const char *p;
-
- s = s1;
- p = s2;
- if (p >= s) {
- while (n--) {
- *s++ = *p++;
- }
- } else {
- s += n;
- p += n;
- while (n--) {
- *--s = *--p;
- }
- }
-#else
register char *s;
register const char *p;
@@ -51,6 +30,5 @@ void bcopy(const void *s2, void *s1, size_t n)
}
}
#endif
-#endif
}
#endif
diff --git a/libc/string/bfin/memchr.S b/libc/string/bfin/memchr.S
index 88e46bef6..26d419f7c 100644
--- a/libc/string/bfin/memchr.S
+++ b/libc/string/bfin/memchr.S
@@ -25,8 +25,8 @@
.weak _memchr
ENTRY(_memchr)
- P0 = R0; // P0 = address
- P2 = R2; // P2 = count
+ P0 = R0; /* P0 = address */
+ P2 = R2; /* P2 = count */
R1 = R1.B(Z);
CC = R2 == 0;
IF CC JUMP .Lfailed;
diff --git a/libc/string/bfin/strcmp.S b/libc/string/bfin/strcmp.S
index 12e8c53c6..ef23aa9ab 100644
--- a/libc/string/bfin/strcmp.S
+++ b/libc/string/bfin/strcmp.S
@@ -29,66 +29,66 @@ ENTRY(_strcmp)
p1 = r0;
p2 = r1;
- p0 = -1; // (need for loop counter init)
+ p0 = -1; /* (need for loop counter init) */
- // check if byte aligned
- r0 = r0 | r1; // check both pointers at same time
- r0 <<= 30; // dump all but last 2 bits
- cc = az; // are they zero?
- if !cc jump .Lunaligned; // no; use unaligned code.
- // fall-thru for aligned case..
+ /* check if byte aligned */
+ r0 = r0 | r1; /* check both pointers at same time */
+ r0 <<= 30; /* dump all but last 2 bits */
+ cc = az; /* are they zero? */
+ if !cc jump .Lunaligned; /* no; use unaligned code. */
+ /* fall-thru for aligned case.. */
- // note that r0 is zero from the previous...
- // p0 set to -1
+ /* note that r0 is zero from the previous... */
+ /* p0 set to -1 */
LSETUP (.Lbeginloop, .Lendloop) lc0=p0;
- // pick up first words
+ /* pick up first words */
r1 = [p1++];
r2 = [p2++];
- // make up mask: 0FF0FF
+ /* make up mask: 0FF0FF */
r7 = 0xFF;
r7.h = 0xFF;
- // loop : 9 cycles to check 4 characters
+ /* loop : 9 cycles to check 4 characters */
cc = r1 == r2;
.Lbeginloop:
- if !cc jump .Lnotequal4; // compare failure, exit loop
+ if !cc jump .Lnotequal4; /* compare failure, exit loop */
- // starting with 44332211
- // see if char 3 or char 1 is 0
- r3 = r1 & r7; // form 00330011
- // add to zero, and (r2 is free, reload)
+ /* starting with 44332211 */
+ /* see if char 3 or char 1 is 0 */
+ r3 = r1 & r7; /* form 00330011 */
+ /* add to zero, and (r2 is free, reload) */
r6 = r3 +|+ r0 || r2 = [p2++] || nop;
- cc = az; // true if either is zero
- r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020)
- // (trick, saves having another mask)
- // add to zero, and (r1 is free, reload)
+ cc = az; /* true if either is zero */
+ r3 = r1 ^ r3; /* form 44002200 (4321^0301 => 4020) */
+ /* (trick, saves having another mask) */
+ /* add to zero, and (r1 is free, reload) */
r6 = r3 +|+ r0 || r1 = [p1++] || nop;
- cc |= az; // true if either is zero
- if cc jump .Lzero4; // leave if a zero somewhere
+ cc |= az; /* true if either is zero */
+ if cc jump .Lzero4; /* leave if a zero somewhere */
.Lendloop:
cc = r1 == r2;
- // loop exits
-.Lnotequal4: // compare failure on 4-char compare
- // address pointers are one word ahead;
- // faster to use zero4 exit code
+ /* loop exits */
+.Lnotequal4: /* compare failure on 4-char compare */
+ /* address pointers are one word ahead; */
+ /* faster to use zero4 exit code */
p1 += 4;
p2 += 4;
-.Lzero4: // one of the bytes in word 1 is zero
- // but we've already fetched the next word; so
- // backup two to look at failing word again
+.Lzero4: /* one of the bytes in word 1 is zero */
+ /* but we've already fetched the next word; so */
+ /* backup two to look at failing word again */
p1 += -8;
p2 += -8;
- // here when pointers are unaligned: checks one
- // character at a time. Also use at the end of
- // the word-check algorithm to figure out what happened
+ /* here when pointers are unaligned: checks one */
+ /* character at a time. Also use at the end of */
+ /* the word-check algorithm to figure out what happened */
.Lunaligned:
- // R0 is non-zero from before.
- // p0 set to -1
+ /* R0 is non-zero from before. */
+ /* p0 set to -1 */
r0 = 0 (Z);
r1 = B[p1++] (Z);
@@ -96,18 +96,18 @@ ENTRY(_strcmp)
LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0;
.Lbeginloop1:
- cc = r1; // first char must be non-zero
- // chars must be the same
+ cc = r1; /* first char must be non-zero */
+ /* chars must be the same */
r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
cc &= az;
- r3 = r0 - r2; // second char must be non-zero
+ r3 = r0 - r2; /* second char must be non-zero */
cc &= an;
if !cc jump .Lexitloop1;
.Lendloop1:
r2 = B[p2++] (Z);
-.Lexitloop1: // here means we found a zero or a difference.
- // we have r2(N), p2(N), r1(N+1), p1(N+2)
+.Lexitloop1: /* here means we found a zero or a difference. */
+ /* we have r2(N), p2(N), r1(N+1), p1(N+2) */
r1=B[p1+ -2] (Z);
r0 = r1 - r2;
(r7:4) = [sp++];
diff --git a/libc/string/bzero.c b/libc/string/bzero.c
index 7498f795f..32dce416e 100644
--- a/libc/string/bzero.c
+++ b/libc/string/bzero.c
@@ -5,30 +5,20 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-#include "_string.h"
+#include <string.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
-
-/* Experimentally off - libc_hidden_proto(memset) */
-
void bzero(void *s, size_t n)
{
#if 1
(void)memset(s, 0, n);
#else
register unsigned char *p = s;
-#ifdef __BCC__
- /* bcc can optimize the counter if it thinks it is a pointer... */
- register const char *np = (const char *) n;
-#else
-#define np n
-#endif
- while (np) {
+ while (n) {
*p++ = 0;
- --np;
+ --n;
}
#endif
}
-#undef np
#endif
diff --git a/libc/string/cris/memcopy.h b/libc/string/cris/memcopy.h
index 449c75641..ccd447c83 100644
--- a/libc/string/cris/memcopy.h
+++ b/libc/string/cris/memcopy.h
@@ -16,8 +16,7 @@
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ see <http://www.gnu.org/licenses/>. */
#include "../generic/memcopy.h"
diff --git a/libc/string/cris/memcpy.c b/libc/string/cris/memcpy.c
index cc14188b8..94e576f4f 100644
--- a/libc/string/cris/memcpy.c
+++ b/libc/string/cris/memcpy.c
@@ -1,264 +1,242 @@
-/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
- Copyright (C) 1994, 1995, 2000 Axis Communications AB.
-
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/*#************************************************************************#*/
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# FUNCTION NAME: memcpy() */
-/*# */
-/*# PARAMETERS: void* dst; Destination address. */
-/*# void* src; Source address. */
-/*# int len; Number of bytes to copy. */
-/*# */
-/*# RETURNS: dst. */
-/*# */
-/*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */
-/*# about copying of overlapping memory areas. This routine is */
-/*# very sensitive to compiler changes in register allocation. */
-/*# Should really be rewritten to avoid this problem. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# HISTORY */
-/*# */
-/*# DATE NAME CHANGES */
-/*# ---- ---- ------- */
-/*# 941007 Kenny R Creation */
-/*# 941011 Kenny R Lots of optimizations and inlining. */
-/*# 941129 Ulf A Adapted for use in libc. */
-/*# 950216 HP N==0 forgotten if non-aligned src/dst. */
-/*# Added some optimizations. */
-/*# 001025 HP Make src and dst char *. Align dst to */
-/*# dword, not just word-if-both-src-and-dst- */
-/*# are-misaligned. */
-/*# 070806 RW Modified for uClibc */
-/*# (__arch_v32 -> __CONFIG_CRISV32__, */
-/*# include features.h to reach it.) */
-/*# */
-/*#-------------------------------------------------------------------------*/
-
-#include <features.h>
-
-#ifdef __CONFIG_CRISV32__
+/* A memcpy for CRIS.
+ Copyright (C) 1994-2008 Axis Communications.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Neither the name of Axis Communications nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
+ COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE. */
+
+/* FIXME: This file should really only be used for reference, as the
+ result is somewhat depending on gcc generating what we expect rather
+ than what we describe. An assembly file should be used instead. */
+
+#include <string.h>
+
+#ifdef __arch_v32
/* For CRISv32, movem is very cheap. */
-#define MEMCPY_BLOCK_THRESHOLD (44)
+#define MEMCPY_BY_BLOCK_THRESHOLD (44)
#else
-/* Break even between movem and move16 is at 38.7*2, but modulo 44. */
-#define MEMCPY_BLOCK_THRESHOLD (44*2)
+/* Break even between movem and move16 is really at 38.7 * 2, but
+ modulo 44, so up to the next multiple of 44, we use ordinary code. */
+#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
#endif
-void *memcpy(void *, const void *, unsigned int);
+/* No name ambiguities in this file. */
+__asm__ (".syntax no_register_prefix");
-/* Experimentally off - libc_hidden_proto(memcpy) */
-void *memcpy(void *pdst,
- const void *psrc,
- unsigned int pn)
+void *
+memcpy(void *pdst, const void *psrc, size_t pn)
{
- /* Ok. Now we want the parameters put in special registers.
+ /* Now we want the parameters put in special registers.
Make sure the compiler is able to make something useful of this.
- As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
+ As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
If gcc was allright, it really would need no temporaries, and no
- stack space to save stuff on. */
+ stack space to save stuff on. */
-#ifndef MEMPCPY
register void *return_dst __asm__ ("r10") = pdst;
-#else
- /* FIXME: Use R10 for something. */
-# define return_dst dst
-#endif
-
- register char *dst __asm__ ("r13") = pdst;
- register char *src __asm__ ("r11") = (char *) psrc;
+ register unsigned char *dst __asm__ ("r13") = pdst;
+ register unsigned const char *src __asm__ ("r11") = psrc;
register int n __asm__ ("r12") = pn;
-
/* When src is aligned but not dst, this makes a few extra needless
cycles. I believe it would take as many to check that the
re-alignment was unnecessary. */
if (((unsigned long) dst & 3) != 0
/* Don't align if we wouldn't copy more than a few bytes; so we
- don't have to check further for overflows. */
+ don't have to check further for overflows. */
&& n >= 3)
{
if ((unsigned long) dst & 1)
- {
- n--;
- *(char*)dst = *(char*)src;
- src++;
- dst++;
- }
+ {
+ n--;
+ *dst = *src;
+ src++;
+ dst++;
+ }
if ((unsigned long) dst & 2)
- {
- n -= 2;
- *(short*)dst = *(short*)src;
- src += 2;
- dst += 2;
- }
+ {
+ n -= 2;
+ *(short *) dst = *(short *) src;
+ src += 2;
+ dst += 2;
+ }
}
- /* Decide which copying method to use. */
- if (n >= MEMCPY_BLOCK_THRESHOLD)
- {
- /* For large copies we use 'movem' */
-
- /* It is not optimal to tell the compiler about clobbering any
- registers; that will move the saving/restoring of those registers
- to the function prologue/epilogue, and make non-movem sizes
- suboptimal.
-
- This method is not foolproof; it assumes that the "register asm"
- declarations at the beginning of the function really are used
- here (beware: they may be moved to temporary registers).
- This way, we do not have to save/move the registers around into
- temporaries; we can safely use them straight away. */
- __asm__ __volatile__ ("\
- .syntax no_register_prefix \n\
- \n\
- ;; Check that the register asm declaration got right. \n\
- ;; The GCC manual explicitly says TRT will happen. \n\
- .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
- .err \n\
- .endif \n\
- \n\
- ;; Save the registers we'll use in the movem process \n\
- ;; on the stack. \n\
- subq 11*4,sp \n\
- movem r10,[sp] \n\
- \n\
- ;; Now we've got this: \n\
- ;; r11 - src \n\
- ;; r13 - dst \n\
- ;; r12 - n \n\
- \n\
- ;; Update n for the first loop \n\
- subq 44,r12 \n\
-0: \n\
- movem [r11+],r10 \n\
- subq 44,r12 \n\
- bge 0b \n\
- movem r10,[r13+] \n\
- \n\
- addq 44,r12 ;; compensate for last loop underflowing n \n\
- \n\
- ;; Restore registers from stack \n\
- movem [sp+],r10"
-
- /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
- /* Inputs */ : "0" (dst), "1" (src), "2" (n));
- }
+ /* Decide which copying method to use. */
+ if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
+ {
+ /* It is not optimal to tell the compiler about clobbering any
+ registers; that will move the saving/restoring of those registers
+ to the function prologue/epilogue, and make non-movem sizes
+ suboptimal. */
+ __asm__ __volatile__
+ ("\
+ ;; GCC does promise correct register allocations, but let's \n\
+ ;; make sure it keeps its promises. \n\
+ .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
+ .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
+ .endif \n\
+ \n\
+ ;; Save the registers we'll use in the movem process \n\
+ ;; on the stack. \n\
+ subq 11*4,sp \n\
+ movem r10,[sp] \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r11 - src \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop. \n\
+ subq 44,r12 \n\
+0: \n\
+"
+#ifdef __arch_common_v10_v32
+ /* Cater to branch offset difference between v32 and v10. We
+ assume the branch below has an 8-bit offset. */
+" setf\n"
+#endif
+" movem [r11+],r10 \n\
+ subq 44,r12 \n\
+ bge 0b \n\
+ movem r10,[r13+] \n\
+ \n\
+ ;; Compensate for last loop underflowing n. \n\
+ addq 44,r12 \n\
+ \n\
+ ;; Restore registers from stack. \n\
+ movem [sp+],r10"
+
+ /* Outputs. */
+ : "=r" (dst), "=r" (src), "=r" (n)
+
+ /* Inputs. */
+ : "0" (dst), "1" (src), "2" (n));
+ }
- /* Either we directly starts copying, using dword copying
- in a loop, or we copy as much as possible with 'movem'
- and then the last block (<44 bytes) is copied here.
- This will work since 'movem' will have updated src,dst,n. */
+ while (n >= 16)
+ {
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
- while ( n >= 16 )
- {
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- n -= 16;
- }
+ n -= 16;
+ }
- /* A switch() is definitely the fastest although it takes a LOT of code.
- * Particularly if you inline code this.
- */
switch (n)
- {
+ {
case 0:
break;
+
case 1:
- *((char*)dst)++ = *((char*)src)++;
+ *dst = *src;
break;
+
case 2:
- *((short*)dst)++ = *((short*)src)++;
+ *(short *) dst = *(short *) src;
break;
+
case 3:
- *((short*)dst)++ = *((short*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
+
case 4:
- *((long*)dst)++ = *((long*)src)++;
+ *(long *) dst = *(long *) src;
break;
+
case 5:
- *((long*)dst)++ = *((long*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *dst = *src;
break;
+
case 6:
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src;
break;
+
case 7:
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
+
case 8:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src;
break;
+
case 9:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *dst = *src;
break;
+
case 10:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src;
break;
+
case 11:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
+
case 12:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src;
break;
+
case 13:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *dst = *src;
break;
+
case 14:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src;
break;
+
case 15:
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((long*)dst)++ = *((long*)src)++;
- *((short*)dst)++ = *((short*)src)++;
- *((char*)dst)++ = *((char*)src)++;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(long *) dst = *(long *) src; dst += 4; src += 4;
+ *(short *) dst = *(short *) src; dst += 2; src += 2;
+ *dst = *src;
break;
- }
+ }
- return return_dst; /* destination pointer. */
-} /* memcpy() */
+ return return_dst;
+}
libc_hidden_def(memcpy)
diff --git a/libc/string/cris/memmove.c b/libc/string/cris/memmove.c
index fa495eba4..906ef8e74 100644
--- a/libc/string/cris/memmove.c
+++ b/libc/string/cris/memmove.c
@@ -18,16 +18,14 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
#include "../generic/pagecopy.h"
-/* Experimentally off - libc_hidden_proto(memmove) */
void *memmove (void *dest, const void *src, size_t len)
{
unsigned long int dstp = (long int) dest;
diff --git a/libc/string/cris/memset.c b/libc/string/cris/memset.c
index b578aac5d..fab4e8b66 100644
--- a/libc/string/cris/memset.c
+++ b/libc/string/cris/memset.c
@@ -1,271 +1,262 @@
-/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
- Copyright (C) 1999, 2000 Axis Communications AB.
-
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/*#************************************************************************#*/
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# FUNCTION NAME: memset() */
-/*# */
-/*# PARAMETERS: void* dst; Destination address. */
-/*# int c; Value of byte to write. */
-/*# int len; Number of bytes to write. */
-/*# */
-/*# RETURNS: dst. */
-/*# */
-/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
-/*# Framework taken from memcpy. This routine is */
-/*# very sensitive to compiler changes in register allocation. */
-/*# Should really be rewritten to avoid this problem. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-/*# */
-/*# HISTORY */
-/*# */
-/*# DATE NAME CHANGES */
-/*# ---- ---- ------- */
-/*# 990713 HP Tired of watching this function (or */
-/*# really, the nonoptimized generic */
-/*# implementation) take up 90% of simulator */
-/*# output. Measurements needed. */
-/*# */
-/*#-------------------------------------------------------------------------*/
-
-/* No, there's no macro saying 12*4, since it is "hard" to get it into
- the asm in a good way. Thus better to expose the problem everywhere.
- */
-
-/* Assuming 1 cycle per dword written or read (ok, not really true), and
- one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
- so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
-
-#define ZERO_BLOCK_SIZE (1*12*4)
-
-void *memset(void *, int, unsigned long);
-
-/* Experimentally off - libc_hidden_proto(memset) */
-void *memset(void *pdst,
- int c,
- unsigned long plen)
+/* A memset for CRIS.
+ Copyright (C) 1999-2008 Axis Communications.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Neither the name of Axis Communications nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
+ COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE. */
+
+/* FIXME: This file should really only be used for reference, as the
+ result is somewhat depending on gcc generating what we expect rather
+ than what we describe. An assembly file should be used instead. */
+
+#include <string.h>
+
+/* Note the multiple occurrence of the expression "12*4", including the
+ asm. It is hard to get it into the asm in a good way. Thus better to
+ expose the problem everywhere: no macro. */
+
+/* Assuming one cycle per dword written or read (ok, not really true; the
+ world is not ideal), and one cycle per instruction, then 43+3*(n/48-1)
+ <= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full
+ 48-byte block to set. */
+
+#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48)
+
+/* No name ambiguities in this file. */
+__asm__ (".syntax no_register_prefix");
+
+void *memset(void *pdst, int c, unsigned int plen)
{
- /* Ok. Now we want the parameters put in special registers.
- Make sure the compiler is able to make something useful of this. */
+ /* Now we want the parameters in special registers. Make sure the
+ compiler does something usable with this. */
register char *return_dst __asm__ ("r10") = pdst;
- register long n __asm__ ("r12") = plen;
+ register int n __asm__ ("r12") = plen;
register int lc __asm__ ("r11") = c;
- /* Most apps use memset sanely. Only those memsetting about 3..4
- bytes or less get penalized compared to the generic implementation
- - and that's not really sane use. */
+ /* Most apps use memset sanely. Memsetting about 3..4 bytes or less get
+ penalized here compared to the generic implementation. */
- /* Ugh. This is fragile at best. Check with newer GCC releases, if
- they compile cascaded "x |= x << 8" sanely! */
- __asm__("movu.b %0,$r13 \n\
- lslq 8,$r13 \n\
- move.b %0,$r13 \n\
- move.d $r13,%0 \n\
- lslq 16,$r13 \n\
- or.d $r13,%0"
- : "=r" (lc) : "0" (lc) : "r13");
+ /* This is fragile performancewise at best. Check with newer GCC
+ releases, if they compile cascaded "x |= x << 8" to sane code. */
+ __asm__("movu.b %0,r13 \n\
+ lslq 8,r13 \n\
+ move.b %0,r13 \n\
+ move.d r13,%0 \n\
+ lslq 16,r13 \n\
+ or.d r13,%0"
+ : "=r" (lc) /* Inputs. */
+ : "0" (lc) /* Outputs. */
+ : "r13"); /* Trash. */
{
register char *dst __asm__ ("r13") = pdst;
- if (((unsigned long) pdst & 3) != 0
- /* Oops! n=0 must be a legal call, regardless of alignment. */
- && n >= 3)
- {
- if ((unsigned long)dst & 1)
- {
- *dst = (char) lc;
- n--;
- dst++;
- }
-
- if ((unsigned long)dst & 2)
- {
- *(short *)dst = lc;
- n -= 2;
- dst += 2;
- }
- }
+ if (((unsigned long) pdst & 3) != 0
+ /* Oops! n = 0 must be a valid call, regardless of alignment. */
+ && n >= 3)
+ {
+ if ((unsigned long) dst & 1)
+ {
+ *dst = (char) lc;
+ n--;
+ dst++;
+ }
- /* Now the fun part. For the threshold value of this, check the equation
- above. */
- /* Decide which copying method to use. */
- if (n >= ZERO_BLOCK_SIZE)
- {
- /* For large copies we use 'movem' */
-
- /* It is not optimal to tell the compiler about clobbering any
- registers; that will move the saving/restoring of those registers
- to the function prologue/epilogue, and make non-movem sizes
- suboptimal.
-
- This method is not foolproof; it assumes that the "asm reg"
- declarations at the beginning of the function really are used
- here (beware: they may be moved to temporary registers).
- This way, we do not have to save/move the registers around into
- temporaries; we can safely use them straight away. */
- __asm__ __volatile__ (" \n\
- .syntax no_register_prefix \n\
- \n\
- ;; Check that the register asm declaration got right. \n\
- ;; The GCC manual explicitly says there's no warranty for that (too). \n\
- .ifnc %0-%1-%4,$r13-$r12-$r11 \n\
- .err \n\
- .endif \n\
- \n\
- ;; Save the registers we'll clobber in the movem process \n\
- ;; on the stack. Don't mention them to gcc, it will only be \n\
- ;; upset. \n\
- subq 11*4,sp \n\
- movem r10,[sp] \n\
- \n\
- move.d r11,r0 \n\
- move.d r11,r1 \n\
- move.d r11,r2 \n\
- move.d r11,r3 \n\
- move.d r11,r4 \n\
- move.d r11,r5 \n\
- move.d r11,r6 \n\
- move.d r11,r7 \n\
- move.d r11,r8 \n\
- move.d r11,r9 \n\
- move.d r11,r10 \n\
- \n\
- ;; Now we've got this: \n\
- ;; r13 - dst \n\
- ;; r12 - n \n\
- \n\
- ;; Update n for the first loop \n\
- subq 12*4,r12 \n\
-0: \n\
- subq 12*4,r12 \n\
- bge 0b \n\
- movem r11,[r13+] \n\
- \n\
- addq 12*4,r12 ;; compensate for last loop underflowing n \n\
- \n\
- ;; Restore registers from stack \n\
- movem [sp+],r10"
-
- /* Outputs */ : "=r" (dst), "=r" (n)
- /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
+ if ((unsigned long) dst & 2)
+ {
+ *(short *) dst = lc;
+ n -= 2;
+ dst += 2;
+ }
+ }
- }
+ /* Decide which setting method to use. */
+ if (n >= MEMSET_BY_BLOCK_THRESHOLD)
+ {
+ /* It is not optimal to tell the compiler about clobbering any
+ registers; that will move the saving/restoring of those registers
+ to the function prologue/epilogue, and make non-block sizes
+ suboptimal. */
+ __asm__ __volatile__
+ ("\
+ ;; GCC does promise correct register allocations, but let's \n\
+ ;; make sure it keeps its promises. \n\
+ .ifnc %0-%1-%4,$r13-$r12-$r11 \n\
+ .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\
+ .endif \n\
+ \n\
+ ;; Save the registers we'll clobber in the movem process \n\
+ ;; on the stack. Don't mention them to gcc, it will only be \n\
+ ;; upset. \n\
+ subq 11*4,sp \n\
+ movem r10,[sp] \n\
+ \n\
+ move.d r11,r0 \n\
+ move.d r11,r1 \n\
+ move.d r11,r2 \n\
+ move.d r11,r3 \n\
+ move.d r11,r4 \n\
+ move.d r11,r5 \n\
+ move.d r11,r6 \n\
+ move.d r11,r7 \n\
+ move.d r11,r8 \n\
+ move.d r11,r9 \n\
+ move.d r11,r10 \n\
+ \n\
+ ;; Now we've got this: \n\
+ ;; r13 - dst \n\
+ ;; r12 - n \n\
+ \n\
+ ;; Update n for the first loop \n\
+ subq 12*4,r12 \n\
+0: \n\
+"
+#ifdef __arch_common_v10_v32
+ /* Cater to branch offset difference between v32 and v10. We
+ assume the branch below has an 8-bit offset. */
+" setf\n"
+#endif
+" subq 12*4,r12 \n\
+ bge 0b \n\
+ movem r11,[r13+] \n\
+ \n\
+ ;; Compensate for last loop underflowing n. \n\
+ addq 12*4,r12 \n\
+ \n\
+ ;; Restore registers from stack. \n\
+ movem [sp+],r10"
+
+ /* Outputs. */
+ : "=r" (dst), "=r" (n)
+
+ /* Inputs. */
+ : "0" (dst), "1" (n), "r" (lc));
+ }
+
+ /* An ad-hoc unroll, used for 4*12-1..16 bytes. */
+ while (n >= 16)
+ {
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ n -= 16;
+ }
- /* Either we directly starts copying, using dword copying
- in a loop, or we copy as much as possible with 'movem'
- and then the last block (<44 bytes) is copied here.
- This will work since 'movem' will have updated src,dst,n. */
-
- while ( n >= 16 )
- {
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- n -= 16;
- }
-
- /* A switch() is definitely the fastest although it takes a LOT of code.
- * Particularly if you inline code this.
- */
switch (n)
- {
+ {
case 0:
break;
+
case 1:
- *(char*)dst = (char) lc;
+ *dst = (char) lc;
break;
+
case 2:
- *(short*)dst = (short) lc;
+ *(short *) dst = (short) lc;
break;
+
case 3:
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
+
case 4:
- *((long*)dst)++ = lc;
+ *(long *) dst = lc;
break;
+
case 5:
- *((long*)dst)++ = lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *dst = (char) lc;
break;
+
case 6:
- *((long*)dst)++ = lc;
- *(short*)dst = (short) lc;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc;
break;
+
case 7:
- *((long*)dst)++ = lc;
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
+
case 8:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc;
break;
+
case 9:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *dst = (char) lc;
break;
+
case 10:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(short*)dst = (short) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc;
break;
+
case 11:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
+
case 12:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc;
break;
+
case 13:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *dst = (char) lc;
break;
+
case 14:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *(short*)dst = (short) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc;
break;
+
case 15:
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((long*)dst)++ = lc;
- *((short*)dst)++ = (short) lc;
- *(char*)dst = (char) lc;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(long *) dst = lc; dst += 4;
+ *(short *) dst = (short) lc; dst += 2;
+ *dst = (char) lc;
break;
- }
+ }
}
- return return_dst; /* destination pointer. */
-} /* memset() */
+ return return_dst;
+}
libc_hidden_def(memset)
diff --git a/libc/string/cris/strcpy.c b/libc/string/cris/strcpy.c
index 955a990b7..40e6389b9 100644
--- a/libc/string/cris/strcpy.c
+++ b/libc/string/cris/strcpy.c
@@ -6,7 +6,6 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strcpy) */
char *strcpy(char *dest, const char *src)
{
char *ret = dest;
diff --git a/libc/string/cris/strncpy.c b/libc/string/cris/strncpy.c
index 3f2775bdd..8d074071a 100644
--- a/libc/string/cris/strncpy.c
+++ b/libc/string/cris/strncpy.c
@@ -6,9 +6,7 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memset) */
-/* Experimentally off - libc_hidden_proto(strncpy) */
char *strncpy(char *dest, const char *src, size_t count)
{
char *ret = dest;
diff --git a/libc/string/dirname.c b/libc/string/dirname.c
index 6265e562e..c7f4dec1f 100644
--- a/libc/string/dirname.c
+++ b/libc/string/dirname.c
@@ -5,7 +5,8 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-#include "_string.h"
+#define __need_NULL
+#include <stddef.h>
#include <libgen.h>
char *dirname(char *path)
diff --git a/libc/string/ffs.c b/libc/string/ffs.c
index 241b7456f..f39d304b7 100644
--- a/libc/string/ffs.c
+++ b/libc/string/ffs.c
@@ -5,12 +5,9 @@
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
-/* ffsl,ffsll */
-
-#include "_string.h"
-
-/* Experimentally off - libc_hidden_proto(ffs) */
-
+#include <limits.h>
+#include <string.h>
+
int ffs(int i)
{
#if 1
@@ -53,3 +50,6 @@ int ffs(int i)
#endif
}
libc_hidden_def(ffs)
+#if ULONG_MAX == UINT_MAX
+strong_alias_untyped(ffs, ffsl)
+#endif
diff --git a/libc/string/ffsll.c b/libc/string/ffsll.c
new file mode 100644
index 000000000..967fc5168
--- /dev/null
+++ b/libc/string/ffsll.c
@@ -0,0 +1,35 @@
+/* Copyright (C) 1991, 1992, 1997, 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Torbjorn Granlund (tege@sics.se).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <limits.h>
+#include <string.h>
+
+/* Find the first bit set in I. */
+int ffsll (long long int i)
+{
+ unsigned long long int x = i & -i;
+
+ if (x <= 0xffffffff)
+ return ffs (i);
+ else
+ return 32 + ffs (i >> 32);
+}
+
+#if ULONG_MAX != UINT_MAX
+strong_alias_untyped(ffsll, ffsl)
+#endif
diff --git a/libc/string/frv/memcpy.S b/libc/string/frv/memcpy.S
index ae843797d..47773726a 100644
--- a/libc/string/frv/memcpy.S
+++ b/libc/string/frv/memcpy.S
@@ -14,8 +14,8 @@
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
*/
#include <features.h>
diff --git a/libc/string/frv/memset.S b/libc/string/frv/memset.S
index 477597dcd..17013672e 100644
--- a/libc/string/frv/memset.S
+++ b/libc/string/frv/memset.S
@@ -14,8 +14,8 @@
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
*/
#include <features.h>
@@ -155,4 +155,4 @@ memset:
bralr
.size memset, .-memset
-/* Experimentally off - libc_hidden_proto(memset) */
+libc_hidden_def(memset)
diff --git a/libc/string/generic/bp-checks.h b/libc/string/generic/bp-checks.h
deleted file mode 100644
index 08c70aa5d..000000000
--- a/libc/string/generic/bp-checks.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/* Bounded-pointer checking macros for C.
- Copyright (C) 2000 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Greg McGary <greg@mcgary.org>
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _bp_checks_h_
-#define _bp_checks_h_ 1
-
-#if __BOUNDED_POINTERS__
-
-# define BOUNDS_VIOLATED (__builtin_trap (), 0)
-
-/* Verify that pointer's value >= low. Return pointer value. */
-# define CHECK_BOUNDS_LOW(ARG) \
- (((__ptrvalue (ARG) < __ptrlow (ARG)) && BOUNDS_VIOLATED), \
- __ptrvalue (ARG))
-
-/* Verify that pointer's value < high. Return pointer value. */
-# define CHECK_BOUNDS_HIGH(ARG) \
- (((__ptrvalue (ARG) > __ptrhigh (ARG)) && BOUNDS_VIOLATED), \
- __ptrvalue (ARG))
-
-# define _CHECK_N(ARG, N, COND) \
- (((COND) \
- && (__ptrvalue (ARG) < __ptrlow (ARG) \
- || __ptrvalue (ARG) + (N) > __ptrhigh (ARG)) \
- && BOUNDS_VIOLATED), \
- __ptrvalue (ARG))
-
-extern void *__unbounded __ubp_memchr (const void *__unbounded, int, unsigned);
-
-# define _CHECK_STRING(ARG, COND) \
- (((COND) \
- && (__ptrvalue (ARG) < __ptrlow (ARG) \
- || !__ubp_memchr (__ptrvalue (ARG), '\0', \
- (__ptrhigh (ARG) - __ptrvalue (ARG)))) \
- && BOUNDS_VIOLATED), \
- __ptrvalue (ARG))
-
-/* Check bounds of a pointer seated to an array of N objects. */
-# define CHECK_N(ARG, N) _CHECK_N ((ARG), (N), 1)
-/* Same as CHECK_N, but tolerate ARG == NULL. */
-# define CHECK_N_NULL_OK(ARG, N) _CHECK_N ((ARG), (N), __ptrvalue (ARG))
-
-/* Check bounds of a pointer seated to a single object. */
-# define CHECK_1(ARG) CHECK_N ((ARG), 1)
-/* Same as CHECK_1, but tolerate ARG == NULL. */
-# define CHECK_1_NULL_OK(ARG) CHECK_N_NULL_OK ((ARG), 1)
-
-/* Check for NUL-terminator within string's bounds. */
-# define CHECK_STRING(ARG) _CHECK_STRING ((ARG), 1)
-/* Same as CHECK_STRING, but tolerate ARG == NULL. */
-# define CHECK_STRING_NULL_OK(ARG) _CHECK_STRING ((ARG), __ptrvalue (ARG))
-
-/* Check bounds of signal syscall args with type sigset_t. */
-# define CHECK_SIGSET(SET) CHECK_N ((SET), _NSIG / (8 * sizeof *(SET)))
-/* Same as CHECK_SIGSET, but tolerate SET == NULL. */
-# define CHECK_SIGSET_NULL_OK(SET) CHECK_N_NULL_OK ((SET), _NSIG / (8 * sizeof *(SET)))
-
-# if defined (_IOC_SIZESHIFT) && defined (_IOC_SIZEBITS)
-/* Extract the size of the ioctl data and check its bounds. */
-# define CHECK_IOCTL(ARG, CMD) \
- CHECK_N ((const char *) (ARG), \
- (((CMD) >> _IOC_SIZESHIFT) & ((1 << _IOC_SIZEBITS) - 1)))
-# else
-/* We don't know the size of the ioctl data, so the best we can do
- is check that the first byte is within bounds. */
-# define CHECK_IOCTL(ARG, CMD) CHECK_1 ((const char *) ARG)
-# endif
-
-/* Check bounds of `struct flock *' for the locking fcntl commands. */
-# define CHECK_FCNTL(ARG, CMD) \
- (((CMD) == F_GETLK || (CMD) == F_SETLK || (CMD) == F_SETLKW) \
- ? CHECK_1 ((struct flock *) ARG) : (unsigned long) (ARG))
-
-/* Check bounds of an array of mincore residency-status flags that
- cover a region of NBYTES. Such a vector occupies one byte per page
- of memory. */
-# define CHECK_N_PAGES(ARG, NBYTES) \
- ({ int _page_size_ = __sysconf (_SC_PAGE_SIZE); \
- CHECK_N ((const char *) (ARG), \
- ((NBYTES) + _page_size_ - 1) / _page_size_); })
-
-/* Return a bounded pointer with value PTR that satisfies CHECK_N (PTR, N). */
-# define BOUNDED_N(PTR, N) \
- ({ __typeof (PTR) __bounded _p_; \
- __ptrvalue _p_ = __ptrlow _p_ = __ptrvalue (PTR); \
- __ptrhigh _p_ = __ptrvalue _p_ + (N); \
- _p_; })
-
-#else /* !__BOUNDED_POINTERS__ */
-
-/* Do nothing if not compiling with -fbounded-pointers. */
-
-# define BOUNDS_VIOLATED
-# define CHECK_BOUNDS_LOW(ARG) (ARG)
-# define CHECK_BOUNDS_HIGH(ARG) (ARG)
-# define CHECK_1(ARG) (ARG)
-# define CHECK_1_NULL_OK(ARG) (ARG)
-# define CHECK_N(ARG, N) (ARG)
-# define CHECK_N_NULL_OK(ARG, N) (ARG)
-# define CHECK_STRING(ARG) (ARG)
-# define CHECK_SIGSET(SET) (SET)
-# define CHECK_SIGSET_NULL_OK(SET) (SET)
-# define CHECK_IOCTL(ARG, CMD) (ARG)
-# define CHECK_FCNTL(ARG, CMD) (ARG)
-# define CHECK_N_PAGES(ARG, NBYTES) (ARG)
-# define BOUNDED_N(PTR, N) (PTR)
-
-#endif /* !__BOUNDED_POINTERS__ */
-
-#define BOUNDED_1(PTR) BOUNDED_N (PTR, 1)
-
-#endif /* _bp_checks_h_ */
diff --git a/libc/string/generic/memchr.c b/libc/string/generic/memchr.c
index 3c7c997bc..967ae51ea 100644
--- a/libc/string/generic/memchr.c
+++ b/libc/string/generic/memchr.c
@@ -17,22 +17,19 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
#include <limits.h>
-/* Experimentally off - libc_hidden_proto(memchr) */
-libc_hidden_proto(abort)
-
#include "memcopy.h"
#define LONG_MAX_32_BITS 2147483647
/* Search no more than N bytes of S for C. */
+#undef memchr
void *memchr (const void * s, int c_in, size_t n)
{
const unsigned char *char_ptr;
diff --git a/libc/string/generic/memcmp.c b/libc/string/generic/memcmp.c
index fc63a2eae..170c50997 100644
--- a/libc/string/generic/memcmp.c
+++ b/libc/string/generic/memcmp.c
@@ -14,22 +14,16 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(memcmp) */
#include <endian.h>
#if __BYTE_ORDER == __BIG_ENDIAN
-# define WORDS_BIGENDIAN
-#endif
-
-#ifdef WORDS_BIGENDIAN
# define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1)
#else
# define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b))
@@ -48,17 +42,12 @@
3. Compare the few remaining bytes. */
-#ifndef WORDS_BIGENDIAN
+#if __BYTE_ORDER != __BIG_ENDIAN
/* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine.
A and B are known to be different.
This is needed only on little-endian machines. */
-static int memcmp_bytes __P((op_t, op_t));
-
-# ifdef __GNUC__
-__inline
-# endif
-static int
+static __inline__ int
memcmp_bytes (op_t a, op_t b)
{
long int srcp1 = (long int) &a;
@@ -77,8 +66,6 @@ memcmp_bytes (op_t a, op_t b)
}
#endif
-static int memcmp_common_alignment __P((long, long, size_t));
-
/* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t'
objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for
memory operations on `op_t's. */
@@ -161,8 +148,6 @@ memcmp_common_alignment (long int srcp1, long int srcp2, size_t len)
return 0;
}
-static int memcmp_not_common_alignment __P((long, long, size_t));
-
/* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN
`op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory
operations on `op_t', but SRCP1 *should be unaligned*. */
diff --git a/libc/string/generic/memcopy.h b/libc/string/generic/memcopy.h
index fab4da764..031557ac8 100644
--- a/libc/string/generic/memcopy.h
+++ b/libc/string/generic/memcopy.h
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* The strategy of the memory functions is:
@@ -107,7 +106,6 @@ typedef unsigned char byte;
} \
} while (0)
-#ifdef __ARCH_HAS_BWD_MEMCPY__
/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
the assumption that DST_BP is aligned on an OPSIZ multiple. If
not all bytes could be easily copied, store remaining number of bytes
@@ -126,8 +124,6 @@ typedef unsigned char byte;
(nbytes_left) = (nbytes) % OPSIZ; \
} while (0)
-#endif
-
/* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
beginning at the words (of type op_t) right before the pointers and
continuing towards smaller addresses. May take advantage of that
diff --git a/libc/string/generic/memcpy.c b/libc/string/generic/memcpy.c
index 4284f2fe5..ca2e7e0f9 100644
--- a/libc/string/generic/memcpy.c
+++ b/libc/string/generic/memcpy.c
@@ -15,15 +15,14 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
#include "pagecopy.h"
+#include "_memcpy_fwd.c"
-/* Experimentally off - libc_hidden_proto(memcpy) */
void *memcpy (void *dstpp, const void *srcpp, size_t len)
{
diff --git a/libc/string/generic/memmem.c b/libc/string/generic/memmem.c
index c75bb2426..753e43ae5 100644
--- a/libc/string/generic/memmem.c
+++ b/libc/string/generic/memmem.c
@@ -12,16 +12,13 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stddef.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(memmem) */
-/* Experimentally off - libc_hidden_proto(memcmp) */
/* Return the first occurrence of NEEDLE in HAYSTACK. */
void *memmem (const void *haystack, size_t haystack_len,
@@ -50,5 +47,4 @@ void *memmem (const void *haystack, size_t haystack_len,
return NULL;
}
-libc_hidden_def(memmem)
#endif
diff --git a/libc/string/generic/memmove.c b/libc/string/generic/memmove.c
index 7f945b150..bf78c4778 100644
--- a/libc/string/generic/memmove.c
+++ b/libc/string/generic/memmove.c
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
@@ -29,8 +28,6 @@
#include "_memcpy_fwd.c"
#endif
-/* Experimentally off - libc_hidden_proto(memmove) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
static void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len)
{
diff --git a/libc/string/generic/mempcpy.c b/libc/string/generic/mempcpy.c
index 8d7356486..bb5563a6a 100644
--- a/libc/string/generic/mempcpy.c
+++ b/libc/string/generic/mempcpy.c
@@ -8,13 +8,13 @@
#include <string.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(mempcpy) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
+# undef mempcpy
void *mempcpy (void *dstpp, const void *srcpp, size_t len)
{
memcpy(dstpp, srcpp, len);
return (void *)(((char *)dstpp) + len);
}
libc_hidden_weak(mempcpy)
+strong_alias(mempcpy,__mempcpy)
#endif
diff --git a/libc/string/generic/memrchr.c b/libc/string/generic/memrchr.c
index 9ab805cf7..b74cf5152 100644
--- a/libc/string/generic/memrchr.c
+++ b/libc/string/generic/memrchr.c
@@ -18,17 +18,14 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(memrchr) */
-libc_hidden_proto(abort)
#include "memcopy.h"
diff --git a/libc/string/generic/memset.c b/libc/string/generic/memset.c
index 62cc36fe3..5644e2522 100644
--- a/libc/string/generic/memset.c
+++ b/libc/string/generic/memset.c
@@ -12,14 +12,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(memset) */
void *memset (void *dstpp, int c, size_t len)
{
long int dstp = (long int) dstpp;
diff --git a/libc/string/generic/pagecopy.h b/libc/string/generic/pagecopy.h
index 5a0ada1fa..16aaacab6 100644
--- a/libc/string/generic/pagecopy.h
+++ b/libc/string/generic/pagecopy.h
@@ -13,9 +13,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* This file defines the macro:
@@ -40,7 +39,7 @@
*/
-#if PAGE_COPY_THRESHOLD
+#if defined PAGE_COPY_THRESHOLD && PAGE_COPY_THRESHOLD
#include <assert.h>
@@ -48,7 +47,7 @@
do \
{ \
if ((nbytes) >= PAGE_COPY_THRESHOLD && \
- PAGE_OFFSET ((dstp) - (srcp)) == 0) \
+ PAGE_OFFSET ((dstp) - (srcp)) == 0) \
{ \
/* The amount to copy is past the threshold for copying \
pages virtually with kernel VM operations, and the \
diff --git a/libc/string/generic/rawmemchr.c b/libc/string/generic/rawmemchr.c
index f8b97a61d..816589649 100644
--- a/libc/string/generic/rawmemchr.c
+++ b/libc/string/generic/rawmemchr.c
@@ -17,17 +17,14 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(rawmemchr) */
-libc_hidden_proto(abort)
#include "memcopy.h"
diff --git a/libc/string/generic/strcat.c b/libc/string/generic/strcat.c
index e00494038..68fc2a289 100644
--- a/libc/string/generic/strcat.c
+++ b/libc/string/generic/strcat.c
@@ -12,14 +12,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(strcat) */
/* Append SRC on the end of DEST. */
char *strcat (char *dest, const char *src)
{
diff --git a/libc/string/generic/strchr.c b/libc/string/generic/strchr.c
index 66aed1e25..321d2b8c3 100644
--- a/libc/string/generic/strchr.c
+++ b/libc/string/generic/strchr.c
@@ -17,15 +17,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
-/* Experimentally off - libc_hidden_proto(strchr) */
-libc_hidden_proto(abort)
#include "memcopy.h"
diff --git a/libc/string/generic/strchrnul.c b/libc/string/generic/strchrnul.c
index 72cab2891..d11d9e00d 100644
--- a/libc/string/generic/strchrnul.c
+++ b/libc/string/generic/strchrnul.c
@@ -17,16 +17,13 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(strchrnul) */
-libc_hidden_proto(abort)
#include "memcopy.h"
diff --git a/libc/string/generic/strcmp.c b/libc/string/generic/strcmp.c
index 50acd3548..24ad14382 100644
--- a/libc/string/generic/strcmp.c
+++ b/libc/string/generic/strcmp.c
@@ -12,15 +12,13 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(strcmp) */
/* Compare S1 and S2, returning less than, equal to or
greater than zero if S1 is lexicographically less than,
equal to or greater than S2. */
@@ -44,7 +42,6 @@ int strcmp (const char *p1, const char *p2)
libc_hidden_weak(strcmp)
#ifndef __UCLIBC_HAS_LOCALE__
-/* Experimentally off - libc_hidden_proto(strcoll) */
strong_alias(strcmp,strcoll)
libc_hidden_def(strcoll)
#endif
diff --git a/libc/string/generic/strcpy.c b/libc/string/generic/strcpy.c
index 99e077139..924615fca 100644
--- a/libc/string/generic/strcpy.c
+++ b/libc/string/generic/strcpy.c
@@ -12,36 +12,21 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
-#include <stddef.h>
-#include "memcopy.h"
-#include "bp-checks.h"
-
-/* Experimentally off - libc_hidden_proto(strcpy) */
/* Copy SRC to DEST. */
-char *strcpy (char *dest, const char *src)
+char *strcpy(char *dest, const char *src)
{
- reg_char c;
- char *__unbounded s = (char *__unbounded) CHECK_BOUNDS_LOW (src);
- const ptrdiff_t off = CHECK_BOUNDS_LOW (dest) - s - 1;
- size_t n;
-
- do
- {
- c = *s++;
- s[off] = c;
- }
- while (c != '\0');
+ char *dst = dest;
- n = s - src;
- (void) CHECK_BOUNDS_HIGH (src + n);
- (void) CHECK_BOUNDS_HIGH (dest + n);
+ while ((*dst = *src) != '\0') {
+ src++;
+ dst++;
+ }
- return dest;
+ return dest;
}
libc_hidden_def(strcpy)
diff --git a/libc/string/generic/strcspn.c b/libc/string/generic/strcspn.c
index b65b3b995..ca9506bdd 100644
--- a/libc/string/generic/strcspn.c
+++ b/libc/string/generic/strcspn.c
@@ -12,14 +12,11 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strcspn) */
-/* Experimentally off - libc_hidden_proto(strchr) */
/* Return the length of the maximum initial segment of S
which contains no characters from REJECT. */
diff --git a/libc/string/generic/strlen.c b/libc/string/generic/strlen.c
index 764dae18d..dc383398b 100644
--- a/libc/string/generic/strlen.c
+++ b/libc/string/generic/strlen.c
@@ -15,15 +15,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
-/* Experimentally off - libc_hidden_proto(strlen) */
-libc_hidden_proto(abort)
/* Return the length of the null-terminated string STR. Scan for
the null terminator quickly by testing four bytes at a time. */
diff --git a/libc/string/generic/strncat.c b/libc/string/generic/strncat.c
index 8e3423e49..f0cf8f995 100644
--- a/libc/string/generic/strncat.c
+++ b/libc/string/generic/strncat.c
@@ -12,15 +12,13 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(strncat) */
char *strncat (char *s1, const char *s2, size_t n)
{
reg_char c;
diff --git a/libc/string/generic/strncmp.c b/libc/string/generic/strncmp.c
index c49f36d8b..ca980415e 100644
--- a/libc/string/generic/strncmp.c
+++ b/libc/string/generic/strncmp.c
@@ -12,14 +12,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(strncmp) */
/* Compare no more than N characters of S1 and S2,
returning less than, equal to or greater than zero
if S1 is lexicographically less than, equal to or
diff --git a/libc/string/generic/strncpy.c b/libc/string/generic/strncpy.c
index d2d693f2b..0256bcc6b 100644
--- a/libc/string/generic/strncpy.c
+++ b/libc/string/generic/strncpy.c
@@ -12,14 +12,12 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#include "memcopy.h"
-/* Experimentally off - libc_hidden_proto(strncpy) */
char *strncpy (char *s1, const char *s2, size_t n)
{
reg_char c;
diff --git a/libc/string/generic/strnlen.c b/libc/string/generic/strnlen.c
index d9ba76129..4d4cde84f 100644
--- a/libc/string/generic/strnlen.c
+++ b/libc/string/generic/strnlen.c
@@ -17,16 +17,13 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, see <http://www.gnu.org/licenses/>. */
#include <string.h>
#include <stdlib.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(strnlen) */
-libc_hidden_proto(abort)
/* Find the length of S, but scan at most MAXLEN characters. If no
'\0' terminator is found in that many characters, return MAXLEN. */
@@ -34,7 +31,7 @@ size_t strnlen (const char *str, size_t maxlen)
{
const char *char_ptr, *end_ptr = str + maxlen;
const unsigned long int *longword_ptr;
- unsigned long int longword, magic_bits, himagic, lomagic;
+ unsigned long int longword, himagic, lomagic;
if (maxlen == 0)
return 0;
@@ -68,14 +65,12 @@ size_t strnlen (const char *str, size_t maxlen)
The 1-bits make sure that carries propagate to the next 0-bit.
The 0-bits provide holes for carries to fall into. */
- magic_bits = 0x7efefeffL;
himagic = 0x80808080L;
lomagic = 0x01010101L;
if (sizeof (longword) > 4)
{
/* 64-bit version of the magic. */
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
- magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL;
himagic = ((himagic << 16) << 16) | himagic;
lomagic = ((lomagic << 16) << 16) | lomagic;
}
diff --git a/libc/string/generic/strrchr.c b/libc/string/generic/strrchr.c
index c85707241..8ca404843 100644
--- a/libc/string/generic/strrchr.c
+++ b/libc/string/generic/strrchr.c
@@ -12,14 +12,11 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strrchr) */
-/* Experimentally off - libc_hidden_proto(strchr) */
/* Find the last occurrence of C in S. */
char *strrchr (const char *s, int c)
diff --git a/libc/string/generic/strsep.c b/libc/string/generic/strsep.c
index e02e57068..bbdaf8849 100644
--- a/libc/string/generic/strsep.c
+++ b/libc/string/generic/strsep.c
@@ -12,18 +12,14 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
#ifdef __USE_BSD
-/* Experimentally off - libc_hidden_proto(strchr) */
-/* Experimentally off - libc_hidden_proto(strpbrk) */
-/* Experimentally off - libc_hidden_proto(strsep) */
char *strsep (char **stringp, const char *delim)
{
char *begin, *end;
diff --git a/libc/string/generic/strspn.c b/libc/string/generic/strspn.c
index 010567744..86bcdcb70 100644
--- a/libc/string/generic/strspn.c
+++ b/libc/string/generic/strspn.c
@@ -12,13 +12,11 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strspn) */
/* Return the length of the maximum initial segment
of S which contains only characters in ACCEPT. */
size_t strspn (const char *s, const char *accept)
diff --git a/libc/string/generic/strstr.c b/libc/string/generic/strstr.c
index c12dceb33..dd101768b 100644
--- a/libc/string/generic/strstr.c
+++ b/libc/string/generic/strstr.c
@@ -13,9 +13,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/*
* My personal strstr() implementation that beats most other algorithms.
@@ -28,7 +27,6 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strstr) */
typedef unsigned chartype;
diff --git a/libc/string/generic/strtok_r.c b/libc/string/generic/strtok_r.c
index d082d226e..253964395 100644
--- a/libc/string/generic/strtok_r.c
+++ b/libc/string/generic/strtok_r.c
@@ -13,33 +13,27 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strtok_r) */
-/* Experimentally off - libc_hidden_proto(strspn) */
-/* Experimentally off - libc_hidden_proto(strpbrk) */
#ifdef __USE_GNU
# define __rawmemchr rawmemchr
-/* Experimentally off - libc_hidden_proto(rawmemchr) */
#else
# define __rawmemchr strchr
-/* Experimentally off - libc_hidden_proto(strchr) */
#endif
-
-/* Parse S into tokens separated by characters in DELIM.
+#if 0
+ Parse S into tokens separated by characters in DELIM.
If S is NULL, the saved pointer in SAVE_PTR is used as
the next starting point. For example:
char s[] = "-abc-=-def";
char *sp;
- x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def"
- x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL
- x = strtok_r(NULL, "=", &sp); // x = NULL
- // s = "abc\0-def\0"
-*/
+ x = strtok_r(s, "-", &sp); /* x = "abc", sp = "=-def" */
+ x = strtok_r(NULL, "-=", &sp); /* x = "def", sp = NULL */
+ x = strtok_r(NULL, "=", &sp); /* x = NULL */
+ /* s = "abc\0-def\0" */
+#endif
char *strtok_r (char *s, const char *delim, char **save_ptr)
{
char *token;
diff --git a/libc/string/i386/memchr.c b/libc/string/i386/memchr.c
index fe4537914..1960f6ba4 100644
--- a/libc/string/i386/memchr.c
+++ b/libc/string/i386/memchr.c
@@ -32,20 +32,44 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memchr) */
-void *memchr(const void *cs, int c, size_t count)
+#undef memchr
+/*#define memchr TESTING*/
+void *memchr(const void *s, int c, size_t count)
{
- int d0;
- register void * __res;
- if (!count)
- return NULL;
- __asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "je 1f\n\t"
- "movl $1,%0\n"
- "1:\tdecl %0"
- :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count));
- return __res;
+ void *edi;
+ int ecx;
+ __asm__ __volatile__(
+ " jecxz 1f\n"
+ " repne; scasb\n"
+ " leal -1(%%edi), %%edi\n"
+ " je 2f\n"
+ "1:\n"
+ " xorl %%edi, %%edi\n" /* NULL */
+ "2:\n"
+ : "=&D" (edi), "=&c" (ecx)
+ : "a" (c), "0" (s), "1" (count)
+ /* : no clobbers */
+ );
+ return edi;
}
+#ifndef memchr
libc_hidden_def(memchr)
+#else
+/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os memchr.c -o memchr
+ * and run ./memchr
+ */
+int main()
+{
+ static const char str[] = "abc.def";
+ printf((char*)memchr(str, '.',-2) - str == 3 ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.',-1) - str == 3 ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.', 0) == NULL ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.', 1) == NULL ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.', 2) == NULL ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.', 3) == NULL ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.', 4) - str == 3 ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str, '.', 5) - str == 3 ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str+3, '.', 0) == NULL ? "ok\n" : "BAD!\n");
+ printf((char*)memchr(str+3, '.', 5) - str == 3 ? "ok\n" : "BAD!\n");
+}
+#endif
diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c
index 285583f3b..697d0bdc2 100644
--- a/libc/string/i386/memcpy.c
+++ b/libc/string/i386/memcpy.c
@@ -32,22 +32,23 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memcpy) */
+#undef memcpy
void *memcpy(void * to, const void * from, size_t n)
{
- int d0, d1, d2;
- __asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
- return (to);
+ int d0, d1, d2;
+ __asm__ __volatile__(
+ " rep; movsl\n"
+ " movl %4, %%ecx\n"
+ " andl $3, %%ecx\n"
+ /* jz is optional. avoids "rep; movsb" with ecx == 0,
+ * but adds a branch, which is currently (2008) faster */
+ " jz 1f\n"
+ " rep; movsb\n"
+ "1:\n"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
+ : "memory"
+ );
+ return to;
}
libc_hidden_def(memcpy)
diff --git a/libc/string/i386/memmove.c b/libc/string/i386/memmove.c
index a924efcbc..0ec8016a5 100644
--- a/libc/string/i386/memmove.c
+++ b/libc/string/i386/memmove.c
@@ -32,28 +32,40 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memmove) */
+#undef memmove
+/*#define memmove TESTING*/
void *memmove(void *dest, const void *src, size_t n)
{
- int d0, d1, d2;
- if (dest<src)
+ int eax, ecx, esi, edi;
__asm__ __volatile__(
- "rep\n\t"
- "movsb"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),"1" (src),"2" (dest)
- : "memory");
- else
- __asm__ __volatile__(
- "std\n\t"
- "rep\n\t"
- "movsb\n\t"
- "cld"
- : "=&c" (d0), "=&S" (d1), "=&D" (d2)
- :"0" (n),
- "1" (n-1+(const char *)src),
- "2" (n-1+(char *)dest)
- :"memory");
- return dest;
+ " movl %%eax, %%edi\n"
+ " cmpl %%esi, %%eax\n"
+ " je 2f\n" /* (optional) src == dest -> NOP */
+ " jb 1f\n" /* src > dest -> simple copy */
+ " leal -1(%%esi,%%ecx), %%esi\n"
+ " leal -1(%%eax,%%ecx), %%edi\n"
+ " std\n"
+ "1: rep; movsb\n"
+ " cld\n"
+ "2:\n"
+ : "=&c" (ecx), "=&S" (esi), "=&a" (eax), "=&D" (edi)
+ : "0" (n), "1" (src), "2" (dest)
+ : "memory"
+ );
+ return (void*)eax;
}
+#ifndef memmove
libc_hidden_def(memmove)
+#else
+/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os memmove.c -o memmove
+ * and run ./memmove
+ */
+int main()
+{
+ static char str[] = "abcdef.123";
+ memmove(str + 1, str, 5);
+ printf(strcmp(str, "aabcde.123") == 0 ? "ok\n" : "BAD!\n");
+ memmove(str, str + 1, 5);
+ printf(strcmp(str, "abcdee.123") == 0 ? "ok\n" : "BAD!\n");
+}
+#endif
diff --git a/libc/string/i386/memset.c b/libc/string/i386/memset.c
index bbaa45215..9f51f3c60 100644
--- a/libc/string/i386/memset.c
+++ b/libc/string/i386/memset.c
@@ -28,20 +28,68 @@
* More importantly, these should provide a good example for
* others to follow when adding arch specific optimizations.
* -Erik
+ *
+ * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com>
+ * Fill byte-by-byte is a bit too slow. I prefer 46 byte function
+ * which fills x4 faster than 21 bytes one.
*/
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memset) */
+#undef memset
void *memset(void *s, int c, size_t count)
{
- int d0, d1;
- __asm__ __volatile__(
- "rep\n\t"
- "stosb"
- : "=&c" (d0), "=&D" (d1)
- :"a" (c),"1" (s),"0" (count)
- :"memory");
- return s;
+ int reg, edi;
+ __asm__ __volatile__(
+
+ /* Most of the time, count is divisible by 4 and nonzero */
+ /* It's better to make this case faster */
+ /* " jecxz 9f\n" - (optional) count == 0: goto ret */
+ " mov %%ecx, %1\n"
+ " shr $2, %%ecx\n"
+ " jz 1f\n" /* zero words: goto fill_bytes */
+ /* extend 8-bit fill to 32 bits */
+ " movzx %%al, %%eax\n" /* 3 bytes */
+ /* or: " and $0xff, %%eax\n" - 5 bytes */
+ " imul $0x01010101, %%eax\n" /* 6 bytes */
+ /* fill full words */
+ " rep; stosl\n"
+ /* fill 0-3 bytes */
+ "1: and $3, %1\n"
+ " jz 9f\n" /* (count & 3) == 0: goto end */
+ "2: stosb\n"
+ " dec %1\n"
+ " jnz 2b\n"
+ /* end */
+ "9:\n"
+
+ : "=&D" (edi), "=&r" (reg)
+ : "0" (s), "a" (c), "c" (count)
+ : "memory"
+ );
+ return s;
}
libc_hidden_def(memset)
+
+/*
+gcc 4.3.1
+=========
+57 push %edi
+8b 7c 24 08 mov 0x8(%esp),%edi
+8b 4c 24 10 mov 0x10(%esp),%ecx
+8b 44 24 0c mov 0xc(%esp),%eax
+89 ca mov %ecx,%edx
+c1 e9 02 shr $0x2,%ecx
+74 0b je 1f <__GI_memset+0x1f>
+0f b6 c0 movzbl %al,%eax
+69 c0 01 01 01 01 imul $0x1010101,%eax,%eax
+f3 ab rep stos %eax,%es:(%edi)
+83 e2 03 and $0x3,%edx
+74 04 je 28 <__GI_memset+0x28>
+aa stos %al,%es:(%edi)
+4a dec %edx
+75 fc jne 24 <__GI_memset+0x24>
+8b 44 24 08 mov 0x8(%esp),%eax
+5f pop %edi
+c3 ret
+*/
diff --git a/libc/string/i386/rawmemchr.c b/libc/string/i386/rawmemchr.c
new file mode 100644
index 000000000..be0b142c3
--- /dev/null
+++ b/libc/string/i386/rawmemchr.c
@@ -0,0 +1,24 @@
+/*
+ * Adapted from strlen.c code
+ *
+ * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <string.h>
+
+#undef rawmemchr
+void *rawmemchr(const void *s, int c)
+{
+ void *eax;
+ int ecx, edi;
+ __asm__ __volatile__(
+ " repne; scasb\n"
+ " leal -1(%%edi), %%eax\n"
+ : "=&c" (ecx), "=&D" (edi), "=&a" (eax)
+ : "0" (0xffffffff), "1" (s), "2" (c)
+ );
+ return eax;
+}
+libc_hidden_def(rawmemchr)
diff --git a/libc/string/i386/strcat.c b/libc/string/i386/strcat.c
index 2cf0237a6..e71aad4f7 100644
--- a/libc/string/i386/strcat.c
+++ b/libc/string/i386/strcat.c
@@ -32,7 +32,6 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strcat) */
char *strcat(char * dest, const char * src)
{
int d0, d1, d2, d3;
diff --git a/libc/string/i386/strchr.c b/libc/string/i386/strchr.c
index 46b1dfb6e..93cc9583e 100644
--- a/libc/string/i386/strchr.c
+++ b/libc/string/i386/strchr.c
@@ -32,23 +32,25 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strchr) */
+#undef strchr
char *strchr(const char *s, int c)
{
- int d0;
- register char * __res;
- __asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "je 2f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "movl $1,%1\n"
- "2:\tmovl %1,%0\n\t"
- "decl %0"
- :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c));
- return __res;
+ int esi;
+ register char * eax;
+ __asm__ __volatile__(
+ " movb %%al, %%ah\n"
+ "1: lodsb\n"
+ " cmpb %%ah, %%al\n"
+ " je 2f\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ " movl $1, %%esi\n" /* can use shorter xor + inc */
+ "2: leal -1(%%esi), %%eax\n"
+ : "=a" (eax), "=&S" (esi)
+ : "0" (c), "1" (s)
+ /* no clobbers */
+ );
+ return eax;
}
libc_hidden_def(strchr)
#ifdef __UCLIBC_SUSV3_LEGACY__
diff --git a/libc/string/i386/strchrnul.c b/libc/string/i386/strchrnul.c
new file mode 100644
index 000000000..d48427214
--- /dev/null
+++ b/libc/string/i386/strchrnul.c
@@ -0,0 +1,47 @@
+/*
+ * Adapted from strchr.c code
+ *
+ * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <string.h>
+
+#undef strchrnul
+/*#define strchrnul TESTING*/
+char *strchrnul(const char *s, int c)
+{
+ int esi;
+ char *eax;
+ __asm__ __volatile__(
+ " movb %%al, %%ah\n"
+ "1: lodsb\n"
+ " cmpb %%ah, %%al\n"
+ " je 2f\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ /* with this, we'd get strchr(): */
+ /* " movl $1, %%esi\n" */
+ "2: leal -1(%%esi), %%eax\n"
+ : "=a" (eax), "=&S" (esi)
+ : "0" (c), "1" (s)
+ /* no clobbers */
+ );
+ return eax;
+}
+#ifndef strchrnul
+libc_hidden_def(strchrnul)
+#else
+/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os strchrnul.c -o strchrnul
+ * and run ./strchrnul
+ */
+int main()
+{
+ static const char str[] = "abc.def";
+ printf((char*)strchrnul(str, '.') - str == 3 ? "ok\n" : "BAD!\n");
+ printf((char*)strchrnul(str, '*') - str == 7 ? "ok\n" : "BAD!\n");
+ printf((char*)strchrnul(str, 0) - str == 7 ? "ok\n" : "BAD!\n");
+ printf((char*)strchrnul(str+3, '.') - str == 3 ? "ok\n" : "BAD!\n");
+}
+#endif
diff --git a/libc/string/i386/strcmp.c b/libc/string/i386/strcmp.c
index eff230c5c..9621f66f8 100644
--- a/libc/string/i386/strcmp.c
+++ b/libc/string/i386/strcmp.c
@@ -32,7 +32,6 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strcmp) */
int strcmp(const char *cs, const char *ct)
{
int d0, d1;
@@ -55,7 +54,6 @@ int strcmp(const char *cs, const char *ct)
libc_hidden_def(strcmp)
#ifndef __UCLIBC_HAS_LOCALE__
-/* Experimentally off - libc_hidden_proto(strcoll) */
strong_alias(strcmp,strcoll)
libc_hidden_def(strcoll)
#endif
diff --git a/libc/string/i386/strcpy.c b/libc/string/i386/strcpy.c
index 09065a9b7..fff1bd006 100644
--- a/libc/string/i386/strcpy.c
+++ b/libc/string/i386/strcpy.c
@@ -32,7 +32,7 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strcpy) */
+#undef strcpy
char *strcpy(char * dest, const char * src)
{
int d0, d1, d2;
diff --git a/libc/string/i386/string.h b/libc/string/i386/string.h
new file mode 100644
index 000000000..cf4333dec
--- /dev/null
+++ b/libc/string/i386/string.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball
+ */
+
+#if !defined _STRING_H
+#error "Never use <libc-string_i386.h> directly; include <string.h> instead"
+#endif
+
+#ifndef _LIBC_STRING_i386_H
+#define _LIBC_STRING_i386_H 1
+
+static __always_inline
+void *inlined_memset_const_c_count4(void *s, unsigned eax, unsigned count)
+{
+ int ecx, edi;
+
+ if (count == 0)
+ return s;
+
+ /* Very small (2 stores or less) are best done with direct
+ * mov <const>,<mem> instructions (they do not clobber registers) */
+ if (count == 1) {
+ *(char *)(s + 0) = eax;
+ return s;
+ }
+
+ /* You wonder why & 0xff is needed? Try memset(p, '\xff', size).
+ * If char is signed, '\xff' == -1! */
+ eax = (eax & 0xff) * 0x01010101; /* done at compile time */
+
+ if (count == 2) {
+ *(short *)(s + 0) = eax;
+ return s;
+ }
+ if (count == 3) {
+ *(short *)(s + 0) = eax;
+ *(char *) (s + 2) = eax;
+ return s;
+ }
+ if (count == 1*4 + 0) {
+ *(int *)(s + 0) = eax;
+ return s;
+ }
+ if (count == 1*4 + 1) {
+ *(int *) (s + 0) = eax;
+ *(char *)(s + 4) = eax;
+ return s;
+ }
+ if (count == 1*4 + 2) {
+ *(int *) (s + 0) = eax;
+ *(short *)(s + 4) = eax;
+ return s;
+ }
+
+ /* Small string stores: don't clobber ecx
+ * (clobbers only eax and edi) */
+#define small_store(arg) { \
+ __asm__ __volatile__( \
+ arg \
+ : "=&D" (edi) \
+ : "a" (eax), "0" (s) \
+ : "memory" \
+ ); \
+ return s; \
+}
+ if (count == 1*4 + 3) small_store("stosl; stosw; stosb");
+ if (count == 2*4 + 0) {
+ ((int *)s)[0] = eax;
+ ((int *)s)[1] = eax;
+ return s;
+ }
+ if (count == 2*4 + 1) small_store("stosl; stosl; stosb");
+ if (count == 2*4 + 2) small_store("stosl; stosl; stosw");
+ if (count == 2*4 + 3) small_store("stosl; stosl; stosw; stosb");
+ if (count == 3*4 + 0) small_store("stosl; stosl; stosl");
+ if (count == 3*4 + 1) small_store("stosl; stosl; stosl; stosb");
+ if (count == 3*4 + 2) small_store("stosl; stosl; stosl; stosw");
+ if (count == 3*4 + 3) small_store("stosl; stosl; stosl; stosw; stosb");
+ if (count == 4*4 + 0) small_store("stosl; stosl; stosl; stosl");
+ if (count == 4*4 + 1) small_store("stosl; stosl; stosl; stosl; stosb");
+ /* going over 7 bytes is suboptimal */
+ /* stosw is 2-byte insn, so this one takes 6 bytes: */
+ if (count == 4*4 + 2) small_store("stosl; stosl; stosl; stosl; stosw");
+ /* 7 bytes */
+ if (count == 4*4 + 3) small_store("stosl; stosl; stosl; stosl; stosw; stosb");
+ /* 5 bytes */
+ if (count == 5*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl");
+ /* 6 bytes */
+ if (count == 5*4 + 1) small_store("stosl; stosl; stosl; stosl; stosl; stosb");
+ /* 7 bytes */
+ if (count == 5*4 + 2) small_store("stosl; stosl; stosl; stosl; stosl; stosw");
+ /* 8 bytes, but oh well... */
+ if (count == 5*4 + 3) small_store("stosl; stosl; stosl; stosl; stosl; stosw; stosb");
+ /* 6 bytes */
+ if (count == 6*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl; stosl");
+ /* the rest would be 7+ bytes and is handled below instead */
+#undef small_store
+
+ /* Not small, but multiple-of-4 store.
+ * "mov <const>,%ecx; rep; stosl" sequence is 7 bytes */
+ __asm__ __volatile__(
+ " rep; stosl\n"
+ : "=&c" (ecx), "=&D" (edi)
+ : "a" (eax), "0" (count / 4), "1" (s)
+ : "memory"
+ );
+ return s;
+}
+#if 1 /* -51 bytes on shared i386 build with gcc 4.3.0 */
+#define memset(s, c, count) ( \
+ ( !(__builtin_constant_p(c) && __builtin_constant_p(count)) \
+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+ ) \
+ ? memset((s), (c), (count)) \
+ : inlined_memset_const_c_count4((s), (c), (count)) \
+ )
+#endif
+
+
+static __always_inline
+void *inlined_mempcpy_const_count4(void *d, const void *s, unsigned count)
+{
+ int ecx;
+ char *esi, *edi;
+
+ if (count == 0)
+ return d;
+
+ if (count == 1) {
+ *(char *)d = *(char *)s;
+ return d + 1;
+ }
+ if (count == 2) {
+ *(short *)d = *(short *)s;
+ return d + 2;
+ }
+ /* Small string moves: don't clobber ecx
+ * (clobbers only esi and edi) */
+#define small_move(arg) { \
+ __asm__ __volatile__( \
+ arg \
+ : "=&S" (esi), "=&D" (edi) \
+ : "0" (s), "1" (d) \
+ : "memory" \
+ ); \
+ return edi; \
+}
+ if (count == 3) small_move("movsw; movsb");
+ if (count == 1*4 + 0) {
+ *(int *)d = *(int *)s;
+ return d + 4;
+ }
+ if (count == 1*4 + 1) small_move("movsl; movsb");
+ if (count == 1*4 + 2) small_move("movsl; movsw");
+ if (count == 1*4 + 3) small_move("movsl; movsw; movsb");
+ if (count == 2*4 + 0) small_move("movsl; movsl");
+ if (count == 2*4 + 1) small_move("movsl; movsl; movsb");
+ if (count == 2*4 + 2) small_move("movsl; movsl; movsw");
+ if (count == 2*4 + 3) small_move("movsl; movsl; movsw; movsb");
+ if (count == 3*4 + 0) small_move("movsl; movsl; movsl");
+ if (count == 3*4 + 1) small_move("movsl; movsl; movsl; movsb");
+ if (count == 3*4 + 2) small_move("movsl; movsl; movsl; movsw");
+ if (count == 3*4 + 3) small_move("movsl; movsl; movsl; movsw; movsb");
+ if (count == 4*4 + 0) small_move("movsl; movsl; movsl; movsl");
+ if (count == 4*4 + 1) small_move("movsl; movsl; movsl; movsl; movsb");
+ /* going over 7 bytes is suboptimal */
+ /* movsw is 2-byte insn, so this one takes 6 bytes: */
+ if (count == 4*4 + 2) small_move("movsl; movsl; movsl; movsl; movsw");
+ /* 7 bytes */
+ if (count == 4*4 + 3) small_move("movsl; movsl; movsl; movsl; movsw; movsb");
+ /* 5 bytes */
+ if (count == 5*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl");
+ /* 6 bytes */
+ if (count == 5*4 + 1) small_move("movsl; movsl; movsl; movsl; movsl; movsb");
+ /* 7 bytes */
+ if (count == 5*4 + 2) small_move("movsl; movsl; movsl; movsl; movsl; movsw");
+ /* 8 bytes, but oh well... */
+ if (count == 5*4 + 3) small_move("movsl; movsl; movsl; movsl; movsl; movsw; movsb");
+ /* 6 bytes */
+ if (count == 6*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl; movsl");
+ /* the rest would be 7+ bytes and is handled below instead */
+#undef small_move
+
+ /* Not small, but multiple-of-4 move.
+ * "mov <const>,%ecx; rep; movsl" sequence is 7 bytes */
+ __asm__ __volatile__(
+ " rep; movsl\n"
+ : "=&c" (ecx), "=&S" (esi), "=&D" (edi)
+ : "0" (count / 4), "1" (s), "2" (d)
+ : "memory"
+ );
+ return edi;
+}
+static __always_inline
+void *inlined_memcpy_const_count4(void *d, const void *s, unsigned count)
+{
+ inlined_mempcpy_const_count4(d, s, count);
+ return d;
+}
+#if 1 /* +34 bytes on shared i386 build with gcc 4.3.0 */
+#define mempcpy(d, s, count) ( \
+ ( !(__builtin_constant_p(count)) \
+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+ ) \
+ ? mempcpy((d), (s), (count)) \
+ : inlined_mempcpy_const_count4((d), (s), (count)) \
+ )
+#define memcpy(d, s, count) ( \
+ ( !(__builtin_constant_p(count)) \
+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+ ) \
+ ? memcpy((d), (s), (count)) \
+ : inlined_memcpy_const_count4((d), (s), (count)) \
+ )
+#endif
+
+
+static __always_inline
+size_t inlined_strlen(const char *s)
+{
+ int edi;
+ int ecx;
+ __asm__ __volatile__(
+ " repne; scasb\n"
+ /* " notl %0\n" */
+ /* " decl %0\n" */
+ : "=c" (ecx), "=&D" (edi)
+ : "1" (s), "a" (0), "0" (0xffffffffu)
+ /* : no clobbers */
+ );
+ return -ecx - 1;
+}
+#if 0 /* +1108 bytes on shared i386 build with gcc 4.3.0 */
+#define strlen(s) inlined_strlen(s)
+#endif
+
+
+static __always_inline
+char *inlined_stpcpy(char *dest, const char *src)
+{
+ char *esi, *edi;
+ int eax;
+ __asm__ __volatile__(
+ "1: lodsb\n"
+ " stosb\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ : "=&S" (esi), "=&D" (edi), "=&a" (eax)
+ : "0" (src), "1" (dest)
+ : "memory"
+ );
+ return edi - 1;
+}
+static __always_inline
+char *inlined_strcpy(char *dest, const char *src)
+{
+ inlined_stpcpy(dest, src);
+ return dest;
+}
+#if 0 /* +562 bytes on shared i386 build with gcc 4.3.0 */
+#define stpcpy(dest, src) inlined_stpcpy(dest, src)
+#define strcpy(dest, src) inlined_strcpy(dest, src)
+#endif
+
+
+static __always_inline
+void *inlined_memchr(const void *s, int c, size_t count)
+{
+ void *edi;
+ int ecx;
+ /* Unfortunately, c gets loaded to %eax (wide insn), not %al */
+ __asm__ __volatile__(
+ " jecxz 1f\n"
+ " repne; scasb\n"
+ " leal -1(%%edi), %%edi\n"
+ " je 2f\n"
+ "1:\n"
+ " xorl %%edi, %%edi\n"
+ "2:\n"
+ : "=&D" (edi), "=&c" (ecx)
+ : "a" (c), "0" (s), "1" (count)
+ /* : no clobbers */
+ );
+ return edi;
+}
+static __always_inline
+void *inlined_memchr_const_c(const void *s, int c, size_t count)
+{
+#if defined __OPTIMIZE__
+ void *edi;
+ int ecx, eax;
+ __asm__ __volatile__(
+ " jecxz 1f\n"
+ " movb %4, %%al\n" /* const c to %%al */
+ " repne; scasb\n"
+ " leal -1(%%edi), %%edi\n"
+ " je 2f\n"
+ "1:\n"
+ " xorl %%edi, %%edi\n"
+ "2:\n"
+ : "=&D" (edi), "=&c" (ecx), "=&a" (eax)
+ : "0" (s), "i" (c), "1" (count)
+ /* : no clobbers */
+ );
+ return edi;
+#else
+ /* With -O0, gcc can't figure out how to encode CONST c
+ * as an immediate operand. Generating slightly bigger code
+ * (usually "movl CONST,%eax", 3 bytes bigger than needed):
+ */
+ void *edi;
+ int ecx, eax;
+ __asm__ __volatile__(
+ " jecxz 1f\n"
+ " repne; scasb\n"
+ " leal -1(%%edi), %%edi\n"
+ " je 2f\n"
+ "1:\n"
+ " xorl %%edi, %%edi\n"
+ "2:\n"
+ : "=&D" (edi), "=&c" (ecx), "=&a" (eax)
+ : "0" (s), "2" (c), "1" (count)
+ /* : no clobbers */
+ );
+ return edi;
+#endif
+}
+#if 1 /* +2 bytes on shared i386 build with gcc 4.3.0 */
+#define memchr(s, c, count) ( \
+ __builtin_constant_p(c) \
+ ? inlined_memchr_const_c(s, (c) & 0xff, count) \
+ : inlined_memchr(s, c, count) \
+ )
+#endif
+
+#endif /* _LIBC_STRING_i386_H */
diff --git a/libc/string/i386/strlen.c b/libc/string/i386/strlen.c
index 61a178393..ff2baeb38 100644
--- a/libc/string/i386/strlen.c
+++ b/libc/string/i386/strlen.c
@@ -32,17 +32,17 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strlen) */
+#undef strlen
size_t strlen(const char *s)
{
- int d0;
- register int __res;
- __asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "notl %0\n\t"
- "decl %0"
- :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
- return __res;
+ int eax, ecx, edi;
+ __asm__ __volatile__(
+ " repne; scasb\n"
+ " notl %%ecx\n"
+ " leal -1(%%ecx), %%eax\n"
+ : "=&c" (ecx), "=&D" (edi), "=&a" (eax)
+ : "0" (0xffffffff), "1" (s), "2" (0)
+ );
+ return eax;
}
libc_hidden_def(strlen)
diff --git a/libc/string/i386/strncat.c b/libc/string/i386/strncat.c
index 3872679d5..12f0a302b 100644
--- a/libc/string/i386/strncat.c
+++ b/libc/string/i386/strncat.c
@@ -32,30 +32,55 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strncat) */
-char *strncat(char * dest,
- const char * src, size_t count)
+#undef strncat
+/*#define strncat TESTING*/
+char *strncat(char * dest, const char * src, size_t count)
{
- int d0, d1, d2, d3;
- __asm__ __volatile__(
- "repne\n\t"
- "scasb\n\t"
- "decl %1\n\t"
- "movl %8,%3\n"
- "incl %3\n"
- "1:\tdecl %3\n\t"
- "jz 2f\n"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "jmp 3f\n"
- "2:\txorl %2,%2\n\t"
- "stosb\n"
- "3:"
- : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
- : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count)
- : "memory");
- return dest;
+ int esi, edi, eax, ecx, edx;
+ __asm__ __volatile__(
+ " xorl %%eax, %%eax\n"
+ " incl %%edx\n"
+ " pushl %%edi\n" /* save dest */
+ " repne; scasb\n"
+ " decl %%edi\n" /* edi => NUL in dest */
+ /* count-- */
+ "1: decl %%edx\n"
+ /* if count reached 0, store NUL and bail out */
+ " movl %%edx, %%eax\n"
+ " jz 2f\n"
+ /* else copy a char */
+ " lodsb\n"
+ "2: stosb\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ /* end of loop */
+ " popl %%eax\n" /* restore dest into eax */
+ : "=&S" (esi), "=&D" (edi), "=&a" (eax), "=&c" (ecx), "=&d" (edx)
+ : "0" (src), "1" (dest), "3" (0xffffffff), "4" (count)
+ : "memory"
+ );
+ return (char *)eax;
}
+#ifndef strncat
libc_hidden_def(strncat)
+#else
+/* Uncomment TESTING, gcc -m32 -Os strncat.c -o strncat
+ * and run ./strncat
+ */
+int main()
+{
+ char buf[99];
+
+ strcpy(buf, "abc"); buf[4] = '*'; strncat(buf, "def", 0);
+ printf(strcmp(buf, "abc") == 0 && buf[4] == '*' ? "ok\n" : "BAD!\n");
+
+ strcpy(buf, "abc"); buf[6] = 1; buf[7] = '*'; strncat(buf, "def", 50);
+ printf(strcmp(buf, "abcdef") == 0 && buf[7] == '*' ? "ok\n" : "BAD!\n");
+
+ strcpy(buf, "abc"); buf[6] = 1; buf[7] = '*'; strncat(buf, "def", -1);
+ printf(strcmp(buf, "abcdef") == 0 && buf[7] == '*' ? "ok\n" : "BAD!\n");
+
+ strcpy(buf, "abc"); buf[6] = 1; buf[7] = '*'; strncat(buf, "def123", 3);
+ printf(strcmp(buf, "abcdef") == 0 && buf[7] == '*' ? "ok\n" : "BAD!\n");
+}
+#endif
diff --git a/libc/string/i386/strncmp.c b/libc/string/i386/strncmp.c
index a14bb503b..bfb20c307 100644
--- a/libc/string/i386/strncmp.c
+++ b/libc/string/i386/strncmp.c
@@ -32,27 +32,28 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strncmp) */
+#undef strncmp
int strncmp(const char *cs, const char *ct, size_t count)
{
- register int __res;
- int d0, d1, d2;
- __asm__ __volatile__(
- "incl %3\n"
- "1:\tdecl %3\n\t"
- "jz 2f\n"
- "lodsb\n\t"
- "scasb\n\t"
- "jne 3f\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n"
- "2:\txorl %%eax,%%eax\n\t"
- "jmp 4f\n"
- "3:\tsbbl %%eax,%%eax\n\t"
- "orb $1,%%al\n"
- "4:"
- :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
- :"1" (cs),"2" (ct),"3" (count));
- return __res;
+ int eax;
+ int esi, edi, ecx;
+ __asm__ __volatile__(
+ " incl %%ecx\n"
+ "1: decl %%ecx\n"
+ " jz 2f\n"
+ " lodsb\n"
+ " scasb\n"
+ " jne 3f\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ "2: xorl %%eax, %%eax\n"
+ " jmp 4f\n"
+ "3: sbbl %%eax, %%eax\n"
+ " orb $1, %%al\n"
+ "4:\n"
+ : "=a" (eax), "=&S" (esi), "=&D" (edi), "=&c" (ecx)
+ : "1" (cs), "2" (ct), "3" (count)
+ );
+ return eax;
}
libc_hidden_weak(strncmp)
diff --git a/libc/string/i386/strncpy.c b/libc/string/i386/strncpy.c
index 76aa6ae1b..99d104b0d 100644
--- a/libc/string/i386/strncpy.c
+++ b/libc/string/i386/strncpy.c
@@ -32,25 +32,44 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strncpy) */
+#undef strncpy
+/*#define strncpy TESTING*/
char *strncpy(char * dest, const char * src, size_t count)
{
- int d0, d1, d2, d3;
- __asm__ __volatile__(
- "incl %2\n"
- "1:\n"
- "decl %2\n"
- "jz 2f\n"
- "lodsb\n\t"
- "stosb\n\t"
- "testb %%al,%%al\n\t"
- "jne 1b\n\t"
- "decl %2\n"
- "rep\n\t"
- "stosb\n"
- "2:"
- : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
- :"0" (src),"1" (dest),"2" (count) : "memory");
- return dest;
+ int esi, edi, ecx, eax;
+ __asm__ __volatile__(
+ "1: subl $1, %%ecx\n" /* not dec! it doesnt set CF */
+ " jc 2f\n"
+ " lodsb\n"
+ " stosb\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ " rep; stosb\n"
+ "2:\n"
+ : "=&S" (esi), "=&D" (edi), "=&c" (ecx), "=&a" (eax)
+ : "0" (src), "1" (dest), "2" (count)
+ : "memory"
+ );
+ return dest;
}
+#ifndef strncpy
libc_hidden_def(strncpy)
+#else
+/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os strncpy.c -o strncpy
+ * and run ./strncpy
+ */
+int main()
+{
+ static char str[99];
+
+ str[3] = '*'; str[4] = 0; strncpy(str, "abc", 3);
+ printf(strcmp(str, "abc*") == 0 ? "ok\n" : "BAD!\n");
+
+ str[4] = '*'; str[5] = '+'; strncpy(str, "abc", 5);
+ printf(strcmp(str, "abc") == 0 && str[4] == 0 && str[5] == '+' ?
+ "ok\n" : "BAD!\n");
+ strncpy(str, "def", 0); /* should do nothing */
+ printf(strcmp(str, "abc") == 0 && str[4] == 0 && str[5] == '+' ?
+ "ok\n" : "BAD!\n");
+}
+#endif
diff --git a/libc/string/i386/strnlen.c b/libc/string/i386/strnlen.c
index 02c72f530..f58f698d1 100644
--- a/libc/string/i386/strnlen.c
+++ b/libc/string/i386/strnlen.c
@@ -33,24 +33,43 @@
#include <string.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(strnlen) */
+
+#undef strnlen
+/*#define strnlen TESTING*/
size_t strnlen(const char *s, size_t count)
{
- int d0;
- register int __res;
- __asm__ __volatile__(
- "movl %2,%0\n\t"
- "incl %1\n"
- "jmp 2f\n"
- "1:\tcmpb $0,(%0)\n\t"
- "je 3f\n\t"
- "incl %0\n"
- "2:\tdecl %1\n\t"
- "jne 1b\n"
- "3:\tsubl %2,%0"
- :"=a" (__res), "=&d" (d0)
- :"c" (s),"1" (count));
- return __res;
+ int edx;
+ int eax;
+ __asm__ __volatile__(
+ " leal -1(%%ecx), %%eax\n"
+ "1: incl %%eax\n"
+ " decl %%edx\n"
+ " jz 3f\n"
+ " cmpb $0, (%%eax)\n"
+ " jnz 1b\n"
+ "3: subl %%ecx, %%eax"
+ : "=a" (eax), "=&d" (edx)
+ : "c" (s), "1" (count + 1)
+ );
+ return eax;
}
+#ifndef strnlen
libc_hidden_def(strnlen)
+#else
+/* Uncomment TESTING, gcc -D_GNU_SOURCE -m32 -Os strnlen.c -o strnlen
+ * and run ./strnlen
+ */
+int main()
+{
+ printf(strnlen("abc\0def", -2) == 3 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", -1) == 3 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", 0) == 0 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", 1) == 1 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", 2) == 2 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", 3) == 3 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", 4) == 3 ? "ok\n" : "BAD!\n");
+ printf(strnlen("abc\0def", 5) == 3 ? "ok\n" : "BAD!\n");
+}
+#endif
+
#endif
diff --git a/libc/string/i386/strrchr.c b/libc/string/i386/strrchr.c
index ef378685b..5c349f683 100644
--- a/libc/string/i386/strrchr.c
+++ b/libc/string/i386/strrchr.c
@@ -32,21 +32,25 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strrchr) */
char *strrchr(const char *s, int c)
{
- int d0, d1;
- register char * __res;
- __asm__ __volatile__(
- "movb %%al,%%ah\n"
- "1:\tlodsb\n\t"
- "cmpb %%ah,%%al\n\t"
- "jne 2f\n\t"
- "leal -1(%%esi),%0\n"
- "2:\ttestb %%al,%%al\n\t"
- "jne 1b"
- :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c));
- return __res;
+ char *eax;
+
+ __asm__ __volatile__(
+ " movb %%cl, %%ch\n"
+ "1: movb (%1), %%cl\n" /* load char */
+ " cmpb %%cl, %%ch\n" /* char == c? */
+ " jne 2f\n"
+ " movl %1, %%eax\n"
+ "2: incl %1\n"
+ " testb %%cl, %%cl\n" /* char == NUL? */
+ " jnz 1b\n"
+ /* "=c": use ecx, not ebx (-fpic uses it). */
+ : "=a" (eax), "=r" (s), "=c" (c)
+ : "0" (0), "1" (s), "2" (c)
+ /* : no clobbers */
+ );
+ return eax;
}
libc_hidden_def(strrchr)
#ifdef __UCLIBC_SUSV3_LEGACY__
diff --git a/libc/string/ia64/bcopy.S b/libc/string/ia64/bcopy.S
index c5637c369..62da68d74 100644
--- a/libc/string/ia64/bcopy.S
+++ b/libc/string/ia64/bcopy.S
@@ -1,4 +1,4 @@
-#include "sysdep.h"
+#include <sysdep.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
diff --git a/libc/string/ia64/bzero.S b/libc/string/ia64/bzero.S
index d390838a6..79419579a 100644
--- a/libc/string/ia64/bzero.S
+++ b/libc/string/ia64/bzero.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: dest
@@ -32,7 +31,7 @@
Since a stf.spill f0 can store 16B in one go, we use this instruction
to get peak speed. */
-#include "sysdep.h"
+#include <sysdep.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
@@ -47,13 +46,13 @@
#define ptr1 r28
#define ptr2 r27
#define ptr3 r26
-#define ptr9 r24
+#define ptr9 r24
#define loopcnt r23
#define linecnt r22
#define bytecnt r21
-// This routine uses only scratch predicate registers (p6 - p15)
-#define p_scr p6 // default register for same-cycle branches
+/* This routine uses only scratch predicate registers (p6 - p15) */
+#define p_scr p6 /* default register for same-cycle branches */
#define p_unalgn p9
#define p_y p11
#define p_n p12
@@ -65,7 +64,7 @@
#define MIN1 15
#define MIN1P1HALF 8
#define LINE_SIZE 128
-#define LSIZE_SH 7 // shift amount
+#define LSIZE_SH 7 /* shift amount */
#define PREF_AHEAD 8
#define USE_FLP
@@ -87,49 +86,49 @@ ENTRY(bzero)
movi0 save_lc = ar.lc
} { .mmi
.body
- mov ret0 = dest // return value
+ mov ret0 = dest /* return value */
nop.m 0
cmp.eq p_scr, p0 = cnt, r0
;; }
{ .mmi
- and ptr2 = -(MIN1+1), dest // aligned address
- and tmp = MIN1, dest // prepare to check for alignment
- tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U)
+ and ptr2 = -(MIN1+1), dest /* aligned address */
+ and tmp = MIN1, dest /* prepare to check for alignment */
+ tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */
} { .mib
mov ptr1 = dest
nop.i 0
-(p_scr) br.ret.dpnt.many rp // return immediately if count = 0
+(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */
;; }
{ .mib
cmp.ne p_unalgn, p0 = tmp, r0
-} { .mib // NB: # of bytes to move is 1
- sub bytecnt = (MIN1+1), tmp // higher than loopcnt
- cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
-(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U)
+} { .mib /* NB: # of bytes to move is 1 */
+ sub bytecnt = (MIN1+1), tmp /* higher than loopcnt */
+ cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */
+(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */
;; }
{ .mmi
-(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
-(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
+(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */
+(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */
;; }
{ .mib
(p_y) add cnt = -8, cnt
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */
} { .mib
(p_y) st8 [ptr2] = r0,-4
(p_n) add ptr2 = 4, ptr2
;; }
{ .mib
(p_yy) add cnt = -4, cnt
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */
} { .mib
(p_yy) st4 [ptr2] = r0,-2
(p_nn) add ptr2 = 2, ptr2
;; }
{ .mmi
- mov tmp = LINE_SIZE+1 // for compare
+ mov tmp = LINE_SIZE+1 /* for compare */
(p_y) add cnt = -2, cnt
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */
} { .mmi
nop.m 0
(p_y) st2 [ptr2] = r0,-1
@@ -138,44 +137,44 @@ ENTRY(bzero)
{ .mmi
(p_yy) st1 [ptr2] = r0
- cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
+ cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */
} { .mbb
(p_yy) add cnt = -1, cnt
-(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
+(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */
;; }
{ .mib
- nop.m 0
+ nop.m 0
shr.u linecnt = cnt, LSIZE_SH
nop.b 0
;; }
.align 32
-.l1b: // ------------------// L1B: store ahead into cache lines; fill later
+.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */
{ .mmi
- and tmp = -(LINE_SIZE), cnt // compute end of range
- mov ptr9 = ptr1 // used for prefetching
- and cnt = (LINE_SIZE-1), cnt // remainder
+ and tmp = -(LINE_SIZE), cnt /* compute end of range */
+ mov ptr9 = ptr1 /* used for prefetching */
+ and cnt = (LINE_SIZE-1), cnt /* remainder */
} { .mmi
- mov loopcnt = PREF_AHEAD-1 // default prefetch loop
- cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+ mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */
;; }
{ .mmi
(p_scr) add loopcnt = -1, linecnt
- add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores)
- add ptr1 = tmp, ptr1 // first address beyond total range
+ add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */
+ add ptr1 = tmp, ptr1 /* first address beyond total range */
;; }
{ .mmi
- add tmp = -1, linecnt // next loop count
+ add tmp = -1, linecnt /* next loop count */
movi0 ar.lc = loopcnt
;; }
.pref_l1b:
{ .mib
- stf.spill [ptr9] = f0, 128 // Do stores one cache line apart
+ stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */
nop.i 0
br.cloop.dptk.few .pref_l1b
;; }
{ .mmi
- add ptr0 = 16, ptr2 // Two stores in parallel
+ add ptr0 = 16, ptr2 /* Two stores in parallel */
movi0 ar.lc = tmp
;; }
.l1bx:
@@ -190,7 +189,7 @@ ENTRY(bzero)
{ .mmi
stf.spill [ptr2] = f0, 32
stf.spill [ptr0] = f0, 64
- cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */
;; }
{ .mmb
stf.spill [ptr2] = f0, 32
@@ -198,14 +197,14 @@ ENTRY(bzero)
br.cloop.dptk.few .l1bx
;; }
{ .mib
- cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
+ cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */
(p_scr) br.cond.dpnt.many .move_bytes_from_alignment
;; }
.fraction_of_line:
{ .mib
add ptr2 = 16, ptr1
- shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
+ shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */
;; }
{ .mib
cmp.eq p_scr, p0 = loopcnt, r0
@@ -213,11 +212,11 @@ ENTRY(bzero)
(p_scr) br.cond.dpnt.many .store_words
;; }
{ .mib
- and cnt = 0x1f, cnt // compute the remaining cnt
+ and cnt = 0x1f, cnt /* compute the remaining cnt */
movi0 ar.lc = loopcnt
;; }
.align 32
-.l2: // -----------------------------// L2A: store 32B in 2 cycles
+.l2: /* ----------------------------- L2A: store 32B in 2 cycles */
{ .mmb
store [ptr1] = myval, 8
store [ptr2] = myval, 8
@@ -228,38 +227,38 @@ ENTRY(bzero)
;; }
.store_words:
{ .mib
- cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
-(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
+ cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */
+(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */
;; }
{ .mmi
- store [ptr1] = myval, 8 // store
- cmp.le p_y, p_n = 16, cnt //
- add cnt = -8, cnt // subtract
+ store [ptr1] = myval, 8 /* store */
+ cmp.le p_y, p_n = 16, cnt /* */
+ add cnt = -8, cnt /* subtract */
;; }
{ .mmi
-(p_y) store [ptr1] = myval, 8 // store
+(p_y) store [ptr1] = myval, 8 /* store */
(p_y) cmp.le.unc p_yy, p_nn = 16, cnt
-(p_y) add cnt = -8, cnt // subtract
+(p_y) add cnt = -8, cnt /* subtract */
;; }
-{ .mmi // store
+{ .mmi /* store */
(p_yy) store [ptr1] = myval, 8
-(p_yy) add cnt = -8, cnt // subtract
+(p_yy) add cnt = -8, cnt /* subtract */
;; }
.move_bytes_from_alignment:
{ .mib
cmp.eq p_scr, p0 = cnt, r0
- tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ?
+ tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */
(p_scr) br.cond.dpnt.few .restore_and_exit
;; }
{ .mib
(p_y) st4 [ptr1] = r0,4
- tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ?
+ tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */
;; }
{ .mib
(p_yy) st2 [ptr1] = r0,2
- tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ?
+ tbit.nz.unc p_y, p0 = cnt, 0 /* should we terminate with a st1 ? */
;; }
{ .mib
@@ -281,38 +280,38 @@ ENTRY(bzero)
(p_n) add ptr2 = 2, ptr1
} { .mmi
(p_y) add ptr2 = 3, ptr1
-(p_y) st1 [ptr1] = r0, 1 // fill 1 (odd-aligned) byte
-(p_y) add cnt = -1, cnt // [15, 14 (or less) left]
+(p_y) st1 [ptr1] = r0, 1 /* fill 1 (odd-aligned) byte */
+(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */
;; }
{ .mmi
(p_yy) cmp.le.unc p_y, p0 = 8, cnt
- add ptr3 = ptr1, cnt // prepare last store
+ add ptr3 = ptr1, cnt /* prepare last store */
movi0 ar.lc = save_lc
} { .mmi
-(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes
-(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes
-(p_yy) add cnt = -4, cnt // [11, 10 (o less) left]
+(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */
+(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */
+(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */
;; }
{ .mmi
(p_y) cmp.le.unc p_yy, p0 = 8, cnt
- add ptr3 = -1, ptr3 // last store
- tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ?
+ add ptr3 = -1, ptr3 /* last store */
+ tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */
} { .mmi
-(p_y) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes
-(p_y) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes
-(p_y) add cnt = -4, cnt // [7, 6 (or less) left]
+(p_y) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */
+(p_y) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */
+(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */
;; }
{ .mmi
-(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes
-(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes
- // [3, 2 (or less) left]
- tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ?
+(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */
+(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */
+ /* [3, 2 (or less) left] */
+ tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */
} { .mmi
(p_yy) add cnt = -4, cnt
;; }
{ .mmb
-(p_scr) st2 [ptr1] = r0 // fill 2 (aligned) bytes
-(p_y) st1 [ptr3] = r0 // fill last byte (using ptr3)
+(p_scr) st2 [ptr1] = r0 /* fill 2 (aligned) bytes */
+(p_y) st1 [ptr3] = r0 /* fill last byte (using ptr3) */
br.ret.sptk.many rp
;; }
END(bzero)
diff --git a/libc/string/ia64/memccpy.S b/libc/string/ia64/memccpy.S
index 1afba3637..5c4d7e3c2 100644
--- a/libc/string/ia64/memccpy.S
+++ b/libc/string/ia64/memccpy.S
@@ -14,16 +14,15 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: a pointer to the next byte after char in dest or NULL
Inputs:
in0: dest
in1: src
- in2: char
+ in2: char
in3: byte count
This implementation assumes little endian mode (UM.be = 0).
@@ -31,7 +30,7 @@
This implementation assumes that it is safe to do read ahead
in the src block, without getting beyond its limit. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define OP_T_THRES 16
@@ -69,75 +68,75 @@ ENTRY(memccpy)
.rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
.rotp p[MEMLAT + 6 + 1]
- mov ret0 = r0 // return NULL if no match
+ mov ret0 = r0 /* return NULL if no match */
.save pr, saved_pr
- mov saved_pr = pr // save the predicate registers
- mov dest = in0 // dest
+ mov saved_pr = pr /* save the predicate registers */
+ mov dest = in0 /* dest */
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
- mov saved_ec = ar.ec // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
+ mov saved_ec = ar.ec /* save the loop counter */
.body
- mov src = in1 // src
- extr.u char = in2, 0, 8 // char
- mov len = in3 // len
- sub tmp = r0, in0 // tmp = -dest
- cmp.ne p7, p0 = r0, r0 // clear p7
+ mov src = in1 /* src */
+ extr.u char = in2, 0, 8 /* char */
+ mov len = in3 /* len */
+ sub tmp = r0, in0 /* tmp = -dest */
+ cmp.ne p7, p0 = r0, r0 /* clear p7 */
;;
- and loopcnt = 7, tmp // loopcnt = -dest % 8
- cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES
- mov ar.ec = 0 // ec not guaranteed zero on entry
-(p6) br.cond.spnt .cpyfew // copy byte by byte
+ and loopcnt = 7, tmp /* loopcnt = -dest % 8 */
+ cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */
+ mov ar.ec = 0 /* ec not guaranteed zero on entry */
+(p6) br.cond.spnt .cpyfew /* copy byte by byte */
;;
cmp.eq p6, p0 = loopcnt, r0
mux1 charx8 = char, @brcst
(p6) br.cond.sptk .dest_aligned
- sub len = len, loopcnt // len -= -dest % 8
- adds loopcnt = -1, loopcnt // --loopcnt
+ sub len = len, loopcnt /* len -= -dest % 8 */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
;;
mov ar.lc = loopcnt
-.l1: // copy -dest % 8 bytes
- ld1 value = [src], 1 // value = *src++
+.l1: /* copy -dest % 8 bytes */
+ ld1 value = [src], 1 /* value = *src++ */
;;
- st1 [dest] = value, 1 // *dest++ = value
+ st1 [dest] = value, 1 /* *dest++ = value */
cmp.eq p6, p0 = value, char
(p6) br.cond.spnt .foundit
br.cloop.dptk .l1
.dest_aligned:
- and sh1 = 7, src // sh1 = src % 8
- and tmp = -8, len // tmp = len & -OPSIZ
- and asrc = -8, src // asrc = src & -OPSIZ -- align src
- shr.u loopcnt = len, 3 // loopcnt = len / 8
- and len = 7, len ;; // len = len % 8
- shl sh1 = sh1, 3 // sh1 = 8 * (src % 8)
- adds loopcnt = -1, loopcnt // --loopcnt
- mov pr.rot = 1 << 16 ;; // set rotating predicates
- sub sh2 = 64, sh1 // sh2 = 64 - sh1
- mov ar.lc = loopcnt // set LC
- cmp.eq p6, p0 = sh1, r0 // is the src aligned?
+ and sh1 = 7, src /* sh1 = src % 8 */
+ and tmp = -8, len /* tmp = len & -OPSIZ */
+ and asrc = -8, src /* asrc = src & -OPSIZ -- align src */
+ shr.u loopcnt = len, 3 /* loopcnt = len / 8 */
+ and len = 7, len ;; /* len = len % 8 */
+ shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
+ mov pr.rot = 1 << 16 ;; /* set rotating predicates */
+ sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */
+ mov ar.lc = loopcnt /* set LC */
+ cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
(p6) br.cond.sptk .src_aligned ;;
- add src = src, tmp // src += len & -OPSIZ
- mov ar.ec = MEMLAT + 6 + 1 // six more passes needed
- ld8 r[1] = [asrc], 8 // r[1] = w0
- cmp.ne p6, p0 = r0, r0 ;; // clear p6
+ add src = src, tmp /* src += len & -OPSIZ */
+ mov ar.ec = MEMLAT + 6 + 1 /* six more passes needed */
+ ld8 r[1] = [asrc], 8 /* r[1] = w0 */
+ cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */
ALIGN(32)
.l2:
-(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1
-(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1
-(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2
+(p[0]) ld8.s r[0] = [asrc], 8 /* r[0] = w1 */
+(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */
+(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */
(p[MEMLAT+4]) xor tmp3[0] = val[1], charx8
(p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1]
-(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't
- // valid - rollback!
+(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 /* our data isn't */
+ /* valid - rollback! */
(p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1]
(p6) br.cond.spnt .gotit
-(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest
-(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2
+(p[MEMLAT+6]) st8 [dest] = val[3], 8 /* store val to dest */
+(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */
br.ctop.sptk .l2
br.cond.sptk .cpyfew
.src_aligned:
- cmp.ne p6, p0 = r0, r0 // clear p6
- mov ar.ec = MEMLAT + 2 + 1 ;; // set EC
+ cmp.ne p6, p0 = r0, r0 /* clear p6 */
+ mov ar.ec = MEMLAT + 2 + 1 ;; /* set EC */
.l3:
(p[0]) ld8.s r[0] = [src], 8
(p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8
@@ -149,8 +148,8 @@ ENTRY(memccpy)
(p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8
br.ctop.dptk .l3
.cpyfew:
- cmp.eq p6, p0 = len, r0 // is len == 0 ?
- adds len = -1, len // --len;
+ cmp.eq p6, p0 = len, r0 /* is len == 0 ? */
+ adds len = -1, len /* --len; */
(p6) br.cond.spnt .restore_and_exit ;;
mov ar.lc = len
.l4:
@@ -163,14 +162,14 @@ ENTRY(memccpy)
.foundit:
(p6) mov ret0 = dest
.restore_and_exit:
- mov pr = saved_pr, -1 // restore the predicate registers
- mov ar.lc = saved_lc // restore the loop counter
- mov ar.ec = saved_ec ;; // restore the epilog counter
+ mov pr = saved_pr, -1 /* restore the predicate registers */
+ mov ar.lc = saved_lc /* restore the loop counter */
+ mov ar.ec = saved_ec ;; /* restore the epilog counter */
br.ret.sptk.many b0
.gotit:
.pred.rel "mutex" p6, p7
-(p6) mov value = val[3] // if coming from l2
-(p7) mov value = r[MEMLAT+2] // if coming from l3
+(p6) mov value = val[3] /* if coming from l2 */
+(p7) mov value = r[MEMLAT+2] /* if coming from l3 */
mov ar.lc = pos0[1] ;;
.l5:
extr.u tmp = value, 0, 8 ;;
diff --git a/libc/string/ia64/memchr.S b/libc/string/ia64/memchr.S
index 2bf078fe6..fcd9f9305 100644
--- a/libc/string/ia64/memchr.S
+++ b/libc/string/ia64/memchr.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: the address of the first occurence of chr in str or NULL
@@ -40,7 +39,7 @@
All the loops in this function could have had the internal branch removed
if br.ctop and br.cloop could be predicated :-(. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define saved_pr r15
@@ -62,18 +61,18 @@ ENTRY(__memchr)
.rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2]
.rotp p[MEMLAT+3]
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
.save pr, saved_pr
- mov saved_pr = pr // save the predicates
+ mov saved_pr = pr /* save the predicates */
.body
mov ret0 = str
- and tmp = 7, str // tmp = str % 8
- cmp.ne p7, p0 = r0, r0 // clear p7
- extr.u chr = in1, 0, 8 // chr = (unsigned char) in1
+ and tmp = 7, str /* tmp = str % 8 */
+ cmp.ne p7, p0 = r0, r0 /* clear p7 */
+ extr.u chr = in1, 0, 8 /* chr = (unsigned char) in1 */
mov len = in2
- cmp.gtu p6, p0 = 16, in2 // use a simple loop for short
-(p6) br.cond.spnt .srchfew ;; // searches
- sub loopcnt = 8, tmp // loopcnt = 8 - tmp
+ cmp.gtu p6, p0 = 16, in2 /* use a simple loop for short */
+(p6) br.cond.spnt .srchfew ;; /* searches */
+ sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */
cmp.eq p6, p0 = tmp, r0
(p6) br.cond.sptk .str_aligned;;
sub len = len, loopcnt
@@ -86,12 +85,12 @@ ENTRY(__memchr)
(p6) br.cond.spnt .foundit
br.cloop.sptk .l1 ;;
.str_aligned:
- cmp.ne p6, p0 = r0, r0 // clear p6
- shr.u loopcnt = len, 3 // loopcnt = len / 8
- and len = 7, len ;; // remaining len = len & 7
+ cmp.ne p6, p0 = r0, r0 /* clear p6 */
+ shr.u loopcnt = len, 3 /* loopcnt = len / 8 */
+ and len = 7, len ;; /* remaining len = len & 7 */
adds loopcnt = -1, loopcnt
mov ar.ec = MEMLAT + 3
- mux1 chrx8 = chr, @brcst ;; // get a word full of chr
+ mux1 chrx8 = chr, @brcst ;; /* get a word full of chr */
mov ar.lc = loopcnt
mov pr.rot = 1 << 16 ;;
.l2:
@@ -114,20 +113,18 @@ ENTRY(__memchr)
(p6) br.cond.dpnt .foundit
br.cloop.sptk .l3 ;;
.notfound:
- cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here)
- mov ret0 = r0 ;; // return NULL
+ cmp.ne p6, p0 = r0, r0 /* clear p6 (p7 was already 0 when we got here) */
+ mov ret0 = r0 ;; /* return NULL */
.foundit:
.pred.rel "mutex" p6, p7
-(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3
-(p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2
+(p6) adds ret0 = -1, ret0 /* if we got here from l1 or l3 */
+(p7) add ret0 = addr[MEMLAT+2], poschr[1] /* if we got here from l2 */
mov pr = saved_pr, -1
mov ar.lc = saved_lc
br.ret.sptk.many b0
END(__memchr)
-weak_alias (__memchr, memchr)
-#if !__BOUNDED_POINTERS__
-weak_alias (__memchr, __ubp_memchr)
-#endif
-libc_hidden_def (memchr)
+weak_alias(__memchr, memchr)
+weak_alias(__memchr, __ubp_memchr)
+libc_hidden_def(memchr)
diff --git a/libc/string/ia64/memcmp.S b/libc/string/ia64/memcmp.S
index 8b0c096ce..0cf54e7db 100644
--- a/libc/string/ia64/memcmp.S
+++ b/libc/string/ia64/memcmp.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: the result of the comparison
@@ -28,16 +27,16 @@
In this form, it assumes little endian mode. For big endian mode, the
the two shifts in .l2 must be inverted:
- shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1
+ shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1
shr.u tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 >> sh2
and all the mux1 instructions should be replaced by plain mov's. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
-#define OP_T_THRES 16
-#define OPSIZ 8
+#define OP_T_THRES 16
+#define OPSIZ 8
#define MEMLAT 2
#define start r15
@@ -56,85 +55,85 @@
ENTRY(memcmp)
.prologue
- alloc r2 = ar.pfs, 3, 37, 0, 40
+ alloc r2 = ar.pfs, 3, 37, 0, 40
.rotr r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2]
.rotp p[MEMLAT + 4 + 1]
- mov ret0 = r0 // by default return value = 0
+ mov ret0 = r0 /* by default return value = 0 */
.save pr, saved_pr
- mov saved_pr = pr // save the predicate registers
+ mov saved_pr = pr /* save the predicate registers */
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
.body
- mov dest = in0 // dest
- mov src = in1 // src
- mov len = in2 // len
- sub tmp = r0, in0 // tmp = -dest
+ mov dest = in0 /* dest */
+ mov src = in1 /* src */
+ mov len = in2 /* len */
+ sub tmp = r0, in0 /* tmp = -dest */
;;
- and loopcnt = 7, tmp // loopcnt = -dest % 8
- cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES
-(p6) br.cond.spnt .cmpfew // compare byte by byte
+ and loopcnt = 7, tmp /* loopcnt = -dest % 8 */
+ cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */
+(p6) br.cond.spnt .cmpfew /* compare byte by byte */
;;
cmp.eq p6, p0 = loopcnt, r0
(p6) br.cond.sptk .dest_aligned
- sub len = len, loopcnt // len -= -dest % 8
- adds loopcnt = -1, loopcnt // --loopcnt
+ sub len = len, loopcnt /* len -= -dest % 8 */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
;;
mov ar.lc = loopcnt
-.l1: // copy -dest % 8 bytes
- ld1 value1 = [src], 1 // value = *src++
+.l1: /* copy -dest % 8 bytes */
+ ld1 value1 = [src], 1 /* value = *src++ */
ld1 value2 = [dest], 1
;;
cmp.ne p6, p0 = value1, value2
(p6) br.cond.spnt .done
br.cloop.dptk .l1
.dest_aligned:
- and sh1 = 7, src // sh1 = src % 8
- and tmp = -8, len // tmp = len & -OPSIZ
- and asrc = -8, src // asrc = src & -OPSIZ -- align src
- shr.u loopcnt = len, 3 // loopcnt = len / 8
- and len = 7, len ;; // len = len % 8
- shl sh1 = sh1, 3 // sh1 = 8 * (src % 8)
- adds loopcnt = -1, loopcnt // --loopcnt
- mov pr.rot = 1 << 16 ;; // set rotating predicates
- sub sh2 = 64, sh1 // sh2 = 64 - sh1
- mov ar.lc = loopcnt // set LC
- cmp.eq p6, p0 = sh1, r0 // is the src aligned?
+ and sh1 = 7, src /* sh1 = src % 8 */
+ and tmp = -8, len /* tmp = len & -OPSIZ */
+ and asrc = -8, src /* asrc = src & -OPSIZ -- align src */
+ shr.u loopcnt = len, 3 /* loopcnt = len / 8 */
+ and len = 7, len ;; /* len = len % 8 */
+ shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
+ mov pr.rot = 1 << 16 ;; /* set rotating predicates */
+ sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */
+ mov ar.lc = loopcnt /* set LC */
+ cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
(p6) br.cond.sptk .src_aligned
- add src = src, tmp // src += len & -OPSIZ
- mov ar.ec = MEMLAT + 4 + 1 // four more passes needed
- ld8 r[1] = [asrc], 8 ;; // r[1] = w0
+ add src = src, tmp /* src += len & -OPSIZ */
+ mov ar.ec = MEMLAT + 4 + 1 /* four more passes needed */
+ ld8 r[1] = [asrc], 8 ;; /* r[1] = w0 */
.align 32
-// We enter this loop with p6 cleared by the above comparison
+/* We enter this loop with p6 cleared by the above comparison */
.l2:
-(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1
+(p[0]) ld8 r[0] = [asrc], 8 /* r[0] = w1 */
(p[0]) ld8 q[0] = [dest], 8
-(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1
-(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2
+(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */
+(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */
(p[MEMLAT+4]) cmp.ne p6, p0 = q[MEMLAT + 4], val[1]
-(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2
+(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */
(p6) br.cond.spnt .l2exit
br.ctop.sptk .l2
br.cond.sptk .cmpfew
.l3exit:
mux1 value1 = r[MEMLAT], @rev
mux1 value2 = q[MEMLAT], @rev
- cmp.ne p6, p0 = r0, r0 ;; // clear p6
+ cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */
.l2exit:
(p6) mux1 value1 = val[1], @rev
(p6) mux1 value2 = q[MEMLAT + 4], @rev ;;
cmp.ltu p6, p7 = value2, value1 ;;
(p6) mov ret0 = -1
(p7) mov ret0 = 1
- mov pr = saved_pr, -1 // restore the predicate registers
- mov ar.lc = saved_lc // restore the loop counter
+ mov pr = saved_pr, -1 /* restore the predicate registers */
+ mov ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
.src_aligned:
- cmp.ne p6, p0 = r0, r0 // clear p6
- mov ar.ec = MEMLAT + 1 ;; // set EC
+ cmp.ne p6, p0 = r0, r0 /* clear p6 */
+ mov ar.ec = MEMLAT + 1 ;; /* set EC */
.l3:
(p[0]) ld8 r[0] = [src], 8
(p[0]) ld8 q[0] = [dest], 8
@@ -142,8 +141,8 @@ ENTRY(memcmp)
(p6) br.cond.spnt .l3exit
br.ctop.dptk .l3 ;;
.cmpfew:
- cmp.eq p6, p0 = len, r0 // is len == 0 ?
- adds len = -1, len // --len;
+ cmp.eq p6, p0 = len, r0 /* is len == 0 ? */
+ adds len = -1, len /* --len; */
(p6) br.cond.spnt .restore_and_exit ;;
mov ar.lc = len
.l4:
@@ -154,10 +153,10 @@ ENTRY(memcmp)
(p6) br.cond.spnt .done
br.cloop.dptk .l4 ;;
.done:
-(p6) sub ret0 = value2, value1 // don't execute it if falling thru
+(p6) sub ret0 = value2, value1 /* don't execute it if falling thru */
.restore_and_exit:
- mov pr = saved_pr, -1 // restore the predicate registers
- mov ar.lc = saved_lc // restore the loop counter
+ mov pr = saved_pr, -1 /* restore the predicate registers */
+ mov ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
END(memcmp)
libc_hidden_def (memcmp)
diff --git a/libc/string/ia64/memcpy.S b/libc/string/ia64/memcpy.S
index 810eb0c0e..5f2e79414 100644
--- a/libc/string/ia64/memcpy.S
+++ b/libc/string/ia64/memcpy.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: dest
@@ -37,13 +36,13 @@
#define USE_LFETCH
#define USE_FLP
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define LFETCH_DIST 500
-#define ALIGN_UNROLL_no 4 // no. of elements
-#define ALIGN_UNROLL_sh 2 // (shift amount)
+#define ALIGN_UNROLL_no 4 /* no. of elements */
+#define ALIGN_UNROLL_sh 2 /* (shift amount) */
#define MEMLAT 8
#define Nrot ((4*(MEMLAT+2) + 7) & ~7)
@@ -168,76 +167,76 @@ ENTRY(memcpy)
.rotr r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1]
.rotp p[MEMLAT+2]
.rotf fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1]
- mov ret0 = in0 // return tmp2 = dest
+ mov ret0 = in0 /* return tmp2 = dest */
.save pr, saved_pr
- movi0 saved_pr = pr // save the predicate registers
+ movi0 saved_pr = pr /* save the predicate registers */
} { .mmi
- and tmp4 = 7, in0 // check if destination is aligned
- mov dest = in0 // dest
- mov src = in1 // src
+ and tmp4 = 7, in0 /* check if destination is aligned */
+ mov dest = in0 /* dest */
+ mov src = in1 /* src */
;; }
{ .mii
- cmp.eq p_scr, p0 = in2, r0 // if (len == 0)
+ cmp.eq p_scr, p0 = in2, r0 /* if (len == 0) */
.save ar.lc, saved_lc
- movi0 saved_lc = ar.lc // save the loop counter
+ movi0 saved_lc = ar.lc /* save the loop counter */
.body
- cmp.ge p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH
+ cmp.ge p_few, p0 = OP_T_THRES, in2 /* is len <= OP_T_THRESH */
} { .mbb
- mov len = in2 // len
-(p_scr) br.cond.dpnt.few .restore_and_exit // Branch no. 1: return dest
-(p_few) br.cond.dpnt.many .copy_bytes // Branch no. 2: copy byte by byte
+ mov len = in2 /* len */
+(p_scr) br.cond.dpnt.few .restore_and_exit /* Branch no. 1: return dest */
+(p_few) br.cond.dpnt.many .copy_bytes /* Branch no. 2: copy byte by byte */
;; }
{ .mmi
#if defined(USE_LFETCH)
- lfetch.nt1 [dest] //
- lfetch.nt1 [src] //
+ lfetch.nt1 [dest] /* */
+ lfetch.nt1 [src] /* */
#endif
- shr.u elemcnt = len, 3 // elemcnt = len / 8
+ shr.u elemcnt = len, 3 /* elemcnt = len / 8 */
} { .mib
- cmp.eq p_scr, p0 = tmp4, r0 // is destination aligned?
- sub loopcnt = 7, tmp4 //
+ cmp.eq p_scr, p0 = tmp4, r0 /* is destination aligned? */
+ sub loopcnt = 7, tmp4 /* */
(p_scr) br.cond.dptk.many .dest_aligned
;; }
{ .mmi
- ld1 tmp2 = [src], 1 //
- sub len = len, loopcnt, 1 // reduce len
- movi0 ar.lc = loopcnt //
+ ld1 tmp2 = [src], 1 /* */
+ sub len = len, loopcnt, 1 /* reduce len */
+ movi0 ar.lc = loopcnt /* */
} { .mib
- cmp.ne p_scr, p0 = 0, loopcnt // avoid loading beyond end-point
+ cmp.ne p_scr, p0 = 0, loopcnt /* avoid loading beyond end-point */
;; }
-.l0: // ---------------------------- // L0: Align src on 8-byte boundary
+.l0: /* ---------------------------- L0: Align src on 8-byte boundary */
{ .mmi
- st1 [dest] = tmp2, 1 //
-(p_scr) ld1 tmp2 = [src], 1 //
+ st1 [dest] = tmp2, 1 /* */
+(p_scr) ld1 tmp2 = [src], 1 /* */
} { .mib
- cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point
+ cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */
add loopcnt = -1, loopcnt
- br.cloop.dptk.few .l0 //
+ br.cloop.dptk.few .l0 /* */
;; }
.dest_aligned:
{ .mmi
- and tmp4 = 7, src // ready for alignment check
- shr.u elemcnt = len, 3 // elemcnt = len / 8
+ and tmp4 = 7, src /* ready for alignment check */
+ shr.u elemcnt = len, 3 /* elemcnt = len / 8 */
;; }
{ .mib
- cmp.ne p_scr, p0 = tmp4, r0 // is source also aligned
- tbit.nz p_xtr, p_nxtr = src, 3 // prepare a separate move if src
-} { .mib // is not 16B aligned
- add ptr2 = LFETCH_DIST, dest // prefetch address
+ cmp.ne p_scr, p0 = tmp4, r0 /* is source also aligned */
+ tbit.nz p_xtr, p_nxtr = src, 3 /* prepare a separate move if src */
+} { .mib /* is not 16B aligned */
+ add ptr2 = LFETCH_DIST, dest /* prefetch address */
add ptr1 = LFETCH_DIST, src
(p_scr) br.cond.dptk.many .src_not_aligned
;; }
-// The optimal case, when dest, and src are aligned
+/* The optimal case, when dest, and src are aligned */
.both_aligned:
{ .mmi
.pred.rel "mutex",p_xtr,p_nxtr
-(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify
-(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt // Need only N to qualify
- movi0 pr.rot = 1 << 16 // set rotating predicates
+(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt /* Need N + 1 to qualify */
+(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt /* Need only N to qualify */
+ movi0 pr.rot = 1 << 16 /* set rotating predicates */
} { .mib
(p_scr) br.cond.dpnt.many .copy_full_words
;; }
@@ -245,21 +244,21 @@ ENTRY(memcpy)
{ .mmi
(p_xtr) load tempreg = [src], 8
(p_xtr) add elemcnt = -1, elemcnt
- movi0 ar.ec = MEMLAT + 1 // set the epilog counter
+ movi0 ar.ec = MEMLAT + 1 /* set the epilog counter */
;; }
{ .mmi
-(p_xtr) add len = -8, len //
- add asrc = 16, src // one bank apart (for USE_INT)
- shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh // cater for unrolling
+(p_xtr) add len = -8, len /* */
+ add asrc = 16, src /* one bank apart (for USE_INT) */
+ shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh /* cater for unrolling */
;;}
{ .mmi
add loopcnt = -1, loopcnt
-(p_xtr) store [dest] = tempreg, 8 // copy the "extra" word
+(p_xtr) store [dest] = tempreg, 8 /* copy the "extra" word */
nop.i 0
;; }
{ .mib
add adest = 16, dest
- movi0 ar.lc = loopcnt // set the loop counter
+ movi0 ar.lc = loopcnt /* set the loop counter */
;; }
#ifdef GAS_ALIGN_BREAKS_UNWIND_INFO
@@ -268,7 +267,7 @@ ENTRY(memcpy)
.align 32
#endif
#if defined(USE_FLP)
-.l1: // ------------------------------- // L1: Everything a multiple of 8
+.l1: /* ------------------------------- L1: Everything a multiple of 8 */
{ .mmi
#if defined(USE_LFETCH)
(p[0]) lfetch.nt1 [ptr2],32
@@ -290,7 +289,7 @@ ENTRY(memcpy)
br.ctop.dptk.many .l1
;; }
#elif defined(USE_INT)
-.l1: // ------------------------------- // L1: Everything a multiple of 8
+.l1: /* ------------------------------- L1: Everything a multiple of 8 */
{ .mmi
(p[0]) load the_r[0] = [src], 8
(p[0]) load the_q[0] = [asrc], 8
@@ -317,58 +316,58 @@ ENTRY(memcpy)
.copy_full_words:
{ .mib
- cmp.gt p_scr, p0 = 8, len //
- shr.u elemcnt = len, 3 //
+ cmp.gt p_scr, p0 = 8, len /* */
+ shr.u elemcnt = len, 3 /* */
(p_scr) br.cond.dpnt.many .copy_bytes
;; }
{ .mii
load tempreg = [src], 8
- add loopcnt = -1, elemcnt //
+ add loopcnt = -1, elemcnt /* */
;; }
{ .mii
- cmp.ne p_scr, p0 = 0, loopcnt //
- mov ar.lc = loopcnt //
+ cmp.ne p_scr, p0 = 0, loopcnt /* */
+ mov ar.lc = loopcnt /* */
;; }
-.l2: // ------------------------------- // L2: Max 4 words copied separately
+.l2: /* ------------------------------- L2: Max 4 words copied separately */
{ .mmi
store [dest] = tempreg, 8
-(p_scr) load tempreg = [src], 8 //
+(p_scr) load tempreg = [src], 8 /* */
add len = -8, len
} { .mib
- cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point
+ cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */
add loopcnt = -1, loopcnt
br.cloop.dptk.few .l2
;; }
.copy_bytes:
{ .mib
- cmp.eq p_scr, p0 = len, r0 // is len == 0 ?
- add loopcnt = -1, len // len--;
+ cmp.eq p_scr, p0 = len, r0 /* is len == 0 ? */
+ add loopcnt = -1, len /* len--; */
(p_scr) br.cond.spnt .restore_and_exit
;; }
{ .mii
ld1 tmp2 = [src], 1
movi0 ar.lc = loopcnt
- cmp.ne p_scr, p0 = 0, loopcnt // avoid load beyond end-point
+ cmp.ne p_scr, p0 = 0, loopcnt /* avoid load beyond end-point */
;; }
-.l3: // ------------------------------- // L3: Final byte move
+.l3: /* ------------------------------- L3: Final byte move */
{ .mmi
st1 [dest] = tmp2, 1
(p_scr) ld1 tmp2 = [src], 1
} { .mib
- cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point
+ cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */
add loopcnt = -1, loopcnt
br.cloop.dptk.few .l3
;; }
.restore_and_exit:
{ .mmi
- movi0 pr = saved_pr, -1 // restore the predicate registers
+ movi0 pr = saved_pr, -1 /* restore the predicate registers */
;; }
{ .mib
- movi0 ar.lc = saved_lc // restore the loop counter
+ movi0 ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
;; }
@@ -376,41 +375,41 @@ ENTRY(memcpy)
.src_not_aligned:
{ .mmi
cmp.gt p_scr, p0 = 16, len
- and sh1 = 7, src // sh1 = src % 8
- shr.u loopcnt = len, 4 // element-cnt = len / 16
+ and sh1 = 7, src /* sh1 = src % 8 */
+ shr.u loopcnt = len, 4 /* element-cnt = len / 16 */
} { .mib
add tmp4 = @ltoff(.table), gp
add tmp3 = @ltoff(.loop56), gp
-(p_scr) br.cond.dpnt.many .copy_bytes // do byte by byte if too few
+(p_scr) br.cond.dpnt.many .copy_bytes /* do byte by byte if too few */
;; }
{ .mmi
- and asrc = -8, src // asrc = (-8) -- align src for loop
- add loopcnt = -1, loopcnt // loopcnt--
- shl sh1 = sh1, 3 // sh1 = 8 * (src % 8)
+ and asrc = -8, src /* asrc = (-8) -- align src for loop */
+ add loopcnt = -1, loopcnt /* loopcnt-- */
+ shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */
} { .mmi
- ld8 ptable = [tmp4] // ptable = &table
- ld8 ploop56 = [tmp3] // ploop56 = &loop56
- and tmp2 = -16, len // tmp2 = len & -OPSIZ
+ ld8 ptable = [tmp4] /* ptable = &table */
+ ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */
+ and tmp2 = -16, len /* tmp2 = len & -OPSIZ */
;; }
{ .mmi
- add tmp3 = ptable, sh1 // tmp3 = &table + sh1
- add src = src, tmp2 // src += len & (-16)
- movi0 ar.lc = loopcnt // set LC
+ add tmp3 = ptable, sh1 /* tmp3 = &table + sh1 */
+ add src = src, tmp2 /* src += len & (-16) */
+ movi0 ar.lc = loopcnt /* set LC */
;; }
{ .mmi
- ld8 tmp4 = [tmp3] // tmp4 = loop offset
- sub len = len, tmp2 // len -= len & (-16)
- movi0 ar.ec = MEMLAT + 2 // one more pass needed
+ ld8 tmp4 = [tmp3] /* tmp4 = loop offset */
+ sub len = len, tmp2 /* len -= len & (-16) */
+ movi0 ar.ec = MEMLAT + 2 /* one more pass needed */
;; }
{ .mmi
- ld8 s[1] = [asrc], 8 // preload
- sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset
- movi0 pr.rot = 1 << 16 // set rotating predicates
+ ld8 s[1] = [asrc], 8 /* preload */
+ sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */
+ movi0 pr.rot = 1 << 16 /* set rotating predicates */
;; }
{ .mib
nop.m 0
movi0 b6 = loopaddr
- br b6 // jump to the appropriate loop
+ br b6 /* jump to the appropriate loop */
;; }
LOOP(8)
@@ -426,7 +425,7 @@ libc_hidden_def (memcpy)
.rodata
.align 8
.table:
- data8 0 // dummy entry
+ data8 0 /* dummy entry */
data8 .loop56 - .loop8
data8 .loop56 - .loop16
data8 .loop56 - .loop24
diff --git a/libc/string/ia64/memmove.S b/libc/string/ia64/memmove.S
index 00342d8e0..7d830f912 100644
--- a/libc/string/ia64/memmove.S
+++ b/libc/string/ia64/memmove.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: dest
@@ -33,7 +32,7 @@
sh1 must be computed using an extra instruction: sub sh1 = 64, sh1
or the UM.be bit should be cleared at the beginning and set at the end. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define OP_T_THRES 16
@@ -81,48 +80,48 @@ ENTRY(memmove)
alloc r2 = ar.pfs, 3, Nrot - 3, 0, Nrot
.rotr r[MEMLAT + 2], q[MEMLAT + 1]
.rotp p[MEMLAT + 2]
- mov ret0 = in0 // return value = dest
+ mov ret0 = in0 /* return value = dest */
.save pr, saved_pr
- mov saved_pr = pr // save the predicate registers
+ mov saved_pr = pr /* save the predicate registers */
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
.body
- or tmp3 = in0, in1 ;; // tmp3 = dest | src
- or tmp3 = tmp3, in2 // tmp3 = dest | src | len
- mov dest = in0 // dest
- mov src = in1 // src
- mov len = in2 // len
- sub tmp2 = r0, in0 // tmp2 = -dest
- cmp.eq p6, p0 = in2, r0 // if (len == 0)
-(p6) br.cond.spnt .restore_and_exit;;// return dest;
- and tmp4 = 7, tmp3 // tmp4 = (dest | src | len) & 7
- cmp.le p6, p0 = dest, src // if dest <= src it's always safe
-(p6) br.cond.spnt .forward // to copy forward
+ or tmp3 = in0, in1 ;; /* tmp3 = dest | src */
+ or tmp3 = tmp3, in2 /* tmp3 = dest | src | len */
+ mov dest = in0 /* dest */
+ mov src = in1 /* src */
+ mov len = in2 /* len */
+ sub tmp2 = r0, in0 /* tmp2 = -dest */
+ cmp.eq p6, p0 = in2, r0 /* if (len == 0) */
+(p6) br.cond.spnt .restore_and_exit;;/* return dest; */
+ and tmp4 = 7, tmp3 /* tmp4 = (dest | src | len) & 7 */
+ cmp.le p6, p0 = dest, src /* if dest <= src it's always safe */
+(p6) br.cond.spnt .forward /* to copy forward */
add tmp3 = src, len;;
- cmp.lt p6, p0 = dest, tmp3 // if dest > src && dest < src + len
-(p6) br.cond.spnt .backward // we have to copy backward
+ cmp.lt p6, p0 = dest, tmp3 /* if dest > src && dest < src + len */
+(p6) br.cond.spnt .backward /* we have to copy backward */
.forward:
- shr.u loopcnt = len, 4 ;; // loopcnt = len / 16
- cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0)
-(p6) br.cond.sptk .next // goto next;
+ shr.u loopcnt = len, 4 ;; /* loopcnt = len / 16 */
+ cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */
+(p6) br.cond.sptk .next /* goto next; */
-// The optimal case, when dest, src and len are all multiples of 8
+/* The optimal case, when dest, src and len are all multiples of 8 */
and tmp3 = 0xf, len
- mov pr.rot = 1 << 16 // set rotating predicates
- mov ar.ec = MEMLAT + 1 ;; // set the epilog counter
- cmp.ne p6, p0 = tmp3, r0 // do we have to copy an extra word?
- adds loopcnt = -1, loopcnt;; // --loopcnt
+ mov pr.rot = 1 << 16 /* set rotating predicates */
+ mov ar.ec = MEMLAT + 1 ;; /* set the epilog counter */
+ cmp.ne p6, p0 = tmp3, r0 /* do we have to copy an extra word? */
+ adds loopcnt = -1, loopcnt;; /* --loopcnt */
(p6) ld8 value = [src], 8;;
-(p6) st8 [dest] = value, 8 // copy the "odd" word
- mov ar.lc = loopcnt // set the loop counter
+(p6) st8 [dest] = value, 8 /* copy the "odd" word */
+ mov ar.lc = loopcnt /* set the loop counter */
cmp.eq p6, p0 = 8, len
-(p6) br.cond.spnt .restore_and_exit;;// the one-word special case
- adds adest = 8, dest // set adest one word ahead of dest
- adds asrc = 8, src ;; // set asrc one word ahead of src
- nop.b 0 // get the "golden" alignment for
- nop.b 0 // the next loop
+(p6) br.cond.spnt .restore_and_exit;;/* the one-word special case */
+ adds adest = 8, dest /* set adest one word ahead of dest */
+ adds asrc = 8, src ;; /* set asrc one word ahead of src */
+ nop.b 0 /* get the "golden" alignment for */
+ nop.b 0 /* the next loop */
.l0:
(p[0]) ld8 r[0] = [src], 16
(p[0]) ld8 q[0] = [asrc], 16
@@ -130,50 +129,50 @@ ENTRY(memmove)
(p[MEMLAT]) st8 [adest] = q[MEMLAT], 16
br.ctop.dptk .l0 ;;
- mov pr = saved_pr, -1 // restore the predicate registers
- mov ar.lc = saved_lc // restore the loop counter
+ mov pr = saved_pr, -1 /* restore the predicate registers */
+ mov ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
.next:
- cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES
- and loopcnt = 7, tmp2 // loopcnt = -dest % 8
-(p6) br.cond.spnt .cpyfew // copy byte by byte
+ cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */
+ and loopcnt = 7, tmp2 /* loopcnt = -dest % 8 */
+(p6) br.cond.spnt .cpyfew /* copy byte by byte */
;;
cmp.eq p6, p0 = loopcnt, r0
(p6) br.cond.sptk .dest_aligned
- sub len = len, loopcnt // len -= -dest % 8
- adds loopcnt = -1, loopcnt // --loopcnt
+ sub len = len, loopcnt /* len -= -dest % 8 */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
;;
mov ar.lc = loopcnt
-.l1: // copy -dest % 8 bytes
- ld1 value = [src], 1 // value = *src++
+.l1: /* copy -dest % 8 bytes */
+ ld1 value = [src], 1 /* value = *src++ */
;;
- st1 [dest] = value, 1 // *dest++ = value
+ st1 [dest] = value, 1 /* *dest++ = value */
br.cloop.dptk .l1
.dest_aligned:
- and sh1 = 7, src // sh1 = src % 8
- and tmp2 = -8, len // tmp2 = len & -OPSIZ
- and asrc = -8, src // asrc = src & -OPSIZ -- align src
- shr.u loopcnt = len, 3 // loopcnt = len / 8
- and len = 7, len;; // len = len % 8
- adds loopcnt = -1, loopcnt // --loopcnt
+ and sh1 = 7, src /* sh1 = src % 8 */
+ and tmp2 = -8, len /* tmp2 = len & -OPSIZ */
+ and asrc = -8, src /* asrc = src & -OPSIZ -- align src */
+ shr.u loopcnt = len, 3 /* loopcnt = len / 8 */
+ and len = 7, len;; /* len = len % 8 */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
addl tmp4 = @ltoff(.table), gp
addl tmp3 = @ltoff(.loop56), gp
- mov ar.ec = MEMLAT + 1 // set EC
- mov pr.rot = 1 << 16;; // set rotating predicates
- mov ar.lc = loopcnt // set LC
- cmp.eq p6, p0 = sh1, r0 // is the src aligned?
+ mov ar.ec = MEMLAT + 1 /* set EC */
+ mov pr.rot = 1 << 16;; /* set rotating predicates */
+ mov ar.lc = loopcnt /* set LC */
+ cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
(p6) br.cond.sptk .src_aligned
- add src = src, tmp2 // src += len & -OPSIZ
- shl sh1 = sh1, 3 // sh1 = 8 * (src % 8)
- ld8 ploop56 = [tmp3] // ploop56 = &loop56
- ld8 ptable = [tmp4];; // ptable = &table
- add tmp3 = ptable, sh1;; // tmp3 = &table + sh1
- mov ar.ec = MEMLAT + 1 + 1 // one more pass needed
- ld8 tmp4 = [tmp3];; // tmp4 = loop offset
- sub loopaddr = ploop56,tmp4 // loopadd = &loop56 - loop offset
- ld8 r[1] = [asrc], 8;; // w0
+ add src = src, tmp2 /* src += len & -OPSIZ */
+ shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */
+ ld8 ploop56 = [tmp3] /* ploop56 = &loop56 */
+ ld8 ptable = [tmp4];; /* ptable = &table */
+ add tmp3 = ptable, sh1;; /* tmp3 = &table + sh1 */
+ mov ar.ec = MEMLAT + 1 + 1 /* one more pass needed */
+ ld8 tmp4 = [tmp3];; /* tmp4 = loop offset */
+ sub loopaddr = ploop56,tmp4 /* loopadd = &loop56 - loop offset */
+ ld8 r[1] = [asrc], 8;; /* w0 */
mov b6 = loopaddr;;
- br b6 // jump to the appropriate loop
+ br b6 /* jump to the appropriate loop */
LOOP(8)
LOOP(16)
@@ -189,8 +188,8 @@ ENTRY(memmove)
(p[MEMLAT]) st8 [dest] = r[MEMLAT], 8
br.ctop.dptk .l3
.cpyfew:
- cmp.eq p6, p0 = len, r0 // is len == 0 ?
- adds len = -1, len // --len;
+ cmp.eq p6, p0 = len, r0 /* is len == 0 ? */
+ adds len = -1, len /* --len; */
(p6) br.cond.spnt .restore_and_exit ;;
mov ar.lc = len
.l4:
@@ -199,36 +198,36 @@ ENTRY(memmove)
st1 [dest] = value, 1
br.cloop.dptk .l4 ;;
.restore_and_exit:
- mov pr = saved_pr, -1 // restore the predicate registers
- mov ar.lc = saved_lc // restore the loop counter
+ mov pr = saved_pr, -1 /* restore the predicate registers */
+ mov ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
-// In the case of a backward copy, optimise only the case when everything
-// is a multiple of 8, otherwise copy byte by byte. The backward copy is
-// used only when the blocks are overlapping and dest > src.
-
+/* In the case of a backward copy, optimise only the case when everything
+ is a multiple of 8, otherwise copy byte by byte. The backward copy is
+ used only when the blocks are overlapping and dest > src.
+*/
.backward:
- shr.u loopcnt = len, 3 // loopcnt = len / 8
- add src = src, len // src points one byte past the end
- add dest = dest, len ;; // dest points one byte past the end
- mov ar.ec = MEMLAT + 1 // set the epilog counter
- mov pr.rot = 1 << 16 // set rotating predicates
- adds loopcnt = -1, loopcnt // --loopcnt
- cmp.ne p6, p0 = tmp4, r0 // if ((dest | src | len) & 7 != 0)
-(p6) br.cond.sptk .bytecopy ;; // copy byte by byte backward
- adds src = -8, src // src points to the last word
- adds dest = -8, dest // dest points to the last word
- mov ar.lc = loopcnt;; // set the loop counter
+ shr.u loopcnt = len, 3 /* loopcnt = len / 8 */
+ add src = src, len /* src points one byte past the end */
+ add dest = dest, len ;; /* dest points one byte past the end */
+ mov ar.ec = MEMLAT + 1 /* set the epilog counter */
+ mov pr.rot = 1 << 16 /* set rotating predicates */
+ adds loopcnt = -1, loopcnt /* --loopcnt */
+ cmp.ne p6, p0 = tmp4, r0 /* if ((dest | src | len) & 7 != 0) */
+(p6) br.cond.sptk .bytecopy ;; /* copy byte by byte backward */
+ adds src = -8, src /* src points to the last word */
+ adds dest = -8, dest /* dest points to the last word */
+ mov ar.lc = loopcnt;; /* set the loop counter */
.l5:
(p[0]) ld8 r[0] = [src], -8
(p[MEMLAT]) st8 [dest] = r[MEMLAT], -8
br.ctop.dptk .l5
br.cond.sptk .restore_and_exit
.bytecopy:
- adds src = -1, src // src points to the last byte
- adds dest = -1, dest // dest points to the last byte
- adds loopcnt = -1, len;; // loopcnt = len - 1
- mov ar.lc = loopcnt;; // set the loop counter
+ adds src = -1, src /* src points to the last byte */
+ adds dest = -1, dest /* dest points to the last byte */
+ adds loopcnt = -1, len;; /* loopcnt = len - 1 */
+ mov ar.lc = loopcnt;; /* set the loop counter */
.l6:
(p[0]) ld1 r[0] = [src], -1
(p[MEMLAT]) st1 [dest] = r[MEMLAT], -1
@@ -239,7 +238,7 @@ END(memmove)
.rodata
.align 8
.table:
- data8 0 // dummy entry
+ data8 0 /* dummy entry */
data8 .loop56 - .loop8
data8 .loop56 - .loop16
data8 .loop56 - .loop24
diff --git a/libc/string/ia64/memset.S b/libc/string/ia64/memset.S
index ed27f3f31..7bd15c88a 100644
--- a/libc/string/ia64/memset.S
+++ b/libc/string/ia64/memset.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: dest
@@ -33,7 +32,7 @@
Since a stf.spill f0 can store 16B in one go, we use this instruction
to get peak speed when value = 0. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define dest in0
@@ -46,15 +45,15 @@
#define ptr1 r28
#define ptr2 r27
#define ptr3 r26
-#define ptr9 r24
+#define ptr9 r24
#define loopcnt r23
#define linecnt r22
#define bytecnt r21
#define fvalue f6
-// This routine uses only scratch predicate registers (p6 - p15)
-#define p_scr p6 // default register for same-cycle branches
+/* This routine uses only scratch predicate registers (p6 - p15) */
+#define p_scr p6 /* default register for same-cycle branches */
#define p_nz p7
#define p_zr p8
#define p_unalgn p9
@@ -68,7 +67,7 @@
#define MIN1 15
#define MIN1P1HALF 8
#define LINE_SIZE 128
-#define LSIZE_SH 7 // shift amount
+#define LSIZE_SH 7 /* shift amount */
#define PREF_AHEAD 8
#define USE_FLP
@@ -90,97 +89,97 @@ ENTRY(memset)
movi0 save_lc = ar.lc
} { .mmi
.body
- mov ret0 = dest // return value
- cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero
+ mov ret0 = dest /* return value */
+ cmp.ne p_nz, p_zr = value, r0 /* use stf.spill if value is zero */
cmp.eq p_scr, p0 = cnt, r0
;; }
{ .mmi
- and ptr2 = -(MIN1+1), dest // aligned address
- and tmp = MIN1, dest // prepare to check for alignment
- tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U)
+ and ptr2 = -(MIN1+1), dest /* aligned address */
+ and tmp = MIN1, dest /* prepare to check for alignment */
+ tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */
} { .mib
mov ptr1 = dest
- mux1 value = value, @brcst // create 8 identical bytes in word
-(p_scr) br.ret.dpnt.many rp // return immediately if count = 0
+ mux1 value = value, @brcst /* create 8 identical bytes in word */
+(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */
;; }
{ .mib
cmp.ne p_unalgn, p0 = tmp, r0
-} { .mib // NB: # of bytes to move is 1 higher
- sub bytecnt = (MIN1+1), tmp // than loopcnt
- cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
-(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U)
+} { .mib /* NB: # of bytes to move is 1 higher */
+ sub bytecnt = (MIN1+1), tmp /* than loopcnt */
+ cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */
+(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */
;; }
{ .mmi
-(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
-(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
+(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */
+(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */
;; }
{ .mib
(p_y) add cnt = -8, cnt
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */
} { .mib
(p_y) st8 [ptr2] = value, -4
(p_n) add ptr2 = 4, ptr2
;; }
{ .mib
(p_yy) add cnt = -4, cnt
-(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
+(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */
} { .mib
(p_yy) st4 [ptr2] = value, -2
(p_nn) add ptr2 = 2, ptr2
;; }
{ .mmi
- mov tmp = LINE_SIZE+1 // for compare
+ mov tmp = LINE_SIZE+1 /* for compare */
(p_y) add cnt = -2, cnt
-(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
+(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */
} { .mmi
- setf.sig fvalue=value // transfer value to FLP side
+ setf.sig fvalue=value /* transfer value to FLP side */
(p_y) st2 [ptr2] = value, -1
(p_n) add ptr2 = 1, ptr2
;; }
{ .mmi
(p_yy) st1 [ptr2] = value
- cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
+ cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */
} { .mbb
(p_yy) add cnt = -1, cnt
-(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
+(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */
;; }
{ .mib
nop.m 0
shr.u linecnt = cnt, LSIZE_SH
-(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill
+(p_zr) br.cond.dptk.many .l1b /* Jump to use stf.spill */
;; }
#ifndef GAS_ALIGN_BREAKS_UNWIND_INFO
- .align 32 // -------- // L1A: store ahead into cache lines; fill later
+ .align 32 /* -------- L1A: store ahead into cache lines; fill later */
#endif
{ .mmi
- and tmp = -(LINE_SIZE), cnt // compute end of range
- mov ptr9 = ptr1 // used for prefetching
- and cnt = (LINE_SIZE-1), cnt // remainder
+ and tmp = -(LINE_SIZE), cnt /* compute end of range */
+ mov ptr9 = ptr1 /* used for prefetching */
+ and cnt = (LINE_SIZE-1), cnt /* remainder */
} { .mmi
- mov loopcnt = PREF_AHEAD-1 // default prefetch loop
- cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+ mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */
;; }
{ .mmi
-(p_scr) add loopcnt = -1, linecnt // start of stores
- add ptr2 = 8, ptr1 // (beyond prefetch stores)
- add ptr1 = tmp, ptr1 // first address beyond total
-;; } // range
+(p_scr) add loopcnt = -1, linecnt /* start of stores */
+ add ptr2 = 8, ptr1 /* (beyond prefetch stores) */
+ add ptr1 = tmp, ptr1 /* first address beyond total */
+;; } /* range */
{ .mmi
- add tmp = -1, linecnt // next loop count
+ add tmp = -1, linecnt /* next loop count */
movi0 ar.lc = loopcnt
;; }
.pref_l1a:
{ .mib
- store [ptr9] = myval, 128 // Do stores one cache line apart
+ store [ptr9] = myval, 128 /* Do stores one cache line apart */
nop.i 0
br.cloop.dptk.few .pref_l1a
;; }
{ .mmi
- add ptr0 = 16, ptr2 // Two stores in parallel
+ add ptr0 = 16, ptr2 /* Two stores in parallel */
movi0 ar.lc = tmp
;; }
.l1ax:
@@ -211,7 +210,7 @@ ENTRY(memset)
{ .mmi
store [ptr2] = myval, 8
store [ptr0] = myval, 32
- cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */
;; }
{ .mmb
store [ptr2] = myval, 24
@@ -219,9 +218,9 @@ ENTRY(memset)
br.cloop.dptk.few .l1ax
;; }
{ .mbb
- cmp.le p_scr, p0 = 8, cnt // just a few bytes left ?
-(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2
- br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3
+ cmp.le p_scr, p0 = 8, cnt /* just a few bytes left ? */
+(p_scr) br.cond.dpnt.many .fraction_of_line /* Branch no. 2 */
+ br.cond.dpnt.many .move_bytes_from_alignment /* Branch no. 3 */
;; }
#ifdef GAS_ALIGN_BREAKS_UNWIND_INFO
@@ -229,32 +228,32 @@ ENTRY(memset)
#else
.align 32
#endif
-.l1b: // ------------------ // L1B: store ahead into cache lines; fill later
+.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */
{ .mmi
- and tmp = -(LINE_SIZE), cnt // compute end of range
- mov ptr9 = ptr1 // used for prefetching
- and cnt = (LINE_SIZE-1), cnt // remainder
+ and tmp = -(LINE_SIZE), cnt /* compute end of range */
+ mov ptr9 = ptr1 /* used for prefetching */
+ and cnt = (LINE_SIZE-1), cnt /* remainder */
} { .mmi
- mov loopcnt = PREF_AHEAD-1 // default prefetch loop
- cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
+ mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */
+ cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */
;; }
{ .mmi
(p_scr) add loopcnt = -1, linecnt
- add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores)
- add ptr1 = tmp, ptr1 // first address beyond total range
+ add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */
+ add ptr1 = tmp, ptr1 /* first address beyond total range */
;; }
{ .mmi
- add tmp = -1, linecnt // next loop count
+ add tmp = -1, linecnt /* next loop count */
movi0 ar.lc = loopcnt
;; }
.pref_l1b:
{ .mib
- stf.spill [ptr9] = f0, 128 // Do stores one cache line apart
+ stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */
nop.i 0
br.cloop.dptk.few .pref_l1b
;; }
{ .mmi
- add ptr0 = 16, ptr2 // Two stores in parallel
+ add ptr0 = 16, ptr2 /* Two stores in parallel */
movi0 ar.lc = tmp
;; }
.l1bx:
@@ -269,7 +268,7 @@ ENTRY(memset)
{ .mmi
stf.spill [ptr2] = f0, 32
stf.spill [ptr0] = f0, 64
- cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
+ cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */
;; }
{ .mmb
stf.spill [ptr2] = f0, 32
@@ -277,14 +276,14 @@ ENTRY(memset)
br.cloop.dptk.few .l1bx
;; }
{ .mib
- cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
+ cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */
(p_scr) br.cond.dpnt.many .move_bytes_from_alignment
;; }
.fraction_of_line:
{ .mib
add ptr2 = 16, ptr1
- shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
+ shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */
;; }
{ .mib
cmp.eq p_scr, p0 = loopcnt, r0
@@ -292,13 +291,13 @@ ENTRY(memset)
(p_scr) br.cond.dpnt.many store_words
;; }
{ .mib
- and cnt = 0x1f, cnt // compute the remaining cnt
+ and cnt = 0x1f, cnt /* compute the remaining cnt */
movi0 ar.lc = loopcnt
;; }
#ifndef GAS_ALIGN_BREAKS_UNWIND_INFO
.align 32
#endif
-.l2: // ---------------------------- // L2A: store 32B in 2 cycles
+.l2: /* ---------------------------- L2A: store 32B in 2 cycles */
{ .mmb
store [ptr1] = myval, 8
store [ptr2] = myval, 8
@@ -309,34 +308,34 @@ ENTRY(memset)
;; }
store_words:
{ .mib
- cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
-(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
+ cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */
+(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */
;; }
{ .mmi
- store [ptr1] = myval, 8 // store
- cmp.le p_y, p_n = 16, cnt //
- add cnt = -8, cnt // subtract
+ store [ptr1] = myval, 8 /* store */
+ cmp.le p_y, p_n = 16, cnt /* */
+ add cnt = -8, cnt /* subtract */
;; }
{ .mmi
-(p_y) store [ptr1] = myval, 8 // store
-(p_y) cmp.le.unc p_yy, p_nn = 16, cnt //
-(p_y) add cnt = -8, cnt // subtract
+(p_y) store [ptr1] = myval, 8 /* store */
+(p_y) cmp.le.unc p_yy, p_nn = 16, cnt /* */
+(p_y) add cnt = -8, cnt /* subtract */
;; }
-{ .mmi // store
-(p_yy) store [ptr1] = myval, 8 //
-(p_yy) add cnt = -8, cnt // subtract
+{ .mmi /* store */
+(p_yy) store [ptr1] = myval, 8 /* */
+(p_yy) add cnt = -8, cnt /* subtract */
;; }
.move_bytes_from_alignment:
{ .mib
cmp.eq p_scr, p0 = cnt, r0
- tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ?
+ tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */
(p_scr) br.cond.dpnt.few .restore_and_exit
;; }
{ .mib
(p_y) st4 [ptr1] = value, 4
- tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ?
+ tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */
;; }
{ .mib
(p_yy) st2 [ptr1] = value, 2
@@ -362,38 +361,38 @@ store_words:
(p_n) add ptr2 = 2, ptr1
} { .mmi
(p_y) add ptr2 = 3, ptr1
-(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte
-(p_y) add cnt = -1, cnt // [15, 14 (or less) left]
+(p_y) st1 [ptr1] = value, 1 /* fill 1 (odd-aligned) byte */
+(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */
;; }
{ .mmi
(p_yy) cmp.le.unc p_y, p0 = 8, cnt
- add ptr3 = ptr1, cnt // prepare last store
+ add ptr3 = ptr1, cnt /* prepare last store */
movi0 ar.lc = save_lc
} { .mmi
-(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
-(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes
-(p_yy) add cnt = -4, cnt // [11, 10 (o less) left]
+(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */
+(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */
+(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */
;; }
{ .mmi
(p_y) cmp.le.unc p_yy, p0 = 8, cnt
- add ptr3 = -1, ptr3 // last store
- tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ?
+ add ptr3 = -1, ptr3 /* last store */
+ tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */
} { .mmi
-(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
-(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes
-(p_y) add cnt = -4, cnt // [7, 6 (or less) left]
+(p_y) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */
+(p_y) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */
+(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */
;; }
{ .mmi
-(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
-(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes
- // [3, 2 (or less) left]
- tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ?
+(p_yy) st2 [ptr1] = value, 4 /* fill 2 (aligned) bytes */
+(p_yy) st2 [ptr2] = value, 4 /* fill 2 (aligned) bytes */
+ /* [3, 2 (or less) left] */
+ tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */
} { .mmi
(p_yy) add cnt = -4, cnt
;; }
{ .mmb
-(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes
-(p_y) st1 [ptr3] = value // fill last byte (using ptr3)
+(p_scr) st2 [ptr1] = value /* fill 2 (aligned) bytes */
+(p_y) st1 [ptr3] = value /* fill last byte (using ptr3) */
br.ret.sptk.many rp
;; }
END(memset)
diff --git a/libc/string/ia64/softpipe.h b/libc/string/ia64/softpipe.h
index d71af735e..a9a9dc679 100644
--- a/libc/string/ia64/softpipe.h
+++ b/libc/string/ia64/softpipe.h
@@ -12,9 +12,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* The latency of a memory load assumed by the assembly implementation
of the mem and str functions. Since we don't have any clue about
diff --git a/libc/string/ia64/strchr.S b/libc/string/ia64/strchr.S
index 401a07941..034fd3096 100644
--- a/libc/string/ia64/strchr.S
+++ b/libc/string/ia64/strchr.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: the address of the first occurence of chr in str or NULL
@@ -30,7 +29,7 @@
This implementation assumes little endian mode. For big endian mode,
the instruction czx1.r should be replaced by czx1.l. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define saved_lc r18
@@ -49,15 +48,15 @@ ENTRY(strchr)
.prologue
alloc r2 = ar.pfs, 2, 0, 0, 0
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
.body
mov ret0 = str
- and tmp = 7, str // tmp = str % 8
+ and tmp = 7, str /* tmp = str % 8 */
mux1 chrx8 = chr, @brcst
- extr.u chr = chr, 0, 8 // retain only the last byte
- cmp.ne p8, p0 = r0, r0 // clear p8
+ extr.u chr = chr, 0, 8 /* retain only the last byte */
+ cmp.ne p8, p0 = r0, r0 /* clear p8 */
;;
- sub loopcnt = 8, tmp // loopcnt = 8 - tmp
+ sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */
cmp.eq p6, p0 = tmp, r0
(p6) br.cond.sptk .str_aligned;;
adds loopcnt = -1, loopcnt;;
@@ -75,10 +74,10 @@ ENTRY(strchr)
nop.b 0
nop.b 0
.l2:
- ld8.s val2 = [ret0], 8 // don't bomb out here
+ ld8.s val2 = [ret0], 8 /* don't bomb out here */
czx1.r pos0 = val1
- xor tmp = val1, chrx8 // if val1 contains chr, tmp will
- ;; // contain a zero in its position
+ xor tmp = val1, chrx8 /* if val1 contains chr, tmp will */
+ ;; /* contain a zero in its position */
czx1.r poschr = tmp
cmp.ne p6, p0 = 8, pos0
;;
@@ -90,21 +89,21 @@ ENTRY(strchr)
mov val1 = val2
br.cond.dptk .l2
.foundit:
-(p6) cmp.lt p8, p0 = pos0, poschr // we found chr and null in the word
-(p8) br.cond.spnt .notfound // null was found before chr
+(p6) cmp.lt p8, p0 = pos0, poschr /* we found chr and null in the word */
+(p8) br.cond.spnt .notfound /* null was found before chr */
add ret0 = ret0, poschr ;;
- adds ret0 = -15, ret0 ;; // should be -16, but we decrement
-.restore_and_exit: // ret0 in the next instruction
- adds ret0 = -1, ret0 // ret0 was pointing 1 char too far
- mov ar.lc = saved_lc // restore the loop counter
+ adds ret0 = -15, ret0 ;; /* should be -16, but we decrement */
+.restore_and_exit: /* ret0 in the next instruction */
+ adds ret0 = -1, ret0 /* ret0 was pointing 1 char too far */
+ mov ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
.notfound:
- mov ret0 = r0 // return NULL if null was found
+ mov ret0 = r0 /* return NULL if null was found */
mov ar.lc = saved_lc
br.ret.sptk.many b0
.recovery:
adds ret0 = -8, ret0;;
- ld8 val2 = [ret0], 8 // bomb out here
+ ld8 val2 = [ret0], 8 /* bomb out here */
br.cond.sptk .back
END(strchr)
libc_hidden_def (strchr)
diff --git a/libc/string/ia64/strcmp.S b/libc/string/ia64/strcmp.S
index d3b41e642..c45ab4801 100644
--- a/libc/string/ia64/strcmp.S
+++ b/libc/string/ia64/strcmp.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: the result of the comparison
@@ -27,7 +26,7 @@
Unlike memcmp(), this function is optimized for mismatches within the
first few characters. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define s1 in0
@@ -42,7 +41,7 @@ ENTRY(strcmp)
.loop:
ld1 val1 = [s1], 1
ld1 val2 = [s2], 1
- cmp.eq p6, p0 = r0, r0 // set p6
+ cmp.eq p6, p0 = r0, r0 /* set p6 */
;;
cmp.ne.and p6, p0 = val1, r0
cmp.ne.and p6, p0 = val2, r0
diff --git a/libc/string/ia64/strcpy.S b/libc/string/ia64/strcpy.S
index e4a9915ca..c9b3bc143 100644
--- a/libc/string/ia64/strcpy.S
+++ b/libc/string/ia64/strcpy.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: dest
@@ -27,11 +26,11 @@
In this form, it assumes little endian mode. For big endian mode, the
the two shifts in .l2 must be inverted:
- shl value = r[1], sh1 // value = w0 << sh1
- shr.u tmp = r[0], sh2 // tmp = w1 >> sh2
+ shl value = r[1], sh1 // value = w0 << sh1
+ shr.u tmp = r[0], sh2 // tmp = w1 >> sh2
*/
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define saved_lc r15
@@ -53,62 +52,62 @@
ENTRY(strcpy)
.prologue
- alloc r2 = ar.pfs, 2, 0, 30, 32
+ alloc r2 = ar.pfs, 2, 0, 30, 32
#define MEMLAT 2
.rotr r[MEMLAT + 2]
.rotp p[MEMLAT + 1]
- mov ret0 = in0 // return value = dest
+ mov ret0 = in0 /* return value = dest */
.save pr, saved_pr
- mov saved_pr = pr // save the predicate registers
+ mov saved_pr = pr /* save the predicate registers */
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
.body
- sub tmp = r0, in0 ;; // tmp = -dest
- mov dest = in0 // dest
- mov src = in1 // src
- and loopcnt = 7, tmp ;; // loopcnt = -dest % 8
+ sub tmp = r0, in0 ;; /* tmp = -dest */
+ mov dest = in0 /* dest */
+ mov src = in1 /* src */
+ and loopcnt = 7, tmp ;; /* loopcnt = -dest % 8 */
cmp.eq p6, p0 = loopcnt, r0
- adds loopcnt = -1, loopcnt // --loopcnt
+ adds loopcnt = -1, loopcnt /* --loopcnt */
(p6) br.cond.sptk .dest_aligned ;;
mov ar.lc = loopcnt
-.l1: // copy -dest % 8 bytes
- ld1 c = [src], 1 // c = *src++
+.l1: /* copy -dest % 8 bytes */
+ ld1 c = [src], 1 /* c = *src++ */
;;
- st1 [dest] = c, 1 // *dest++ = c
+ st1 [dest] = c, 1 /* *dest++ = c */
cmp.eq p6, p0 = c, r0
(p6) br.cond.dpnt .restore_and_exit
br.cloop.dptk .l1 ;;
.dest_aligned:
- and sh1 = 7, src // sh1 = src % 8
- mov ar.lc = -1 // "infinite" loop
- and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src
+ and sh1 = 7, src /* sh1 = src % 8 */
+ mov ar.lc = -1 /* "infinite" loop */
+ and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */
sub thresh = 8, sh1
- mov pr.rot = 1 << 16 // set rotating predicates
- cmp.ne p7, p0 = r0, r0 // clear p7
- shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8)
- sub sh2 = 64, sh1 // sh2 = 64 - sh1
- cmp.eq p6, p0 = sh1, r0 // is the src aligned?
+ mov pr.rot = 1 << 16 /* set rotating predicates */
+ cmp.ne p7, p0 = r0, r0 /* clear p7 */
+ shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */
+ sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */
+ cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
(p6) br.cond.sptk .src_aligned ;;
ld8 r[1] = [asrc],8 ;;
.align 32
.l2:
ld8.s r[0] = [asrc], 8
- shr.u value = r[1], sh1 ;; // value = w0 >> sh1
- czx1.r pos = value ;; // do we have an "early" zero
- cmp.lt p7, p0 = pos, thresh // in w0 >> sh1?
+ shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */
+ czx1.r pos = value ;; /* do we have an "early" zero */
+ cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */
(p7) br.cond.dpnt .found0
- chk.s r[0], .recovery2 // it is safe to do that only
-.back2: // after the previous test
- shl tmp = r[0], sh2 // tmp = w1 << sh2
+ chk.s r[0], .recovery2 /* it is safe to do that only */
+.back2: /* after the previous test */
+ shl tmp = r[0], sh2 /* tmp = w1 << sh2 */
;;
- or value = value, tmp ;; // value |= tmp
+ or value = value, tmp ;; /* value |= tmp */
czx1.r pos = value ;;
cmp.ne p7, p0 = 8, pos
(p7) br.cond.dpnt .found0
- st8 [dest] = value, 8 // store val to dest
+ st8 [dest] = value, 8 /* store val to dest */
br.ctop.dptk .l2 ;;
.src_aligned:
.l3:
@@ -124,14 +123,14 @@ ENTRY(strcpy)
.found0:
mov ar.lc = pos
.l4:
- extr.u c = value, 0, 8 // c = value & 0xff
+ extr.u c = value, 0, 8 /* c = value & 0xff */
shr.u value = value, 8
;;
st1 [dest] = c, 1
br.cloop.dptk .l4 ;;
.restore_and_exit:
- mov ar.lc = saved_lc // restore the loop counter
- mov pr = saved_pr, -1 // restore the predicate registers
+ mov ar.lc = saved_lc /* restore the loop counter */
+ mov pr = saved_pr, -1 /* restore the predicate registers */
br.ret.sptk.many b0
.recovery2:
add tmp = -8, asrc ;;
diff --git a/libc/string/ia64/strlen.S b/libc/string/ia64/strlen.S
index 9b27a2d1b..83244cd76 100644
--- a/libc/string/ia64/strlen.S
+++ b/libc/string/ia64/strlen.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: the length of the input string
@@ -33,7 +32,7 @@
This implementation assumes little endian mode. For big endian mode,
the instruction czx1.r should be replaced by czx1.l. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define saved_lc r18
@@ -50,13 +49,13 @@ ENTRY(strlen)
.prologue
alloc r2 = ar.pfs, 1, 0, 0, 0
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
+ mov saved_lc = ar.lc /* save the loop counter */
.body
mov str = in0
- mov len = r0 // len = 0
- and tmp = 7, in0 // tmp = str % 8
+ mov len = r0 /* len = 0 */
+ and tmp = 7, in0 /* tmp = str % 8 */
;;
- sub loopcnt = 8, tmp // loopcnt = 8 - tmp
+ sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */
cmp.eq p6, p0 = tmp, r0
(p6) br.cond.sptk .str_aligned;;
adds loopcnt = -1, loopcnt;;
@@ -69,11 +68,11 @@ ENTRY(strlen)
adds len = 1, len
br.cloop.dptk .l1
.str_aligned:
- mov origadd = str // origadd = orig
+ mov origadd = str /* origadd = orig */
ld8 val1 = [str], 8;;
nop.b 0
nop.b 0
-.l2: ld8.s val2 = [str], 8 // don't bomb out here
+.l2: ld8.s val2 = [str], 8 /* don't bomb out here */
czx1.r pos0 = val1
;;
cmp.ne p6, p0 = 8, pos0
@@ -83,16 +82,16 @@ ENTRY(strlen)
mov val1 = val2
br.cond.dptk .l2
.foundit:
- sub tmp = str, origadd // tmp = crt address - orig
+ sub tmp = str, origadd /* tmp = crt address - orig */
add len = len, pos0;;
add len = len, tmp;;
adds len = -16, len
.restore_and_exit:
- mov ar.lc = saved_lc // restore the loop counter
+ mov ar.lc = saved_lc /* restore the loop counter */
br.ret.sptk.many b0
.recovery:
adds str = -8, str;;
- ld8 val2 = [str], 8 // bomb out here
+ ld8 val2 = [str], 8 /* bomb out here */
br.cond.sptk .back
END(strlen)
libc_hidden_def (strlen)
diff --git a/libc/string/ia64/strncmp.S b/libc/string/ia64/strncmp.S
index 8e0373c7f..d58a2007e 100644
--- a/libc/string/ia64/strncmp.S
+++ b/libc/string/ia64/strncmp.S
@@ -14,21 +14,20 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: the result of the comparison
Inputs:
in0: s1
in1: s2
- in2: n
+ in2: n
Unlike memcmp(), this function is optimized for mismatches within the
first few characters. */
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define s1 in0
@@ -42,13 +41,13 @@
ENTRY(strncmp)
alloc r2 = ar.pfs, 3, 0, 0, 0
mov ret0 = r0
- cmp.eq p6, p0 = r0, r0 // set p6
- cmp.eq p7, p0 = n, r0 // return immediately if n == 0
+ cmp.eq p6, p0 = r0, r0 /* set p6 */
+ cmp.eq p7, p0 = n, r0 /* return immediately if n == 0 */
(p7) br.cond.spnt .restore_and_exit ;;
.loop:
ld1 val1 = [s1], 1
ld1 val2 = [s2], 1
- adds n = -1, n // n--
+ adds n = -1, n /* n-- */
;;
cmp.ne.and p6, p0 = val1, r0
cmp.ne.and p6, p0 = val2, r0
@@ -58,5 +57,5 @@ ENTRY(strncmp)
sub ret0 = val1, val2
.restore_and_exit:
br.ret.sptk.many b0
-END(strncmp)
+END(strncmp)
libc_hidden_weak (strncmp)
diff --git a/libc/string/ia64/strncpy.S b/libc/string/ia64/strncpy.S
index 4f1129350..b72e8a70c 100644
--- a/libc/string/ia64/strncpy.S
+++ b/libc/string/ia64/strncpy.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Return: dest
@@ -29,7 +28,7 @@
In this form, it assumes little endian mode.
*/
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret
#define saved_lc r15
@@ -58,64 +57,64 @@ ENTRY(strncpy)
.rotr r[MEMLAT + 2]
.rotp p[MEMLAT + 1]
- mov ret0 = in0 // return value = dest
+ mov ret0 = in0 /* return value = dest */
.save pr, saved_pr
- mov saved_pr = pr // save the predicate registers
+ mov saved_pr = pr /* save the predicate registers */
.save ar.lc, saved_lc
- mov saved_lc = ar.lc // save the loop counter
- mov ar.ec = 0 // ec is not guaranteed to
- // be zero upon function entry
+ mov saved_lc = ar.lc /* save the loop counter */
+ mov ar.ec = 0 /* ec is not guaranteed to */
+ /* be zero upon function entry */
.body
cmp.geu p6, p5 = 24, in2
(p6) br.cond.spnt .short_len
- sub tmp = r0, in0 ;; // tmp = -dest
- mov len = in2 // len
- mov dest = in0 // dest
- mov src = in1 // src
- and tmp = 7, tmp ;; // loopcnt = -dest % 8
+ sub tmp = r0, in0 ;; /* tmp = -dest */
+ mov len = in2 /* len */
+ mov dest = in0 /* dest */
+ mov src = in1 /* src */
+ and tmp = 7, tmp ;; /* loopcnt = -dest % 8 */
cmp.eq p6, p7 = tmp, r0
- adds loopcnt = -1, tmp // --loopcnt
+ adds loopcnt = -1, tmp /* --loopcnt */
(p6) br.cond.sptk .dest_aligned ;;
- sub len = len, tmp // len -= -dest % 8
+ sub len = len, tmp /* len -= -dest % 8 */
mov ar.lc = loopcnt
-.l1: // copy -dest % 8 bytes
-(p5) ld1 c = [src], 1 // c = *src++
+.l1: /* copy -dest % 8 bytes */
+(p5) ld1 c = [src], 1 /* c = *src++ */
;;
- st1 [dest] = c, 1 // *dest++ = c
+ st1 [dest] = c, 1 /* *dest++ = c */
cmp.ne p5, p7 = c, r0
br.cloop.dptk .l1 ;;
(p7) br.cond.dpnt .found0_align
-.dest_aligned: // p7 should be cleared here
- shr.u c = len, 3 // c = len / 8
- and sh1 = 7, src // sh1 = src % 8
- and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src
- adds c = (MEMLAT-1), c // c = (len / 8) + MEMLAT - 1
+.dest_aligned: /* p7 should be cleared here */
+ shr.u c = len, 3 /* c = len / 8 */
+ and sh1 = 7, src /* sh1 = src % 8 */
+ and asrc = -8, src ;; /* asrc = src & -OPSIZ -- align src */
+ adds c = (MEMLAT-1), c /* c = (len / 8) + MEMLAT - 1 */
sub thresh = 8, sh1
- mov pr.rot = 1 << 16 // set rotating predicates
- shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8)
- mov ar.lc = c // "infinite" loop
- sub sh2 = 64, sh1 // sh2 = 64 - sh1
- cmp.eq p6, p0 = sh1, r0 // is the src aligned?
+ mov pr.rot = 1 << 16 /* set rotating predicates */
+ shl sh1 = sh1, 3 ;; /* sh1 = 8 * (src % 8) */
+ mov ar.lc = c /* "infinite" loop */
+ sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */
+ cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */
(p6) br.cond.sptk .src_aligned
- adds c = -(MEMLAT-1), c ;; // c = (len / 8)
+ adds c = -(MEMLAT-1), c ;; /* c = (len / 8) */
ld8 r[1] = [asrc],8
mov ar.lc = c ;;
.align 32
.l2:
-(p6) st8 [dest] = value, 8 // store val to dest
+(p6) st8 [dest] = value, 8 /* store val to dest */
ld8.s r[0] = [asrc], 8
- shr.u value = r[1], sh1 ;; // value = w0 >> sh1
- czx1.r pos = value ;; // do we have an "early" zero
- cmp.lt p7, p0 = pos, thresh // in w0 >> sh1?
- adds len = -8, len // len -= 8
+ shr.u value = r[1], sh1 ;; /* value = w0 >> sh1 */
+ czx1.r pos = value ;; /* do we have an "early" zero */
+ cmp.lt p7, p0 = pos, thresh /* in w0 >> sh1? */
+ adds len = -8, len /* len -= 8 */
(p7) br.cond.dpnt .nonalign_found0
- chk.s r[0], .recovery2 // it is safe to do that only
-.back2: // after the previous test
- shl tmp = r[0], sh2 // tmp = w1 << sh2
+ chk.s r[0], .recovery2 /* it is safe to do that only */
+.back2: /* after the previous test */
+ shl tmp = r[0], sh2 /* tmp = w1 << sh2 */
;;
- or value = value, tmp ;; // value |= tmp
+ or value = value, tmp ;; /* value |= tmp */
czx1.r pos = value ;;
cmp.ne p7, p6 = 8, pos
(p7) br.cond.dpnt .nonalign_found0
@@ -137,7 +136,7 @@ ENTRY(strncpy)
(p[MEMLAT]) mov value = r[MEMLAT]
(p[MEMLAT]) czx1.r pos = r[MEMLAT] ;;
(p[MEMLAT]) cmp.ne p7, p0 = 8, pos
-(p[MEMLAT]) adds len = -8, len // len -= 8
+(p[MEMLAT]) adds len = -8, len /* len -= 8 */
(p7) br.cond.dpnt .found0
(p[MEMLAT]) st8 [dest] = r[MEMLAT], 8
br.ctop.dptk .l3 ;;
@@ -152,7 +151,7 @@ ENTRY(strncpy)
(p5) br.cond.dptk .restore_and_exit ;;
mov ar.lc = len
.l4:
-(p6) extr.u c = value, 0, 8 // c = value & 0xff
+(p6) extr.u c = value, 0, 8 /* c = value & 0xff */
(p6) shr.u value = value, 8 ;;
st1 [dest] = c, 1
cmp.ne p6, p0 = c, r0
@@ -165,7 +164,7 @@ ENTRY(strncpy)
mov value = 0 ;;
.found0:
shl tmp = pos, 3
- shr.u loopcnt = len, 4 // loopcnt = len / 16
+ shr.u loopcnt = len, 4 /* loopcnt = len / 16 */
mov c = -1 ;;
cmp.eq p6, p0 = loopcnt, r0
adds loopcnt = -1, loopcnt
@@ -192,24 +191,24 @@ ENTRY(strncpy)
st1 [dest] = r0, 1
br.cloop.dptk .l7 ;;
.restore_and_exit:
- mov ar.lc = saved_lc // restore the loop counter
- mov pr = saved_pr, -1 // restore the predicate registers
+ mov ar.lc = saved_lc /* restore the loop counter */
+ mov pr = saved_pr, -1 /* restore the predicate registers */
br.ret.sptk.many b0
.short_len:
cmp.eq p5, p0 = in2, r0
adds loopcnt = -1, in2
(p5) br.cond.spnt .restore_and_exit ;;
- mov ar.lc = loopcnt // p6 should be set when we get here
+ mov ar.lc = loopcnt /* p6 should be set when we get here */
.l8:
-(p6) ld1 c = [in1], 1 // c = *src++
+(p6) ld1 c = [in1], 1 /* c = *src++ */
;;
- st1 [in0] = c, 1 // *dest++ = c
+ st1 [in0] = c, 1 /* *dest++ = c */
(p6) cmp.ne p6, p0 = c, r0
br.cloop.dptk .l8
;;
- mov ar.lc = saved_lc // restore the loop counter
- mov pr = saved_pr, -1 // restore the predicate registers
+ mov ar.lc = saved_lc /* restore the loop counter */
+ mov pr = saved_pr, -1 /* restore the predicate registers */
br.ret.sptk.many b0
.recovery2:
add c = 8, len
diff --git a/libc/string/ia64/sysdep.h b/libc/string/ia64/sysdep.h
deleted file mode 100644
index d10020ac1..000000000
--- a/libc/string/ia64/sysdep.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/* Copyright (C) 1999, 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Jes Sorensen, <Jes.Sorensen@cern.ch>, April 1999.
- Based on code originally written by David Mosberger-Tang
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _LINUX_IA64_SYSDEP_H
-#define _LINUX_IA64_SYSDEP_H 1
-
-#include <features.h>
-#include <asm/unistd.h>
-
-#ifdef __ASSEMBLER__
-
-/* Macros to help writing .prologue directives in assembly code. */
-#define ASM_UNW_PRLG_RP 0x8
-#define ASM_UNW_PRLG_PFS 0x4
-#define ASM_UNW_PRLG_PSP 0x2
-#define ASM_UNW_PRLG_PR 0x1
-#define ASM_UNW_PRLG_GRSAVE(ninputs) (32+(ninputs))
-
-#ifdef __STDC__
-#define C_LABEL(name) name :
-#else
-#define C_LABEL(name) name/**/:
-#endif
-
-#define CALL_MCOUNT
-
-#define ENTRY(name) \
- .text; \
- .align 32; \
- .proc C_SYMBOL_NAME(name); \
- .global C_SYMBOL_NAME(name); \
- C_LABEL(name) \
- CALL_MCOUNT
-
-#define LEAF(name) \
- .text; \
- .align 32; \
- .proc C_SYMBOL_NAME(name); \
- .global name; \
- C_LABEL(name)
-
-/* Mark the end of function SYM. */
-#undef END
-#define END(sym) .endp C_SYMBOL_NAME(sym)
-
-/* For Linux we can use the system call table in the header file
- /usr/include/asm/unistd.h
- of the kernel. But these symbols do not follow the SYS_* syntax
- so we have to redefine the `SYS_ify' macro here. */
-#undef SYS_ify
-#ifdef __STDC__
-# define SYS_ify(syscall_name) __NR_##syscall_name
-#else
-# define SYS_ify(syscall_name) __NR_/**/syscall_name
-#endif
-
-/* Linux uses a negative return value to indicate syscall errors, unlike
- most Unices, which use the condition codes' carry flag.
-
- Since version 2.1 the return value of a system call might be negative
- even if the call succeeded. E.g., the `lseek' system call might return
- a large offset. Therefore we must not anymore test for < 0, but test
- for a real error by making sure the value in %d0 is a real error
- number. Linus said he will make sure the no syscall returns a value
- in -1 .. -4095 as a valid result so we can savely test with -4095. */
-
-/* We don't want the label for the error handler to be visible in the symbol
- table when we define it here. */
-#define SYSCALL_ERROR_LABEL __syscall_error
-
-#undef PSEUDO
-#define PSEUDO(name, syscall_name, args) \
- ENTRY(name) \
- DO_CALL (SYS_ify(syscall_name)); \
- cmp.eq p6,p0=-1,r10; \
-(p6) br.cond.spnt.few __syscall_error;
-
-#define DO_CALL_VIA_BREAK(num) \
- mov r15=num; \
- break __BREAK_SYSCALL
-
-#ifdef IA64_USE_NEW_STUB
-# ifdef SHARED
-# define DO_CALL(num) \
- .prologue; \
- adds r2 = SYSINFO_OFFSET, r13;; \
- ld8 r2 = [r2]; \
- .save ar.pfs, r11; \
- mov r11 = ar.pfs;; \
- .body; \
- mov r15 = num; \
- mov b7 = r2; \
- br.call.sptk.many b6 = b7;; \
- .restore sp; \
- mov ar.pfs = r11; \
- .prologue; \
- .body
-# else /* !SHARED */
-# define DO_CALL(num) \
- .prologue; \
- mov r15 = num; \
- movl r2 = _dl_sysinfo;; \
- ld8 r2 = [r2]; \
- .save ar.pfs, r11; \
- mov r11 = ar.pfs;; \
- .body; \
- mov b7 = r2; \
- br.call.sptk.many b6 = b7;; \
- .restore sp; \
- mov ar.pfs = r11; \
- .prologue; \
- .body
-# endif
-#else
-# define DO_CALL(num) DO_CALL_VIA_BREAK(num)
-#endif
-
-#undef PSEUDO_END
-#define PSEUDO_END(name) .endp C_SYMBOL_NAME(name);
-
-#undef PSEUDO_NOERRNO
-#define PSEUDO_NOERRNO(name, syscall_name, args) \
- ENTRY(name) \
- DO_CALL (SYS_ify(syscall_name));
-
-#undef PSEUDO_END_NOERRNO
-#define PSEUDO_END_NOERRNO(name) .endp C_SYMBOL_NAME(name);
-
-#undef PSEUDO_ERRVAL
-#define PSEUDO_ERRVAL(name, syscall_name, args) \
- ENTRY(name) \
- DO_CALL (SYS_ify(syscall_name)); \
- cmp.eq p6,p0=-1,r10; \
-(p6) mov r10=r8;
-
-
-#undef PSEUDO_END_ERRVAL
-#define PSEUDO_END_ERRVAL(name) .endp C_SYMBOL_NAME(name);
-
-#undef END
-#define END(name) \
- .size C_SYMBOL_NAME(name), . - C_SYMBOL_NAME(name) ; \
- .endp C_SYMBOL_NAME(name)
-
-#define ret br.ret.sptk.few b0
-#define ret_NOERRNO ret
-#define ret_ERRVAL ret
-
-#endif /* not __ASSEMBLER__ */
-
-#endif /* linux/ia64/sysdep.h */
diff --git a/libc/string/memchr.c b/libc/string/memchr.c
index 413999722..99e13a2fc 100644
--- a/libc/string/memchr.c
+++ b/libc/string/memchr.c
@@ -10,31 +10,23 @@
#ifdef WANT_WIDE
# define Wmemchr wmemchr
#else
+# undef memchr
# define Wmemchr memchr
#endif
-libc_hidden_proto(Wmemchr)
-
Wvoid *Wmemchr(const Wvoid *s, Wint c, size_t n)
{
register const Wuchar *r = (const Wuchar *) s;
-#ifdef __BCC__
- /* bcc can optimize the counter if it thinks it is a pointer... */
- register const char *np = (const char *) n;
-#else
-# define np n
-#endif
- while (np) {
+ while (n) {
if (*r == ((Wuchar)c)) {
return (Wvoid *) r; /* silence the warning */
}
++r;
- --np;
+ --n;
}
return NULL;
}
-#undef np
libc_hidden_def(Wmemchr)
diff --git a/libc/string/memcmp.c b/libc/string/memcmp.c
index 762fc23c1..6cb37f417 100644
--- a/libc/string/memcmp.c
+++ b/libc/string/memcmp.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wmemcmp wmemcmp
#else
-/* Experimentally off - libc_hidden_proto(memcmp) */
# define Wmemcmp memcmp
#endif
diff --git a/libc/string/memcpy.c b/libc/string/memcpy.c
index dc2986778..42436e0b6 100644
--- a/libc/string/memcpy.c
+++ b/libc/string/memcpy.c
@@ -10,26 +10,19 @@
#ifdef WANT_WIDE
# define Wmemcpy wmemcpy
#else
+# undef memcpy
# define Wmemcpy memcpy
#endif
-libc_hidden_proto(Wmemcpy)
-
Wvoid *Wmemcpy(Wvoid * __restrict s1, const Wvoid * __restrict s2, size_t n)
{
register Wchar *r1 = s1;
register const Wchar *r2 = s2;
-#ifdef __BCC__
- while (n--) {
- *r1++ = *r2++;
- }
-#else
while (n) {
*r1++ = *r2++;
--n;
}
-#endif
return s1;
}
diff --git a/libc/string/memmem.c b/libc/string/memmem.c
index 9dcd4c4c0..1b3a0bab6 100644
--- a/libc/string/memmem.c
+++ b/libc/string/memmem.c
@@ -8,7 +8,6 @@
#include "_string.h"
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(memmem) */
void *memmem(const void *haystack, size_t haystacklen,
const void *needle, size_t needlelen)
{
@@ -38,5 +37,4 @@ void *memmem(const void *haystack, size_t haystacklen,
return NULL;
}
-libc_hidden_def(memmem)
#endif
diff --git a/libc/string/memmove.c b/libc/string/memmove.c
index 0bea9b497..b768b6ea9 100644
--- a/libc/string/memmove.c
+++ b/libc/string/memmove.c
@@ -10,30 +10,11 @@
#ifdef WANT_WIDE
# define Wmemmove wmemmove
#else
-/* Experimentally off - libc_hidden_proto(memmove) */
# define Wmemmove memmove
#endif
Wvoid *Wmemmove(Wvoid *s1, const Wvoid *s2, size_t n)
{
-#ifdef __BCC__
- register Wchar *s = (Wchar *) s1;
- register const Wchar *p = (const Wchar *) s2;
-
- if (p >= s) {
- while (n--) {
- *s++ = *p++;
- }
- } else {
- s += n;
- p += n;
- while (n--) {
- *--s = *--p;
- }
- }
-
- return s1;
-#else
register Wchar *s = (Wchar *) s1;
register const Wchar *p = (const Wchar *) s2;
@@ -50,9 +31,8 @@ Wvoid *Wmemmove(Wvoid *s1, const Wvoid *s2, size_t n)
}
return s1;
-#endif
}
#ifndef WANT_WIDE
-libc_hidden_def(Wmemmove)
+libc_hidden_def(memmove)
#endif
diff --git a/libc/string/mempcpy.c b/libc/string/mempcpy.c
index 91896434b..d1d752b50 100644
--- a/libc/string/mempcpy.c
+++ b/libc/string/mempcpy.c
@@ -12,26 +12,19 @@
#ifdef WANT_WIDE
# define Wmempcpy wmempcpy
#else
+# undef mempcpy
# define Wmempcpy mempcpy
#endif
-libc_hidden_proto(Wmempcpy)
-
Wvoid *Wmempcpy(Wvoid * __restrict s1, const Wvoid * __restrict s2, size_t n)
{
register Wchar *r1 = s1;
register const Wchar *r2 = s2;
-#ifdef __BCC__
- while (n--) {
- *r1++ = *r2++;
- }
-#else
while (n) {
*r1++ = *r2++;
--n;
}
-#endif
return r1;
}
diff --git a/libc/string/memrchr.c b/libc/string/memrchr.c
index 48ec50a4e..60211f804 100644
--- a/libc/string/memrchr.c
+++ b/libc/string/memrchr.c
@@ -8,31 +8,21 @@
#include "_string.h"
#ifdef __USE_GNU
-
-/* Experimentally off - libc_hidden_proto(memrchr) */
-
void *memrchr(const void *s, int c, size_t n)
{
register const unsigned char *r;
-#ifdef __BCC__
- /* bcc can optimize the counter if it thinks it is a pointer... */
- register const char *np = (const char *) n;
-#else
-#define np n
-#endif
- r = ((unsigned char *)s) + ((size_t) np);
+ r = ((unsigned char *)s) + ((size_t) n);
- while (np) {
+ while (n) {
if (*--r == ((unsigned char)c)) {
return (void *) r; /* silence the warning */
}
- --np;
+ --n;
}
return NULL;
}
-#undef np
libc_hidden_def(memrchr)
#endif
diff --git a/libc/string/memset.c b/libc/string/memset.c
index 6dd20d668..2a7c19dee 100644
--- a/libc/string/memset.c
+++ b/libc/string/memset.c
@@ -10,28 +10,21 @@
#ifdef WANT_WIDE
# define Wmemset wmemset
#else
-/* Experimentally off - libc_hidden_proto(memset) */
+# undef memset
# define Wmemset memset
#endif
Wvoid *Wmemset(Wvoid *s, Wint c, size_t n)
{
register Wuchar *p = (Wuchar *) s;
-#ifdef __BCC__
- /* bcc can optimize the counter if it thinks it is a pointer... */
- register const char *np = (const char *) n;
-#else
-# define np n
-#endif
- while (np) {
+ while (n) {
*p++ = (Wuchar) c;
- --np;
+ --n;
}
return s;
}
-#undef np
#ifndef WANT_WIDE
libc_hidden_def(memset)
diff --git a/libc/string/metag/Makefile b/libc/string/metag/Makefile
new file mode 100644
index 000000000..523cf6842
--- /dev/null
+++ b/libc/string/metag/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/metag/memchr.S b/libc/string/metag/memchr.S
new file mode 100644
index 000000000..8b48d863c
--- /dev/null
+++ b/libc/string/metag/memchr.S
@@ -0,0 +1,156 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+!
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+ .text
+ .global _memchr
+ .type _memchr,function
+! D0Ar6 src
+! D0Ar2 c
+! D1Ar3 n
+_memchr:
+ CMP D1Ar3, #0
+ BEQ $Lexit_fail
+ !! convert c to unsigned char
+ AND D0Ar2,D0Ar2,#0xff
+ MOV D0Ar6, D1Ar1
+ MOV D1Ar5, D0Ar6
+ !! test alignment
+ AND D1Ar5, D1Ar5, #7
+ CMP D1Ar5, #0
+ BNZ $Lunaligned_loop
+ !! length must be greater than or equal to 8 for aligned loop
+ CMP D1Ar3, #8
+ BGE $Laligned_setup
+$Lunaligned_loop:
+ !! get 1 char from s
+ GETB D0Re0, [D0Ar6++]
+ !! increase alignment counter
+ ADD D1Ar5, D1Ar5, #1
+ !! decrement n
+ SUB D1Ar3, D1Ar3, #1
+ !! exit if we have a match
+ CMP D0Re0, D0Ar2
+ BZ $Lexit_success1
+ !! exit if we have hit the end of the string
+ CMP D1Ar3, #0
+ BZ $Lexit_fail
+ !! fall through if the buffer is aligned now
+ CMP D1Ar5, #8
+ BNE $Lunaligned_loop
+ !! fall through if there is more than 8 bytes left
+ CMP D1Ar3, #8
+ BLT $Lunaligned_loop
+$Laligned_setup:
+ !! fill the c into 4 bytes
+ MOV D0Ar4, D0Ar2
+ LSL D0Ar4, D0Ar4, #8
+ ADD D0Ar4, D0Ar4, D0Ar2
+ LSL D0Ar4, D0Ar4, #8
+ ADD D0Ar4, D0Ar4, D0Ar2
+ LSL D0Ar4, D0Ar4, #8
+ ADD D0Ar4, D0Ar4, D0Ar2
+ !! divide n by 8
+ MOV D1Ar5, D1Ar3
+ LSR D1Ar5, D1Ar5, #3
+$Laligned_loop:
+ !! get 8 chars from s
+ GETL D0Re0, D1Re0, [D0Ar6++]
+ !! decrement loop counter
+ SUB D1Ar5, D1Ar5, #1
+ !! test first 4 chars
+ XOR D0Re0, D0Re0, D0Ar4
+ !! test second 4 chars
+ MOV D0Ar2, D1Re0
+ XOR D1Re0, D0Ar2, D0Ar4
+ !! check for matches in the first 4 chars
+ MOV D0Ar2, D0Re0
+ ADDT D0Re0, D0Re0, #HI(0xfefefeff)
+ ADD D0Re0, D0Re0, #LO(0xfefefeff)
+ XOR D0Ar2, D0Ar2, #-1
+ AND D0Re0, D0Re0, D0Ar2
+ ANDMT D0Re0, D0Re0, #HI(0x80808080)
+ ANDMB D0Re0, D0Re0, #LO(0x80808080)
+ CMP D0Re0, #0
+ BNZ $Lmatch_word1
+ !! check for matches in the second 4 chars
+ MOV D1Ar1, D1Re0
+ ADDT D1Re0, D1Re0, #HI(0xfefefeff)
+ ADD D1Re0, D1Re0, #LO(0xfefefeff)
+ XOR D1Ar1, D1Ar1, #-1
+ AND D1Re0, D1Re0, D1Ar1
+ ANDMT D1Re0, D1Re0, #HI(0x80808080)
+ ANDMB D1Re0, D1Re0, #LO(0x80808080)
+ CMP D1Re0, #0
+ BNZ $Lmatch_word2
+ !! check if we have reached the end of the buffer
+ CMP D1Ar5, #0
+ BNE $Laligned_loop
+ !! exit if there are no chars left to check
+ AND D1Ar3, D1Ar3, #7
+ CMP D1Ar3, #0
+ BZ $Lexit_fail
+ !! recover c
+ AND D0Ar2, D0Ar4, #0xff
+$Lbyte_loop:
+ !! get 1 char from s
+ GETB D0Re0, [D0Ar6++]
+ !! decrement n
+ SUB D1Ar3, D1Ar3, #1
+ !! exit if we have a match
+ CMP D0Re0, D0Ar2
+ BZ $Lexit_success1
+ !! fall through if we have run out of chars
+ CMP D1Ar3, #0
+ BNE $Lbyte_loop
+
+$Lexit_fail:
+ MOV D0Re0, #0
+ B $Lend
+
+$Lmatch_word1:
+ !! move the match word into D1Re0
+ MOV D1Re0, D0Re0
+ !! roll back the buffer pointer by 4 chars
+ SUB D0Ar6, D0Ar6, #4
+$Lmatch_word2:
+ !! roll back the buffer pointer by 4 chars
+ SUB D0Ar6, D0Ar6, #4
+ !! exit if lowest byte is 0
+ MOV D1Ar1, D1Re0
+ AND D1Ar1, D1Ar1, #0xff
+ CMP D1Ar1, #0
+ BNE $Lexit_success2
+ !! advance buffer pointer to the next char
+ ADD D0Ar6, D0Ar6, #1
+ !! shift in the next lowest byte
+ LSR D1Re0, D1Re0, #8
+ !! exit if lowest byte is 0
+ MOV D1Ar1, D1Re0
+ AND D1Ar1, D1Ar1, #0xff
+ CMP D1Ar1, #0
+ BNE $Lexit_success2
+ !! advance buffer pointer to the next char
+ ADD D0Ar6, D0Ar6, #1
+ !! shift in the next lowest byte
+ LSR D1Re0, D1Re0, #8
+ !! exit if lowest byte is 0
+ MOV D1Ar1, D1Re0
+ AND D1Ar1, D1Ar1, #0xff
+ CMP D1Ar1, #0
+ BNE $Lexit_success2
+ !! the match must be in the last byte, exit
+ ADD D0Ar6, D0Ar6, #1
+ B $Lexit_success2
+
+$Lexit_success1:
+ SUB D0Ar6, D0Ar6, #1
+$Lexit_success2:
+ !! return the buffer pointer
+ MOV D0Re0, D0Ar6
+$Lend:
+ MOV PC, D1RtP
+
+ .size _memchr,.-_memchr
+
+libc_hidden_def(memchr)
diff --git a/libc/string/metag/memcpy.S b/libc/string/metag/memcpy.S
new file mode 100644
index 000000000..f96c9d131
--- /dev/null
+++ b/libc/string/metag/memcpy.S
@@ -0,0 +1,189 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+ .text
+ .global _memcpy
+ .type _memcpy,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memcpy:
+ CMP D1Ar3, #16
+ MOV A1.2, D0Ar2 ! source pointer
+ MOV A0.2, D1Ar1 ! destination pointer
+ MOV A0.3, D1Ar1 ! for return value
+! If there are less than 16 bytes to copy use the byte copy loop
+ BGE $Llong_copy
+
+$Lbyte_copy:
+! Simply copy a byte at a time
+ SUBS TXRPT, D1Ar3, #1
+ BLT $Lend
+$Lloop_byte:
+ GETB D1Re0, [A1.2++]
+ SETB [A0.2++], D1Re0
+ BR $Lloop_byte
+
+$Lend:
+! Finally set return value and return
+ MOV D0Re0, A0.3
+ MOV PC, D1RtP
+
+$Llong_copy:
+ ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
+ BZ $Laligned_dst
+
+! The destination address is not 8 byte aligned. We will copy bytes from
+! the source to the destination until the remaining data has an 8 byte
+! destination address alignment (i.e we should never copy more than 7
+! bytes here).
+$Lalign_dst:
+ GETB D0Re0, [A1.2++]
+ ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
+ SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
+ SETB [A0.2++], D0Re0
+ CMP D1Ar5, #8
+ BNE $Lalign_dst
+
+! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
+! blocks, then jump to the unaligned copy loop or fall through to the aligned
+! copy loop as appropriate.
+$Laligned_dst:
+ MOV D0Ar4, A1.2
+ LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
+ ANDS D0Ar4, D0Ar4, #7 ! test source alignment
+ BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
+
+! Both source and destination are 8 byte aligned - the easy case.
+$Laligned_copy:
+ LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
+ BZ $Lbyte_copy
+ SUB TXRPT, D1Ar5, #1
+
+$Laligned_32:
+ GETL D0Re0, D1Re0, [A1.2++]
+ GETL D0Ar6, D1Ar5, [A1.2++]
+ SETL [A0.2++], D0Re0, D1Re0
+ SETL [A0.2++], D0Ar6, D1Ar5
+ GETL D0Re0, D1Re0, [A1.2++]
+ GETL D0Ar6, D1Ar5, [A1.2++]
+ SETL [A0.2++], D0Re0, D1Re0
+ SETL [A0.2++], D0Ar6, D1Ar5
+ BR $Laligned_32
+
+! If there are any remaining bytes use the byte copy loop, otherwise we are done
+ ANDS D1Ar3, D1Ar3, #0x1f
+ BNZ $Lbyte_copy
+ B $Lend
+
+! The destination is 8 byte aligned but the source is not, and there are 8
+! or more bytes to be copied.
+$Lunaligned_copy:
+! Adjust the source pointer (A1.2) to the 8 byte boundary before its
+! current value
+ MOV D0Ar4, A1.2
+ MOV D0Ar6, A1.2
+ ANDMB D0Ar4, D0Ar4, #0xfff8
+ MOV A1.2, D0Ar4
+! Save the number of bytes of mis-alignment in D0Ar4 for use later
+ SUBS D0Ar6, D0Ar6, D0Ar4
+ MOV D0Ar4, D0Ar6
+! if there is no mis-alignment after all, use the aligned copy loop
+ BZ $Laligned_copy
+
+! prefetch 8 bytes
+ GETL D0Re0, D1Re0, [A1.2]
+
+ SUB TXRPT, D1Ar5, #1
+
+! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
+! 4 bytes, and more than 4 bytes.
+ CMP D0Ar6, #4
+ BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
+ BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
+
+! The mis-alignment is more than 4 bytes
+$Lunaligned_5_6_7:
+ SUB D0Ar6, D0Ar6, #4
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+ MULW D0Ar6, D0Ar6, #8
+ MOV D1Ar5, #32
+ SUB D1Ar5, D1Ar5, D0Ar6
+! Move data 4 bytes before we enter the main loop
+ MOV D0Re0, D1Re0
+
+$Lloop_5_6_7:
+ GETL D0Ar2, D1Ar1, [++A1.2]
+! form 64-bit data in D0Re0, D1Re0
+ LSR D0Re0, D0Re0, D0Ar6
+ MOV D1Re0, D0Ar2
+ LSL D1Re0, D1Re0, D1Ar5
+ ADD D0Re0, D0Re0, D1Re0
+
+ LSR D0Ar2, D0Ar2, D0Ar6
+ LSL D1Re0, D1Ar1, D1Ar5
+ ADD D1Re0, D1Re0, D0Ar2
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D1Ar1
+ BR $Lloop_5_6_7
+
+ B $Lunaligned_end
+
+$Lunaligned_1_2_3:
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+ MULW D0Ar6, D0Ar6, #8
+ MOV D1Ar5, #32
+ SUB D1Ar5, D1Ar5, D0Ar6
+
+$Lloop_1_2_3:
+! form 64-bit data in D0Re0,D1Re0
+ LSR D0Re0, D0Re0, D0Ar6
+ LSL D1Ar1, D1Re0, D1Ar5
+ ADD D0Re0, D0Re0, D1Ar1
+ MOV D0Ar2, D1Re0
+ LSR D0FrT, D0Ar2, D0Ar6
+ GETL D0Ar2, D1Ar1, [++A1.2]
+
+ MOV D1Re0, D0Ar2
+ LSL D1Re0, D1Re0, D1Ar5
+ ADD D1Re0, D1Re0, D0FrT
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0Ar2
+ MOV D1Re0, D1Ar1
+ BR $Lloop_1_2_3
+
+ B $Lunaligned_end
+
+! The 4 byte mis-alignment case - this does not require any shifting, just a
+! shuffling of registers.
+$Lunaligned_4:
+ MOV D0Re0, D1Re0
+$Lloop_4:
+ GETL D0Ar2, D1Ar1, [++A1.2]
+ MOV D1Re0, D0Ar2
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D1Ar1
+ BR $Lloop_4
+
+$Lunaligned_end:
+! If there are no remaining bytes to copy, we are done.
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lend
+! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
+! address of the remaining bytes, and fall through to the byte copy loop.
+ MOV D0Ar6, A1.2
+ ADD D1Ar5, D0Ar4, D0Ar6
+ MOV A1.2, D1Ar5
+ B $Lbyte_copy
+
+ .size _memcpy,.-_memcpy
+
+libc_hidden_def(memcpy)
diff --git a/libc/string/metag/memmove.S b/libc/string/metag/memmove.S
new file mode 100644
index 000000000..3416fd558
--- /dev/null
+++ b/libc/string/metag/memmove.S
@@ -0,0 +1,350 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+
+ .text
+ .global _memmove
+ .type _memmove,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memmove:
+ CMP D1Ar3, #0
+ MOV D0Re0, D1Ar1
+ BZ $LEND2
+ MSETL [A0StP], D0.5, D0.6, D0.7
+ MOV D1Ar5, D0Ar2
+ CMP D1Ar1, D1Ar5
+ BLT $Lforwards_copy
+ SUB D0Ar4, D1Ar1, D1Ar3
+ ADD D0Ar4, D0Ar4, #1
+ CMP D0Ar2, D0Ar4
+ BLT $Lforwards_copy
+ ! should copy backwards
+ MOV D1Re0, D0Ar2
+ ! adjust pointer to the end of mem
+ ADD D0Ar2, D1Re0, D1Ar3
+ ADD D1Ar1, D1Ar1, D1Ar3
+
+ MOV A1.2, D0Ar2
+ MOV A0.2, D1Ar1
+ CMP D1Ar3, #8
+ BLT $Lbbyte_loop
+
+ MOV D0Ar4, D0Ar2
+ MOV D1Ar5, D1Ar1
+
+ ! test 8 byte alignment
+ ANDS D1Ar5, D1Ar5, #7
+ BNE $Lbdest_unaligned
+
+ ANDS D0Ar4, D0Ar4, #7
+ BNE $Lbsrc_unaligned
+
+ LSR D1Ar5, D1Ar3, #3
+
+$Lbaligned_loop:
+ GETL D0Re0, D1Re0, [--A1.2]
+ SETL [--A0.2], D0Re0, D1Re0
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbaligned_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+$Lbbyte_loop:
+ GETB D1Re0, [--A1.2]
+ SETB [--A0.2], D1Re0
+ SUBS D1Ar3, D1Ar3, #1
+ BNE $Lbbyte_loop
+$Lbbyte_loop_exit:
+ MOV D0Re0, A0.2
+$LEND:
+ SUB A0.2, A0StP, #24
+ MGETL D0.5, D0.6, D0.7, [A0.2]
+ SUB A0StP, A0StP, #24
+$LEND2:
+ MOV PC, D1RtP
+
+$Lbdest_unaligned:
+ GETB D0Re0, [--A1.2]
+ SETB [--A0.2], D0Re0
+ SUBS D1Ar5, D1Ar5, #1
+ SUB D1Ar3, D1Ar3, #1
+ BNE $Lbdest_unaligned
+ CMP D1Ar3, #8
+ BLT $Lbbyte_loop
+$Lbsrc_unaligned:
+ LSR D1Ar5, D1Ar3, #3
+ ! adjust A1.2
+ MOV D0Ar4, A1.2
+ ! save original address
+ MOV D0Ar6, A1.2
+
+ ADD D0Ar4, D0Ar4, #7
+ ANDMB D0Ar4, D0Ar4, #0xfff8
+ ! new address is the 8-byte aligned one above the original
+ MOV A1.2, D0Ar4
+
+ ! A0.2 dst 64-bit is aligned
+ ! measure the gap size
+ SUB D0Ar6, D0Ar4, D0Ar6
+ MOVS D0Ar4, D0Ar6
+ ! keep this information for the later adjustment
+ ! both aligned
+ BZ $Lbaligned_loop
+
+ ! prefetch
+ GETL D0Re0, D1Re0, [--A1.2]
+
+ CMP D0Ar6, #4
+ BLT $Lbunaligned_1_2_3
+ ! 32-bit aligned
+ BZ $Lbaligned_4
+
+ SUB D0Ar6, D0Ar6, #4
+ ! D1.6 stores the gap size in bits
+ MULW D1.6, D0Ar6, #8
+ MOV D0.6, #32
+ ! D0.6 stores the complement of the gap size
+ SUB D0.6, D0.6, D1.6
+
+$Lbunaligned_5_6_7:
+ GETL D0.7, D1.7, [--A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ MOV D1Re0, D0Re0
+ ! D1Re0 << gap-size
+ LSL D1Re0, D1Re0, D1.6
+ MOV D0Re0, D1.7
+ ! D0Re0 >> complement
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1.5, D0Re0
+ ! combine the both
+ ADD D1Re0, D1Re0, D1.5
+
+ MOV D1.5, D1.7
+ LSL D1.5, D1.5, D1.6
+ MOV D0Re0, D0.7
+ LSR D0Re0, D0Re0, D0.6
+ MOV D0.5, D1.5
+ ADD D0Re0, D0Re0, D0.5
+
+ SETL [--A0.2], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbunaligned_5_6_7
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+ ! Adjust A1.2
+ ! A1.2 <- A1.2 +8 - gapsize
+ ADD A1.2, A1.2, #8
+ SUB A1.2, A1.2, D0Ar4
+ B $Lbbyte_loop
+
+$Lbunaligned_1_2_3:
+ MULW D1.6, D0Ar6, #8
+ MOV D0.6, #32
+ SUB D0.6, D0.6, D1.6
+
+$Lbunaligned_1_2_3_loop:
+ GETL D0.7, D1.7, [--A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ LSL D1Re0, D1Re0, D1.6
+ ! save D0Re0 for later use
+ MOV D0.5, D0Re0
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1.5, D0Re0
+ ADD D1Re0, D1Re0, D1.5
+
+ ! orignal data in D0Re0
+ MOV D1.5, D0.5
+ LSL D1.5, D1.5, D1.6
+ MOV D0Re0, D1.7
+ LSR D0Re0, D0Re0, D0.6
+ MOV D0.5, D1.5
+ ADD D0Re0, D0Re0, D0.5
+
+ SETL [--A0.2], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbunaligned_1_2_3_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, #8
+ SUB A1.2, A1.2, D0Ar4
+ B $Lbbyte_loop
+
+$Lbaligned_4:
+ GETL D0.7, D1.7, [--A1.2]
+ MOV D1Re0, D0Re0
+ MOV D0Re0, D1.7
+ SETL [--A0.2], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lbaligned_4
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lbbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, #8
+ SUB A1.2, A1.2, D0Ar4
+ B $Lbbyte_loop
+
+$Lforwards_copy:
+ MOV A1.2, D0Ar2
+ MOV A0.2, D1Ar1
+ CMP D1Ar3, #8
+ BLT $Lfbyte_loop
+
+ MOV D0Ar4, D0Ar2
+ MOV D1Ar5, D1Ar1
+
+ ANDS D1Ar5, D1Ar5, #7
+ BNE $Lfdest_unaligned
+
+ ANDS D0Ar4, D0Ar4, #7
+ BNE $Lfsrc_unaligned
+
+ LSR D1Ar5, D1Ar3, #3
+
+$Lfaligned_loop:
+ GETL D0Re0, D1Re0, [A1.2++]
+ SUBS D1Ar5, D1Ar5, #1
+ SETL [A0.2++], D0Re0, D1Re0
+ BNE $Lfaligned_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+$Lfbyte_loop:
+ GETB D1Re0, [A1.2++]
+ SETB [A0.2++], D1Re0
+ SUBS D1Ar3, D1Ar3, #1
+ BNE $Lfbyte_loop
+$Lfbyte_loop_exit:
+ MOV D0Re0, D1Ar1
+ B $LEND
+
+$Lfdest_unaligned:
+ GETB D0Re0, [A1.2++]
+ ADD D1Ar5, D1Ar5, #1
+ SUB D1Ar3, D1Ar3, #1
+ SETB [A0.2++], D0Re0
+ CMP D1Ar5, #8
+ BNE $Lfdest_unaligned
+ CMP D1Ar3, #8
+ BLT $Lfbyte_loop
+$Lfsrc_unaligned:
+ ! adjust A1.2
+ LSR D1Ar5, D1Ar3, #3
+
+ MOV D0Ar4, A1.2
+ MOV D0Ar6, A1.2
+ ANDMB D0Ar4, D0Ar4, #0xfff8
+ MOV A1.2, D0Ar4
+
+ ! A0.2 dst 64-bit is aligned
+ SUB D0Ar6, D0Ar6, D0Ar4
+ ! keep the information for the later adjustment
+ MOVS D0Ar4, D0Ar6
+
+ ! both aligned
+ BZ $Lfaligned_loop
+
+ ! prefetch
+ GETL D0Re0, D1Re0, [A1.2]
+
+ CMP D0Ar6, #4
+ BLT $Lfunaligned_1_2_3
+ BZ $Lfaligned_4
+
+ SUB D0Ar6, D0Ar6, #4
+ MULW D0.6, D0Ar6, #8
+ MOV D1.6, #32
+ SUB D1.6, D1.6, D0.6
+
+$Lfunaligned_5_6_7:
+ GETL D0.7, D1.7, [++A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ MOV D0Re0, D1Re0
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1Re0, D0.7
+ LSL D1Re0, D1Re0, D1.6
+ MOV D0.5, D1Re0
+ ADD D0Re0, D0Re0, D0.5
+
+ MOV D0.5, D0.7
+ LSR D0.5, D0.5, D0.6
+ MOV D1Re0, D1.7
+ LSL D1Re0, D1Re0, D1.6
+ MOV D1.5, D0.5
+ ADD D1Re0, D1Re0, D1.5
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lfunaligned_5_6_7
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, D0Ar4
+ B $Lfbyte_loop
+
+$Lfunaligned_1_2_3:
+ MULW D0.6, D0Ar6, #8
+ MOV D1.6, #32
+ SUB D1.6, D1.6, D0.6
+
+$Lfunaligned_1_2_3_loop:
+ GETL D0.7, D1.7, [++A1.2]
+ ! form 64-bit data in D0Re0, D1Re0
+ LSR D0Re0, D0Re0, D0.6
+ MOV D1.5, D1Re0
+ LSL D1Re0, D1Re0, D1.6
+ MOV D0.5, D1Re0
+ ADD D0Re0, D0Re0, D0.5
+
+ MOV D0.5, D1.5
+ LSR D0.5, D0.5, D0.6
+ MOV D1Re0, D0.7
+ LSL D1Re0, D1Re0, D1.6
+ MOV D1.5, D0.5
+ ADD D1Re0, D1Re0, D1.5
+
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lfunaligned_1_2_3_loop
+
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, D0Ar4
+ B $Lfbyte_loop
+
+$Lfaligned_4:
+ GETL D0.7, D1.7, [++A1.2]
+ MOV D0Re0, D1Re0
+ MOV D1Re0, D0.7
+ SETL [A0.2++], D0Re0, D1Re0
+ MOV D0Re0, D0.7
+ MOV D1Re0, D1.7
+ SUBS D1Ar5, D1Ar5, #1
+ BNE $Lfaligned_4
+ ANDS D1Ar3, D1Ar3, #7
+ BZ $Lfbyte_loop_exit
+ ! Adjust A1.2
+ ADD A1.2, A1.2, D0Ar4
+ B $Lfbyte_loop
+
+ .size _memmove,.-_memmove
+
+libc_hidden_def(memmove)
diff --git a/libc/string/metag/memset.S b/libc/string/metag/memset.S
new file mode 100644
index 000000000..8d4e9a158
--- /dev/null
+++ b/libc/string/metag/memset.S
@@ -0,0 +1,90 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+
+ .text
+ .global _memset
+ .type _memset,function
+! D1Ar1 dst
+! D0Ar2 c
+! D1Ar3 cnt
+! D0Re0 dst
+_memset:
+ AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value
+ MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15
+ ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst
+ LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31
+ ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2)
+ MOV D0Re0,D1Ar1 ! Return dst
+ BZ $LLongStub ! if start address is aligned
+ ! start address is not aligned on an 8 byte boundary, so we
+ ! need the number of bytes up to the next 8 byte address
+ ! boundary, or the length of the string if less than 8, in D1Ar5
+ MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ...
+ SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N
+ CMP D1Ar3,D1Ar5
+ MOVMI D1Ar5,D1Ar3
+ B $LByteStub ! dst is mis-aligned, do $LByteStub
+
+!
+! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles)
+!
+$LLongStub:
+ LSRS D0Ar2,D1Ar3,#5
+ AND D1Ar3,D1Ar3,#0x1F
+ MOV A1.2,A0.2
+ BEQ $LLongishStub
+ SUB TXRPT,D0Ar2,#1
+ CMP D1Ar3,#0
+$LLongLoop:
+ SETL [D1Ar1++],A0.2,A1.2
+ SETL [D1Ar1++],A0.2,A1.2
+ SETL [D1Ar1++],A0.2,A1.2
+ SETL [D1Ar1++],A0.2,A1.2
+ BR $LLongLoop
+ BZ $Lexit
+!
+! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles)
+!
+$LLongishStub:
+ LSRS D0Ar2,D1Ar3,#3
+ AND D1Ar3,D1Ar3,#0x7
+ MOV D1Ar5,D1Ar3
+ BEQ $LByteStub
+ SUB TXRPT,D0Ar2,#1
+ CMP D1Ar3,#0
+$LLongishLoop:
+ SETL [D1Ar1++],A0.2,A1.2
+ BR $LLongishLoop
+ BZ $Lexit
+!
+! This does a byte structured burst of up to 7 bytes
+!
+! D1Ar1 should point to the location required
+! D1Ar3 should be the remaining total byte count
+! D1Ar5 should be burst size (<= D1Ar3)
+!
+$LByteStub:
+ SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count
+ ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area
+ MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4)
+ SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ...
+ MOV A1.2,D1Ar5
+ SUB PC,CPC1,A1.2 ! Jump into table below
+ SETB [D1Ar1+#(-7)],A0.2
+ SETB [D1Ar1+#(-6)],A0.2
+ SETB [D1Ar1+#(-5)],A0.2
+ SETB [D1Ar1+#(-4)],A0.2
+ SETB [D1Ar1+#(-3)],A0.2
+ SETB [D1Ar1+#(-2)],A0.2
+ SETB [D1Ar1+#(-1)],A0.2
+!
+! Return if all data has been output, otherwise do $LLongStub
+!
+ BNZ $LLongStub
+$Lexit:
+ MOV PC,D1RtP
+ .size _memset,.-_memset
+
+libc_hidden_def(memset)
diff --git a/libc/string/metag/strchr.S b/libc/string/metag/strchr.S
new file mode 100644
index 000000000..6b0f2ea43
--- /dev/null
+++ b/libc/string/metag/strchr.S
@@ -0,0 +1,167 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+
+#include <features.h>
+
+ .text
+ .global _strchr
+ .type _strchr, function
+! D1Ar1 src
+! D0Ar2 c
+_strchr:
+ AND D0Ar2,D0Ar2,#0xff ! Drop all but 8 bits of c
+ MOV D1Ar5, D1Ar1 ! Copy src to D1Ar5
+ AND D1Ar5, D1Ar5, #7 ! Check 64 bit alignment
+ CMP D1Ar5, #0
+ BZ $Laligned64bit ! Jump to 64 bit aligned strchr
+$Lalign64bit:
+ GETB D0Re0, [D1Ar1++] ! Get the next character
+ ADD D1Ar5, D1Ar5, #1 ! Increment alignment counter
+ CMP D0Re0, D0Ar2 ! Is the char c
+ BZ $Lcharatprevious ! If so exit returning position
+ CMP D0Re0, #0 ! End of string?
+ BZ $Lnotfound ! If so exit
+ CMP D1Ar5, #8 ! Are we aligned 64bit yet?
+ BNZ $Lalign64bit ! If not keep aligning
+$Laligned64bit: ! src is 64bit aligned
+ MOV D0Ar4, D0Ar2 ! put c into D0Ar4
+ LSL D0Ar4, D0Ar4, #8 ! Shift it up
+ ADD D0Ar4, D0Ar4, D0Ar2 ! another c
+ LSL D0Ar4, D0Ar4, #8 ! shift
+ ADD D0Ar4, D0Ar4, D0Ar2 ! another c
+ LSL D0Ar4, D0Ar4, #8 ! shift
+ ADD D0Ar4, D0Ar4, D0Ar2 ! 4 copies of c
+$Lcheck8bytes:
+ GETL D0Re0, D1Re0, [D1Ar1++] ! grab 16 bytes
+ MOV A0.3, D0Re0 ! save for later use
+ ! first word
+ ! check for \0
+ MOV D0Ar2, D0Re0 ! D0Ar2 is a scratch now
+ ADDT D0Re0, D0Re0, #HI(0xfefefeff) ! Do 4 1-byte compares
+ ADD D0Re0, D0Re0, #LO(0xfefefeff)
+ XOR D0Ar2, D0Ar2, #-1
+ AND D0Re0, D0Re0, D0Ar2
+ ANDMT D0Re0, D0Re0, #HI(0x80808080)
+ ANDMB D0Re0, D0Re0, #LO(0x80808080)
+ CMP D0Re0, #0
+ BNZ $Lnullinword1 ! found \0 (or c if c==\0)
+
+ ! Check for c
+ MOV D0Re0, A0.3 ! restore the first word
+ XOR D0Re0, D0Re0, D0Ar4
+ MOV D0Ar2, D0Re0 ! DO 4 1-byte compares
+ ADDT D0Re0, D0Re0, #HI(0xfefefeff)
+ ADD D0Re0, D0Re0, #LO(0xfefefeff)
+ XOR D0Ar2, D0Ar2, #-1
+ AND D0Re0, D0Re0, D0Ar2
+ ANDMT D0Re0, D0Re0, #HI(0x80808080)
+ ANDMB D0Re0, D0Re0, #LO(0x80808080)
+ CMP D0Re0, #0
+ BNZ $Lcharinword1 ! found c
+
+ ! second word
+ ! check for \0
+ MOV A0.3, D1Re0 ! save for later use
+ MOV D1Ar3, D1Re0
+ ADDT D1Re0, D1Re0, #HI(0xfefefeff) ! Do 4 1-byte compares
+ ADD D1Re0, D1Re0, #LO(0xfefefeff)
+ XOR D1Ar3, D1Ar3, #-1
+ AND D1Re0, D1Re0, D1Ar3
+ ANDMT D1Re0, D1Re0, #HI(0x80808080)
+ ANDMB D1Re0, D1Re0, #LO(0x80808080)
+ CMP D1Re0, #0
+ BNZ $Lnullinword2 ! Found \0 (or c if c==\0)
+
+ MOV D0.4, A0.3 ! restore the second word
+ XOR D1Re0, D0.4, D0Ar4 ! test c
+
+ MOV D1Ar3, D1Re0
+ ADDT D1Re0, D1Re0, #HI(0xfefefeff) ! Do 4 1-byte compares
+ ADD D1Re0, D1Re0, #LO(0xfefefeff)
+ XOR D1Ar3, D1Ar3, #-1
+ AND D1Re0, D1Re0, D1Ar3
+ ANDMT D1Re0, D1Re0, #HI(0x80808080)
+ ANDMB D1Re0, D1Re0, #LO(0x80808080)
+ CMP D1Re0, #0
+ BNZ $Lcharinword2 ! found c
+
+ B $Lcheck8bytes ! Keep checking
+
+$Lnullinword1: ! found \0 somewhere, check for c too
+ SUB D1Ar1, D1Ar1, #4
+$Lnullinword2:
+ SUB D1Ar1, D1Ar1, #4
+ AND D0Ar2, D0Ar4, #0xff ! restore c
+ MOV D0Re0, A0.3 ! restore the word
+ MOV D0.4, D0Re0 ! for shifting later
+ AND D0Re0, D0Re0, #0xff ! take first byte of word
+ CMP D0Re0, D0Ar2
+ BZ $Lcharatcurrent ! found c
+ CMP D0Re0, #0!
+ BZ $Lnotfound ! found \0
+
+ ADD D1Ar1, D1Ar1, #1
+ LSR D0.4, D0.4, #8
+ MOV D0Re0, D0.4
+ AND D0Re0, D0Re0, #0xff ! take second byte of word
+ CMP D0Re0, D0Ar2
+ BZ $Lcharatcurrent ! found c
+ CMP D0Re0, #0
+ BZ $Lnotfound ! found \0
+
+ ADD D1Ar1, D1Ar1, #1
+ LSR D0.4, D0.4, #8
+ MOV D0Re0, D0.4
+ AND D0Re0, D0Re0, #0xff ! take third byte of word
+ CMP D0Re0, D0Ar2
+ BZ $Lcharatcurrent ! found c
+ CMP D0Re0, #0
+ BZ $Lnotfound ! found \0
+
+ ADD D1Ar1, D1Ar1, #1 ! move to 4th byte
+ CMP D0Ar2, #0 ! If c was \0
+ BZ $Lcharatcurrent ! c has been found!
+
+$Lnotfound:
+ MOV D0Re0, #0 ! End of string c not found
+ B $Lend
+
+$Lcharinword1: ! found c in first word
+ MOV D1Re0, D0Re0
+ SUB D1Ar1, D1Ar1, #4
+$Lcharinword2: ! found c in second word
+ SUB D1Ar1, D1Ar1, #4
+
+ AND D0Re0, D1Re0, #0xff ! First byte
+ CMP D0Re0, #0 ! Test c (zero indicates c due
+ ! to the 4 1-byte compare code)
+ BNE $Lcharatcurrent
+ ADD D1Ar1, D1Ar1, #1
+
+ LSR D1Re0, D1Re0, #8
+ AND D0Re0, D1Re0, #0xff ! Second byte
+ CMP D0Re0, #0 ! Test c (indicated by zero)
+ BNE $Lcharatcurrent
+ ADD D1Ar1, D1Ar1, #1
+
+ LSR D1Re0, D1Re0, #8
+ AND D0Re0, D1Re0, #0xff ! Third byte
+ CMP D0Re0, #0 ! Test c (indicated by zero)
+ BNE $Lcharatcurrent
+ ADD D1Ar1, D1Ar1, #1 ! Must be the fourth byte
+ B $Lcharatcurrent
+
+$Lcharatprevious:
+ SUB D1Ar1, D1Ar1, #1 ! Fix-up pointer
+$Lcharatcurrent:
+ MOV D0Re0, D1Ar1 ! Return the string pointer
+$Lend:
+ MOV PC, D1RtP
+ .size _strchr,.-_strchr
+
+libc_hidden_def(strchr)
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias(strchr,index)
+#endif
diff --git a/libc/string/metag/strcmp.S b/libc/string/metag/strcmp.S
new file mode 100644
index 000000000..3278ffaa5
--- /dev/null
+++ b/libc/string/metag/strcmp.S
@@ -0,0 +1,65 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+
+#include <features.h>
+
+ .text
+ .global _strcmp
+ .type _strcmp,function
+!D1Ar1 s1
+!D0Ar2 s2
+_strcmp:
+ TST D1Ar1,#3
+ TSTZ D0Ar2,#3
+ MOVT D1Re0,#0x0101
+ ADD D1Re0,D1Re0,#0x0101
+ BNZ $Lstrcmp_slow
+ GETD D1Ar3,[D1Ar1+#4++] ! Load 32-bits from s1
+ GETD D1Ar5,[D0Ar2+#4++] ! Load 32-bits from s2
+ LSL D0FrT,D1Re0,#7 ! D0FrT = 0x80808080
+$Lstrcmp4_loop:
+ SUB D0Re0,D1Ar3,D1Re0 ! D1Re0 = 0x01010101
+ MOV D0Ar6,D1Ar3
+ SUBS D0Ar4,D1Ar3,D1Ar5 ! Calculate difference
+ XOR D0Ar6,D0Ar6,#-1
+ GETD D1Ar3,[D1Ar1+#4++] ! Load 32-bits from s1
+ AND D0Re0,D0Re0,D0Ar6
+ ANDSZ D0Ar6,D0Re0,D0FrT ! D0FrT = 0x80808080
+ GETD D1Ar5,[D0Ar2+#4++] ! Load 32-bits from s2
+ BZ $Lstrcmp4_loop
+ AND D0Ar6, D0Re0, D0FrT ! D0FrT = 0x80808080
+!
+! Either they are different or they both contain a NULL + junk
+!
+$Lstrcmp4_end:
+ LSLS D0Re0,D0Ar4,#24 ! Was Byte[0] the same?
+ LSLSZ D0Ar2,D0Ar6,#24 ! Yes: AND they where not zero?
+ LSLSZ D0Re0,D0Ar4,#16 ! Yes: Was Byte[1] the same?
+ LSLSZ D0Ar2,D0Ar6,#16 ! Yes: AND they where not zero?
+ LSLSZ D0Re0,D0Ar4,#8 ! Tes: Was Byte[2] the same?
+ LSLSZ D0Ar2,D0Ar6,#8 ! Yes: AND they where not zero?
+ MOVZ D0Re0,D0Ar4 ! Yes: Must by Byte[3] thats the result
+ ASR D0Re0,D0Re0,#24 ! Sign extend result to integer
+ MOV PC,D1RtP
+!
+! Misaligned case, byte at a time
+!
+$Lstrcmp_slow:
+ GETB D1Ar3,[D1Ar1++] ! Load char from s1
+ GETB D1Ar5,[D0Ar2++] ! Load char from s2
+ CMP D1Ar3,#1 ! Null -> C and NZ, rest -> NC (\1->Z)
+ CMPNC D1Ar3,D1Ar5 ! NOT Null: Same -> Z, else -> NZ
+ BZ $Lstrcmp_slow ! NOT Null and Same: Loop
+ SUB D0Re0,D1Ar3,D1Ar5 ! Generate result
+ MOV PC,D1RtP
+
+ .size _strcmp,.-_strcmp
+
+
+libc_hidden_def(strcmp)
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias(strcmp,strcoll)
+libc_hidden_def(strcoll)
+#endif
diff --git a/libc/string/metag/strcpy.S b/libc/string/metag/strcpy.S
new file mode 100644
index 000000000..529ac9279
--- /dev/null
+++ b/libc/string/metag/strcpy.S
@@ -0,0 +1,94 @@
+! Copyright (C) 2013 Imagination Technologies Ltd.
+
+! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
+
+
+ .text
+ .global _strcpy
+ .type _strcpy,function
+! D1Ar1 dst
+! D0Ar2 src
+
+_strcpy:
+ MOV A1.2, D1Ar1
+
+ ! test 4 byte alignment of src
+ ANDS D0Ar4, D0Ar2, #3
+ BNZ $Lbyteloop
+
+ ! test 4 byte alignment of dest
+ ANDS D1Ar5, D1Ar1, #3
+ BNZ $Lbyteloop
+
+ ! load mask values for aligned loops
+ MOVT D1Ar3, #HI(0xfefefeff)
+ ADD D1Ar3, D1Ar3, #LO(0xfefefeff)
+ MOVT D0FrT, #HI(0x80808080)
+ ADD D0FrT, D0FrT, #LO(0x80808080)
+
+ ! test 8 byte alignment of src
+ ANDS D0Ar4, D0Ar2, #7
+ BNZ $Lwordloop
+
+ ! test 8 byte alignment of dest
+ ANDS D1Ar5, D1Ar1, #7
+ BNZ $Lwordloop
+
+$L8byteloop:
+ GETL D1Ar5, D0Ar6, [D0Ar2++]
+ MOV D1Re0, D1Ar5
+ MOV D0Re0, D1Ar5
+ ADD D1Re0, D1Re0, D1Ar3
+ XOR D0Re0, D0Re0, #-1
+ AND D1Re0, D1Re0, D0Re0
+ ANDS D1Re0, D1Re0, D0FrT
+ BNZ $Lnullfound ! NULL in first word
+
+ MOV D1Re0, D0Ar6
+ MOV D0Re0, D0Ar6
+ ADD D1Re0, D1Re0, D1Ar3
+ XOR D0Re0, D0Re0, #-1
+ AND D1Re0, D1Re0, D0Re0
+ ANDS D1Re0, D1Re0, D0FrT
+ BNZ $Lnullfound2 ! NULL in the second word
+
+ SETL [A1.2++], D1Ar5, D0Ar6
+ B $L8byteloop
+
+$Lwordloop:
+ GETD D0Ar6, [D0Ar2++]
+ MOV D1Re0, D0Ar6
+ MOV D0Re0, D0Ar6
+ ADD D1Re0, D1Re0, D1Ar3
+ XOR D0Re0, D0Re0, #-1
+ AND D1Re0, D1Re0, D0Re0
+ ANDS D1Re0, D1Re0, D0FrT
+ MOV D1Ar5, D0Ar6
+ BNZ $Lnullfound
+ SETD [A1.2++], D0Ar6
+ B $Lwordloop
+
+$Lnullfound2:
+ SETD [A1.2++], D1Ar5
+ MOV D1Ar5, D0Ar6
+
+$Lnullfound:
+ SETB [A1.2++], D1Ar5
+ ANDS D0Ar6, D1Ar5, #0xff
+ LSR D1Ar5, D1Ar5, #8
+ BNZ $Lnullfound
+ B $Lend
+
+$Lbyteloop:
+ GETB D0Ar6, [D0Ar2++]
+ SETB [A1.2++], D0Ar6
+ CMP D0Ar6, #0
+ BNZ $Lbyteloop
+
+$Lend:
+ MOV D0Re0, D1Ar1
+ MOV PC, D1RtP
+
+ .size _strcpy,.-_strcpy
+
+libc_hidden_def(strcpy)
diff --git a/libc/string/sh64/Makefile b/libc/string/microblaze/Makefile
index 0a95346fd..5bdfef2f7 100644
--- a/libc/string/sh64/Makefile
+++ b/libc/string/microblaze/Makefile
@@ -5,8 +5,8 @@
# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
#
-top_srcdir:=../../../
-top_builddir:=../../../
+top_srcdir := ../../../
+top_builddir := ../../../
all: objs
include $(top_builddir)Rules.mak
include ../Makefile.in
diff --git a/libc/string/microblaze/memcpy.S b/libc/string/microblaze/memcpy.S
new file mode 100644
index 000000000..5219e9919
--- /dev/null
+++ b/libc/string/microblaze/memcpy.S
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ * Written by Jim Law <jlaw@irispower.com>
+ *
+ * intended to replace:
+ * memcpy in memcpy.c and
+ * memmove in memmove.c
+ * ... in arch/microblaze/lib
+ *
+ *
+ * assly_fastcopy.S
+ *
+ * Attempt at quicker memcpy and memmove for MicroBlaze
+ * Input : Operand1 in Reg r5 - destination address
+ * Operand2 in Reg r6 - source address
+ * Operand3 in Reg r7 - number of bytes to transfer
+ * Output: Result in Reg r3 - starting destinaition address
+ *
+ *
+ * Explanation:
+ * Perform (possibly unaligned) copy of a block of memory
+ * between mem locations with size of xfer spec'd in bytes
+ */
+
+ .text
+ .globl memcpy
+ .type memcpy, @function
+ .ent memcpy
+
+#ifdef __MICROBLAZEEL__
+# define BSLLI bsrli
+# define BSRLI bslli
+#else
+# define BSLLI bslli
+# define BSRLI bsrli
+#endif
+
+memcpy:
+fast_memcpy_ascending:
+ /* move d to return register as value of function */
+ addi r3, r5, 0
+
+ addi r4, r0, 4 /* n = 4 */
+ cmpu r4, r4, r7 /* n = c - n (unsigned) */
+ blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
+
+ /* transfer first 0~3 bytes to get aligned dest address */
+ andi r4, r5, 3 /* n = d & 3 */
+ /* if zero, destination already aligned */
+ beqi r4, a_dalign_done
+ /* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */
+ rsubi r4, r4, 4
+ rsub r7, r4, r7 /* c = c - n adjust c */
+
+a_xfer_first_loop:
+ /* if no bytes left to transfer, transfer the bulk */
+ beqi r4, a_dalign_done
+ lbui r11, r6, 0 /* h = *s */
+ sbi r11, r5, 0 /* *d = h */
+ addi r6, r6, 1 /* s++ */
+ addi r5, r5, 1 /* d++ */
+ brid a_xfer_first_loop /* loop */
+ addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
+
+a_dalign_done:
+ addi r4, r0, 32 /* n = 32 */
+ cmpu r4, r4, r7 /* n = c - n (unsigned) */
+ /* if n < 0, less than one block to transfer */
+ blti r4, a_block_done
+
+a_block_xfer:
+ andi r9, r6, 3 /* t1 = s & 3 */
+ /* if temp == 0, everything is word-aligned */
+ beqi r9, a_word_xfer
+
+a_block_unaligned:
+ andi r4, r7, 0xffffffe0 /* n = c & ~31 */
+ rsub r7, r4, r7 /* c = c - n */
+ andi r8, r6, 0xfffffffc /* as = s & ~3 */
+ add r6, r6, r4 /* s = s + n */
+ lwi r11, r8, 0 /* h = *(as + 0) */
+
+ addi r9, r9, -1
+ beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */
+ addi r9, r9, -1
+ beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */
+
+a_block_u3:
+ BSLLI r11, r11, 24 /* h = h << 24 */
+a_bu3_loop:
+ lwi r12, r8, 4 /* v = *(as + 4) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 0 /* *(d + 0) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 8 /* v = *(as + 8) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 4 /* *(d + 4) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 12 /* v = *(as + 12) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 8 /* *(d + 8) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 16 /* v = *(as + 16) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 12 /* *(d + 12) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 20 /* v = *(as + 20) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 16 /* *(d + 16) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 24 /* v = *(as + 24) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 20 /* *(d + 20) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 28 /* v = *(as + 28) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 24 /* *(d + 24) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ lwi r12, r8, 32 /* v = *(as + 32) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ addi r8, r8, 32 /* as = as + 32 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, a_bu3_loop /* while (n) loop */
+ addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
+ bri a_block_done
+
+a_block_u1:
+ BSLLI r11, r11, 8 /* h = h << 8 */
+a_bu1_loop:
+ lwi r12, r8, 4 /* v = *(as + 4) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 0 /* *(d + 0) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 8 /* v = *(as + 8) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 4 /* *(d + 4) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 12 /* v = *(as + 12) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 8 /* *(d + 8) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 16 /* v = *(as + 16) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 12 /* *(d + 12) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 20 /* v = *(as + 20) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 16 /* *(d + 16) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 24 /* v = *(as + 24) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 20 /* *(d + 20) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 28 /* v = *(as + 28) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 24 /* *(d + 24) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ lwi r12, r8, 32 /* v = *(as + 32) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ addi r8, r8, 32 /* as = as + 32 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, a_bu1_loop /* while (n) loop */
+ addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
+ bri a_block_done
+
+a_block_u2:
+ BSLLI r11, r11, 16 /* h = h << 16 */
+a_bu2_loop:
+ lwi r12, r8, 4 /* v = *(as + 4) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 0 /* *(d + 0) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 8 /* v = *(as + 8) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 4 /* *(d + 4) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 12 /* v = *(as + 12) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 8 /* *(d + 8) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 16 /* v = *(as + 16) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 12 /* *(d + 12) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 20 /* v = *(as + 20) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 16 /* *(d + 16) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 24 /* v = *(as + 24) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 20 /* *(d + 20) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 28 /* v = *(as + 28) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 24 /* *(d + 24) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ lwi r12, r8, 32 /* v = *(as + 32) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ addi r8, r8, 32 /* as = as + 32 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, a_bu2_loop /* while (n) loop */
+ addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
+
+a_block_done:
+ addi r4, r0, 4 /* n = 4 */
+ cmpu r4, r4, r7 /* n = c - n (unsigned) */
+ blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
+
+a_word_xfer:
+ andi r4, r7, 0xfffffffc /* n = c & ~3 */
+ addi r10, r0, 0 /* offset = 0 */
+
+ andi r9, r6, 3 /* t1 = s & 3 */
+ /* if temp != 0, unaligned transfers needed */
+ bnei r9, a_word_unaligned
+
+a_word_aligned:
+ lw r9, r6, r10 /* t1 = *(s+offset) */
+ sw r9, r5, r10 /* *(d+offset) = t1 */
+ addi r4, r4,-4 /* n-- */
+ bneid r4, a_word_aligned /* loop */
+ addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */
+
+ bri a_word_done
+
+a_word_unaligned:
+ andi r8, r6, 0xfffffffc /* as = s & ~3 */
+ lwi r11, r8, 0 /* h = *(as + 0) */
+ addi r8, r8, 4 /* as = as + 4 */
+
+ addi r9, r9, -1
+ beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */
+ addi r9, r9, -1
+ beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */
+
+a_word_u3:
+ BSLLI r11, r11, 24 /* h = h << 24 */
+a_wu3_loop:
+ lw r12, r8, r10 /* v = *(as + offset) */
+ BSRLI r9, r12, 8 /* t1 = v >> 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ sw r9, r5, r10 /* *(d + offset) = t1 */
+ BSLLI r11, r12, 24 /* h = v << 24 */
+ addi r4, r4,-4 /* n = n - 4 */
+ bneid r4, a_wu3_loop /* while (n) loop */
+ addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
+
+ bri a_word_done
+
+a_word_u1:
+ BSLLI r11, r11, 8 /* h = h << 8 */
+a_wu1_loop:
+ lw r12, r8, r10 /* v = *(as + offset) */
+ BSRLI r9, r12, 24 /* t1 = v >> 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ sw r9, r5, r10 /* *(d + offset) = t1 */
+ BSLLI r11, r12, 8 /* h = v << 8 */
+ addi r4, r4,-4 /* n = n - 4 */
+ bneid r4, a_wu1_loop /* while (n) loop */
+ addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
+
+ bri a_word_done
+
+a_word_u2:
+ BSLLI r11, r11, 16 /* h = h << 16 */
+a_wu2_loop:
+ lw r12, r8, r10 /* v = *(as + offset) */
+ BSRLI r9, r12, 16 /* t1 = v >> 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ sw r9, r5, r10 /* *(d + offset) = t1 */
+ BSLLI r11, r12, 16 /* h = v << 16 */
+ addi r4, r4,-4 /* n = n - 4 */
+ bneid r4, a_wu2_loop /* while (n) loop */
+ addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
+
+a_word_done:
+ add r5, r5, r10 /* d = d + offset */
+ add r6, r6, r10 /* s = s + offset */
+ rsub r7, r10, r7 /* c = c - offset */
+
+a_xfer_end:
+a_xfer_end_loop:
+ beqi r7, a_done /* while (c) */
+ lbui r9, r6, 0 /* t1 = *s */
+ addi r6, r6, 1 /* s++ */
+ sbi r9, r5, 0 /* *d = t1 */
+ addi r7, r7, -1 /* c-- */
+ brid a_xfer_end_loop /* loop */
+ addi r5, r5, 1 /* d++ (IN DELAY SLOT) */
+
+a_done:
+ rtsd r15, 8
+ nop
+
+.size memcpy, . - memcpy
+.end memcpy
+libc_hidden_def(memcpy)
diff --git a/libc/string/microblaze/memmove.S b/libc/string/microblaze/memmove.S
new file mode 100644
index 000000000..6bac01620
--- /dev/null
+++ b/libc/string/microblaze/memmove.S
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ * Written by Jim Law <jlaw@irispower.com>
+ *
+ * intended to replace:
+ * memcpy in memcpy.c and
+ * memmove in memmove.c
+ * ... in arch/microblaze/lib
+ *
+ *
+ * assly_fastcopy.S
+ *
+ * Attempt at quicker memcpy and memmove for MicroBlaze
+ * Input : Operand1 in Reg r5 - destination address
+ * Operand2 in Reg r6 - source address
+ * Operand3 in Reg r7 - number of bytes to transfer
+ * Output: Result in Reg r3 - starting destinaition address
+ *
+ *
+ * Explanation:
+ * Perform (possibly unaligned) copy of a block of memory
+ * between mem locations with size of xfer spec'd in bytes
+ */
+
+ .globl memmove
+ .type memmove, @function
+ .ent memmove
+
+#ifdef __MICROBLAZEEL__
+# define BSLLI bsrli
+# define BSRLI bslli
+#else
+# define BSLLI bslli
+# define BSRLI bsrli
+#endif
+
+memmove:
+ cmpu r4, r5, r6 /* n = s - d */
+ bgei r4, HIDDEN_JUMPTARGET(memcpy)
+
+fast_memcpy_descending:
+ /* move d to return register as value of function */
+ addi r3, r5, 0
+
+ add r5, r5, r7 /* d = d + c */
+ add r6, r6, r7 /* s = s + c */
+
+ addi r4, r0, 4 /* n = 4 */
+ cmpu r4, r4, r7 /* n = c - n (unsigned) */
+ blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
+
+ /* transfer first 0~3 bytes to get aligned dest address */
+ andi r4, r5, 3 /* n = d & 3 */
+ /* if zero, destination already aligned */
+ beqi r4,d_dalign_done
+ rsub r7, r4, r7 /* c = c - n adjust c */
+
+d_xfer_first_loop:
+ /* if no bytes left to transfer, transfer the bulk */
+ beqi r4,d_dalign_done
+ addi r6, r6, -1 /* s-- */
+ addi r5, r5, -1 /* d-- */
+ lbui r11, r6, 0 /* h = *s */
+ sbi r11, r5, 0 /* *d = h */
+ brid d_xfer_first_loop /* loop */
+ addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
+
+d_dalign_done:
+ addi r4, r0, 32 /* n = 32 */
+ cmpu r4, r4, r7 /* n = c - n (unsigned) */
+ /* if n < 0, less than one block to transfer */
+ blti r4, d_block_done
+
+d_block_xfer:
+ andi r4, r7, 0xffffffe0 /* n = c & ~31 */
+ rsub r7, r4, r7 /* c = c - n */
+
+ andi r9, r6, 3 /* t1 = s & 3 */
+ /* if temp != 0, unaligned transfers needed */
+ bnei r9, d_block_unaligned
+
+d_block_aligned:
+ addi r6, r6, -32 /* s = s - 32 */
+ addi r5, r5, -32 /* d = d - 32 */
+ lwi r9, r6, 28 /* t1 = *(s + 28) */
+ lwi r10, r6, 24 /* t2 = *(s + 24) */
+ lwi r11, r6, 20 /* t3 = *(s + 20) */
+ lwi r12, r6, 16 /* t4 = *(s + 16) */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ swi r10, r5, 24 /* *(d + 24) = t2 */
+ swi r11, r5, 20 /* *(d + 20) = t3 */
+ swi r12, r5, 16 /* *(d + 16) = t4 */
+ lwi r9, r6, 12 /* t1 = *(s + 12) */
+ lwi r10, r6, 8 /* t2 = *(s + 8) */
+ lwi r11, r6, 4 /* t3 = *(s + 4) */
+ lwi r12, r6, 0 /* t4 = *(s + 0) */
+ swi r9, r5, 12 /* *(d + 12) = t1 */
+ swi r10, r5, 8 /* *(d + 8) = t2 */
+ swi r11, r5, 4 /* *(d + 4) = t3 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, d_block_aligned /* while (n) loop */
+ swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */
+ bri d_block_done
+
+d_block_unaligned:
+ andi r8, r6, 0xfffffffc /* as = s & ~3 */
+ rsub r6, r4, r6 /* s = s - n */
+ lwi r11, r8, 0 /* h = *(as + 0) */
+
+ addi r9, r9, -1
+ beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */
+ addi r9, r9, -1
+ beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */
+
+d_block_u3:
+ BSRLI r11, r11, 8 /* h = h >> 8 */
+d_bu3_loop:
+ addi r8, r8, -32 /* as = as - 32 */
+ addi r5, r5, -32 /* d = d - 32 */
+ lwi r12, r8, 28 /* v = *(as + 28) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 24 /* v = *(as + 24) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 24 /* *(d + 24) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 20 /* v = *(as + 20) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 20 /* *(d + 20) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 16 /* v = *(as + 16) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 16 /* *(d + 16) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 12 /* v = *(as + 12) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 12 /* *(d + 112) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 8 /* v = *(as + 8) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 8 /* *(d + 8) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 4 /* v = *(as + 4) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 4 /* *(d + 4) = t1 */
+ BSRLI r11, r12, 8 /* h = v >> 8 */
+ lwi r12, r8, 0 /* v = *(as + 0) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 0 /* *(d + 0) = t1 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, d_bu3_loop /* while (n) loop */
+ BSRLI r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
+ bri d_block_done
+
+d_block_u1:
+ BSRLI r11, r11, 24 /* h = h >> 24 */
+d_bu1_loop:
+ addi r8, r8, -32 /* as = as - 32 */
+ addi r5, r5, -32 /* d = d - 32 */
+ lwi r12, r8, 28 /* v = *(as + 28) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 24 /* v = *(as + 24) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 24 /* *(d + 24) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 20 /* v = *(as + 20) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 20 /* *(d + 20) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 16 /* v = *(as + 16) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 16 /* *(d + 16) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 12 /* v = *(as + 12) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 12 /* *(d + 112) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 8 /* v = *(as + 8) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 8 /* *(d + 8) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 4 /* v = *(as + 4) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 4 /* *(d + 4) = t1 */
+ BSRLI r11, r12, 24 /* h = v >> 24 */
+ lwi r12, r8, 0 /* v = *(as + 0) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 0 /* *(d + 0) = t1 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, d_bu1_loop /* while (n) loop */
+ BSRLI r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
+ bri d_block_done
+
+d_block_u2:
+ BSRLI r11, r11, 16 /* h = h >> 16 */
+d_bu2_loop:
+ addi r8, r8, -32 /* as = as - 32 */
+ addi r5, r5, -32 /* d = d - 32 */
+ lwi r12, r8, 28 /* v = *(as + 28) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 28 /* *(d + 28) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 24 /* v = *(as + 24) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 24 /* *(d + 24) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 20 /* v = *(as + 20) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 20 /* *(d + 20) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 16 /* v = *(as + 16) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 16 /* *(d + 16) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 12 /* v = *(as + 12) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 12 /* *(d + 112) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 8 /* v = *(as + 8) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 8 /* *(d + 8) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 4 /* v = *(as + 4) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 4 /* *(d + 4) = t1 */
+ BSRLI r11, r12, 16 /* h = v >> 16 */
+ lwi r12, r8, 0 /* v = *(as + 0) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ swi r9, r5, 0 /* *(d + 0) = t1 */
+ addi r4, r4, -32 /* n = n - 32 */
+ bneid r4, d_bu2_loop /* while (n) loop */
+ BSRLI r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
+
+d_block_done:
+ addi r4, r0, 4 /* n = 4 */
+ cmpu r4, r4, r7 /* n = c - n (unsigned) */
+ blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
+
+d_word_xfer:
+ andi r4, r7, 0xfffffffc /* n = c & ~3 */
+ rsub r5, r4, r5 /* d = d - n */
+ rsub r6, r4, r6 /* s = s - n */
+ rsub r7, r4, r7 /* c = c - n */
+
+ andi r9, r6, 3 /* t1 = s & 3 */
+ /* if temp != 0, unaligned transfers needed */
+ bnei r9, d_word_unaligned
+
+d_word_aligned:
+ addi r4, r4,-4 /* n-- */
+ lw r9, r6, r4 /* t1 = *(s+n) */
+ bneid r4, d_word_aligned /* loop */
+ sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */
+
+ bri d_word_done
+
+d_word_unaligned:
+ andi r8, r6, 0xfffffffc /* as = s & ~3 */
+ lw r11, r8, r4 /* h = *(as + n) */
+
+ addi r9, r9, -1
+ beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */
+ addi r9, r9, -1
+ beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */
+
+d_word_u3:
+ BSRLI r11, r11, 8 /* h = h >> 8 */
+d_wu3_loop:
+ addi r4, r4,-4 /* n = n - 4 */
+ lw r12, r8, r4 /* v = *(as + n) */
+ BSLLI r9, r12, 24 /* t1 = v << 24 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ sw r9, r5, r4 /* *(d + n) = t1 */
+ bneid r4, d_wu3_loop /* while (n) loop */
+ BSRLI r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
+
+ bri d_word_done
+
+d_word_u1:
+ BSRLI r11, r11, 24 /* h = h >> 24 */
+d_wu1_loop:
+ addi r4, r4,-4 /* n = n - 4 */
+ lw r12, r8, r4 /* v = *(as + n) */
+ BSLLI r9, r12, 8 /* t1 = v << 8 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ sw r9, r5, r4 /* *(d + n) = t1 */
+ bneid r4, d_wu1_loop /* while (n) loop */
+ BSRLI r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
+
+ bri d_word_done
+
+d_word_u2:
+ BSRLI r11, r11, 16 /* h = h >> 16 */
+d_wu2_loop:
+ addi r4, r4,-4 /* n = n - 4 */
+ lw r12, r8, r4 /* v = *(as + n) */
+ BSLLI r9, r12, 16 /* t1 = v << 16 */
+ or r9, r11, r9 /* t1 = h | t1 */
+ sw r9, r5, r4 /* *(d + n) = t1 */
+ bneid r4, d_wu2_loop /* while (n) loop */
+ BSRLI r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
+
+d_word_done:
+
+d_xfer_end:
+d_xfer_end_loop:
+ beqi r7, a_done /* while (c) */
+ addi r6, r6, -1 /* s-- */
+ lbui r9, r6, 0 /* t1 = *s */
+ addi r5, r5, -1 /* d-- */
+ sbi r9, r5, 0 /* *d = t1 */
+ brid d_xfer_end_loop /* loop */
+ addi r7, r7, -1 /* c-- (IN DELAY SLOT) */
+
+a_done:
+d_done:
+ rtsd r15, 8
+ nop
+
+.size memmove, . - memmove
+.end memmove
+libc_hidden_def(memmove)
diff --git a/libc/string/mips/memcpy.S b/libc/string/mips/memcpy.S
index 9b05ee6da..59f9f0a3a 100644
--- a/libc/string/mips/memcpy.S
+++ b/libc/string/mips/memcpy.S
@@ -1,6 +1,5 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -13,245 +12,861 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
-#include <features.h>
-/*#include <sysdep.h>*/
-#include <endian.h>
-#include "sysdep.h"
+#ifdef ANDROID_CHANGES
+# include "machine/asm.h"
+# include "machine/regdef.h"
+# define USE_MEMMOVE_FOR_OVERLAP
+# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
+# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
+#elif _LIBC
+# include <sysdep.h>
+# include <sys/regdef.h>
+# include <sys/asm.h>
+# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
+# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
+#elif defined _COMPILING_NEWLIB
+# include "machine/asm.h"
+# include "machine/regdef.h"
+# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD_STREAMED
+# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
+#else
+# include <sys/regdef.h>
+# include <sys/asm.h>
+#endif
+
+#if (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS5) || \
+ (_MIPS_ISA == _MIPS_ISA_MIPS32) || (_MIPS_ISA == _MIPS_ISA_MIPS64)
+# ifndef DISABLE_PREFETCH
+# define USE_PREFETCH
+# endif
+#endif
+
+#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
+# ifndef DISABLE_DOUBLE
+# define USE_DOUBLE
+# endif
+#endif
+
+/* Some asm.h files do not have the L macro definition. */
+#ifndef L
+# if _MIPS_SIM == _ABIO32
+# define L(label) $L ## label
+# else
+# define L(label) .L ## label
+# endif
+#endif
+
+/* Some asm.h files do not have the PTR_ADDIU macro definition. */
+#ifndef PTR_ADDIU
+# ifdef USE_DOUBLE
+# define PTR_ADDIU daddiu
+# else
+# define PTR_ADDIU addiu
+# endif
+#endif
+
+/* Some asm.h files do not have the PTR_SRA macro definition. */
+#ifndef PTR_SRA
+# ifdef USE_DOUBLE
+# define PTR_SRA dsra
+# else
+# define PTR_SRA sra
+# endif
+#endif
+
+/* New R6 instructions that may not be in asm.h. */
+#ifndef PTR_LSA
+# if _MIPS_SIM == _ABI64
+# define PTR_LSA dlsa
+# else
+# define PTR_LSA lsa
+# endif
+#endif
+
+/*
+ * Using PREFETCH_HINT_LOAD_STREAMED instead of PREFETCH_LOAD on load
+ * prefetches appears to offer a slight preformance advantage.
+ *
+ * Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
+ * or PREFETCH_STORE_STREAMED offers a large performance advantage
+ * but PREPAREFORSTORE has some special restrictions to consider.
+ *
+ * Prefetch with the 'prepare for store' hint does not copy a memory
+ * location into the cache, it just allocates a cache line and zeros
+ * it out. This means that if you do not write to the entire cache
+ * line before writing it out to memory some data will get zero'ed out
+ * when the cache line is written back to memory and data will be lost.
+ *
+ * Also if you are using this memcpy to copy overlapping buffers it may
+ * not behave correctly when using the 'prepare for store' hint. If you
+ * use the 'prepare for store' prefetch on a memory area that is in the
+ * memcpy source (as well as the memcpy destination), then you will get
+ * some data zero'ed out before you have a chance to read it and data will
+ * be lost.
+ *
+ * If you are going to use this memcpy routine with the 'prepare for store'
+ * prefetch you may want to set USE_MEMMOVE_FOR_OVERLAP in order to avoid
+ * the problem of running memcpy on overlapping buffers.
+ *
+ * There are ifdef'ed sections of this memcpy to make sure that it does not
+ * do prefetches on cache lines that are not going to be completely written.
+ * This code is only needed and only used when PREFETCH_STORE_HINT is set to
+ * PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
+ * 32 bytes and if the cache line is larger it will not work correctly.
+ */
+
+#ifdef USE_PREFETCH
+# define PREFETCH_HINT_LOAD 0
+# define PREFETCH_HINT_STORE 1
+# define PREFETCH_HINT_LOAD_STREAMED 4
+# define PREFETCH_HINT_STORE_STREAMED 5
+# define PREFETCH_HINT_LOAD_RETAINED 6
+# define PREFETCH_HINT_STORE_RETAINED 7
+# define PREFETCH_HINT_WRITEBACK_INVAL 25
+# define PREFETCH_HINT_PREPAREFORSTORE 30
+
+/*
+ * If we have not picked out what hints to use at this point use the
+ * standard load and store prefetch hints.
+ */
+# ifndef PREFETCH_STORE_HINT
+# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
+# endif
+# ifndef PREFETCH_LOAD_HINT
+# define PREFETCH_LOAD_HINT PREFETCH_HINT_LOAD
+# endif
+
+/*
+ * We double everything when USE_DOUBLE is true so we do 2 prefetches to
+ * get 64 bytes in that case. The assumption is that each individual
+ * prefetch brings in 32 bytes.
+ */
+
+# ifdef USE_DOUBLE
+# define PREFETCH_CHUNK 64
+# define PREFETCH_FOR_LOAD(chunk, reg) \
+ pref PREFETCH_LOAD_HINT, (chunk)*64(reg); \
+ pref PREFETCH_LOAD_HINT, ((chunk)*64)+32(reg)
+# define PREFETCH_FOR_STORE(chunk, reg) \
+ pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
+ pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
+# else
+# define PREFETCH_CHUNK 32
+# define PREFETCH_FOR_LOAD(chunk, reg) \
+ pref PREFETCH_LOAD_HINT, (chunk)*32(reg)
+# define PREFETCH_FOR_STORE(chunk, reg) \
+ pref PREFETCH_STORE_HINT, (chunk)*32(reg)
+# endif
+/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
+ * than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
+ * of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
+ * hint is used, the code will not work correctly. If PREPAREFORSTORE is not
+ * used then MAX_PREFETCH_SIZE does not matter. */
+# define MAX_PREFETCH_SIZE 128
+/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
+ * than 5 on a STORE prefetch and that a single prefetch can never be larger
+ * than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
+ * we actually do two prefetches in that case, one 32 bytes after the other. */
+# ifdef USE_DOUBLE
+# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
+# else
+# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
+# endif
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
+ && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
+/* We cannot handle this because the initial prefetches may fetch bytes that
+ * are before the buffer being copied. We start copies with an offset
+ * of 4 so avoid this situation when using PREPAREFORSTORE. */
+#error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
+# endif
+#else /* USE_PREFETCH not defined */
+# define PREFETCH_FOR_LOAD(offset, reg)
+# define PREFETCH_FOR_STORE(offset, reg)
+#endif
+
+#if __mips_isa_rev > 5
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# undef PREFETCH_STORE_HINT
+# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
+# endif
+# define R6_CODE
+#endif
-/* void *memcpy(void *s1, const void *s2, size_t n); */
+/* Allow the routine to be named something else if desired. */
+#ifndef MEMCPY_NAME
+# define MEMCPY_NAME memcpy
+#endif
+
+/* We use these 32/64 bit registers as temporaries to do the copying. */
+#define REG0 t0
+#define REG1 t1
+#define REG2 t2
+#define REG3 t3
+#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABIO32) || (_MIPS_SIM == _ABIO64))
+# define REG4 t4
+# define REG5 t5
+# define REG6 t6
+# define REG7 t7
+#else
+# define REG4 ta0
+# define REG5 ta1
+# define REG6 ta2
+# define REG7 ta3
+#endif
-#ifdef __mips64
+/* We load/store 64 bits at a time when USE_DOUBLE is true.
+ * The C_ prefix stands for CHUNK and is used to avoid macro name
+ * conflicts with system header files. */
-#include <sys/asm.h>
+#ifdef USE_DOUBLE
+# define C_ST sd
+# define C_LD ld
+# ifdef __MIPSEB
+# define C_LDHI ldl /* high part is left in big-endian */
+# define C_STHI sdl /* high part is left in big-endian */
+# define C_LDLO ldr /* low part is right in big-endian */
+# define C_STLO sdr /* low part is right in big-endian */
+# else
+# define C_LDHI ldr /* high part is right in little-endian */
+# define C_STHI sdr /* high part is right in little-endian */
+# define C_LDLO ldl /* low part is left in little-endian */
+# define C_STLO sdl /* low part is left in little-endian */
+# endif
+# define C_ALIGN dalign /* r6 align instruction */
+#else
+# define C_ST sw
+# define C_LD lw
+# ifdef __MIPSEB
+# define C_LDHI lwl /* high part is left in big-endian */
+# define C_STHI swl /* high part is left in big-endian */
+# define C_LDLO lwr /* low part is right in big-endian */
+# define C_STLO swr /* low part is right in big-endian */
+# else
+# define C_LDHI lwr /* high part is right in little-endian */
+# define C_STHI swr /* high part is right in little-endian */
+# define C_LDLO lwl /* low part is left in little-endian */
+# define C_STLO swl /* low part is left in little-endian */
+# endif
+# define C_ALIGN align /* r6 align instruction */
+#endif
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define LDHI ldl /* high part is left in big-endian */
-# define SDHI sdl /* high part is left in big-endian */
-# define LDLO ldr /* low part is right in big-endian */
-# define SDLO sdr /* low part is right in big-endian */
+/* Bookkeeping values for 32 vs. 64 bit mode. */
+#ifdef USE_DOUBLE
+# define NSIZE 8
+# define NSIZEMASK 0x3f
+# define NSIZEDMASK 0x7f
#else
-# define LDHI ldr /* high part is right in little-endian */
-# define SDHI sdr /* high part is right in little-endian */
-# define LDLO ldl /* low part is left in little-endian */
-# define SDLO sdl /* low part is left in little-endian */
+# define NSIZE 4
+# define NSIZEMASK 0x1f
+# define NSIZEDMASK 0x3f
#endif
+#define UNIT(unit) ((unit)*NSIZE)
+#define UNITM1(unit) (((unit)*NSIZE)-1)
-ENTRY (memcpy)
+#ifdef ANDROID_CHANGES
+LEAF(MEMCPY_NAME, 0)
+#else
+LEAF(MEMCPY_NAME)
+#endif
+ .set nomips16
.set noreorder
+/*
+ * Below we handle the case where memcpy is called with overlapping src and dst.
+ * Although memcpy is not required to handle this case, some parts of Android
+ * like Skia rely on such usage. We call memmove to handle such cases.
+ */
+#ifdef USE_MEMMOVE_FOR_OVERLAP
+ PTR_SUBU t0,a0,a1
+ PTR_SRA t2,t0,31
+ xor t1,t0,t2
+ PTR_SUBU t0,t1,t2
+ sltu t2,t0,a2
+ beq t2,zero,L(memcpy)
+ la t9,memmove
+ jr t9
+ nop
+L(memcpy):
+#endif
+/*
+ * If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
+ * size, copy dst pointer to v0 for the return value.
+ */
+ slti t2,a2,(2 * NSIZE)
+ bne t2,zero,L(lasts)
+#if defined(RETURN_FIRST_PREFETCH) || defined(RETURN_LAST_PREFETCH)
+ move v0,zero
+#else
+ move v0,a0
+#endif
- slti t0, a2, 16 # Less than 16?
- bne t0, zero, L(last16)
- move v0, a0 # Setup exit value before too late
-
- xor t0, a1, a0 # Find a0/a1 displacement
- andi t0, 0x7
- bne t0, zero, L(shift) # Go handle the unaligned case
- PTR_SUBU t1, zero, a1
- andi t1, 0x7 # a0/a1 are aligned, but are we
- beq t1, zero, L(chk8w) # starting in the middle of a word?
- PTR_SUBU a2, t1
- LDHI t0, 0(a1) # Yes we are... take care of that
- PTR_ADDU a1, t1
- SDHI t0, 0(a0)
- PTR_ADDU a0, t1
-
-L(chk8w):
- andi t0, a2, 0x3f # 64 or more bytes left?
- beq t0, a2, L(chk1w)
- PTR_SUBU a3, a2, t0 # Yes
- PTR_ADDU a3, a1 # a3 = end address of loop
- move a2, t0 # a2 = what will be left after loop
-L(lop8w):
- ld t0, 0(a1) # Loop taking 8 words at a time
- ld t1, 8(a1)
- ld t2, 16(a1)
- ld t3, 24(a1)
- ld ta0, 32(a1)
- ld ta1, 40(a1)
- ld ta2, 48(a1)
- ld ta3, 56(a1)
- PTR_ADDIU a0, 64
- PTR_ADDIU a1, 64
- sd t0, -64(a0)
- sd t1, -56(a0)
- sd t2, -48(a0)
- sd t3, -40(a0)
- sd ta0, -32(a0)
- sd ta1, -24(a0)
- sd ta2, -16(a0)
- bne a1, a3, L(lop8w)
- sd ta3, -8(a0)
+#ifndef R6_CODE
-L(chk1w):
- andi t0, a2, 0x7 # 8 or more bytes left?
- beq t0, a2, L(last16)
- PTR_SUBU a3, a2, t0 # Yes, handle them one dword at a time
- PTR_ADDU a3, a1 # a3 again end address
- move a2, t0
-L(lop1w):
- ld t0, 0(a1)
- PTR_ADDIU a0, 8
- PTR_ADDIU a1, 8
- bne a1, a3, L(lop1w)
- sd t0, -8(a0)
-
-L(last16):
- blez a2, L(lst16e) # Handle last 16 bytes, one at a time
- PTR_ADDU a3, a2, a1
-L(lst16l):
- lb t0, 0(a1)
- PTR_ADDIU a0, 1
- PTR_ADDIU a1, 1
- bne a1, a3, L(lst16l)
- sb t0, -1(a0)
-L(lst16e):
- jr ra # Bye, bye
- nop
+/*
+ * If src and dst have different alignments, go to L(unaligned), if they
+ * have the same alignment (but are not actually aligned) do a partial
+ * load/store to make them aligned. If they are both already aligned
+ * we can start copying at L(aligned).
+ */
+ xor t8,a1,a0
+ andi t8,t8,(NSIZE-1) /* t8 is a0/a1 word-displacement */
+ bne t8,zero,L(unaligned)
+ PTR_SUBU a3, zero, a0
-L(shift):
- PTR_SUBU a3, zero, a0 # Src and Dest unaligned
- andi a3, 0x7 # (unoptimized case...)
- beq a3, zero, L(shft1)
- PTR_SUBU a2, a3 # a2 = bytes left
- LDHI t0, 0(a1) # Take care of first odd part
- LDLO t0, 7(a1)
- PTR_ADDU a1, a3
- SDHI t0, 0(a0)
- PTR_ADDU a0, a3
-L(shft1):
- andi t0, a2, 0x7
- PTR_SUBU a3, a2, t0
- PTR_ADDU a3, a1
-L(shfth):
- LDHI t1, 0(a1) # Limp through, dword by dword
- LDLO t1, 7(a1)
- PTR_ADDIU a0, 8
- PTR_ADDIU a1, 8
- bne a1, a3, L(shfth)
- sd t1, -8(a0)
- b L(last16) # Handle anything which may be left
- move a2, t0
+ andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
+ beq a3,zero,L(aligned) /* if a3=0, it is already aligned */
+ PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
- .set reorder
-END (memcpy)
+ C_LDHI t8,0(a1)
+ PTR_ADDU a1,a1,a3
+ C_STHI t8,0(a0)
+ PTR_ADDU a0,a0,a3
+
+#else /* R6_CODE */
+
+/*
+ * Align the destination and hope that the source gets aligned too. If it
+ * doesn't we jump to L(r6_unaligned*) to do unaligned copies using the r6
+ * align instruction.
+ */
+ andi t8,a0,7
+ lapc t9,L(atable)
+ PTR_LSA t9,t8,t9,2
+ jrc t9
+L(atable):
+ bc L(lb0)
+ bc L(lb7)
+ bc L(lb6)
+ bc L(lb5)
+ bc L(lb4)
+ bc L(lb3)
+ bc L(lb2)
+ bc L(lb1)
+L(lb7):
+ lb a3, 6(a1)
+ sb a3, 6(a0)
+L(lb6):
+ lb a3, 5(a1)
+ sb a3, 5(a0)
+L(lb5):
+ lb a3, 4(a1)
+ sb a3, 4(a0)
+L(lb4):
+ lb a3, 3(a1)
+ sb a3, 3(a0)
+L(lb3):
+ lb a3, 2(a1)
+ sb a3, 2(a0)
+L(lb2):
+ lb a3, 1(a1)
+ sb a3, 1(a0)
+L(lb1):
+ lb a3, 0(a1)
+ sb a3, 0(a0)
+
+ li t9,8
+ subu t8,t9,t8
+ PTR_SUBU a2,a2,t8
+ PTR_ADDU a0,a0,t8
+ PTR_ADDU a1,a1,t8
+L(lb0):
-#else /* !__mips64 */
+ andi t8,a1,(NSIZE-1)
+ lapc t9,L(jtable)
+ PTR_LSA t9,t8,t9,2
+ jrc t9
+L(jtable):
+ bc L(aligned)
+ bc L(r6_unaligned1)
+ bc L(r6_unaligned2)
+ bc L(r6_unaligned3)
+# ifdef USE_DOUBLE
+ bc L(r6_unaligned4)
+ bc L(r6_unaligned5)
+ bc L(r6_unaligned6)
+ bc L(r6_unaligned7)
+# endif
+#endif /* R6_CODE */
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define LWHI lwl /* high part is left in big-endian */
-# define SWHI swl /* high part is left in big-endian */
-# define LWLO lwr /* low part is right in big-endian */
-# define SWLO swr /* low part is right in big-endian */
+L(aligned):
+
+/*
+ * Now dst/src are both aligned to (word or double word) aligned addresses
+ * Set a2 to count how many bytes we have to copy after all the 64/128 byte
+ * chunks are copied and a3 to the dst pointer after all the 64/128 byte
+ * chunks have been copied. We will loop, incrementing a0 and a1 until a0
+ * equals a3.
+ */
+
+ andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
+ beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
+ PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
+ PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
+
+/* When in the loop we may prefetch with the 'prepare to store' hint,
+ * in this case the a0+x should not be past the "t0-32" address. This
+ * means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
+ * for x=64 the last "safe" a0 address is "t0-96" In the current version we
+ * will use "prefetch hint,128(a0)", so "t0-160" is the limit.
+ */
+#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
+ PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
+#endif
+ PREFETCH_FOR_LOAD (0, a1)
+ PREFETCH_FOR_LOAD (1, a1)
+ PREFETCH_FOR_LOAD (2, a1)
+ PREFETCH_FOR_LOAD (3, a1)
+#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
+ PREFETCH_FOR_STORE (1, a0)
+ PREFETCH_FOR_STORE (2, a0)
+ PREFETCH_FOR_STORE (3, a0)
+#endif
+#if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
+# if PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE
+ sltu v1,t9,a0
+ bgtz v1,L(skip_set)
+ nop
+ PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
+L(skip_set):
+# else
+ PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
+# endif
+#endif
+#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH) \
+ && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
+ PTR_ADDIU v0,a0,(PREFETCH_CHUNK*3)
+# ifdef USE_DOUBLE
+ PTR_ADDIU v0,v0,32
+# endif
+#endif
+L(loop16w):
+ C_LD t0,UNIT(0)(a1)
+#if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
+ bgtz v1,L(skip_pref)
+#endif
+ C_LD t1,UNIT(1)(a1)
+#ifdef R6_CODE
+ PREFETCH_FOR_STORE (2, a0)
#else
-# define LWHI lwr /* high part is right in little-endian */
-# define SWHI swr /* high part is right in little-endian */
-# define LWLO lwl /* low part is left in little-endian */
-# define SWLO swl /* low part is left in little-endian */
+ PREFETCH_FOR_STORE (4, a0)
+ PREFETCH_FOR_STORE (5, a0)
+#endif
+#if defined(RETURN_LAST_PREFETCH) && defined(USE_PREFETCH)
+ PTR_ADDIU v0,a0,(PREFETCH_CHUNK*5)
+# ifdef USE_DOUBLE
+ PTR_ADDIU v0,v0,32
+# endif
#endif
+L(skip_pref):
+ C_LD REG2,UNIT(2)(a1)
+ C_LD REG3,UNIT(3)(a1)
+ C_LD REG4,UNIT(4)(a1)
+ C_LD REG5,UNIT(5)(a1)
+ C_LD REG6,UNIT(6)(a1)
+ C_LD REG7,UNIT(7)(a1)
+#ifdef R6_CODE
+ PREFETCH_FOR_LOAD (3, a1)
+#else
+ PREFETCH_FOR_LOAD (4, a1)
+#endif
+ C_ST t0,UNIT(0)(a0)
+ C_ST t1,UNIT(1)(a0)
+ C_ST REG2,UNIT(2)(a0)
+ C_ST REG3,UNIT(3)(a0)
+ C_ST REG4,UNIT(4)(a0)
+ C_ST REG5,UNIT(5)(a0)
+ C_ST REG6,UNIT(6)(a0)
+ C_ST REG7,UNIT(7)(a0)
-ENTRY (memcpy)
- .set noreorder
+ C_LD t0,UNIT(8)(a1)
+ C_LD t1,UNIT(9)(a1)
+ C_LD REG2,UNIT(10)(a1)
+ C_LD REG3,UNIT(11)(a1)
+ C_LD REG4,UNIT(12)(a1)
+ C_LD REG5,UNIT(13)(a1)
+ C_LD REG6,UNIT(14)(a1)
+ C_LD REG7,UNIT(15)(a1)
+#ifndef R6_CODE
+ PREFETCH_FOR_LOAD (5, a1)
+#endif
+ C_ST t0,UNIT(8)(a0)
+ C_ST t1,UNIT(9)(a0)
+ C_ST REG2,UNIT(10)(a0)
+ C_ST REG3,UNIT(11)(a0)
+ C_ST REG4,UNIT(12)(a0)
+ C_ST REG5,UNIT(13)(a0)
+ C_ST REG6,UNIT(14)(a0)
+ C_ST REG7,UNIT(15)(a0)
+ PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
+ bne a0,a3,L(loop16w)
+ PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
+ move a2,t8
+
+/* Here we have src and dest word-aligned but less than 64-bytes or
+ * 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
+ * is one. Otherwise jump down to L(chk1w) to handle the tail end of
+ * the copy.
+ */
+
+L(chkw):
+ PREFETCH_FOR_LOAD (0, a1)
+ andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
+ /* The t8 is the reminder count past 32-bytes */
+ beq a2,t8,L(chk1w) /* When a2=t8, no 32-byte chunk */
+ nop
+ C_LD t0,UNIT(0)(a1)
+ C_LD t1,UNIT(1)(a1)
+ C_LD REG2,UNIT(2)(a1)
+ C_LD REG3,UNIT(3)(a1)
+ C_LD REG4,UNIT(4)(a1)
+ C_LD REG5,UNIT(5)(a1)
+ C_LD REG6,UNIT(6)(a1)
+ C_LD REG7,UNIT(7)(a1)
+ PTR_ADDIU a1,a1,UNIT(8)
+ C_ST t0,UNIT(0)(a0)
+ C_ST t1,UNIT(1)(a0)
+ C_ST REG2,UNIT(2)(a0)
+ C_ST REG3,UNIT(3)(a0)
+ C_ST REG4,UNIT(4)(a0)
+ C_ST REG5,UNIT(5)(a0)
+ C_ST REG6,UNIT(6)(a0)
+ C_ST REG7,UNIT(7)(a0)
+ PTR_ADDIU a0,a0,UNIT(8)
+
+/*
+ * Here we have less than 32(64) bytes to copy. Set up for a loop to
+ * copy one word (or double word) at a time. Set a2 to count how many
+ * bytes we have to copy after all the word (or double word) chunks are
+ * copied and a3 to the dst pointer after all the (d)word chunks have
+ * been copied. We will loop, incrementing a0 and a1 until a0 equals a3.
+ */
+L(chk1w):
+ andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
+ beq a2,t8,L(lastw)
+ PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
+ PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
- slti t0, a2, 8 # Less than 8?
- bne t0, zero, L(last8)
- move v0, a0 # Setup exit value before too late
-
- xor t0, a1, a0 # Find a0/a1 displacement
- andi t0, 0x3
- bne t0, zero, L(shift) # Go handle the unaligned case
- subu t1, zero, a1
- andi t1, 0x3 # a0/a1 are aligned, but are we
- beq t1, zero, L(chk8w) # starting in the middle of a word?
- subu a2, t1
- LWHI t0, 0(a1) # Yes we are... take care of that
- addu a1, t1
- SWHI t0, 0(a0)
- addu a0, t1
-
-L(chk8w):
- andi t0, a2, 0x1f # 32 or more bytes left?
- beq t0, a2, L(chk1w)
- subu a3, a2, t0 # Yes
- addu a3, a1 # a3 = end address of loop
- move a2, t0 # a2 = what will be left after loop
-L(lop8w):
- lw t0, 0(a1) # Loop taking 8 words at a time
- lw t1, 4(a1)
- lw t2, 8(a1)
- lw t3, 12(a1)
- lw t4, 16(a1)
- lw t5, 20(a1)
- lw t6, 24(a1)
- lw t7, 28(a1)
- addiu a0, 32
- addiu a1, 32
- sw t0, -32(a0)
- sw t1, -28(a0)
- sw t2, -24(a0)
- sw t3, -20(a0)
- sw t4, -16(a0)
- sw t5, -12(a0)
- sw t6, -8(a0)
- bne a1, a3, L(lop8w)
- sw t7, -4(a0)
-
-L(chk1w):
- andi t0, a2, 0x3 # 4 or more bytes left?
- beq t0, a2, L(last8)
- subu a3, a2, t0 # Yes, handle them one word at a time
- addu a3, a1 # a3 again end address
- move a2, t0
-L(lop1w):
- lw t0, 0(a1)
- addiu a0, 4
- addiu a1, 4
- bne a1, a3, L(lop1w)
- sw t0, -4(a0)
-
-L(last8):
- blez a2, L(lst8e) # Handle last 8 bytes, one at a time
- addu a3, a2, a1
-L(lst8l):
- lb t0, 0(a1)
- addiu a0, 1
- addiu a1, 1
- bne a1, a3, L(lst8l)
- sb t0, -1(a0)
-L(lst8e):
- jr ra # Bye, bye
+/* copying in words (4-byte or 8-byte chunks) */
+L(wordCopy_loop):
+ C_LD REG3,UNIT(0)(a1)
+ PTR_ADDIU a0,a0,UNIT(1)
+ PTR_ADDIU a1,a1,UNIT(1)
+ bne a0,a3,L(wordCopy_loop)
+ C_ST REG3,UNIT(-1)(a0)
+
+/* If we have been copying double words, see if we can copy a single word
+ before doing byte copies. We can have, at most, one word to copy. */
+
+L(lastw):
+#ifdef USE_DOUBLE
+ andi t8,a2,3 /* a2 is the remainder past 4 byte chunks. */
+ beq t8,a2,L(lastb)
+ move a2,t8
+ lw REG3,0(a1)
+ sw REG3,0(a0)
+ PTR_ADDIU a0,a0,4
+ PTR_ADDIU a1,a1,4
+#endif
+
+/* Copy the last 8 (or 16) bytes */
+L(lastb):
+ blez a2,L(leave)
+ PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
+L(lastbloop):
+ lb v1,0(a1)
+ PTR_ADDIU a0,a0,1
+ PTR_ADDIU a1,a1,1
+ bne a0,a3,L(lastbloop)
+ sb v1,-1(a0)
+L(leave):
+ j ra
nop
-L(shift):
- subu a3, zero, a0 # Src and Dest unaligned
- andi a3, 0x3 # (unoptimized case...)
- beq a3, zero, L(shft1)
- subu a2, a3 # a2 = bytes left
- LWHI t0, 0(a1) # Take care of first odd part
- LWLO t0, 3(a1)
- addu a1, a3
- SWHI t0, 0(a0)
- addu a0, a3
-L(shft1):
- andi t0, a2, 0x3
- subu a3, a2, t0
- addu a3, a1
-L(shfth):
- LWHI t1, 0(a1) # Limp through, word by word
- LWLO t1, 3(a1)
- addiu a0, 4
- addiu a1, 4
- bne a1, a3, L(shfth)
- sw t1, -4(a0)
- b L(last8) # Handle anything which may be left
- move a2, t0
+/* We jump here with a memcpy of less than 8 or 16 bytes, depending on
+ whether or not USE_DOUBLE is defined. Instead of just doing byte
+ copies, check the alignment and size and use lw/sw if possible.
+ Otherwise, do byte copies. */
- .set reorder
-END (memcpy)
+L(lasts):
+ andi t8,a2,3
+ beq t8,a2,L(lastb)
+
+ andi t9,a0,3
+ bne t9,zero,L(lastb)
+ andi t9,a1,3
+ bne t9,zero,L(lastb)
+
+ PTR_SUBU a3,a2,t8
+ PTR_ADDU a3,a0,a3
+
+L(wcopy_loop):
+ lw REG3,0(a1)
+ PTR_ADDIU a0,a0,4
+ PTR_ADDIU a1,a1,4
+ bne a0,a3,L(wcopy_loop)
+ sw REG3,-4(a0)
-#endif /* !__mips64 */
+ b L(lastb)
+ move a2,t8
-libc_hidden_def(memcpy)
+#ifndef R6_CODE
+/*
+ * UNALIGNED case, got here with a3 = "negu a0"
+ * This code is nearly identical to the aligned code above
+ * but only the destination (not the source) gets aligned
+ * so we need to do partial loads of the source followed
+ * by normal stores to the destination (once we have aligned
+ * the destination).
+ */
+
+L(unaligned):
+ andi a3,a3,(NSIZE-1) /* copy a3 bytes to align a0/a1 */
+ beqz a3,L(ua_chk16w) /* if a3=0, it is already aligned */
+ PTR_SUBU a2,a2,a3 /* a2 is the remining bytes count */
+
+ C_LDHI v1,UNIT(0)(a1)
+ C_LDLO v1,UNITM1(1)(a1)
+ PTR_ADDU a1,a1,a3
+ C_STHI v1,UNIT(0)(a0)
+ PTR_ADDU a0,a0,a3
+
+/*
+ * Now the destination (but not the source) is aligned
+ * Set a2 to count how many bytes we have to copy after all the 64/128 byte
+ * chunks are copied and a3 to the dst pointer after all the 64/128 byte
+ * chunks have been copied. We will loop, incrementing a0 and a1 until a0
+ * equals a3.
+ */
+
+L(ua_chk16w):
+ andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
+ beq a2,t8,L(ua_chkw) /* if a2==t8, no 64-byte/128-byte chunks */
+ PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
+ PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
+
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
+ PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
+# endif
+ PREFETCH_FOR_LOAD (0, a1)
+ PREFETCH_FOR_LOAD (1, a1)
+ PREFETCH_FOR_LOAD (2, a1)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
+ PREFETCH_FOR_STORE (1, a0)
+ PREFETCH_FOR_STORE (2, a0)
+ PREFETCH_FOR_STORE (3, a0)
+# endif
+# if defined(RETURN_FIRST_PREFETCH) && defined(USE_PREFETCH)
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ sltu v1,t9,a0
+ bgtz v1,L(ua_skip_set)
+ nop
+ PTR_ADDIU v0,a0,(PREFETCH_CHUNK*4)
+L(ua_skip_set):
+# else
+ PTR_ADDIU v0,a0,(PREFETCH_CHUNK*1)
+# endif
+# endif
+L(ua_loop16w):
+ PREFETCH_FOR_LOAD (3, a1)
+ C_LDHI t0,UNIT(0)(a1)
+ C_LDHI t1,UNIT(1)(a1)
+ C_LDHI REG2,UNIT(2)(a1)
+# if defined(USE_PREFETCH) && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ sltu v1,t9,a0
+ bgtz v1,L(ua_skip_pref)
+# endif
+ C_LDHI REG3,UNIT(3)(a1)
+ PREFETCH_FOR_STORE (4, a0)
+ PREFETCH_FOR_STORE (5, a0)
+L(ua_skip_pref):
+ C_LDHI REG4,UNIT(4)(a1)
+ C_LDHI REG5,UNIT(5)(a1)
+ C_LDHI REG6,UNIT(6)(a1)
+ C_LDHI REG7,UNIT(7)(a1)
+ C_LDLO t0,UNITM1(1)(a1)
+ C_LDLO t1,UNITM1(2)(a1)
+ C_LDLO REG2,UNITM1(3)(a1)
+ C_LDLO REG3,UNITM1(4)(a1)
+ C_LDLO REG4,UNITM1(5)(a1)
+ C_LDLO REG5,UNITM1(6)(a1)
+ C_LDLO REG6,UNITM1(7)(a1)
+ C_LDLO REG7,UNITM1(8)(a1)
+ PREFETCH_FOR_LOAD (4, a1)
+ C_ST t0,UNIT(0)(a0)
+ C_ST t1,UNIT(1)(a0)
+ C_ST REG2,UNIT(2)(a0)
+ C_ST REG3,UNIT(3)(a0)
+ C_ST REG4,UNIT(4)(a0)
+ C_ST REG5,UNIT(5)(a0)
+ C_ST REG6,UNIT(6)(a0)
+ C_ST REG7,UNIT(7)(a0)
+ C_LDHI t0,UNIT(8)(a1)
+ C_LDHI t1,UNIT(9)(a1)
+ C_LDHI REG2,UNIT(10)(a1)
+ C_LDHI REG3,UNIT(11)(a1)
+ C_LDHI REG4,UNIT(12)(a1)
+ C_LDHI REG5,UNIT(13)(a1)
+ C_LDHI REG6,UNIT(14)(a1)
+ C_LDHI REG7,UNIT(15)(a1)
+ C_LDLO t0,UNITM1(9)(a1)
+ C_LDLO t1,UNITM1(10)(a1)
+ C_LDLO REG2,UNITM1(11)(a1)
+ C_LDLO REG3,UNITM1(12)(a1)
+ C_LDLO REG4,UNITM1(13)(a1)
+ C_LDLO REG5,UNITM1(14)(a1)
+ C_LDLO REG6,UNITM1(15)(a1)
+ C_LDLO REG7,UNITM1(16)(a1)
+ PREFETCH_FOR_LOAD (5, a1)
+ C_ST t0,UNIT(8)(a0)
+ C_ST t1,UNIT(9)(a0)
+ C_ST REG2,UNIT(10)(a0)
+ C_ST REG3,UNIT(11)(a0)
+ C_ST REG4,UNIT(12)(a0)
+ C_ST REG5,UNIT(13)(a0)
+ C_ST REG6,UNIT(14)(a0)
+ C_ST REG7,UNIT(15)(a0)
+ PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
+ bne a0,a3,L(ua_loop16w)
+ PTR_ADDIU a1,a1,UNIT(16) /* adding 64/128 to src */
+ move a2,t8
+
+/* Here we have src and dest word-aligned but less than 64-bytes or
+ * 128 bytes to go. Check for a 32(64) byte chunk and copy if if there
+ * is one. Otherwise jump down to L(ua_chk1w) to handle the tail end of
+ * the copy. */
+
+L(ua_chkw):
+ PREFETCH_FOR_LOAD (0, a1)
+ andi t8,a2,NSIZEMASK /* Is there a 32-byte/64-byte chunk. */
+ /* t8 is the reminder count past 32-bytes */
+ beq a2,t8,L(ua_chk1w) /* When a2=t8, no 32-byte chunk */
+ nop
+ C_LDHI t0,UNIT(0)(a1)
+ C_LDHI t1,UNIT(1)(a1)
+ C_LDHI REG2,UNIT(2)(a1)
+ C_LDHI REG3,UNIT(3)(a1)
+ C_LDHI REG4,UNIT(4)(a1)
+ C_LDHI REG5,UNIT(5)(a1)
+ C_LDHI REG6,UNIT(6)(a1)
+ C_LDHI REG7,UNIT(7)(a1)
+ C_LDLO t0,UNITM1(1)(a1)
+ C_LDLO t1,UNITM1(2)(a1)
+ C_LDLO REG2,UNITM1(3)(a1)
+ C_LDLO REG3,UNITM1(4)(a1)
+ C_LDLO REG4,UNITM1(5)(a1)
+ C_LDLO REG5,UNITM1(6)(a1)
+ C_LDLO REG6,UNITM1(7)(a1)
+ C_LDLO REG7,UNITM1(8)(a1)
+ PTR_ADDIU a1,a1,UNIT(8)
+ C_ST t0,UNIT(0)(a0)
+ C_ST t1,UNIT(1)(a0)
+ C_ST REG2,UNIT(2)(a0)
+ C_ST REG3,UNIT(3)(a0)
+ C_ST REG4,UNIT(4)(a0)
+ C_ST REG5,UNIT(5)(a0)
+ C_ST REG6,UNIT(6)(a0)
+ C_ST REG7,UNIT(7)(a0)
+ PTR_ADDIU a0,a0,UNIT(8)
+/*
+ * Here we have less than 32(64) bytes to copy. Set up for a loop to
+ * copy one word (or double word) at a time.
+ */
+L(ua_chk1w):
+ andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
+ beq a2,t8,L(ua_smallCopy)
+ PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
+ PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
+
+/* copying in words (4-byte or 8-byte chunks) */
+L(ua_wordCopy_loop):
+ C_LDHI v1,UNIT(0)(a1)
+ C_LDLO v1,UNITM1(1)(a1)
+ PTR_ADDIU a0,a0,UNIT(1)
+ PTR_ADDIU a1,a1,UNIT(1)
+ bne a0,a3,L(ua_wordCopy_loop)
+ C_ST v1,UNIT(-1)(a0)
+
+/* Copy the last 8 (or 16) bytes */
+L(ua_smallCopy):
+ beqz a2,L(leave)
+ PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
+L(ua_smallCopy_loop):
+ lb v1,0(a1)
+ PTR_ADDIU a0,a0,1
+ PTR_ADDIU a1,a1,1
+ bne a0,a3,L(ua_smallCopy_loop)
+ sb v1,-1(a0)
+
+ j ra
+ nop
+
+#else /* R6_CODE */
+
+# ifdef __MIPSEB
+# define SWAP_REGS(X,Y) X, Y
+# define ALIGN_OFFSET(N) (N)
+# else
+# define SWAP_REGS(X,Y) Y, X
+# define ALIGN_OFFSET(N) (NSIZE-N)
+# endif
+# define R6_UNALIGNED_WORD_COPY(BYTEOFFSET) \
+ andi REG7, a2, (NSIZE-1);/* REG7 is # of bytes to by bytes. */ \
+ beq REG7, a2, L(lastb); /* Check for bytes to copy by word */ \
+ PTR_SUBU a3, a2, REG7; /* a3 is number of bytes to be copied in */ \
+ /* (d)word chunks. */ \
+ move a2, REG7; /* a2 is # of bytes to copy byte by byte */ \
+ /* after word loop is finished. */ \
+ PTR_ADDU REG6, a0, a3; /* REG6 is the dst address after loop. */ \
+ PTR_SUBU REG2, a1, t8; /* REG2 is the aligned src address. */ \
+ PTR_ADDU a1, a1, a3; /* a1 is addr of source after word loop. */ \
+ C_LD t0, UNIT(0)(REG2); /* Load first part of source. */ \
+L(r6_ua_wordcopy##BYTEOFFSET): \
+ C_LD t1, UNIT(1)(REG2); /* Load second part of source. */ \
+ C_ALIGN REG3, SWAP_REGS(t1,t0), ALIGN_OFFSET(BYTEOFFSET); \
+ PTR_ADDIU a0, a0, UNIT(1); /* Increment destination pointer. */ \
+ PTR_ADDIU REG2, REG2, UNIT(1); /* Increment aligned source pointer.*/ \
+ move t0, t1; /* Move second part of source to first. */ \
+ bne a0, REG6,L(r6_ua_wordcopy##BYTEOFFSET); \
+ C_ST REG3, UNIT(-1)(a0); \
+ j L(lastb); \
+ nop
+
+ /* We are generating R6 code, the destination is 4 byte aligned and
+ the source is not 4 byte aligned. t8 is 1, 2, or 3 depending on the
+ alignment of the source. */
+
+L(r6_unaligned1):
+ R6_UNALIGNED_WORD_COPY(1)
+L(r6_unaligned2):
+ R6_UNALIGNED_WORD_COPY(2)
+L(r6_unaligned3):
+ R6_UNALIGNED_WORD_COPY(3)
+# ifdef USE_DOUBLE
+L(r6_unaligned4):
+ R6_UNALIGNED_WORD_COPY(4)
+L(r6_unaligned5):
+ R6_UNALIGNED_WORD_COPY(5)
+L(r6_unaligned6):
+ R6_UNALIGNED_WORD_COPY(6)
+L(r6_unaligned7):
+ R6_UNALIGNED_WORD_COPY(7)
+# endif
+#endif /* R6_CODE */
+
+ .set at
+ .set reorder
+END(MEMCPY_NAME)
+#ifndef ANDROID_CHANGES
+# ifdef _LIBC
+# ifdef __UCLIBC__
+libc_hidden_def(MEMCPY_NAME)
+# else
+libc_hidden_builtin_def (MEMCPY_NAME)
+# endif
+# endif
+#endif
diff --git a/libc/string/mips/memset.S b/libc/string/mips/memset.S
index ff0554ff9..43034cebb 100644
--- a/libc/string/mips/memset.S
+++ b/libc/string/mips/memset.S
@@ -1,6 +1,5 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -13,147 +12,420 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
-#include <features.h>
-/*#include <sysdep.h>*/
-#include <endian.h>
-#include "sysdep.h"
+#ifdef ANDROID_CHANGES
+# include "machine/asm.h"
+# include "machine/regdef.h"
+# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
+#elif _LIBC
+# include <sysdep.h>
+# include <sys/regdef.h>
+# include <sys/asm.h>
+# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
+#elif defined _COMPILING_NEWLIB
+# include "machine/asm.h"
+# include "machine/regdef.h"
+# define PREFETCH_STORE_HINT PREFETCH_HINT_PREPAREFORSTORE
+#else
+# include <sys/regdef.h>
+# include <sys/asm.h>
+#endif
+
+/* Check to see if the MIPS architecture we are compiling for supports
+ prefetching. */
+
+#if (__mips == 4) || (__mips == 5) || (__mips == 32) || (__mips == 64)
+# ifndef DISABLE_PREFETCH
+# define USE_PREFETCH
+# endif
+#endif
+
+#if defined(_MIPS_SIM) && ((_MIPS_SIM == _ABI64) || (_MIPS_SIM == _ABIN32))
+# ifndef DISABLE_DOUBLE
+# define USE_DOUBLE
+# endif
+#endif
+
+#ifndef USE_DOUBLE
+# ifndef DISABLE_DOUBLE_ALIGN
+# define DOUBLE_ALIGN
+# endif
+#endif
+
+
+/* Some asm.h files do not have the L macro definition. */
+#ifndef L
+# if _MIPS_SIM == _ABIO32
+# define L(label) $L ## label
+# else
+# define L(label) .L ## label
+# endif
+#endif
+
+/* Some asm.h files do not have the PTR_ADDIU macro definition. */
+#ifndef PTR_ADDIU
+# ifdef USE_DOUBLE
+# define PTR_ADDIU daddiu
+# else
+# define PTR_ADDIU addiu
+# endif
+#endif
-/* void *memset(void *s, int c, size_t n). */
+/* New R6 instructions that may not be in asm.h. */
+#ifndef PTR_LSA
+# if _MIPS_SIM == _ABI64
+# define PTR_LSA dlsa
+# else
+# define PTR_LSA lsa
+# endif
+#endif
+
+/* Using PREFETCH_HINT_PREPAREFORSTORE instead of PREFETCH_STORE
+ or PREFETCH_STORE_STREAMED offers a large performance advantage
+ but PREPAREFORSTORE has some special restrictions to consider.
+
+ Prefetch with the 'prepare for store' hint does not copy a memory
+ location into the cache, it just allocates a cache line and zeros
+ it out. This means that if you do not write to the entire cache
+ line before writing it out to memory some data will get zero'ed out
+ when the cache line is written back to memory and data will be lost.
+
+ There are ifdef'ed sections of this memcpy to make sure that it does not
+ do prefetches on cache lines that are not going to be completely written.
+ This code is only needed and only used when PREFETCH_STORE_HINT is set to
+ PREFETCH_HINT_PREPAREFORSTORE. This code assumes that cache lines are
+ less than MAX_PREFETCH_SIZE bytes and if the cache line is larger it will
+ not work correctly. */
+
+#ifdef USE_PREFETCH
+# define PREFETCH_HINT_STORE 1
+# define PREFETCH_HINT_STORE_STREAMED 5
+# define PREFETCH_HINT_STORE_RETAINED 7
+# define PREFETCH_HINT_PREPAREFORSTORE 30
+
+/* If we have not picked out what hints to use at this point use the
+ standard load and store prefetch hints. */
+# ifndef PREFETCH_STORE_HINT
+# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE
+# endif
+
+/* We double everything when USE_DOUBLE is true so we do 2 prefetches to
+ get 64 bytes in that case. The assumption is that each individual
+ prefetch brings in 32 bytes. */
+# ifdef USE_DOUBLE
+# define PREFETCH_CHUNK 64
+# define PREFETCH_FOR_STORE(chunk, reg) \
+ pref PREFETCH_STORE_HINT, (chunk)*64(reg); \
+ pref PREFETCH_STORE_HINT, ((chunk)*64)+32(reg)
+# else
+# define PREFETCH_CHUNK 32
+# define PREFETCH_FOR_STORE(chunk, reg) \
+ pref PREFETCH_STORE_HINT, (chunk)*32(reg)
+# endif
-#ifdef __mips64
+/* MAX_PREFETCH_SIZE is the maximum size of a prefetch, it must not be less
+ than PREFETCH_CHUNK, the assumed size of each prefetch. If the real size
+ of a prefetch is greater than MAX_PREFETCH_SIZE and the PREPAREFORSTORE
+ hint is used, the code will not work correctly. If PREPAREFORSTORE is not
+ used than MAX_PREFETCH_SIZE does not matter. */
+# define MAX_PREFETCH_SIZE 128
+/* PREFETCH_LIMIT is set based on the fact that we never use an offset greater
+ than 5 on a STORE prefetch and that a single prefetch can never be larger
+ than MAX_PREFETCH_SIZE. We add the extra 32 when USE_DOUBLE is set because
+ we actually do two prefetches in that case, one 32 bytes after the other. */
+# ifdef USE_DOUBLE
+# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + 32 + MAX_PREFETCH_SIZE
+# else
+# define PREFETCH_LIMIT (5 * PREFETCH_CHUNK) + MAX_PREFETCH_SIZE
+# endif
-#include <sys/asm.h>
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE) \
+ && ((PREFETCH_CHUNK * 4) < MAX_PREFETCH_SIZE)
+/* We cannot handle this because the initial prefetches may fetch bytes that
+ are before the buffer being copied. We start copies with an offset
+ of 4 so avoid this situation when using PREPAREFORSTORE. */
+# error "PREFETCH_CHUNK is too large and/or MAX_PREFETCH_SIZE is too small."
+# endif
+#else /* USE_PREFETCH not defined */
+# define PREFETCH_FOR_STORE(offset, reg)
+#endif
+
+#if __mips_isa_rev > 5
+# if (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+# undef PREFETCH_STORE_HINT
+# define PREFETCH_STORE_HINT PREFETCH_HINT_STORE_STREAMED
+# endif
+# define R6_CODE
+#endif
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define SDHI sdl /* high part is left in big-endian */
+/* Allow the routine to be named something else if desired. */
+#ifndef MEMSET_NAME
+# define MEMSET_NAME memset
+#endif
+
+/* We load/store 64 bits at a time when USE_DOUBLE is true.
+ The C_ prefix stands for CHUNK and is used to avoid macro name
+ conflicts with system header files. */
+
+#ifdef USE_DOUBLE
+# define C_ST sd
+# ifdef __MIPSEB
+# define C_STHI sdl /* high part is left in big-endian */
+# else
+# define C_STHI sdr /* high part is right in little-endian */
+# endif
#else
-# define SDHI sdr /* high part is right in little-endian */
+# define C_ST sw
+# ifdef __MIPSEB
+# define C_STHI swl /* high part is left in big-endian */
+# else
+# define C_STHI swr /* high part is right in little-endian */
+# endif
#endif
-ENTRY (memset)
- .set noreorder
+/* Bookkeeping values for 32 vs. 64 bit mode. */
+#ifdef USE_DOUBLE
+# define NSIZE 8
+# define NSIZEMASK 0x3f
+# define NSIZEDMASK 0x7f
+#else
+# define NSIZE 4
+# define NSIZEMASK 0x1f
+# define NSIZEDMASK 0x3f
+#endif
+#define UNIT(unit) ((unit)*NSIZE)
+#define UNITM1(unit) (((unit)*NSIZE)-1)
- slti ta1, a2, 16 # Less than 16?
- bne ta1, zero, L(last16)
- move v0, a0 # Setup exit value before too late
-
- beq a1, zero, L(ueven) # If zero pattern, no need to extend
- andi a1, 0xff # Avoid problems with bogus arguments
- dsll ta0, a1, 8
- or a1, ta0
- dsll ta0, a1, 16
- or a1, ta0 # a1 is now pattern in full word
- dsll ta0, a1, 32
- or a1, ta0 # a1 is now pattern in double word
-
-L(ueven):
- PTR_SUBU ta0, zero, a0 # Unaligned address?
- andi ta0, 0x7
- beq ta0, zero, L(chkw)
- PTR_SUBU a2, ta0
- SDHI a1, 0(a0) # Yes, handle first unaligned part
- PTR_ADDU a0, ta0 # Now both a0 and a2 are updated
+#ifdef ANDROID_CHANGES
+LEAF(MEMSET_NAME,0)
+#else
+LEAF(MEMSET_NAME)
+#endif
-L(chkw):
- andi ta0, a2, 0xf # Enough left for one loop iteration?
- beq ta0, a2, L(chkl)
- PTR_SUBU a3, a2, ta0
- PTR_ADDU a3, a0 # a3 is last loop address +1
- move a2, ta0 # a2 is now # of bytes left after loop
-L(loopw):
- PTR_ADDIU a0, 16 # Handle 2 dwords pr. iteration
- sd a1, -16(a0)
- bne a0, a3, L(loopw)
- sd a1, -8(a0)
-
-L(chkl):
- andi ta0, a2, 0x8 # Check if there is at least a double
- beq ta0, zero, L(last16) # word remaining after the loop
- PTR_SUBU a2, ta0
- sd a1, 0(a0) # Yes...
- PTR_ADDIU a0, 8
-
-L(last16):
- blez a2, L(exit) # Handle last 16 bytes (if cnt>0)
- PTR_ADDU a3, a2, a0 # a3 is last address +1
-L(lst16l):
- PTR_ADDIU a0, 1
- bne a0, a3, L(lst16l)
- sb a1, -1(a0)
-L(exit):
- j ra # Bye, bye
+ .set nomips16
+ .set noreorder
+/* If the size is less than 2*NSIZE (8 or 16), go to L(lastb). Regardless of
+ size, copy dst pointer to v0 for the return value. */
+ slti t2,a2,(2 * NSIZE)
+ bne t2,zero,L(lastb)
+ move v0,a0
+
+/* If memset value is not zero, we copy it to all the bytes in a 32 or 64
+ bit word. */
+ beq a1,zero,L(set0) /* If memset value is zero no smear */
+ PTR_SUBU a3,zero,a0
nop
- .set reorder
-END (memset)
+ /* smear byte into 32 or 64 bit word */
+#if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2)
+# ifdef USE_DOUBLE
+ dins a1, a1, 8, 8 /* Replicate fill byte into half-word. */
+ dins a1, a1, 16, 16 /* Replicate fill byte into word. */
+ dins a1, a1, 32, 32 /* Replicate fill byte into dbl word. */
+# else
+ ins a1, a1, 8, 8 /* Replicate fill byte into half-word. */
+ ins a1, a1, 16, 16 /* Replicate fill byte into word. */
+# endif
+#else
+# ifdef USE_DOUBLE
+ and a1,0xff
+ dsll t2,a1,8
+ or a1,t2
+ dsll t2,a1,16
+ or a1,t2
+ dsll t2,a1,32
+ or a1,t2
+# else
+ and a1,0xff
+ sll t2,a1,8
+ or a1,t2
+ sll t2,a1,16
+ or a1,t2
+# endif
+#endif
+
+/* If the destination address is not aligned do a partial store to get it
+ aligned. If it is already aligned just jump to L(aligned). */
+L(set0):
+#ifndef R6_CODE
+ andi t2,a3,(NSIZE-1) /* word-unaligned address? */
+ beq t2,zero,L(aligned) /* t2 is the unalignment count */
+ PTR_SUBU a2,a2,t2
+ C_STHI a1,0(a0)
+ PTR_ADDU a0,a0,t2
+#else /* R6_CODE */
+ andi t2,a0,(NSIZE-1)
+ lapc t9,L(atable)
+ PTR_LSA t9,t2,t9,2
+ jrc t9
+L(atable):
+ bc L(aligned)
+# ifdef USE_DOUBLE
+ bc L(lb7)
+ bc L(lb6)
+ bc L(lb5)
+ bc L(lb4)
+# endif
+ bc L(lb3)
+ bc L(lb2)
+ bc L(lb1)
+L(lb7):
+ sb a1,6(a0)
+L(lb6):
+ sb a1,5(a0)
+L(lb5):
+ sb a1,4(a0)
+L(lb4):
+ sb a1,3(a0)
+L(lb3):
+ sb a1,2(a0)
+L(lb2):
+ sb a1,1(a0)
+L(lb1):
+ sb a1,0(a0)
+
+ li t9,NSIZE
+ subu t2,t9,t2
+ PTR_SUBU a2,a2,t2
+ PTR_ADDU a0,a0,t2
+#endif /* R6_CODE */
+
+L(aligned):
+/* If USE_DOUBLE is not set we may still want to align the data on a 16
+ byte boundry instead of an 8 byte boundry to maximize the opportunity
+ of proAptiv chips to do memory bonding (combining two sequential 4
+ byte stores into one 8 byte store). We know there are at least 4 bytes
+ left to store or we would have jumped to L(lastb) earlier in the code. */
+#ifdef DOUBLE_ALIGN
+ andi t2,a3,4
+ beq t2,zero,L(double_aligned)
+ PTR_SUBU a2,a2,t2
+ sw a1,0(a0)
+ PTR_ADDU a0,a0,t2
+L(double_aligned):
+#endif
-#else /* !__mips64 */
+/* Now the destination is aligned to (word or double word) aligned address
+ Set a2 to count how many bytes we have to copy after all the 64/128 byte
+ chunks are copied and a3 to the dest pointer after all the 64/128 byte
+ chunks have been copied. We will loop, incrementing a0 until it equals
+ a3. */
+ andi t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
+ beq a2,t8,L(chkw) /* if a2==t8, no 64-byte/128-byte chunks */
+ PTR_SUBU a3,a2,t8 /* subtract from a2 the reminder */
+ PTR_ADDU a3,a0,a3 /* Now a3 is the final dst after loop */
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define SWHI swl /* high part is left in big-endian */
+/* When in the loop we may prefetch with the 'prepare to store' hint,
+ in this case the a0+x should not be past the "t0-32" address. This
+ means: for x=128 the last "safe" a0 address is "t0-160". Alternatively,
+ for x=64 the last "safe" a0 address is "t0-96" In the current version we
+ will use "prefetch hint,128(a0)", so "t0-160" is the limit. */
+#if defined(USE_PREFETCH) \
+ && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ PTR_ADDU t0,a0,a2 /* t0 is the "past the end" address */
+ PTR_SUBU t9,t0,PREFETCH_LIMIT /* t9 is the "last safe pref" address */
+#endif
+#if defined(USE_PREFETCH) \
+ && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
+ PREFETCH_FOR_STORE (1, a0)
+ PREFETCH_FOR_STORE (2, a0)
+ PREFETCH_FOR_STORE (3, a0)
+#endif
+
+L(loop16w):
+#if defined(USE_PREFETCH) \
+ && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
+ sltu v1,t9,a0 /* If a0 > t9 don't use next prefetch */
+ bgtz v1,L(skip_pref)
+ nop
+#endif
+#ifdef R6_CODE
+ PREFETCH_FOR_STORE (2, a0)
#else
-# define SWHI swr /* high part is right in little-endian */
+ PREFETCH_FOR_STORE (4, a0)
+ PREFETCH_FOR_STORE (5, a0)
#endif
+L(skip_pref):
+ C_ST a1,UNIT(0)(a0)
+ C_ST a1,UNIT(1)(a0)
+ C_ST a1,UNIT(2)(a0)
+ C_ST a1,UNIT(3)(a0)
+ C_ST a1,UNIT(4)(a0)
+ C_ST a1,UNIT(5)(a0)
+ C_ST a1,UNIT(6)(a0)
+ C_ST a1,UNIT(7)(a0)
+ C_ST a1,UNIT(8)(a0)
+ C_ST a1,UNIT(9)(a0)
+ C_ST a1,UNIT(10)(a0)
+ C_ST a1,UNIT(11)(a0)
+ C_ST a1,UNIT(12)(a0)
+ C_ST a1,UNIT(13)(a0)
+ C_ST a1,UNIT(14)(a0)
+ C_ST a1,UNIT(15)(a0)
+ PTR_ADDIU a0,a0,UNIT(16) /* adding 64/128 to dest */
+ bne a0,a3,L(loop16w)
+ nop
+ move a2,t8
-ENTRY (memset)
- .set noreorder
+/* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go.
+ Check for a 32(64) byte chunk and copy if if there is one. Otherwise
+ jump down to L(chk1w) to handle the tail end of the copy. */
+L(chkw):
+ andi t8,a2,NSIZEMASK /* is there a 32-byte/64-byte chunk. */
+ /* the t8 is the reminder count past 32-bytes */
+ beq a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
+ nop
+ C_ST a1,UNIT(0)(a0)
+ C_ST a1,UNIT(1)(a0)
+ C_ST a1,UNIT(2)(a0)
+ C_ST a1,UNIT(3)(a0)
+ C_ST a1,UNIT(4)(a0)
+ C_ST a1,UNIT(5)(a0)
+ C_ST a1,UNIT(6)(a0)
+ C_ST a1,UNIT(7)(a0)
+ PTR_ADDIU a0,a0,UNIT(8)
+
+/* Here we have less than 32(64) bytes to set. Set up for a loop to
+ copy one word (or double word) at a time. Set a2 to count how many
+ bytes we have to copy after all the word (or double word) chunks are
+ copied and a3 to the dest pointer after all the (d)word chunks have
+ been copied. We will loop, incrementing a0 until a0 equals a3. */
+L(chk1w):
+ andi a2,t8,(NSIZE-1) /* a2 is the reminder past one (d)word chunks */
+ beq a2,t8,L(lastb)
+ PTR_SUBU a3,t8,a2 /* a3 is count of bytes in one (d)word chunks */
+ PTR_ADDU a3,a0,a3 /* a3 is the dst address after loop */
- slti t1, a2, 8 # Less than 8?
- bne t1, zero, L(last8)
- move v0, a0 # Setup exit value before too late
-
- beq a1, zero, L(ueven) # If zero pattern, no need to extend
- andi a1, 0xff # Avoid problems with bogus arguments
- sll t0, a1, 8
- or a1, t0
- sll t0, a1, 16
- or a1, t0 # a1 is now pattern in full word
-
-L(ueven):
- subu t0, zero, a0 # Unaligned address?
- andi t0, 0x3
- beq t0, zero, L(chkw)
- subu a2, t0
- SWHI a1, 0(a0) # Yes, handle first unaligned part
- addu a0, t0 # Now both a0 and a2 are updated
-
-L(chkw):
- andi t0, a2, 0x7 # Enough left for one loop iteration?
- beq t0, a2, L(chkl)
- subu a3, a2, t0
- addu a3, a0 # a3 is last loop address +1
- move a2, t0 # a2 is now # of bytes left after loop
-L(loopw):
- addiu a0, 8 # Handle 2 words pr. iteration
- sw a1, -8(a0)
- bne a0, a3, L(loopw)
- sw a1, -4(a0)
-
-L(chkl):
- andi t0, a2, 0x4 # Check if there is at least a full
- beq t0, zero, L(last8) # word remaining after the loop
- subu a2, t0
- sw a1, 0(a0) # Yes...
- addiu a0, 4
-
-L(last8):
- blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
- addu a3, a2, a0 # a3 is last address +1
-L(lst8l):
- addiu a0, 1
- bne a0, a3, L(lst8l)
- sb a1, -1(a0)
-L(exit):
- j ra # Bye, bye
+/* copying in words (4-byte or 8 byte chunks) */
+L(wordCopy_loop):
+ PTR_ADDIU a0,a0,UNIT(1)
+ bne a0,a3,L(wordCopy_loop)
+ C_ST a1,UNIT(-1)(a0)
+
+/* Copy the last 8 (or 16) bytes */
+L(lastb):
+ blez a2,L(leave)
+ PTR_ADDU a3,a0,a2 /* a3 is the last dst address */
+L(lastbloop):
+ PTR_ADDIU a0,a0,1
+ bne a0,a3,L(lastbloop)
+ sb a1,-1(a0)
+L(leave):
+ j ra
nop
+ .set at
.set reorder
-END (memset)
-
-#endif /* !__mips64 */
+END(MEMSET_NAME)
+#ifndef ANDROID_CHANGES
+# ifdef _LIBC
+# ifdef __UCLIBC__
+libc_hidden_def(MEMSET_NAME)
+# else
+libc_hidden_builtin_def (MEMSET_NAME)
+# endif
+# endif
+#endif
-libc_hidden_def(memset)
diff --git a/libc/string/mips/sysdep.h b/libc/string/mips/sysdep.h
deleted file mode 100644
index 5dad8342e..000000000
--- a/libc/string/mips/sysdep.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Adapted from glibc's sysdeps/unix/mips/sysdep.h */
-
-/* Copyright (C) 1992, 1995, 1997, 1999, 2000, 2002, 2003
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Brendan Kehoe (brendan@zen.org).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifdef __ASSEMBLER__
-
-#include <sgidefs.h>
-#include <sys/regdef.h>
-
-#define ENTRY(name) \
- .globl name; \
- .align 2; \
- .ent name,0; \
- name/* use a comment rather than ## to workaround bug in gcc-3.4.x */:
-
-#undef END
-#define END(function) \
- .end function; \
- .size function,.-function
-
-#if _MIPS_SIM == _MIPS_SIM_ABI32 || _MIPS_SIM == _MIPS_SIM_ABIO64
-# define L(label) $L ## label
-#else
-# define L(label) .L ## label
-#endif
-
-#endif
diff --git a/libc/string/powerpc/memcpy.c b/libc/string/powerpc/memcpy.c
index f3d800739..22794ec33 100644
--- a/libc/string/powerpc/memcpy.c
+++ b/libc/string/powerpc/memcpy.c
@@ -21,16 +21,15 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memcpy) */
-void *memcpy(void *to, const void *from, size_t n)
-/* PPC can do pre increment and load/store, but not post increment and load/store.
- Therefore use *++ptr instead of *ptr++. */
+/* PPC can do pre increment and load/store, but not post increment and
+ load/store. Therefore use *++ptr instead of *ptr++. */
+void *memcpy(void *to, const void *from, size_t len)
{
unsigned long rem, chunks, tmp1, tmp2;
unsigned char *tmp_to;
unsigned char *tmp_from = (unsigned char *)from;
- chunks = n / 8;
+ chunks = len / 8;
tmp_from -= 4;
tmp_to = to - 4;
if (!chunks)
@@ -49,30 +48,33 @@ void *memcpy(void *to, const void *from, size_t n)
*(unsigned long *)tmp_to = tmp2;
} while (--chunks);
lessthan8:
- n = n % 8;
- if (n >= 4) {
- *(unsigned long *)(tmp_to+4) = *(unsigned long *)(tmp_from+4);
+ len = len % 8;
+ if (len >= 4) {
tmp_from += 4;
tmp_to += 4;
- n = n-4;
+ *(unsigned long *)(tmp_to) = *(unsigned long *)(tmp_from);
+ len -= 4;
}
- if (!n ) return to;
+ if (!len)
+ return to;
tmp_from += 3;
tmp_to += 3;
do {
*++tmp_to = *++tmp_from;
- } while (--n);
+ } while (--len);
return to;
align:
+ /* ???: Do we really need to generate the carry flag here? If not, then:
+ rem -= 4; */
rem = 4 - rem;
- n = n - rem;
+ len -= rem;
do {
*(tmp_to+4) = *(tmp_from+4);
++tmp_from;
++tmp_to;
} while (--rem);
- chunks = n / 8;
+ chunks = len / 8;
if (chunks)
goto copy_chunks;
goto lessthan8;
diff --git a/libc/string/powerpc/memmove.c b/libc/string/powerpc/memmove.c
index 8badae37d..6bd79915d 100644
--- a/libc/string/powerpc/memmove.c
+++ b/libc/string/powerpc/memmove.c
@@ -21,9 +21,7 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memcpy) */
-/* Experimentally off - libc_hidden_proto(memmove) */
void *memmove(void *to, const void *from, size_t n)
{
unsigned long rem, chunks, tmp1, tmp2;
diff --git a/libc/string/powerpc/memset.c b/libc/string/powerpc/memset.c
index 1cbfd04fc..a900b92cb 100644
--- a/libc/string/powerpc/memset.c
+++ b/libc/string/powerpc/memset.c
@@ -21,7 +21,6 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memset) */
static __inline__ int expand_byte_word(int c){
/* this does:
diff --git a/libc/string/psignal.c b/libc/string/psignal.c
index 1ca8725db..3e1f68b94 100644
--- a/libc/string/psignal.c
+++ b/libc/string/psignal.c
@@ -10,8 +10,6 @@
#include <string.h>
#include <signal.h>
-libc_hidden_proto(fprintf)
-/* Experimentally off - libc_hidden_proto(strsignal) */
/* TODO: make this threadsafe with a reentrant version of strsignal? */
diff --git a/libc/string/rawmemchr.c b/libc/string/rawmemchr.c
index 3cddefa10..f0cb7ee47 100644
--- a/libc/string/rawmemchr.c
+++ b/libc/string/rawmemchr.c
@@ -8,7 +8,6 @@
#include "_string.h"
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(rawmemchr) */
void *rawmemchr(const void *s, int c)
{
register const unsigned char *r = s;
diff --git a/libc/string/sh/memchr.S b/libc/string/sh/memchr.S
new file mode 100644
index 000000000..6b7142f69
--- /dev/null
+++ b/libc/string/sh/memchr.S
@@ -0,0 +1,30 @@
+/* $Id: memchr.S,v 1.1 2000/04/14 16:49:01 mjd Exp $
+ *
+ * "memchr" implementation of SuperH
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/*
+ * void *memchr(const void *s, int c, size_t n);
+ */
+
+#include <sysdep.h>
+
+ENTRY(memchr)
+ tst r6,r6
+ bt/s 2f
+ exts.b r5,r5
+1: mov.b @r4,r1
+ cmp/eq r1,r5
+ bt/s 3f
+ dt r6
+ bf/s 1b
+ add #1,r4
+2: mov #0,r4
+3: rts
+ mov r4,r0
+END(memchr)
+libc_hidden_def (memchr)
diff --git a/libc/string/sh/sh4/memcpy.S b/libc/string/sh/sh4/memcpy.S
index 2d918293e..6a229a06c 100644
--- a/libc/string/sh/sh4/memcpy.S
+++ b/libc/string/sh/sh4/memcpy.S
@@ -6,6 +6,9 @@
* Modified from memcpy.S and micro-optimised for SH4
* Stuart Menefy (stuart.menefy@st.com)
*
+ * Copyright (c) 2009 STMicroelectronics Ltd
+ * Optimised using prefetching and 64bit data transfer via FPU
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
*/
/*
@@ -15,8 +18,32 @@
* If there is an overlap, then the results are undefined.
*/
+#include <sysdep.h>
#include <endian.h>
+#if defined (__LITTLE_ENDIAN__) && defined (__SH_FPU_ANY__)
+#define MEMCPY_USES_FPU
+/* Use paired single precision load or store mode for 64-bit tranfering.
+ * FPSCR.SZ=1,FPSCR.SZ=0 is well defined on both SH4-200 and SH4-300.
+ * Currenlty it has been only implemented and tested for little endian mode. */
+.macro FPU_SET_PAIRED_PREC
+ sts fpscr, r7
+ mov #0x10, r0 ! PR=0 SZ=1
+ shll16 r0
+ lds r0, fpscr
+.endm
+.macro RESTORE_FPSCR
+ lds r7, fpscr
+.endm
+.macro DALLOC
+ ! Cache allocate + store on dst-32.
+ add #-32, r1
+ movca.l r0, @r1
+ add #32, r1
+.endm
+
+#endif
+
!
! GHIJ KLMN OPQR --> ...G HIJK LMNO PQR.
!
@@ -127,10 +154,10 @@
mov.l r3,@-r0 ! 30 LS
#else
-3: mov r1,r3 ! OPQR
+3: mov r7,r3 ! OPQR
shlr8 r3 ! xOPQ
- mov.l @(r0,r5),r1 ! KLMN
- mov r1,r6
+ mov.l @(r0,r5),r7 ! KLMN
+ mov r7,r6
shll16 r6
shll8 r6 ! Nxxx
or r6,r3 ! NOPQ
@@ -157,12 +184,7 @@
9: rts
nop
-/* void * memcpy(void *dst, const void *src, size_t len) */
-.text
-.align 4
-.type memcpy,@function
-.globl memcpy;
-memcpy:
+ENTRY(memcpy)
! Calculate the invariants which will be used in the remainder
! of the code:
@@ -189,9 +211,7 @@ memcpy:
mov r4, r0 ! 5 MT (0 cycle latency)
add r6, r0 ! 49 EX
- mov #16, r1 ! 6 EX
bt/s .Lcase00 ! 111 BR (aligned)
-
sub r4, r5 ! 75 EX
! Arguments are not nicely long word aligned or zero len.
@@ -207,6 +227,7 @@ memcpy:
! However the penalty for getting it 'wrong' is much higher for long word
! aligned data (and this is more common), so use a value of 16.
+ mov #16, r1 ! 6 EX
cmp/gt r6,r1 ! 56 MT
add #-1,r5 ! 50 EX
@@ -447,6 +468,183 @@ memcpy:
mov.l r7, @-r0 ! 30 LS
+#ifdef MEMCPY_USES_FPU
+ ! Copy the cache line aligned blocks by using the FPU registers.
+ ! If src and dst are well aligned adopt 64-bit data transfer.
+ ! We also need r0 as a temporary (for movca), so 'undo' the invariant:
+ ! r5: src (was r0+r5)
+ ! r1: dest (was r0)
+1:
+ add r0, r5
+ mov r0, r1
+
+ mov r1, r3 ! MT
+ sub r2, r3 ! EX (r3 - r2 -> r3)
+ mov #-5, r0
+ shld r0, r3 ! number of the cache lines
+
+ mov #8, r0
+ cmp/ge r0, r3 ! Check if there are many cache lines to copy.
+ bf 45f ! Copy cache line aligned blocks without pref.
+ mov r5, r0
+ add #-0x7c, r0
+ tst #7, r0 ! src is 8byte aligned
+ bf 45f
+
+ ! Many cache lines have to be copied and the buffers are well aligned.
+ ! Aggressive prefetching and FPU in single paired precision.
+ mov r0, r5
+ mov r5, r6
+ add #-0x80, r6 ! prefetch head
+
+ ! store FPU (in single precision mode, do not check R15 align).
+ fmov fr12, @-r15
+ fmov fr13, @-r15
+ fmov fr14, @-r15
+ fmov fr15, @-r15
+
+ FPU_SET_PAIRED_PREC
+
+ mov #4, r0
+67:
+ add #-0x20, r6
+ pref @r6
+ add #-0x20, r6
+ pref @r6
+
+ fmov @r5+, dr0
+ fmov @r5+, dr2
+ fmov @r5+, dr4
+ fmov @r5+, dr6
+ fmov @r5+, dr8
+ fmov @r5+, dr10
+ fmov @r5+, dr12
+ fmov @r5+, dr14
+ fmov @r5+, xd0
+ fmov @r5+, xd2
+ fmov @r5+, xd4
+ fmov @r5+, xd6
+ fmov @r5+, xd8
+ fmov @r5+, xd10
+ fmov @r5+, xd12
+ fmov @r5+, xd14
+
+ DALLOC
+ fmov xd14, @-r1
+ fmov xd12, @-r1
+ fmov xd10, @-r1
+ fmov xd8, @-r1
+ DALLOC
+ fmov xd6, @-r1
+ fmov xd4, @-r1
+ fmov xd2, @-r1
+ fmov xd0, @-r1
+ DALLOC
+ fmov dr14, @-r1
+ fmov dr12, @-r1
+ fmov dr10, @-r1
+ fmov dr8, @-r1
+ DALLOC
+ fmov dr6, @-r1
+ add #-0x80, r5
+ fmov dr4, @-r1
+ add #-0x80, r5
+ fmov dr2, @-r1
+ add #-0x20, r6
+ fmov dr0, @-r1
+ add #-4, r3
+ pref @r6
+ add #-0x20, r6
+ cmp/ge r0, r3
+ bt/s 67b
+ pref @r6
+
+ RESTORE_FPSCR
+
+ ! Restore FPU callee save registers
+ fmov @r15+, fr15
+ fmov @r15+, fr14
+ fmov @r15+, fr13
+ fmov @r15+, fr12
+
+ ! Other cache lines could be copied: so use the FPU in single paired
+ ! precision without prefetching. No check for alignment is necessary.
+
+ mov #1, r0
+ cmp/ge r0, r3
+ bt/s 3f
+ add #0x60, r5
+
+ bra 5f
+ nop
+
+ ! No prefetch and FPU in single precision.
+45:
+ add #-0x1c, r5
+ mov r5, r0
+ tst #7, r0
+ bt 3f
+
+2: fmov.s @r5+, fr0
+ fmov.s @r5+, fr1
+ fmov.s @r5+, fr2
+ fmov.s @r5+, fr3
+ fmov.s @r5+, fr4
+ fmov.s @r5+, fr5
+ fmov.s @r5+, fr6
+ fmov.s @r5+, fr7
+
+ DALLOC
+
+ fmov.s fr7, @-r1
+ fmov.s fr6, @-r1
+ fmov.s fr5, @-r1
+ fmov.s fr4, @-r1
+ fmov.s fr3, @-r1
+ fmov.s fr2, @-r1
+ fmov.s fr1, @-r1
+ fmov.s fr0, @-r1
+
+ cmp/eq r2,r1
+
+ bf/s 2b
+ add #-0x40, r5
+
+ bra 5f
+ nop
+
+ ! No prefetch and FPU in single paired precision.
+
+3: FPU_SET_PAIRED_PREC
+
+4: fmov @r5+, dr0
+ fmov @r5+, dr2
+ fmov @r5+, dr4
+ fmov @r5+, dr6
+
+ DALLOC
+
+ fmov dr6, @-r1
+ fmov dr4, @-r1
+ fmov dr2, @-r1
+ fmov dr0, @-r1
+ cmp/eq r2,r1
+
+ bf/s 4b
+ add #-0x40, r5
+
+ RESTORE_FPSCR
+
+5: mov r1, r0
+
+ cmp/eq r4, r0 ! 54 MT
+ bf/s 1f ! 109 BR
+ sub r1, r5 ! 75 EX
+
+ rts
+ nop
+1:
+#else
! Copy the cache line aligned blocks
!
! In use: r0, r2, r4, r5
@@ -512,6 +710,7 @@ memcpy:
rts
1: mov.l @r15+, r8 ! 15 LS
+#endif
sub r4, r1 ! 75 EX (len remaining)
! number of trailing bytes is non-zero
@@ -733,30 +932,30 @@ memcpy:
mov.l @(0x04,r5), r11 ! 18 LS (latency=2)
xtrct r9, r8 ! 48 EX
- mov.w @(0x02,r5), r12 ! 18 LS (latency=2)
+ mov.l @(0x00,r5), r12 ! 18 LS (latency=2)
xtrct r10, r9 ! 48 EX
movca.l r0,@r1 ! 40 LS (latency=3-7)
add #-0x1c, r1 ! 50 EX
- mov.l r3, @(0x1c,r1) ! 33 LS
+ mov.l r3, @(0x18,r1) ! 33 LS
xtrct r11, r10 ! 48 EX
- mov.l r6, @(0x18,r1) ! 33 LS
+ mov.l r6, @(0x14,r1) ! 33 LS
xtrct r12, r11 ! 48 EX
- mov.l r7, @(0x14,r1) ! 33 LS
+ mov.l r7, @(0x10,r1) ! 33 LS
- mov.l r8, @(0x10,r1) ! 33 LS
- add #-0x3e, r5 ! 50 EX
+ mov.l r8, @(0x0c,r1) ! 33 LS
+ add #-0x1e, r5 ! 50 EX
- mov.l r9, @(0x0c,r1) ! 33 LS
+ mov.l r9, @(0x08,r1) ! 33 LS
cmp/eq r2,r1 ! 54 MT
- mov.l r10, @(0x08,r1) ! 33 LS
+ mov.l r10, @(0x04,r1) ! 33 LS
bf/s 2b ! 109 BR
- mov.l r11, @(0x04,r1) ! 33 LS
+ mov.l r11, @(0x00,r1) ! 33 LS
#endif
mov.l @r15+, r12
@@ -803,6 +1002,5 @@ memcpy:
rts
mov.b r1,@-r0
-.size memcpy,.-memcpy;
-
+END(memcpy)
libc_hidden_def (memcpy)
diff --git a/libc/string/sh/sh4/memmove.c b/libc/string/sh/sh4/memmove.c
new file mode 100644
index 000000000..8059bd4cc
--- /dev/null
+++ b/libc/string/sh/sh4/memmove.c
@@ -0,0 +1,121 @@
+/* memmove implementation for SH4
+ *
+ * Copyright (C) 2009 STMicroelectronics Ltd.
+ *
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#ifndef __SH_FPU_ANY__
+#include "../../generic/memmove.c"
+#else
+
+#include <string.h>
+
+#define FPSCR_SR (1 << 20)
+#define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x))
+#define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x))
+
+static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len)
+{
+ char *d = (char *)dest;
+ char *s = (char *)src;
+
+ if (len >= 64) {
+ unsigned long fpscr;
+ int *s1;
+ int *d1;
+
+ /* Align the dest to 4 byte boundary. */
+ while ((unsigned)d & 0x7) {
+ *d++ = *s++;
+ len--;
+ }
+
+ s1 = (int *)s;
+ d1 = (int *)d;
+
+ /* check if s is well aligned to use FPU */
+ if (!((unsigned)s1 & 0x7)) {
+
+ /* Align the dest to cache-line boundary */
+ while ((unsigned)d1 & 0x1c) {
+ *d1++ = *s1++;
+ len -= 4;
+ }
+
+ /* Use paired single precision load or store mode for
+ * 64-bit tranfering.*/
+ STORE_FPSCR(fpscr);
+ LOAD_FPSCR(FPSCR_SR);
+
+ while (len >= 32) {
+ __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1));
+ __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1));
+ __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1));
+ __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1));
+ __asm__
+ __volatile__ ("fmov dr0,@%0"::"r"
+ (d1):"memory");
+ d1 += 2;
+ __asm__
+ __volatile__ ("fmov dr2,@%0"::"r"
+ (d1):"memory");
+ d1 += 2;
+ __asm__
+ __volatile__ ("fmov dr4,@%0"::"r"
+ (d1):"memory");
+ d1 += 2;
+ __asm__
+ __volatile__ ("fmov dr6,@%0"::"r"
+ (d1):"memory");
+ d1 += 2;
+ len -= 32;
+ }
+ LOAD_FPSCR(fpscr);
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ /*TODO: other subcases could be covered here?!?*/
+ }
+ /* Go to per-byte copy */
+ while (len > 0) {
+ *d++ = *s++;
+ len--;
+ }
+ return;
+}
+
+void *memmove(void *dest, const void *src, size_t len)
+{
+ unsigned long int d = (long int)dest;
+ unsigned long int s = (long int)src;
+ unsigned long int res;
+
+ if (d >= s)
+ res = d - s;
+ else
+ res = s - d;
+ /*
+ * 1) dest and src are not overlap ==> memcpy (BWD/FDW)
+ * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)
+ * 3) left-to-right overlap ==> Copy from the beginning to the end
+ * 4) right-to-left overlap ==> Copy from the end to the beginning
+ */
+
+ if (res == 0) /* 100% overlap */
+ memcpy(dest, src, len); /* No overlap */
+ else if (res >= len)
+ memcpy(dest, src, len);
+ else {
+ if (d > s) /* right-to-left overlap */
+ memcpy(dest, src, len); /* memcpy is BWD */
+ else /* cannot use SH4 memcpy for this case */
+ fpu_optimised_copy_fwd(dest, src, len);
+ }
+ return (dest);
+}
+
+libc_hidden_def(memmove)
+#endif /*__SH_FPU_ANY__ */
diff --git a/libc/string/sh/sh4/memset.S b/libc/string/sh/sh4/memset.S
new file mode 100644
index 000000000..eb83355ce
--- /dev/null
+++ b/libc/string/sh/sh4/memset.S
@@ -0,0 +1,152 @@
+/* $Id: memset.S,v 1.1 2000/04/14 16:49:01 mjd Exp $
+ *
+ * "memset" implementation of SuperH
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ *
+ * Copyright (c) 2009 STMicroelectronics Ltd
+ * Optimised using 64bit data transfer (via FPU) and the movca.l inst.
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/*
+ * void *memset(void *s, int c, size_t n);
+ */
+
+#include <sysdep.h>
+
+#if defined (__LITTLE_ENDIAN__) && defined (__SH_FPU_ANY__)
+#define MEMSET_USES_FPU
+/* Use paired single precision load or store mode for 64-bit tranfering.
+ * FPSCR.SZ=1,FPSCR.SZ=0 is well defined on both SH4-200 and SH4-300.
+ * Currenlty it has been only implemented and tested for little endian mode. */
+.macro FPU_SET_PAIRED_PREC
+ sts fpscr, r3
+ mov #0x10, r1 ! PR=0 SZ=1
+ shll16 r1
+ lds r1, fpscr
+.endm
+.macro RESTORE_FPSCR
+ lds r3, fpscr
+.endm
+#endif
+
+ENTRY(memset)
+ mov #12,r0
+ add r6,r4
+ cmp/gt r6,r0
+ bt/s 40f ! if it's too small, set a byte at once
+ mov r4,r0
+ and #3,r0
+ cmp/eq #0,r0
+ bt/s 2f ! It's aligned
+ sub r0,r6
+1:
+ dt r0
+ bf/s 1b
+ mov.b r5,@-r4
+2: ! make VVVV
+ extu.b r5,r5
+ swap.b r5,r0 ! V0
+ or r0,r5 ! VV
+ swap.w r5,r0 ! VV00
+ or r0,r5 ! VVVV
+
+ ! Check if enough bytes need to be copied to be worth the big loop
+ mov #0x40, r0 ! (MT)
+ cmp/gt r6,r0 ! (MT) 64 > len => slow loop
+
+ bt/s 22f
+ mov r6,r0
+
+ ! align the dst to the cache block size if necessary
+ mov r4, r3
+ mov #~(0x1f), r1
+
+ and r3, r1
+ cmp/eq r3, r1
+
+ bt/s 11f ! dst is already aligned
+ sub r1, r3 ! r3-r1 -> r3
+ shlr2 r3 ! number of loops
+
+10: mov.l r5,@-r4
+ dt r3
+ bf/s 10b
+ add #-4, r6
+
+11: ! dst is 32byte aligned
+ mov r6,r2
+ mov #-5,r0
+ shld r0,r2 ! number of loops
+
+ add #-32, r4
+ mov r5, r0
+
+#ifdef MEMSET_USES_FPU
+ lds r5, fpul ! (CO)
+ fsts fpul, fr0 ! Dr0 will be 'VVVVVVVV'
+ fsts fpul, fr1
+
+ FPU_SET_PAIRED_PREC
+12:
+ movca.l r0, @r4
+ mov.l r5, @(4, r4)
+ add #32, r4
+ fmov dr0, @-r4
+ fmov dr0, @-r4
+ add #-0x20, r6
+ fmov dr0, @-r4
+ dt r2
+ bf/s 12b
+ add #-40, r4
+
+ RESTORE_FPSCR
+#else
+12:
+ movca.l r0,@r4
+ mov.l r5,@(4, r4)
+ mov.l r5,@(8, r4)
+ mov.l r5,@(12,r4)
+ mov.l r5,@(16,r4)
+ mov.l r5,@(20,r4)
+ add #-0x20, r6
+ mov.l r5,@(24,r4)
+ dt r2
+ mov.l r5,@(28,r4)
+ bf/s 12b
+ add #-32, r4
+
+#endif
+ add #32, r4
+ mov #8, r0
+ cmp/ge r0, r6
+ bf 40f
+
+ mov r6,r0
+22:
+ shlr2 r0
+ shlr r0 ! r0 = r6 >> 3
+3:
+ dt r0
+ mov.l r5,@-r4 ! set 8-byte at once
+ bf/s 3b
+ mov.l r5,@-r4
+ !
+ mov #7,r0
+ and r0,r6
+
+ ! fill bytes (length may be zero)
+40: tst r6,r6
+ bt 5f
+4:
+ dt r6
+ bf/s 4b
+ mov.b r5,@-r4
+5:
+ rts
+ mov r4,r0
+END(memset)
+libc_hidden_def (memset)
diff --git a/libc/string/sh/sh4/strcpy.S b/libc/string/sh/sh4/strcpy.S
new file mode 100644
index 000000000..0f8278017
--- /dev/null
+++ b/libc/string/sh/sh4/strcpy.S
@@ -0,0 +1,28 @@
+/* strcpy implementation for SUPERH
+ *
+ * Copyright (C) 2009 STMicroelectronics Ltd.
+ *
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/*
+ char *strcpy(char *dest, const char *src);
+ */
+
+#include <sysdep.h>
+
+ENTRY(strcpy)
+ mov r4,r2
+1:
+ mov.b @r5+,r1
+ tst r1,r1
+ mov.b r1,@r2
+ bf/s 1b
+ add #1,r2
+
+ rts
+ mov r4,r0
+END(strcpy)
+libc_hidden_def (strcpy)
diff --git a/libc/string/sh/sh4/strncpy.S b/libc/string/sh/sh4/strncpy.S
new file mode 100644
index 000000000..8a16f39d4
--- /dev/null
+++ b/libc/string/sh/sh4/strncpy.S
@@ -0,0 +1,43 @@
+/* strncpy implementation for SUPERH
+ *
+ * Copyright (C) 2009 STMicroelectronics Ltd.
+ *
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/*
+ char *strncpy(char *dest, const char *src, size_t n);
+ */
+
+#include <sysdep.h>
+
+ENTRY(strncpy)
+ mov #0,r0
+ bra 2f
+ mov r4,r2
+1:
+ mov.b r1,@(r0,r2)
+ add #1,r0
+2:
+ cmp/hs r6,r0
+ bt 5f
+ mov.b @(r0,r5),r1
+ tst r1,r1
+ bf/s 1b
+ cmp/hs r6,r0
+ bra 4f
+ nop
+3:
+ mov.b r1,@(r0,r2)
+ add #1,r0
+ cmp/hs r6,r0
+4:
+ bf/s 3b
+ mov #0,r1
+5:
+ rts
+ mov r2,r0
+END(strncpy)
+libc_hidden_def(strncpy)
diff --git a/libc/string/sh/strlen.S b/libc/string/sh/strlen.S
new file mode 100644
index 000000000..1ccecc17b
--- /dev/null
+++ b/libc/string/sh/strlen.S
@@ -0,0 +1,75 @@
+/* $Id: strlen.S,v 1.2 2001/06/29 14:07:15 gniibe Exp $
+ *
+ * "strlen" implementation of SuperH
+ *
+ * Copyright (C) 1999 Kaz Kojima
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/* size_t strlen (const char *s) */
+
+#include <sysdep.h>
+#include <endian.h>
+
+ENTRY(strlen)
+ mov r4,r0
+ and #3,r0
+ tst r0,r0
+ bt/s 1f
+ mov #0,r2
+
+ add #-1,r0
+ shll2 r0
+ shll r0
+ braf r0
+ nop
+
+ mov.b @r4+,r1
+ tst r1,r1
+ bt 8f
+ add #1,r2
+
+ mov.b @r4+,r1
+ tst r1,r1
+ bt 8f
+ add #1,r2
+
+ mov.b @r4+,r1
+ tst r1,r1
+ bt 8f
+ add #1,r2
+
+1:
+ mov #0,r3
+2:
+ mov.l @r4+,r1
+ cmp/str r3,r1
+ bf/s 2b
+ add #4,r2
+
+ add #-4,r2
+#ifndef __LITTLE_ENDIAN__
+ swap.b r1,r1
+ swap.w r1,r1
+ swap.b r1,r1
+#endif
+ extu.b r1,r0
+ tst r0,r0
+ bt/s 8f
+ shlr8 r1
+ add #1,r2
+ extu.b r1,r0
+ tst r0,r0
+ bt/s 8f
+ shlr8 r1
+ add #1,r2
+ extu.b r1,r0
+ tst r0,r0
+ bt 8f
+ add #1,r2
+8:
+ rts
+ mov r2,r0
+END(strlen)
+libc_hidden_def (strlen)
diff --git a/libc/string/sh64/memcpy.S b/libc/string/sh64/memcpy.S
deleted file mode 100644
index 3c0ea0c0d..000000000
--- a/libc/string/sh64/memcpy.S
+++ /dev/null
@@ -1,205 +0,0 @@
-/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
-/* Modified by SuperH, Inc. September 2003 */
-!
-! Fast SH memcpy
-!
-! by Toshiyasu Morita (tm@netcom.com)
-! hacked by J"orn Rernnecke (joern.rennecke@superh.com) ("o for o-umlaut)
-! SH5 code Copyright 2002 SuperH Ltd.
-!
-! Entry: ARG0: destination pointer
-! ARG1: source pointer
-! ARG2: byte count
-!
-! Exit: RESULT: destination pointer
-! any other registers in the range r0-r7: trashed
-!
-! Notes: Usually one wants to do small reads and write a longword, but
-! unfortunately it is difficult in some cases to concatanate bytes
-! into a longword on the SH, so this does a longword read and small
-! writes.
-!
-! This implementation makes two assumptions about how it is called:
-!
-! 1.: If the byte count is nonzero, the address of the last byte to be
-! copied is unsigned greater than the address of the first byte to
-! be copied. This could be easily swapped for a signed comparison,
-! but the algorithm used needs some comparison.
-!
-! 2.: When there are two or three bytes in the last word of an 11-or-more
-! bytes memory chunk to b copied, the rest of the word can be read
-! without side effects.
-! This could be easily changed by increasing the minumum size of
-! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
-! however, this would cost a few extra cyles on average.
-! For SHmedia, the assumption is that any quadword can be read in its
-! enirety if at least one byte is included in the copy.
-!
-
-#include <features.h>
-
- .section .text..SHmedia32,"ax"
- .globl memcpy
- .type memcpy, @function
- .align 5
-
-memcpy:
-
-#define LDUAQ(P,O,D0,D1) ldlo.q P,O,D0; ldhi.q P,O+7,D1
-#define STUAQ(P,O,D0,D1) stlo.q P,O,D0; sthi.q P,O+7,D1
-#define LDUAL(P,O,D0,D1) ldlo.l P,O,D0; ldhi.l P,O+3,D1
-#define STUAL(P,O,D0,D1) stlo.l P,O,D0; sthi.l P,O+3,D1
-
- ld.b r3,0,r63
- pta/l Large,tr0
- movi 25,r0
- bgeu/u r4,r0,tr0
- nsb r4,r0
- shlli r0,5,r0
- movi (L1-L0+63*32 + 1) & 0xffff,r1
- sub r1, r0, r0
-L0: ptrel r0,tr0
- add r2,r4,r5
- ptabs r18,tr1
- add r3,r4,r6
- blink tr0,r63
-
-/* Rearranged to make cut2 safe */
- .balign 8
-L4_7: /* 4..7 byte memcpy cntd. */
- stlo.l r2, 0, r0
- or r6, r7, r6
- sthi.l r5, -1, r6
- stlo.l r5, -4, r6
- blink tr1,r63
-
- .balign 8
-L1: /* 0 byte memcpy */
- nop
- blink tr1,r63
- nop
- nop
- nop
- nop
-
-L2_3: /* 2 or 3 byte memcpy cntd. */
- st.b r5,-1,r6
- blink tr1,r63
-
- /* 1 byte memcpy */
- ld.b r3,0,r0
- st.b r2,0,r0
- blink tr1,r63
-
-L8_15: /* 8..15 byte memcpy cntd. */
- stlo.q r2, 0, r0
- or r6, r7, r6
- sthi.q r5, -1, r6
- stlo.q r5, -8, r6
- blink tr1,r63
-
- /* 2 or 3 byte memcpy */
- ld.b r3,0,r0
- ld.b r2,0,r63
- ld.b r3,1,r1
- st.b r2,0,r0
- pta/l L2_3,tr0
- ld.b r6,-1,r6
- st.b r2,1,r1
- blink tr0, r63
-
- /* 4 .. 7 byte memcpy */
- LDUAL (r3, 0, r0, r1)
- pta L4_7, tr0
- ldlo.l r6, -4, r7
- or r0, r1, r0
- sthi.l r2, 3, r0
- ldhi.l r6, -1, r6
- blink tr0, r63
-
- /* 8 .. 15 byte memcpy */
- LDUAQ (r3, 0, r0, r1)
- pta L8_15, tr0
- ldlo.q r6, -8, r7
- or r0, r1, r0
- sthi.q r2, 7, r0
- ldhi.q r6, -1, r6
- blink tr0, r63
-
- /* 16 .. 24 byte memcpy */
- LDUAQ (r3, 0, r0, r1)
- LDUAQ (r3, 8, r8, r9)
- or r0, r1, r0
- sthi.q r2, 7, r0
- or r8, r9, r8
- sthi.q r2, 15, r8
- ldlo.q r6, -8, r7
- ldhi.q r6, -1, r6
- stlo.q r2, 8, r8
- stlo.q r2, 0, r0
- or r6, r7, r6
- sthi.q r5, -1, r6
- stlo.q r5, -8, r6
- blink tr1,r63
-
-Large:
- ld.b r2, 0, r63
- pta/l Loop_ua, tr1
- ori r3, -8, r7
- sub r2, r7, r22
- sub r3, r2, r6
- add r2, r4, r5
- ldlo.q r3, 0, r0
- addi r5, -16, r5
- movi 64+8, r27 // could subtract r7 from that.
- stlo.q r2, 0, r0
- sthi.q r2, 7, r0
- ldx.q r22, r6, r0
- bgtu/l r27, r4, tr1
-
- addi r5, -48, r27
- pta/l Loop_line, tr0
- addi r6, 64, r36
- addi r6, -24, r19
- addi r6, -16, r20
- addi r6, -8, r21
-
-Loop_line:
- ldx.q r22, r36, r63
- alloco r22, 32
- addi r22, 32, r22
- ldx.q r22, r19, r23
- sthi.q r22, -25, r0
- ldx.q r22, r20, r24
- ldx.q r22, r21, r25
- stlo.q r22, -32, r0
- ldx.q r22, r6, r0
- sthi.q r22, -17, r23
- sthi.q r22, -9, r24
- sthi.q r22, -1, r25
- stlo.q r22, -24, r23
- stlo.q r22, -16, r24
- stlo.q r22, -8, r25
- bgeu r27, r22, tr0
-
-Loop_ua:
- addi r22, 8, r22
- sthi.q r22, -1, r0
- stlo.q r22, -8, r0
- ldx.q r22, r6, r0
- bgtu/l r5, r22, tr1
-
- add r3, r4, r7
- ldlo.q r7, -8, r1
- sthi.q r22, 7, r0
- ldhi.q r7, -1, r7
- ptabs r18,tr1
- stlo.q r22, 0, r0
- or r1, r7, r1
- sthi.q r5, 15, r1
- stlo.q r5, 8, r1
- blink tr1, r63
-
- .size memcpy,.-memcpy
-
-libc_hidden_def(memcpy)
diff --git a/libc/string/sh64/memset.S b/libc/string/sh64/memset.S
deleted file mode 100644
index f588323f0..000000000
--- a/libc/string/sh64/memset.S
+++ /dev/null
@@ -1,96 +0,0 @@
-/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
-/* Modified by SuperH, Inc. September 2003 */
-!
-! Fast SH memset
-!
-! by Toshiyasu Morita (tm@netcom.com)
-!
-! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
-! Copyright 2002 SuperH Ltd.
-!
-
-#include <features.h>
-#include <endian.h>
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define SHHI shlld
-#define SHLO shlrd
-#else
-#define SHHI shlrd
-#define SHLO shlld
-#endif
-
- .section .text..SHmedia32,"ax"
- .globl memset
- .type memset, @function
-
- .align 5
-
-memset:
- pta/l multiquad, tr0
- andi r2, 7, r22
- ptabs r18, tr2
- mshflo.b r3,r3,r3
- add r4, r22, r23
- mperm.w r3, r63, r3 // Fill pattern now in every byte of r3
-
- movi 8, r9
- bgtu/u r23, r9, tr0 // multiquad
-
- beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses
- ldlo.q r2, 0, r7
- shlli r4, 2, r4
- movi -1, r8
- SHHI r8, r4, r8
- SHHI r8, r4, r8
- mcmv r7, r8, r3
- stlo.q r2, 0, r3
- blink tr2, r63
-
-multiquad:
- pta/l lastquad, tr0
- stlo.q r2, 0, r3
- shlri r23, 3, r24
- add r2, r4, r5
- beqi/u r24, 1, tr0 // lastquad
- pta/l loop, tr1
- sub r2, r22, r25
- andi r5, -8, r20 // calculate end address and
- addi r20, -7*8, r8 // loop end address; This might overflow, so we need
- // to use a different test before we start the loop
- bge/u r24, r9, tr1 // loop
- st.q r25, 8, r3
- st.q r20, -8, r3
- shlri r24, 1, r24
- beqi/u r24, 1, tr0 // lastquad
- st.q r25, 16, r3
- st.q r20, -16, r3
- beqi/u r24, 2, tr0 // lastquad
- st.q r25, 24, r3
- st.q r20, -24, r3
-lastquad:
- sthi.q r5, -1, r3
- blink tr2,r63
-
-loop:
-!!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895.
- // QQQ commenting out is locically correct, but sub-optimal
- // QQQ Sean McGoogan - 4th April 2003.
- st.q r25, 8, r3
- st.q r25, 16, r3
- st.q r25, 24, r3
- st.q r25, 32, r3
- addi r25, 32, r25
- bgeu/l r8, r25, tr1 // loop
-
- st.q r20, -40, r3
- st.q r20, -32, r3
- st.q r20, -24, r3
- st.q r20, -16, r3
- st.q r20, -8, r3
- sthi.q r5, -1, r3
- blink tr2,r63
-
- .size memset,.-memset
-
-libc_hidden_def(memset)
diff --git a/libc/string/sh64/strcpy.S b/libc/string/sh64/strcpy.S
deleted file mode 100644
index da79d5143..000000000
--- a/libc/string/sh64/strcpy.S
+++ /dev/null
@@ -1,102 +0,0 @@
-/* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
-/* Modified by SuperH, Inc. September 2003 */
-! Entry: arg0: destination
-! arg1: source
-! Exit: result: destination
-!
-! SH5 code Copyright 2002 SuperH Ltd.
-
-#include <features.h>
-#include <endian.h>
-
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define SHHI shlld
-#define SHLO shlrd
-#else
-#define SHHI shlrd
-#define SHLO shlld
-#endif
-
- .section .text..SHmedia32,"ax"
- .globl strcpy
- .type strcpy, @function
- .align 5
-
-strcpy:
-
- pta/l shortstring,tr1
- ldlo.q r3,0,r4
- ptabs r18,tr4
- shlli r3,3,r7
- addi r2, 8, r0
- mcmpeq.b r4,r63,r6
- SHHI r6,r7,r6
- bnei/u r6,0,tr1 // shortstring
- pta/l no_lddst, tr2
- ori r3,-8,r23
- sub r2, r23, r0
- sub r3, r2, r21
- addi r21, 8, r20
- ldx.q r0, r21, r5
- pta/l loop, tr0
- ori r2,-8,r22
- mcmpeq.b r5, r63, r6
- bgt/u r22, r23, tr2 // no_lddst
-
- // r22 < r23 : Need to do a load from the destination.
- // r22 == r23 : Doesn't actually need to load from destination,
- // but still can be handled here.
- ldlo.q r2, 0, r9
- movi -1, r8
- SHLO r8, r7, r8
- mcmv r4, r8, r9
- stlo.q r2, 0, r9
- beqi/l r6, 0, tr0 // loop
-
- add r5, r63, r4
- addi r0, 8, r0
- blink tr1, r63 // shortstring
-no_lddst:
- // r22 > r23: note that for r22 == r23 the sthi.q would clobber
- // bytes before the destination region.
- stlo.q r2, 0, r4
- SHHI r4, r7, r4
- sthi.q r0, -1, r4
- beqi/l r6, 0, tr0 // loop
-
- add r5, r63, r4
- addi r0, 8, r0
-shortstring:
-#if __BYTE_ORDER != __LITTLE_ENDIAN
- pta/l shortstring2,tr1
- byterev r4,r4
-#endif
-shortstring2:
- st.b r0,-8,r4
- andi r4,0xff,r5
- shlri r4,8,r4
- addi r0,1,r0
- bnei/l r5,0,tr1
- blink tr4,r63 // return
-
- .balign 8
-loop:
- stlo.q r0, 0, r5
- ldx.q r0, r20, r4
- addi r0, 16, r0
- sthi.q r0, -9, r5
- mcmpeq.b r4, r63, r6
- bnei/u r6, 0, tr1 // shortstring
- ldx.q r0, r21, r5
- stlo.q r0, -8, r4
- sthi.q r0, -1, r4
- mcmpeq.b r5, r63, r6
- beqi/l r6, 0, tr0 // loop
-
- add r5, r63, r4
- addi r0, 8, r0
- blink tr1, r63 // shortstring
-
- .size strcpy,.-strcpy
-
-libc_hidden_def(strcpy)
diff --git a/libc/string/sh64/strlen.S b/libc/string/sh64/strlen.S
deleted file mode 100644
index 18f4164ff..000000000
--- a/libc/string/sh64/strlen.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/* vi: set sw=8 ts=8: */
-/*
- * libc/string/sh64/strlen.S
- *
- * Simplistic strlen() implementation for SHmedia.
- *
- * Copyright (C) 2003 Paul Mundt <lethal@linux-sh.org>
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. The name of the above contributors may not be
- * used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <features.h>
-
- .section .text..SHmedia32,"ax"
- .globl strlen
- .type strlen,@function
-
- .balign 16
-strlen:
- ptabs r18, tr4
-
- /*
- * Note: We could easily deal with the NULL case here with a simple
- * sanity check, though it seems that the behavior we want is to fault
- * in the event that r2 == NULL, so we don't bother.
- */
-/* beqi r2, 0, tr4 */ ! Sanity check
-
- movi -1, r0
- pta/l loop, tr0
-loop:
- ld.b r2, 0, r1
- addi r2, 1, r2
- addi r0, 1, r0
- bnei/l r1, 0, tr0
-
- or r0, r63, r2
- blink tr4, r63
-
- .size strlen,.-strlen
-
-libc_hidden_def(strlen)
diff --git a/libc/string/sparc/sparc32/memchr.S b/libc/string/sparc/sparc32/memchr.S
index 4d57a553b..1949db2e5 100644
--- a/libc/string/sparc/sparc32/memchr.S
+++ b/libc/string/sparc/sparc32/memchr.S
@@ -24,9 +24,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
.text
.align 4
@@ -139,6 +138,4 @@ ENTRY(memchr)
END(memchr)
libc_hidden_def(memchr)
-#if !__BOUNDED_POINTERS__
weak_alias(memchr,__ubp_memchr)
-#endif
diff --git a/libc/string/sparc/sparc32/memcpy.S b/libc/string/sparc/sparc32/memcpy.S
index 25a48844d..2fb87bb17 100644
--- a/libc/string/sparc/sparc32/memcpy.S
+++ b/libc/string/sparc/sparc32/memcpy.S
@@ -17,9 +17,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <features.h>
diff --git a/libc/string/sparc/sparc32/memset.S b/libc/string/sparc/sparc32/memset.S
index 6c6424cf8..6d02fc1a8 100644
--- a/libc/string/sparc/sparc32/memset.S
+++ b/libc/string/sparc/sparc32/memset.S
@@ -16,9 +16,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include <features.h>
diff --git a/libc/string/sparc/sparc32/stpcpy.S b/libc/string/sparc/sparc32/stpcpy.S
index daf116eb1..2984ea156 100644
--- a/libc/string/sparc/sparc32/stpcpy.S
+++ b/libc/string/sparc/sparc32/stpcpy.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
to find out if any byte in xword could be zero. This is fast, but
diff --git a/libc/string/sparc/sparc32/strcat.S b/libc/string/sparc/sparc32/strcat.S
index eda029a16..e968a18a3 100644
--- a/libc/string/sparc/sparc32/strcat.S
+++ b/libc/string/sparc/sparc32/strcat.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
to find out if any byte in xword could be zero. This is fast, but
diff --git a/libc/string/sparc/sparc32/strchr.S b/libc/string/sparc/sparc32/strchr.S
index 16710d4e8..fabc3e7e5 100644
--- a/libc/string/sparc/sparc32/strchr.S
+++ b/libc/string/sparc/sparc32/strchr.S
@@ -16,9 +16,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
to find out if any byte in xword could be zero. This is fast, but
diff --git a/libc/string/sparc/sparc32/strcmp.S b/libc/string/sparc/sparc32/strcmp.S
index d43883de6..07284cd18 100644
--- a/libc/string/sparc/sparc32/strcmp.S
+++ b/libc/string/sparc/sparc32/strcmp.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
to find out if any byte in xword could be zero. This is fast, but
diff --git a/libc/string/sparc/sparc32/strcpy.S b/libc/string/sparc/sparc32/strcpy.S
index 4d7742ebc..3287546f3 100644
--- a/libc/string/sparc/sparc32/strcpy.S
+++ b/libc/string/sparc/sparc32/strcpy.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
to find out if any byte in xword could be zero. This is fast, but
diff --git a/libc/string/sparc/sparc32/strlen.S b/libc/string/sparc/sparc32/strlen.S
index 4edfe7e78..66c790cb6 100644
--- a/libc/string/sparc/sparc32/strlen.S
+++ b/libc/string/sparc/sparc32/strlen.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
/* Normally, this uses ((xword - 0x01010101) & 0x80808080) test
to find out if any byte in xword could be zero. This is fast, but
diff --git a/libc/string/sparc/sparc64/memchr.S b/libc/string/sparc/sparc64/memchr.S
deleted file mode 100644
index 6096cc218..000000000
--- a/libc/string/sparc/sparc64/memchr.S
+++ /dev/null
@@ -1,261 +0,0 @@
-/* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less
- than N.
- For SPARC v9.
- Copyright (C) 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
- This version is developed using the same algorithm as the fast C
- version which carries the following introduction:
- Based on strlen implementation by Torbjorn Granlund (tege@sics.se),
- with help from Dan Sahlin (dan@sics.se) and
- commentary by Jim Blandy (jimb@ai.mit.edu);
- adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
- and implemented by Roland McGrath (roland@ai.mit.edu).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <asm/asi.h>
-#ifndef XCC
-#define XCC xcc
-#define USE_BPR
- .register %g2, #scratch
- .register %g3, #scratch
-#endif
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(memchr)
- and %o1, 0xff, %o1 /* IEU0 Group */
-#ifdef USE_BPR
- brz,pn %o2, 12f /* CTI+IEU1 */
-#else
- tst %o2 /* IEU1 */
- be,pn %XCC, 12f /* CTI */
-#endif
- sll %o1, 8, %g3 /* IEU0 Group */
- add %o0, %o2, %o2 /* IEU1 */
-
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- or %g3, %o1, %g3 /* IEU1 */
- ldub [%o0], %o3 /* Load */
- sllx %g3, 16, %g5 /* IEU0 Group */
-
- or %g1, %lo(0x01010101), %g1 /* IEU1 */
- sllx %g1, 32, %g2 /* IEU0 Group */
- or %g3, %g5, %g3 /* IEU1 */
- sllx %g3, 32, %g5 /* IEU0 Group */
-
- cmp %o3, %o1 /* IEU1 */
- be,pn %xcc, 13f /* CTI */
- or %g1, %g2, %g1 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
-
- bne,a,pn %icc, 21f /* CTI */
- add %o0, 1, %o0 /* IEU0 Group */
- ldx [%o0], %o3 /* Load Group */
- sllx %g1, 7, %g2 /* IEU0 */
-
- or %g3, %g5, %g3 /* IEU1 */
-1: add %o0, 8, %o0 /* IEU0 Group */
- xor %o3, %g3, %o4 /* IEU1 */
- /* %g1 = 0101010101010101 *
- * %g2 = 8080088080808080 *
- * %g3 = c c c c c c c c *
- * %o3 = value *
- * %o4 = value XOR c */
-2: cmp %o0, %o2 /* IEU1 Group */
-
- bg,pn %XCC, 11f /* CTI */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- sub %o4, %g1, %o5 /* IEU0 Group */
- add %o0, 8, %o0 /* IEU1 */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o5, %o4, %o5 /* IEU0 Group */
-#endif
-
- andcc %o5, %g2, %g0 /* IEU1 Group */
- be,a,pt %xcc, 2b /* CTI */
- xor %o3, %g3, %o4 /* IEU0 */
- srlx %o4, 56, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 3f /* CTI */
- srlx %o4, 48, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4f /* CTI */
- srlx %o4, 40, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
-
- srlx %o4, 32, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- srlx %o4, 24, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
- srlx %o4, 16, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 8f /* CTI */
- srlx %o4, 8, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 9f /* CTI */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- bne,pt %icc, 2b /* CTI */
- xor %o3, %g3, %o4 /* IEU0 */
- retl /* CTI+IEU1 Group */
-
- add %o0, -9, %o0 /* IEU0 */
-
- .align 16
-3: retl /* CTI+IEU1 Group */
- add %o0, -16, %o0 /* IEU0 */
-4: retl /* CTI+IEU1 Group */
- add %o0, -15, %o0 /* IEU0 */
-
-5: retl /* CTI+IEU1 Group */
- add %o0, -14, %o0 /* IEU0 */
-6: retl /* CTI+IEU1 Group */
- add %o0, -13, %o0 /* IEU0 */
-
-7: retl /* CTI+IEU1 Group */
- add %o0, -12, %o0 /* IEU0 */
-8: retl /* CTI+IEU1 Group */
- add %o0, -11, %o0 /* IEU0 */
-
-9: retl /* CTI+IEU1 Group */
- add %o0, -10, %o0 /* IEU0 */
-11: sub %o4, %g1, %o5 /* IEU0 Group */
- sub %o0, 8, %o0 /* IEU1 */
-
- andcc %o5, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 12f /* CTI */
- sub %o2, %o0, %o2 /* IEU0 */
- tst %o2 /* IEU1 Group */
-
- be,pn %XCC, 12f /* CTI */
- srlx %o4, 56, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 13f /* CTI */
-
- cmp %o2, 1 /* IEU0 */
- be,pn %XCC, 12f /* CTI Group */
- srlx %o4, 48, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 14f /* CTI */
- cmp %o2, 2 /* IEU1 Group */
- be,pn %XCC, 12f /* CTI */
- srlx %o4, 40, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 15f /* CTI */
- cmp %o2, 3 /* IEU1 Group */
- be,pn %XCC, 12f /* CTI */
-
- srlx %o4, 32, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 16f /* CTI */
- cmp %o2, 4 /* IEU1 Group */
-
- be,pn %XCC, 12f /* CTI */
- srlx %o4, 24, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 17f /* CTI */
-
- cmp %o2, 5 /* IEU1 Group */
- be,pn %XCC, 12f /* CTI */
- srlx %o4, 16, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 18f /* CTI */
- cmp %o2, 6 /* IEU1 Group */
- be,pn %XCC, 12f /* CTI */
- srlx %o4, 8, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 19f /* CTI */
- nop /* IEU0 */
-12: retl /* CTI+IEU1 Group */
-
- clr %o0 /* IEU0 */
- nop /* Stub */
-13: retl /* CTI+IEU1 Group */
- nop /* IEU0 */
-
-14: retl /* CTI+IEU1 Group */
- add %o0, 1, %o0 /* IEU0 */
-15: retl /* CTI+IEU1 Group */
- add %o0, 2, %o0 /* IEU0 */
-
-16: retl /* CTI+IEU1 Group */
- add %o0, 3, %o0 /* IEU0 */
-17: retl /* CTI+IEU1 Group */
- add %o0, 4, %o0 /* IEU0 */
-
-18: retl /* CTI+IEU1 Group */
- add %o0, 5, %o0 /* IEU0 */
-19: retl /* CTI+IEU1 Group */
- add %o0, 6, %o0 /* IEU0 */
-
-21: cmp %o0, %o2 /* IEU1 */
- be,pn %XCC, 12b /* CTI */
- sllx %g1, 7, %g2 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
-
- or %g3, %g5, %g3 /* IEU1 */
-22: andcc %o0, 7, %g0 /* IEU1 Group */
- be,a,pn %icc, 1b /* CTI */
- ldx [%o0], %o3 /* Load */
-
- cmp %o3, %o1 /* IEU1 Group */
- be,pn %xcc, 23f /* CTI */
- add %o0, 1, %o0 /* IEU0 */
- cmp %o0, %o2 /* IEU1 Group */
-
- bne,a,pt %XCC, 22b /* CTI */
- ldub [%o0], %o3 /* Load */
- retl /* CTI+IEU1 Group */
- clr %o0 /* IEU0 */
-
-23: retl /* CTI+IEU1 Group */
- add %o0, -1, %o0 /* IEU0 */
-END(memchr)
-
-libc_hidden_def(memchr)
-#if !__BOUNDED_POINTERS__
-weak_alias(memchr,__ubp_memchr)
-#endif
diff --git a/libc/string/sparc/sparc64/memcpy.S b/libc/string/sparc/sparc64/memcpy.S
deleted file mode 100644
index db63d1da2..000000000
--- a/libc/string/sparc/sparc64/memcpy.S
+++ /dev/null
@@ -1,923 +0,0 @@
-/* Copy SIZE bytes from SRC to DEST.
- For UltraSPARC.
- Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller (davem@caip.rutgers.edu) and
- Jakub Jelinek (jakub@redhat.com).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <features.h>
-#include <asm/asi.h>
-#ifndef XCC
-#define USE_BPR
- .register %g2, #scratch
- .register %g3, #scratch
- .register %g6, #scratch
-#define XCC xcc
-#endif
-#define FPRS_FEF 4
-
-#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \
- faligndata %f1, %f2, %f48; \
- faligndata %f2, %f3, %f50; \
- faligndata %f3, %f4, %f52; \
- faligndata %f4, %f5, %f54; \
- faligndata %f5, %f6, %f56; \
- faligndata %f6, %f7, %f58; \
- faligndata %f7, %f8, %f60; \
- faligndata %f8, %f9, %f62;
-
-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \
- ldda [%src] %asi, %fdest; \
- add %src, 0x40, %src; \
- add %dest, 0x40, %dest; \
- subcc %len, 0x40, %len; \
- be,pn %xcc, jmptgt; \
- stda %fsrc, [%dest - 0x40] %asi;
-
-#define LOOP_CHUNK1(src, dest, len, branch_dest) \
- MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest)
-#define LOOP_CHUNK2(src, dest, len, branch_dest) \
- MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
-#define LOOP_CHUNK3(src, dest, len, branch_dest) \
- MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
-
-#define STORE_SYNC(dest, fsrc) \
- stda %fsrc, [%dest] %asi; \
- add %dest, 0x40, %dest;
-
-#define STORE_JUMP(dest, fsrc, target) \
- stda %fsrc, [%dest] %asi; \
- add %dest, 0x40, %dest; \
- ba,pt %xcc, target;
-
-#define VISLOOP_PAD nop; nop; nop; nop; \
- nop; nop; nop; nop; \
- nop; nop; nop; nop; \
- nop; nop; nop;
-
-#define FINISH_VISCHUNK(dest, f0, f1, left) \
- subcc %left, 8, %left; \
- bl,pn %xcc, 205f; \
- faligndata %f0, %f1, %f48; \
- std %f48, [%dest]; \
- add %dest, 8, %dest;
-
-#define UNEVEN_VISCHUNK(dest, f0, f1, left) \
- subcc %left, 8, %left; \
- bl,pn %xcc, 205f; \
- fsrc1 %f0, %f1; \
- ba,a,pt %xcc, 204f;
-
- /* Macros for non-VIS memcpy code. */
-#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- ldx [%src + offset + 0x10], %t2; \
- ldx [%src + offset + 0x18], %t3; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst + offset + 0x08]; \
- stw %t2, [%dst + offset + 0x14]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst + offset + 0x10]; \
- stw %t3, [%dst + offset + 0x1c]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst + offset + 0x18];
-
-#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- ldx [%src + offset + 0x10], %t2; \
- ldx [%src + offset + 0x18], %t3; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08]; \
- stx %t2, [%dst + offset + 0x10]; \
- stx %t3, [%dst + offset + 0x18]; \
- ldx [%src + offset + 0x20], %t0; \
- ldx [%src + offset + 0x28], %t1; \
- ldx [%src + offset + 0x30], %t2; \
- ldx [%src + offset + 0x38], %t3; \
- stx %t0, [%dst + offset + 0x20]; \
- stx %t1, [%dst + offset + 0x28]; \
- stx %t2, [%dst + offset + 0x30]; \
- stx %t3, [%dst + offset + 0x38];
-
-#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x10], %t0; \
- ldx [%src - offset - 0x08], %t1; \
- stw %t0, [%dst - offset - 0x0c]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t1, [%dst - offset - 0x04]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src - offset - 0x10], %t0; \
- ldx [%src - offset - 0x08], %t1; \
- stx %t0, [%dst - offset - 0x10]; \
- stx %t1, [%dst - offset - 0x08];
-
- /* Macros for non-VIS memmove code. */
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stw %t0, [%dst - offset - 0x1c]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst - offset - 0x20]; \
- stw %t1, [%dst - offset - 0x14]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst - offset - 0x18]; \
- stw %t2, [%dst - offset - 0x0c]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t3, [%dst - offset - 0x04]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stx %t0, [%dst - offset - 0x20]; \
- stx %t1, [%dst - offset - 0x18]; \
- stx %t2, [%dst - offset - 0x10]; \
- stx %t3, [%dst - offset - 0x08]; \
- ldx [%src - offset - 0x40], %t0; \
- ldx [%src - offset - 0x38], %t1; \
- ldx [%src - offset - 0x30], %t2; \
- ldx [%src - offset - 0x28], %t3; \
- stx %t0, [%dst - offset - 0x40]; \
- stx %t1, [%dst - offset - 0x38]; \
- stx %t2, [%dst - offset - 0x30]; \
- stx %t3, [%dst - offset - 0x28];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst + offset + 0x08];
-
-#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08];
-
- .text
- .align 32
-
-#ifdef __UCLIBC_SUSV3_LEGACY__
-ENTRY(bcopy)
- sub %o1, %o0, %o4 /* IEU0 Group */
- mov %o0, %g3 /* IEU1 */
- cmp %o4, %o2 /* IEU1 Group */
- mov %o1, %o0 /* IEU0 */
- bgeu,pt %XCC, 210f /* CTI */
- mov %g3, %o1 /* IEU0 Group */
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 */
-#endif
- brnz,pn %o2, 220f /* CTI Group */
- add %o0, %o2, %o0 /* IEU0 */
- retl
- nop
-END(bcopy)
-#endif
-
- .align 32
-200: be,pt %xcc, 201f /* CTI */
- andcc %o0, 0x38, %g5 /* IEU1 Group */
- mov 8, %g1 /* IEU0 */
- sub %g1, %g2, %g2 /* IEU0 Group */
- andcc %o0, 1, %g0 /* IEU1 */
- be,pt %icc, 2f /* CTI */
- sub %o2, %g2, %o2 /* IEU0 Group */
-1: ldub [%o1], %o5 /* Load Group */
- add %o1, 1, %o1 /* IEU0 */
- add %o0, 1, %o0 /* IEU1 */
- subcc %g2, 1, %g2 /* IEU1 Group */
- be,pn %xcc, 3f /* CTI */
- stb %o5, [%o0 - 1] /* Store */
-2: ldub [%o1], %o5 /* Load Group */
- add %o0, 2, %o0 /* IEU0 */
- ldub [%o1 + 1], %g3 /* Load Group */
- subcc %g2, 2, %g2 /* IEU1 Group */
- stb %o5, [%o0 - 2] /* Store */
- add %o1, 2, %o1 /* IEU0 */
- bne,pt %xcc, 2b /* CTI Group */
- stb %g3, [%o0 - 1] /* Store */
-3: andcc %o0, 0x38, %g5 /* IEU1 Group */
-201: be,pt %icc, 202f /* CTI */
- mov 64, %g1 /* IEU0 */
- fmovd %f0, %f2 /* FPU */
- sub %g1, %g5, %g5 /* IEU0 Group */
- alignaddr %o1, %g0, %g1 /* GRU Group */
- ldd [%g1], %f4 /* Load Group */
- sub %o2, %g5, %o2 /* IEU0 */
-1: ldd [%g1 + 0x8], %f6 /* Load Group */
- add %g1, 0x8, %g1 /* IEU0 Group */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f4, %f6, %f0 /* GRU Group */
- std %f0, [%o0] /* Store */
- add %o1, 8, %o1 /* IEU0 Group */
- be,pn %xcc, 202f /* CTI */
- add %o0, 8, %o0 /* IEU1 */
- ldd [%g1 + 0x8], %f4 /* Load Group */
- add %g1, 8, %g1 /* IEU0 */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f6, %f4, %f0 /* GRU Group */
- std %f0, [%o0] /* Store */
- add %o1, 8, %o1 /* IEU0 */
- bne,pt %xcc, 1b /* CTI Group */
- add %o0, 8, %o0 /* IEU0 */
-202: membar #LoadStore | #StoreStore | #StoreLoad /* LSU Group */
- wr %g0, ASI_BLK_P, %asi /* LSU Group */
- subcc %o2, 0x40, %g6 /* IEU1 Group */
- mov %o1, %g1 /* IEU0 */
- andncc %g6, (0x40 - 1), %g6 /* IEU1 Group */
- srl %g1, 3, %g2 /* IEU0 */
- sub %o2, %g6, %g3 /* IEU0 Group */
- andn %o1, (0x40 - 1), %o1 /* IEU1 */
- and %g2, 7, %g2 /* IEU0 Group */
- andncc %g3, 0x7, %g3 /* IEU1 */
- fmovd %f0, %f2 /* FPU */
- sub %g3, 0x10, %g3 /* IEU0 Group */
- sub %o2, %g6, %o2 /* IEU1 */
- alignaddr %g1, %g0, %g0 /* GRU Group */
- add %g1, %g6, %g1 /* IEU0 Group */
- subcc %o2, %g3, %o2 /* IEU1 */
- ldda [%o1 + 0x00] %asi, %f0 /* LSU Group */
- add %g1, %g3, %g1 /* IEU0 */
- ldda [%o1 + 0x40] %asi, %f16 /* LSU Group */
- sub %g6, 0x80, %g6 /* IEU0 */
- ldda [%o1 + 0x80] %asi, %f32 /* LSU Group */
- /* Clk1 Group 8-( */
- /* Clk2 Group 8-( */
- /* Clk3 Group 8-( */
- /* Clk4 Group 8-( */
-203: rd %pc, %g5 /* PDU Group 8-( */
- addcc %g5, %lo(300f - 203b), %g5 /* IEU1 Group */
- sll %g2, 9, %g2 /* IEU0 */
- jmpl %g5 + %g2, %g0 /* CTI Group brk forced*/
- addcc %o1, 0xc0, %o1 /* IEU1 Group */
-
- .align 512 /* OK, here comes the fun part... */
-300: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g6, 301f)
- FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g6, 302f)
- FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) LOOP_CHUNK3(o1, o0, g6, 303f)
- b,pt %xcc, 300b+4; faligndata %f0, %f2, %f48
-301: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_JUMP(o0, f48, 400f) membar #Sync
-302: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, 416f) membar #Sync
-303: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, 432f) membar #Sync
- VISLOOP_PAD
-310: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g6, 311f)
- FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g6, 312f)
- FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) LOOP_CHUNK3(o1, o0, g6, 313f)
- b,pt %xcc, 310b+4; faligndata %f2, %f4, %f48
-311: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_JUMP(o0, f48, 402f) membar #Sync
-312: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, 418f) membar #Sync
-313: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, 434f) membar #Sync
- VISLOOP_PAD
-320: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g6, 321f)
- FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g6, 322f)
- FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) LOOP_CHUNK3(o1, o0, g6, 323f)
- b,pt %xcc, 320b+4; faligndata %f4, %f6, %f48
-321: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_JUMP(o0, f48, 404f) membar #Sync
-322: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, 420f) membar #Sync
-323: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, 436f) membar #Sync
- VISLOOP_PAD
-330: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g6, 331f)
- FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g6, 332f)
- FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) LOOP_CHUNK3(o1, o0, g6, 333f)
- b,pt %xcc, 330b+4; faligndata %f6, %f8, %f48
-331: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_JUMP(o0, f48, 406f) membar #Sync
-332: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, 422f) membar #Sync
-333: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, 438f) membar #Sync
- VISLOOP_PAD
-340: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g6, 341f)
- FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g6, 342f)
- FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) LOOP_CHUNK3(o1, o0, g6, 343f)
- b,pt %xcc, 340b+4; faligndata %f8, %f10, %f48
-341: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_JUMP(o0, f48, 408f) membar #Sync
-342: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, 424f) membar #Sync
-343: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, 440f) membar #Sync
- VISLOOP_PAD
-350: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g6, 351f)
- FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g6, 352f)
- FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g6, 353f)
- b,pt %xcc, 350b+4; faligndata %f10, %f12, %f48
-351: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, 410f) membar #Sync
-352: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, 426f) membar #Sync
-353: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, 442f) membar #Sync
- VISLOOP_PAD
-360: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g6, 361f)
- FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g6, 362f)
- FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g6, 363f)
- b,pt %xcc, 360b+4; faligndata %f12, %f14, %f48
-361: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, 412f) membar #Sync
-362: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, 428f) membar #Sync
-363: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, 444f) membar #Sync
- VISLOOP_PAD
-370: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g6, 371f)
- FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g6, 372f)
- FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g6, 373f)
- b,pt %xcc, 370b+4; faligndata %f14, %f16, %f48
-371: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, 414f) membar #Sync
-372: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, 430f) membar #Sync
-373: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync
- FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 446f) membar #Sync
- VISLOOP_PAD
-400: FINISH_VISCHUNK(o0, f0, f2, g3)
-402: FINISH_VISCHUNK(o0, f2, f4, g3)
-404: FINISH_VISCHUNK(o0, f4, f6, g3)
-406: FINISH_VISCHUNK(o0, f6, f8, g3)
-408: FINISH_VISCHUNK(o0, f8, f10, g3)
-410: FINISH_VISCHUNK(o0, f10, f12, g3)
-412: FINISH_VISCHUNK(o0, f12, f14, g3)
-414: UNEVEN_VISCHUNK(o0, f14, f0, g3)
-416: FINISH_VISCHUNK(o0, f16, f18, g3)
-418: FINISH_VISCHUNK(o0, f18, f20, g3)
-420: FINISH_VISCHUNK(o0, f20, f22, g3)
-422: FINISH_VISCHUNK(o0, f22, f24, g3)
-424: FINISH_VISCHUNK(o0, f24, f26, g3)
-426: FINISH_VISCHUNK(o0, f26, f28, g3)
-428: FINISH_VISCHUNK(o0, f28, f30, g3)
-430: UNEVEN_VISCHUNK(o0, f30, f0, g3)
-432: FINISH_VISCHUNK(o0, f32, f34, g3)
-434: FINISH_VISCHUNK(o0, f34, f36, g3)
-436: FINISH_VISCHUNK(o0, f36, f38, g3)
-438: FINISH_VISCHUNK(o0, f38, f40, g3)
-440: FINISH_VISCHUNK(o0, f40, f42, g3)
-442: FINISH_VISCHUNK(o0, f42, f44, g3)
-444: FINISH_VISCHUNK(o0, f44, f46, g3)
-446: UNEVEN_VISCHUNK(o0, f46, f0, g3)
-204: ldd [%o1], %f2 /* Load Group */
- add %o1, 8, %o1 /* IEU0 */
- subcc %g3, 8, %g3 /* IEU1 */
- faligndata %f0, %f2, %f8 /* GRU Group */
- std %f8, [%o0] /* Store */
- bl,pn %xcc, 205f /* CTI */
- add %o0, 8, %o0 /* IEU0 Group */
- ldd [%o1], %f0 /* Load Group */
- add %o1, 8, %o1 /* IEU0 */
- subcc %g3, 8, %g3 /* IEU1 */
- faligndata %f2, %f0, %f8 /* GRU Group */
- std %f8, [%o0] /* Store */
- bge,pt %xcc, 204b /* CTI */
- add %o0, 8, %o0 /* IEU0 Group */
-205: brz,pt %o2, 207f /* CTI Group */
- mov %g1, %o1 /* IEU0 */
-206: ldub [%o1], %g5 /* LOAD */
- add %o1, 1, %o1 /* IEU0 */
- add %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 */
- bne,pt %xcc, 206b /* CTI */
- stb %g5, [%o0 - 1] /* Store Group */
-207: membar #StoreLoad | #StoreStore /* LSU Group */
- wr %g0, FPRS_FEF, %fprs
- retl
- mov %g4, %o0
-
-208: andcc %o2, 1, %g0 /* IEU1 Group */
- be,pt %icc, 2f+4 /* CTI */
-1: ldub [%o1], %g5 /* LOAD Group */
- add %o1, 1, %o1 /* IEU0 */
- add %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 Group */
- be,pn %xcc, 209f /* CTI */
- stb %g5, [%o0 - 1] /* Store */
-2: ldub [%o1], %g5 /* LOAD Group */
- add %o0, 2, %o0 /* IEU0 */
- ldub [%o1 + 1], %o5 /* LOAD Group */
- add %o1, 2, %o1 /* IEU0 */
- subcc %o2, 2, %o2 /* IEU1 Group */
- stb %g5, [%o0 - 2] /* Store */
- bne,pt %xcc, 2b /* CTI */
- stb %o5, [%o0 - 1] /* Store */
-209: retl
- mov %g4, %o0
-
-#ifdef USE_BPR
-
- /* void *__align_cpy_4(void *dest, void *src, size_t n)
- * SPARC v9 SYSV ABI
- * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3))
- */
-
- .align 32
-ENTRY(__align_cpy_4)
- mov %o0, %g4 /* IEU0 Group */
- cmp %o2, 15 /* IEU1 */
- bleu,pn %xcc, 208b /* CTI */
- cmp %o2, (64 * 6) /* IEU1 Group */
- bgeu,pn %xcc, 200b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- ba,pt %xcc, 216f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
-END(__align_cpy_4)
-
- /* void *__align_cpy_8(void *dest, void *src, size_t n)
- * SPARC v9 SYSV ABI
- * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7))
- */
-
- .align 32
-ENTRY(__align_cpy_8)
- mov %o0, %g4 /* IEU0 Group */
- cmp %o2, 15 /* IEU1 */
- bleu,pn %xcc, 208b /* CTI */
- cmp %o2, (64 * 6) /* IEU1 Group */
- bgeu,pn %xcc, 201b /* CTI */
- andcc %o0, 0x38, %g5 /* IEU1 Group */
- andcc %o2, -128, %g6 /* IEU1 Group */
- bne,a,pt %xcc, 82f + 4 /* CTI */
- ldx [%o1], %g1 /* Load */
- ba,pt %xcc, 41f /* CTI Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
-END(__align_cpy_8)
-
- /* void *__align_cpy_16(void *dest, void *src, size_t n)
- * SPARC v9 SYSV ABI
- * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15))
- */
-
- .align 32
-ENTRY(__align_cpy_16)
- mov %o0, %g4 /* IEU0 Group */
- cmp %o2, (64 * 6) /* IEU1 */
- bgeu,pn %xcc, 201b /* CTI */
- andcc %o0, 0x38, %g5 /* IEU1 Group */
- andcc %o2, -128, %g6 /* IEU1 Group */
- bne,a,pt %xcc, 82f + 4 /* CTI */
- ldx [%o1], %g1 /* Load */
- ba,pt %xcc, 41f /* CTI Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
-END(__align_cpy_16)
-
-#endif
-
- .align 32
-ENTRY(memcpy)
-210:
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 Group */
-#endif
- brz,pn %o2, 209b /* CTI Group */
- mov %o0, %g4 /* IEU0 */
-218: cmp %o2, 15 /* IEU1 Group */
- bleu,pn %xcc, 208b /* CTI */
- cmp %o2, (64 * 6) /* IEU1 Group */
- bgeu,pn %xcc, 200b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- sub %o0, %o1, %g5 /* IEU0 */
- andcc %g5, 3, %o5 /* IEU1 Group */
- bne,pn %xcc, 212f /* CTI */
- andcc %o1, 3, %g0 /* IEU1 Group */
- be,a,pt %xcc, 216f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
- andcc %o1, 1, %g0 /* IEU1 Group */
- be,pn %xcc, 4f /* CTI */
- andcc %o1, 2, %g0 /* IEU1 Group */
- ldub [%o1], %g2 /* Load Group */
- add %o1, 1, %o1 /* IEU0 */
- add %o0, 1, %o0 /* IEU1 */
- sub %o2, 1, %o2 /* IEU0 Group */
- bne,pn %xcc, 5f /* CTI Group */
- stb %g2, [%o0 - 1] /* Store */
-4: lduh [%o1], %g2 /* Load Group */
- add %o1, 2, %o1 /* IEU0 */
- add %o0, 2, %o0 /* IEU1 */
- sub %o2, 2, %o2 /* IEU0 */
- sth %g2, [%o0 - 2] /* Store Group + bubble */
-5: andcc %o1, 4, %g0 /* IEU1 */
-216: be,a,pn %xcc, 2f /* CTI */
- andcc %o2, -128, %g6 /* IEU1 Group */
- lduw [%o1], %g5 /* Load Group */
- add %o1, 4, %o1 /* IEU0 */
- add %o0, 4, %o0 /* IEU1 */
- sub %o2, 4, %o2 /* IEU0 Group */
- stw %g5, [%o0 - 4] /* Store */
- andcc %o2, -128, %g6 /* IEU1 Group */
-2: be,pn %xcc, 215f /* CTI */
- andcc %o0, 4, %g0 /* IEU1 Group */
- be,pn %xcc, 82f + 4 /* CTI Group */
-5: MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
-35: subcc %g6, 128, %g6 /* IEU1 Group */
- add %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 5b /* CTI */
- add %o0, 128, %o0 /* IEU0 Group */
-215: andcc %o2, 0x70, %g6 /* IEU1 Group */
-41: be,pn %xcc, 80f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-79: rd %pc, %o5 /* PDU Group */
- sll %g6, 1, %g5 /* IEU0 Group */
- add %o1, %g6, %o1 /* IEU1 */
- sub %o5, %g5, %o5 /* IEU0 Group */
- jmpl %o5 + %lo(80f - 79b), %g0 /* CTI Group brk forced*/
- add %o0, %g6, %o0 /* IEU0 Group */
-36: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-80: be,pt %xcc, 81f /* CTI */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1], %g2 /* Load Group */
- add %o0, 8, %o0 /* IEU0 */
- stw %g2, [%o0 - 0x4] /* Store Group */
- add %o1, 8, %o1 /* IEU1 */
- srlx %g2, 32, %g2 /* IEU0 Group */
- stw %g2, [%o0 - 0x8] /* Store */
-81: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1], %g2 /* Load Group */
- add %o1, 4, %o1 /* IEU0 */
- stw %g2, [%o0] /* Store Group */
- add %o0, 4, %o0 /* IEU0 */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1], %g2 /* Load Group */
- add %o1, 2, %o1 /* IEU0 */
- sth %g2, [%o0] /* Store Group */
- add %o0, 2, %o0 /* IEU0 */
-1: be,pt %xcc, 211f /* CTI */
- nop /* IEU1 */
- ldub [%o1], %g2 /* Load Group */
- stb %g2, [%o0] /* Store Group + bubble */
-211: retl
- mov %g4, %o0
-
-82: MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
-37: subcc %g6, 128, %g6 /* IEU1 Group */
- add %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 82b /* CTI */
- add %o0, 128, %o0 /* IEU0 Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
- be,pn %xcc, 84f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-83: rd %pc, %o5 /* PDU Group */
- add %o1, %g6, %o1 /* IEU0 Group */
- sub %o5, %g6, %o5 /* IEU1 */
- jmpl %o5 + %lo(84f - 83b), %g0 /* CTI Group brk forced*/
- add %o0, %g6, %o0 /* IEU0 Group */
-38: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-84: be,pt %xcc, 85f /* CTI Group */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1], %g2 /* Load Group */
- add %o0, 8, %o0 /* IEU0 */
- add %o1, 8, %o1 /* IEU0 Group */
- stx %g2, [%o0 - 0x8] /* Store */
-85: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1], %g2 /* Load Group */
- add %o0, 4, %o0 /* IEU0 */
- add %o1, 4, %o1 /* IEU0 Group */
- stw %g2, [%o0 - 0x4] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1], %g2 /* Load Group */
- add %o0, 2, %o0 /* IEU0 */
- add %o1, 2, %o1 /* IEU0 Group */
- sth %g2, [%o0 - 0x2] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- nop /* IEU0 Group */
- ldub [%o1], %g2 /* Load Group */
- stb %g2, [%o0] /* Store Group + bubble */
-1: retl
- mov %g4, %o0
-
-212: brz,pt %g2, 2f /* CTI Group */
- mov 8, %g1 /* IEU0 */
- sub %g1, %g2, %g2 /* IEU0 Group */
- sub %o2, %g2, %o2 /* IEU0 Group */
-1: ldub [%o1], %g5 /* Load Group */
- add %o1, 1, %o1 /* IEU0 */
- add %o0, 1, %o0 /* IEU1 */
- subcc %g2, 1, %g2 /* IEU1 Group */
- bne,pt %xcc, 1b /* CTI */
- stb %g5, [%o0 - 1] /* Store */
-2: andn %o2, 7, %g5 /* IEU0 Group */
- and %o2, 7, %o2 /* IEU1 */
- fmovd %f0, %f2 /* FPU */
- alignaddr %o1, %g0, %g1 /* GRU Group */
- ldd [%g1], %f4 /* Load Group */
-1: ldd [%g1 + 0x8], %f6 /* Load Group */
- add %g1, 0x8, %g1 /* IEU0 Group */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f4, %f6, %f0 /* GRU Group */
- std %f0, [%o0] /* Store */
- add %o1, 8, %o1 /* IEU0 Group */
- be,pn %xcc, 213f /* CTI */
- add %o0, 8, %o0 /* IEU1 */
- ldd [%g1 + 0x8], %f4 /* Load Group */
- add %g1, 8, %g1 /* IEU0 */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f6, %f4, %f0 /* GRU Group */
- std %f0, [%o0] /* Store */
- add %o1, 8, %o1 /* IEU0 */
- bne,pn %xcc, 1b /* CTI Group */
- add %o0, 8, %o0 /* IEU0 */
-213: brz,pn %o2, 214f /* CTI Group */
- nop /* IEU0 */
- ldub [%o1], %g5 /* LOAD */
- add %o1, 1, %o1 /* IEU0 */
- add %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 */
- bne,pt %xcc, 206b /* CTI */
- stb %g5, [%o0 - 1] /* Store Group */
-214: wr %g0, FPRS_FEF, %fprs
- retl
- mov %g4, %o0
-END(memcpy)
-libc_hidden_def(memcpy)
-
- .align 32
-228: andcc %o2, 1, %g0 /* IEU1 Group */
- be,pt %icc, 2f+4 /* CTI */
-1: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 Group */
- be,pn %xcc, 229f /* CTI */
- stb %o5, [%o0] /* Store */
-2: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o0, 2, %o0 /* IEU0 */
- ldub [%o1 - 2], %g5 /* LOAD Group */
- sub %o1, 2, %o1 /* IEU0 */
- subcc %o2, 2, %o2 /* IEU1 Group */
- stb %o5, [%o0 + 1] /* Store */
- bne,pt %xcc, 2b /* CTI */
- stb %g5, [%o0] /* Store */
-229: retl
- mov %g4, %o0
-219: retl
- nop
-
- .align 32
-ENTRY(memmove)
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 Group */
-#endif
- brz,pn %o2, 219b /* CTI Group */
- sub %o0, %o1, %o4 /* IEU0 */
- cmp %o4, %o2 /* IEU1 Group */
- bgeu,pt %XCC, 218b /* CTI */
- mov %o0, %g4 /* IEU0 */
- add %o0, %o2, %o0 /* IEU0 Group */
-220: add %o1, %o2, %o1 /* IEU1 */
- cmp %o2, 15 /* IEU1 Group */
- bleu,pn %xcc, 228b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- sub %o0, %o1, %g5 /* IEU0 */
- andcc %g5, 3, %o5 /* IEU1 Group */
- bne,pn %xcc, 232f /* CTI */
- andcc %o1, 3, %g0 /* IEU1 Group */
- be,a,pt %xcc, 236f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
- andcc %o1, 1, %g0 /* IEU1 Group */
- be,pn %xcc, 4f /* CTI */
- andcc %o1, 2, %g0 /* IEU1 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- sub %o2, 1, %o2 /* IEU0 Group */
- be,pn %xcc, 5f /* CTI Group */
- stb %g2, [%o0] /* Store */
-4: lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sub %o0, 2, %o0 /* IEU1 */
- sub %o2, 2, %o2 /* IEU0 */
- sth %g2, [%o0] /* Store Group + bubble */
-5: andcc %o1, 4, %g0 /* IEU1 */
-236: be,a,pn %xcc, 2f /* CTI */
- andcc %o2, -128, %g6 /* IEU1 Group */
- lduw [%o1 - 4], %g5 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- sub %o0, 4, %o0 /* IEU1 */
- sub %o2, 4, %o2 /* IEU0 Group */
- stw %g5, [%o0] /* Store */
- andcc %o2, -128, %g6 /* IEU1 Group */
-2: be,pn %xcc, 235f /* CTI */
- andcc %o0, 4, %g0 /* IEU1 Group */
- be,pn %xcc, 282f + 4 /* CTI Group */
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 5b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
-235: andcc %o2, 0x70, %g6 /* IEU1 Group */
-41: be,pn %xcc, 280f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-279: rd %pc, %o5 /* PDU Group */
- sll %g6, 1, %g5 /* IEU0 Group */
- sub %o1, %g6, %o1 /* IEU1 */
- sub %o5, %g5, %o5 /* IEU0 Group */
- jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-280: be,pt %xcc, 281f /* CTI */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- stw %g2, [%o0 + 4] /* Store Group */
- sub %o1, 8, %o1 /* IEU1 */
- srlx %g2, 32, %g2 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-281: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- stw %g2, [%o0 - 4] /* Store Group */
- sub %o0, 4, %o0 /* IEU0 */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sth %g2, [%o0 - 2] /* Store Group */
- sub %o0, 2, %o0 /* IEU0 */
-1: be,pt %xcc, 211f /* CTI */
- nop /* IEU1 */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-211: retl
- mov %g4, %o0
-
-282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 282b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
- be,pn %xcc, 284f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-283: rd %pc, %o5 /* PDU Group */
- sub %o1, %g6, %o1 /* IEU0 Group */
- sub %o5, %g6, %o5 /* IEU1 */
- jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-284: be,pt %xcc, 285f /* CTI Group */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- sub %o1, 8, %o1 /* IEU0 Group */
- stx %g2, [%o0] /* Store */
-285: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o0, 4, %o0 /* IEU0 */
- sub %o1, 4, %o1 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o0, 2, %o0 /* IEU0 */
- sub %o1, 2, %o1 /* IEU0 Group */
- sth %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- nop /* IEU0 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-1: retl
- mov %g4, %o0
-
-232: brz,pt %g2, 2f /* CTI Group */
- sub %o2, %g2, %o2 /* IEU0 Group */
-1: ldub [%o1 - 1], %g5 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %g2, 1, %g2 /* IEU1 Group */
- bne,pt %xcc, 1b /* CTI */
- stb %g5, [%o0] /* Store */
-2: andn %o2, 7, %g5 /* IEU0 Group */
- and %o2, 7, %o2 /* IEU1 */
- fmovd %f0, %f2 /* FPU */
- alignaddr %o1, %g0, %g1 /* GRU Group */
- ldd [%g1], %f4 /* Load Group */
-1: ldd [%g1 - 8], %f6 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 Group */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f6, %f4, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 Group */
- be,pn %xcc, 233f /* CTI */
- sub %o0, 8, %o0 /* IEU1 */
- ldd [%g1 - 8], %f4 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f4, %f6, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 */
- bne,pn %xcc, 1b /* CTI Group */
- sub %o0, 8, %o0 /* IEU0 */
-233: brz,pn %o2, 234f /* CTI Group */
- nop /* IEU0 */
-237: ldub [%o1 - 1], %g5 /* LOAD */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 */
- bne,pt %xcc, 237b /* CTI */
- stb %g5, [%o0] /* Store Group */
-234: wr %g0, FPRS_FEF, %fprs
- retl
- mov %g4, %o0
-END(memmove)
-libc_hidden_def(memmove)
-
-#ifdef USE_BPR
-weak_alias(memcpy,__align_cpy_1)
-weak_alias(memcpy,__align_cpy_2)
-#endif
diff --git a/libc/string/sparc/sparc64/memset.S b/libc/string/sparc/sparc64/memset.S
deleted file mode 100644
index 50e404bcc..000000000
--- a/libc/string/sparc/sparc64/memset.S
+++ /dev/null
@@ -1,317 +0,0 @@
-/* Set a block of memory to some byte value.
- For UltraSPARC.
- Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller (davem@caip.rutgers.edu) and
- Jakub Jelinek (jj@ultra.linux.cz).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <features.h>
-#include <asm/asi.h>
-#ifndef XCC
-#define XCC xcc
-#define USE_BPR
-#endif
-#define FPRS_FEF 4
-
-#define SET_BLOCKS(base, offset, source) \
- stx source, [base - offset - 0x18]; \
- stx source, [base - offset - 0x10]; \
- stx source, [base - offset - 0x08]; \
- stx source, [base - offset - 0x00];
-
- /* Well, memset is a lot easier to get right than bcopy... */
- .text
- .align 32
-ENTRY(memset)
- andcc %o1, 0xff, %o1
- mov %o0, %o5
- be,a,pt %icc, 50f
-#ifndef USE_BPR
- srl %o2, 0, %o1
-#else
- mov %o2, %o1
-#endif
- cmp %o2, 7
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- bleu,pn %XCC, 17f
- andcc %o0, 3, %g5
- be,pt %xcc, 4f
- and %o1, 0xff, %o1
- cmp %g5, 3
- be,pn %xcc, 2f
- stb %o1, [%o0 + 0x00]
- cmp %g5, 2
- be,pt %xcc, 2f
- stb %o1, [%o0 + 0x01]
- stb %o1, [%o0 + 0x02]
-2: sub %g5, 4, %g5
- sub %o0, %g5, %o0
- add %o2, %g5, %o2
-4: sllx %o1, 8, %g1
- andcc %o0, 4, %g0
- or %o1, %g1, %o1
- sllx %o1, 16, %g1
- or %o1, %g1, %o1
- be,pt %xcc, 2f
- sllx %o1, 32, %g1
- stw %o1, [%o0]
- sub %o2, 4, %o2
- add %o0, 4, %o0
-2: cmp %o2, 128
- or %o1, %g1, %o1
- blu,pn %xcc, 9f
- andcc %o0, 0x38, %g5
- be,pn %icc, 6f
- mov 64, %o4
- andcc %o0, 8, %g0
- be,pn %icc, 1f
- sub %o4, %g5, %o4
- stx %o1, [%o0]
- add %o0, 8, %o0
-1: andcc %o4, 16, %g0
- be,pn %icc, 1f
- sub %o2, %o4, %o2
- stx %o1, [%o0]
- stx %o1, [%o0 + 8]
- add %o0, 16, %o0
-1: andcc %o4, 32, %g0
- be,pn %icc, 7f
- andncc %o2, 0x3f, %o3
- stw %o1, [%o0]
- stw %o1, [%o0 + 4]
- stw %o1, [%o0 + 8]
- stw %o1, [%o0 + 12]
- stw %o1, [%o0 + 16]
- stw %o1, [%o0 + 20]
- stw %o1, [%o0 + 24]
- stw %o1, [%o0 + 28]
- add %o0, 32, %o0
-7: be,pn %xcc, 9f
- nop
- ldd [%o0 - 8], %f0
-18: wr %g0, ASI_BLK_P, %asi
- membar #StoreStore | #LoadStore
- andcc %o3, 0xc0, %g5
- and %o2, 0x3f, %o2
- fmovd %f0, %f2
- fmovd %f0, %f4
- andn %o3, 0xff, %o3
- fmovd %f0, %f6
- cmp %g5, 64
- fmovd %f0, %f8
- fmovd %f0, %f10
- fmovd %f0, %f12
- brz,pn %g5, 10f
- fmovd %f0, %f14
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x00] %asi
- cmp %g5, 128
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
-2: brz,pn %o3, 12f
- add %o0, %g5, %o0
-10: stda %f0, [%o0 + 0x00] %asi
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- stda %f0, [%o0 + 0xc0] %asi
-11: subcc %o3, 256, %o3
- bne,pt %xcc, 10b
- add %o0, 256, %o0
-12: wr %g0, FPRS_FEF, %fprs
- membar #StoreLoad | #StoreStore
-9: andcc %o2, 0x78, %g5
- be,pn %xcc, 13f
- andcc %o2, 7, %o2
-14: rd %pc, %o4
- srl %g5, 1, %o3
- sub %o4, %o3, %o4
- jmpl %o4 + (13f - 14b), %g0
- add %o0, %g5, %o0
-12: SET_BLOCKS (%o0, 0x68, %o1)
- SET_BLOCKS (%o0, 0x48, %o1)
- SET_BLOCKS (%o0, 0x28, %o1)
- SET_BLOCKS (%o0, 0x08, %o1)
-13: be,pn %xcc, 8f
- andcc %o2, 4, %g0
- be,pn %xcc, 1f
- andcc %o2, 2, %g0
- stw %o1, [%o0]
- add %o0, 4, %o0
-1: be,pn %xcc, 1f
- andcc %o2, 1, %g0
- sth %o1, [%o0]
- add %o0, 2, %o0
-1: bne,a,pn %xcc, 8f
- stb %o1, [%o0]
-8: retl
- mov %o5, %o0
-17: brz,pn %o2, 0f
-8: add %o0, 1, %o0
- subcc %o2, 1, %o2
- bne,pt %xcc, 8b
- stb %o1, [%o0 - 1]
-0: retl
- mov %o5, %o0
-
-6: stx %o1, [%o0]
- andncc %o2, 0x3f, %o3
- be,pn %xcc, 9b
- nop
- ba,pt %xcc, 18b
- ldd [%o0], %f0
-END(memset)
-libc_hidden_def(memset)
-
-#define ZERO_BLOCKS(base, offset, source) \
- stx source, [base - offset - 0x38]; \
- stx source, [base - offset - 0x30]; \
- stx source, [base - offset - 0x28]; \
- stx source, [base - offset - 0x20]; \
- stx source, [base - offset - 0x18]; \
- stx source, [base - offset - 0x10]; \
- stx source, [base - offset - 0x08]; \
- stx source, [base - offset - 0x00];
-
- .text
- .align 32
-#ifdef __UCLIBC_SUSV3_LEGACY__
-ENTRY(bzero)
-#ifndef USE_BPR
- srl %o1, 0, %o1
-#endif
- mov %o0, %o5
-#endif
-50: cmp %o1, 7
- bleu,pn %xcc, 17f
- andcc %o0, 3, %o2
- be,a,pt %xcc, 4f
- andcc %o0, 4, %g0
- cmp %o2, 3
- be,pn %xcc, 2f
- stb %g0, [%o0 + 0x00]
- cmp %o2, 2
- be,pt %xcc, 2f
- stb %g0, [%o0 + 0x01]
- stb %g0, [%o0 + 0x02]
-2: sub %o2, 4, %o2
- sub %o0, %o2, %o0
- add %o1, %o2, %o1
- andcc %o0, 4, %g0
-4: be,pt %xcc, 2f
- cmp %o1, 128
- stw %g0, [%o0]
- sub %o1, 4, %o1
- add %o0, 4, %o0
-2: blu,pn %xcc, 9f
- andcc %o0, 0x38, %o2
- be,pn %icc, 6f
- mov 64, %o4
- andcc %o0, 8, %g0
- be,pn %icc, 1f
- sub %o4, %o2, %o4
- stx %g0, [%o0]
- add %o0, 8, %o0
-1: andcc %o4, 16, %g0
- be,pn %icc, 1f
- sub %o1, %o4, %o1
- stx %g0, [%o0]
- stx %g0, [%o0 + 8]
- add %o0, 16, %o0
-1: andcc %o4, 32, %g0
- be,pn %icc, 7f
- andncc %o1, 0x3f, %o3
- stx %g0, [%o0]
- stx %g0, [%o0 + 8]
- stx %g0, [%o0 + 16]
- stx %g0, [%o0 + 24]
- add %o0, 32, %o0
-6: andncc %o1, 0x3f, %o3
-7: be,pn %xcc, 9f
- wr %g0, ASI_BLK_P, %asi
- membar #StoreLoad | #StoreStore | #LoadStore
- fzero %f0
- andcc %o3, 0xc0, %o2
- and %o1, 0x3f, %o1
- fzero %f2
- andn %o3, 0xff, %o3
- faddd %f0, %f2, %f4
- fmuld %f0, %f2, %f6
- cmp %o2, 64
- faddd %f0, %f2, %f8
- fmuld %f0, %f2, %f10
- faddd %f0, %f2, %f12
- brz,pn %o2, 10f
- fmuld %f0, %f2, %f14
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x00] %asi
- cmp %o2, 128
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
-2: brz,pn %o3, 12f
- add %o0, %o2, %o0
-10: stda %f0, [%o0 + 0x00] %asi
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- stda %f0, [%o0 + 0xc0] %asi
-11: subcc %o3, 256, %o3
- bne,pt %xcc, 10b
- add %o0, 256, %o0
-12: wr %g0, FPRS_FEF, %fprs
- membar #StoreLoad | #StoreStore
-9: andcc %o1, 0xf8, %o2
- be,pn %xcc, 13f
- andcc %o1, 7, %o1
-14: rd %pc, %o4
- srl %o2, 1, %o3
- sub %o4, %o3, %o4
- jmpl %o4 + (13f - 14b), %g0
- add %o0, %o2, %o0
-12: ZERO_BLOCKS (%o0, 0xc8, %g0)
- ZERO_BLOCKS (%o0, 0x88, %g0)
- ZERO_BLOCKS (%o0, 0x48, %g0)
- ZERO_BLOCKS (%o0, 0x08, %g0)
-13: be,pn %xcc, 8f
- andcc %o1, 4, %g0
- be,pn %xcc, 1f
- andcc %o1, 2, %g0
- stw %g0, [%o0]
- add %o0, 4, %o0
-1: be,pn %xcc, 1f
- andcc %o1, 1, %g0
- sth %g0, [%o0]
- add %o0, 2, %o0
-1: bne,a,pn %xcc, 8f
- stb %g0, [%o0]
-8: retl
- mov %o5, %o0
-17: be,pn %xcc, 13b
- orcc %o1, 0, %g0
- be,pn %xcc, 0f
-8: add %o0, 1, %o0
- subcc %o1, 1, %o1
- bne,pt %xcc, 8b
- stb %g0, [%o0 - 1]
-0: retl
- mov %o5, %o0
-#ifdef __UCLIBC_SUSV3_LEGACY__
-END(bzero)
-#endif
diff --git a/libc/string/sparc/sparc64/sparcv9b/memcpy.S b/libc/string/sparc/sparc64/sparcv9b/memcpy.S
deleted file mode 100644
index 64f6a92e0..000000000
--- a/libc/string/sparc/sparc64/sparcv9b/memcpy.S
+++ /dev/null
@@ -1,612 +0,0 @@
-/* Copy SIZE bytes from SRC to DEST.
- For UltraSPARC-III.
- Copyright (C) 2001, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller (davem@redhat.com)
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <features.h>
-
-#define ASI_BLK_P 0xf0
-#define FPRS_FEF 0x04
-#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
-#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
-
-#ifndef XCC
-#define USE_BPR
-#define XCC xcc
-#endif
-
- .register %g2,#scratch
- .register %g3,#scratch
- .register %g6,#scratch
-
- .text
- .align 32
-
-#ifdef __UCLIBC_SUSV3_LEGACY__
-ENTRY(bcopy)
- sub %o1, %o0, %o4
- mov %o0, %g4
- cmp %o4, %o2
- mov %o1, %o0
- bgeu,pt %XCC, 100f
- mov %g4, %o1
-#ifndef USE_BPR
- srl %o2, 0, %o2
-#endif
- brnz,pn %o2, 220f
- add %o0, %o2, %o0
- retl
- nop
-END(bcopy)
-#endif
-
- /* Special/non-trivial issues of this code:
- *
- * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
- * 2) Only low 32 FPU registers are used so that only the
- * lower half of the FPU register set is dirtied by this
- * code. This is especially important in the kernel.
- * 3) This code never prefetches cachelines past the end
- * of the source buffer.
- *
- * The cheetah's flexible spine, oversized liver, enlarged heart,
- * slender muscular body, and claws make it the swiftest hunter
- * in Africa and the fastest animal on land. Can reach speeds
- * of up to 2.4GB per second.
- */
- .align 32
-ENTRY(memcpy)
-
-100: /* %o0=dst, %o1=src, %o2=len */
- mov %o0, %g5
- cmp %o2, 0
- be,pn %XCC, out
-218: or %o0, %o1, %o3
- cmp %o2, 16
- bleu,a,pn %XCC, small_copy
- or %o3, %o2, %o3
-
- cmp %o2, 256
- blu,pt %XCC, medium_copy
- andcc %o3, 0x7, %g0
-
- ba,pt %xcc, enter
- andcc %o0, 0x3f, %g2
-
- /* Here len >= 256 and condition codes reflect execution
- * of "andcc %o0, 0x7, %g2", done by caller.
- */
- .align 64
-enter:
- /* Is 'dst' already aligned on an 64-byte boundary? */
- be,pt %XCC, 2f
-
- /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
- * of bytes to copy to make 'dst' 64-byte aligned. We pre-
- * subtract this from 'len'.
- */
- sub %g2, 0x40, %g2
- sub %g0, %g2, %g2
- sub %o2, %g2, %o2
-
- /* Copy %g2 bytes from src to dst, one byte at a time. */
-1: ldub [%o1 + 0x00], %o3
- add %o1, 0x1, %o1
- add %o0, 0x1, %o0
- subcc %g2, 0x1, %g2
-
- bg,pt %XCC, 1b
- stb %o3, [%o0 + -1]
-
-2: VISEntryHalf
- and %o1, 0x7, %g1
- ba,pt %xcc, begin
- alignaddr %o1, %g0, %o1
-
- .align 64
-begin:
- prefetch [%o1 + 0x000], #one_read
- prefetch [%o1 + 0x040], #one_read
- andn %o2, (0x40 - 1), %o4
- prefetch [%o1 + 0x080], #one_read
- prefetch [%o1 + 0x0c0], #one_read
- ldd [%o1 + 0x000], %f0
- prefetch [%o1 + 0x100], #one_read
- ldd [%o1 + 0x008], %f2
- prefetch [%o1 + 0x140], #one_read
- ldd [%o1 + 0x010], %f4
- prefetch [%o1 + 0x180], #one_read
- faligndata %f0, %f2, %f16
- ldd [%o1 + 0x018], %f6
- faligndata %f2, %f4, %f18
- ldd [%o1 + 0x020], %f8
- faligndata %f4, %f6, %f20
- ldd [%o1 + 0x028], %f10
- faligndata %f6, %f8, %f22
-
- ldd [%o1 + 0x030], %f12
- faligndata %f8, %f10, %f24
- ldd [%o1 + 0x038], %f14
- faligndata %f10, %f12, %f26
- ldd [%o1 + 0x040], %f0
-
- sub %o4, 0x80, %o4
- add %o1, 0x40, %o1
- ba,pt %xcc, loop
- srl %o4, 6, %o3
-
- .align 64
-loop:
- ldd [%o1 + 0x008], %f2
- faligndata %f12, %f14, %f28
- ldd [%o1 + 0x010], %f4
- faligndata %f14, %f0, %f30
- stda %f16, [%o0] ASI_BLK_P
- ldd [%o1 + 0x018], %f6
- faligndata %f0, %f2, %f16
-
- ldd [%o1 + 0x020], %f8
- faligndata %f2, %f4, %f18
- ldd [%o1 + 0x028], %f10
- faligndata %f4, %f6, %f20
- ldd [%o1 + 0x030], %f12
- faligndata %f6, %f8, %f22
- ldd [%o1 + 0x038], %f14
- faligndata %f8, %f10, %f24
-
- ldd [%o1 + 0x040], %f0
- prefetch [%o1 + 0x180], #one_read
- faligndata %f10, %f12, %f26
- subcc %o3, 0x01, %o3
- add %o1, 0x40, %o1
- bg,pt %XCC, loop
- add %o0, 0x40, %o0
-
- /* Finally we copy the last full 64-byte block. */
-loopfini:
- ldd [%o1 + 0x008], %f2
- faligndata %f12, %f14, %f28
- ldd [%o1 + 0x010], %f4
- faligndata %f14, %f0, %f30
- stda %f16, [%o0] ASI_BLK_P
- ldd [%o1 + 0x018], %f6
- faligndata %f0, %f2, %f16
- ldd [%o1 + 0x020], %f8
- faligndata %f2, %f4, %f18
- ldd [%o1 + 0x028], %f10
- faligndata %f4, %f6, %f20
- ldd [%o1 + 0x030], %f12
- faligndata %f6, %f8, %f22
- ldd [%o1 + 0x038], %f14
- faligndata %f8, %f10, %f24
- cmp %g1, 0
- be,pt %XCC, 1f
- add %o0, 0x40, %o0
- ldd [%o1 + 0x040], %f0
-1: faligndata %f10, %f12, %f26
- faligndata %f12, %f14, %f28
- faligndata %f14, %f0, %f30
- stda %f16, [%o0] ASI_BLK_P
- add %o0, 0x40, %o0
- add %o1, 0x40, %o1
- membar #Sync
-
- /* Now we copy the (len modulo 64) bytes at the end.
- * Note how we borrow the %f0 loaded above.
- *
- * Also notice how this code is careful not to perform a
- * load past the end of the src buffer.
- */
-loopend:
- and %o2, 0x3f, %o2
- andcc %o2, 0x38, %g2
- be,pn %XCC, endcruft
- subcc %g2, 0x8, %g2
- be,pn %XCC, endcruft
- cmp %g1, 0
-
- be,a,pt %XCC, 1f
- ldd [%o1 + 0x00], %f0
-
-1: ldd [%o1 + 0x08], %f2
- add %o1, 0x8, %o1
- sub %o2, 0x8, %o2
- subcc %g2, 0x8, %g2
- faligndata %f0, %f2, %f8
- std %f8, [%o0 + 0x00]
- be,pn %XCC, endcruft
- add %o0, 0x8, %o0
- ldd [%o1 + 0x08], %f0
- add %o1, 0x8, %o1
- sub %o2, 0x8, %o2
- subcc %g2, 0x8, %g2
- faligndata %f2, %f0, %f8
- std %f8, [%o0 + 0x00]
- bne,pn %XCC, 1b
- add %o0, 0x8, %o0
-
- /* If anything is left, we copy it one byte at a time.
- * Note that %g1 is (src & 0x3) saved above before the
- * alignaddr was performed.
- */
-endcruft:
- cmp %o2, 0
- add %o1, %g1, %o1
- VISExitHalf
- be,pn %XCC, out
- sub %o0, %o1, %o3
-
- andcc %g1, 0x7, %g0
- bne,pn %icc, small_copy_unaligned
- andcc %o2, 0x8, %g0
- be,pt %icc, 1f
- nop
- ldx [%o1], %o5
- stx %o5, [%o1 + %o3]
- add %o1, 0x8, %o1
-
-1: andcc %o2, 0x4, %g0
- be,pt %icc, 1f
- nop
- lduw [%o1], %o5
- stw %o5, [%o1 + %o3]
- add %o1, 0x4, %o1
-
-1: andcc %o2, 0x2, %g0
- be,pt %icc, 1f
- nop
- lduh [%o1], %o5
- sth %o5, [%o1 + %o3]
- add %o1, 0x2, %o1
-
-1: andcc %o2, 0x1, %g0
- be,pt %icc, out
- nop
- ldub [%o1], %o5
- ba,pt %xcc, out
- stb %o5, [%o1 + %o3]
-
-medium_copy: /* 16 < len <= 64 */
- bne,pn %XCC, small_copy_unaligned
- sub %o0, %o1, %o3
-
-medium_copy_aligned:
- andn %o2, 0x7, %o4
- and %o2, 0x7, %o2
-1: subcc %o4, 0x8, %o4
- ldx [%o1], %o5
- stx %o5, [%o1 + %o3]
- bgu,pt %XCC, 1b
- add %o1, 0x8, %o1
- andcc %o2, 0x4, %g0
- be,pt %XCC, 1f
- nop
- sub %o2, 0x4, %o2
- lduw [%o1], %o5
- stw %o5, [%o1 + %o3]
- add %o1, 0x4, %o1
-1: cmp %o2, 0
- be,pt %XCC, out
- nop
- ba,pt %xcc, small_copy_unaligned
- nop
-
-small_copy: /* 0 < len <= 16 */
- andcc %o3, 0x3, %g0
- bne,pn %XCC, small_copy_unaligned
- sub %o0, %o1, %o3
-
-small_copy_aligned:
- subcc %o2, 4, %o2
- lduw [%o1], %g1
- stw %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_aligned
- add %o1, 4, %o1
-
-out: retl
- mov %g5, %o0
-
- .align 32
-small_copy_unaligned:
- subcc %o2, 1, %o2
- ldub [%o1], %g1
- stb %g1, [%o1 + %o3]
- bgu,pt %XCC, small_copy_unaligned
- add %o1, 1, %o1
- retl
- mov %g5, %o0
-
-END(memcpy)
-libc_hidden_def(memcpy)
-
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stw %t0, [%dst - offset - 0x1c]; \
- srlx %t0, 32, %t0; \
- stw %t0, [%dst - offset - 0x20]; \
- stw %t1, [%dst - offset - 0x14]; \
- srlx %t1, 32, %t1; \
- stw %t1, [%dst - offset - 0x18]; \
- stw %t2, [%dst - offset - 0x0c]; \
- srlx %t2, 32, %t2; \
- stw %t2, [%dst - offset - 0x10]; \
- stw %t3, [%dst - offset - 0x04]; \
- srlx %t3, 32, %t3; \
- stw %t3, [%dst - offset - 0x08];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src - offset - 0x20], %t0; \
- ldx [%src - offset - 0x18], %t1; \
- ldx [%src - offset - 0x10], %t2; \
- ldx [%src - offset - 0x08], %t3; \
- stx %t0, [%dst - offset - 0x20]; \
- stx %t1, [%dst - offset - 0x18]; \
- stx %t2, [%dst - offset - 0x10]; \
- stx %t3, [%dst - offset - 0x08]; \
- ldx [%src - offset - 0x40], %t0; \
- ldx [%src - offset - 0x38], %t1; \
- ldx [%src - offset - 0x30], %t2; \
- ldx [%src - offset - 0x28], %t3; \
- stx %t0, [%dst - offset - 0x40]; \
- stx %t1, [%dst - offset - 0x38]; \
- stx %t2, [%dst - offset - 0x30]; \
- stx %t3, [%dst - offset - 0x28];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stw %t0, [%dst + offset + 0x04]; \
- srlx %t0, 32, %t2; \
- stw %t2, [%dst + offset + 0x00]; \
- stw %t1, [%dst + offset + 0x0c]; \
- srlx %t1, 32, %t3; \
- stw %t3, [%dst + offset + 0x08];
-
-#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \
- ldx [%src + offset + 0x00], %t0; \
- ldx [%src + offset + 0x08], %t1; \
- stx %t0, [%dst + offset + 0x00]; \
- stx %t1, [%dst + offset + 0x08];
-
- .align 32
-228: andcc %o2, 1, %g0 /* IEU1 Group */
- be,pt %icc, 2f+4 /* CTI */
-1: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 Group */
- be,pn %xcc, 229f /* CTI */
- stb %o5, [%o0] /* Store */
-2: ldub [%o1 - 1], %o5 /* LOAD Group */
- sub %o0, 2, %o0 /* IEU0 */
- ldub [%o1 - 2], %g5 /* LOAD Group */
- sub %o1, 2, %o1 /* IEU0 */
- subcc %o2, 2, %o2 /* IEU1 Group */
- stb %o5, [%o0 + 1] /* Store */
- bne,pt %xcc, 2b /* CTI */
- stb %g5, [%o0] /* Store */
-229: retl
- mov %g4, %o0
-
- .align 32
-ENTRY(memmove)
- mov %o0, %g5
-#ifndef USE_BPR
- srl %o2, 0, %o2 /* IEU1 Group */
-#endif
- brz,pn %o2, out /* CTI Group */
- sub %o0, %o1, %o4 /* IEU0 */
- cmp %o4, %o2 /* IEU1 Group */
- bgeu,pt %XCC, 218b /* CTI */
- mov %o0, %g4 /* IEU0 */
- add %o0, %o2, %o0 /* IEU0 Group */
-220: add %o1, %o2, %o1 /* IEU1 */
- cmp %o2, 15 /* IEU1 Group */
- bleu,pn %xcc, 228b /* CTI */
- andcc %o0, 7, %g2 /* IEU1 Group */
- sub %o0, %o1, %g5 /* IEU0 */
- andcc %g5, 3, %o5 /* IEU1 Group */
- bne,pn %xcc, 232f /* CTI */
- andcc %o1, 3, %g0 /* IEU1 Group */
- be,a,pt %xcc, 236f /* CTI */
- andcc %o1, 4, %g0 /* IEU1 Group */
- andcc %o1, 1, %g0 /* IEU1 Group */
- be,pn %xcc, 4f /* CTI */
- andcc %o1, 2, %g0 /* IEU1 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- sub %o2, 1, %o2 /* IEU0 Group */
- be,pn %xcc, 5f /* CTI Group */
- stb %g2, [%o0] /* Store */
-4: lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sub %o0, 2, %o0 /* IEU1 */
- sub %o2, 2, %o2 /* IEU0 */
- sth %g2, [%o0] /* Store Group + bubble */
-5: andcc %o1, 4, %g0 /* IEU1 */
-236: be,a,pn %xcc, 2f /* CTI */
- andcc %o2, -128, %g6 /* IEU1 Group */
- lduw [%o1 - 4], %g5 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- sub %o0, 4, %o0 /* IEU1 */
- sub %o2, 4, %o2 /* IEU0 Group */
- stw %g5, [%o0] /* Store */
- andcc %o2, -128, %g6 /* IEU1 Group */
-2: be,pn %xcc, 235f /* CTI */
- andcc %o0, 4, %g0 /* IEU1 Group */
- be,pn %xcc, 282f + 4 /* CTI Group */
-5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 5b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
-235: andcc %o2, 0x70, %g6 /* IEU1 Group */
-41: be,pn %xcc, 280f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-279: rd %pc, %o5 /* PDU Group */
- sll %g6, 1, %g5 /* IEU0 Group */
- sub %o1, %g6, %o1 /* IEU1 */
- sub %o5, %g5, %o5 /* IEU0 Group */
- jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
- RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
-280: be,pt %xcc, 281f /* CTI */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- stw %g2, [%o0 + 4] /* Store Group */
- sub %o1, 8, %o1 /* IEU1 */
- srlx %g2, 32, %g2 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-281: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o1, 4, %o1 /* IEU0 */
- stw %g2, [%o0 - 4] /* Store Group */
- sub %o0, 4, %o0 /* IEU0 */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o1, 2, %o1 /* IEU0 */
- sth %g2, [%o0 - 2] /* Store Group */
- sub %o0, 2, %o0 /* IEU0 */
-1: be,pt %xcc, 211f /* CTI */
- nop /* IEU1 */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-211: retl
- mov %g4, %o0
-
-282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
- RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
- subcc %g6, 128, %g6 /* IEU1 Group */
- sub %o1, 128, %o1 /* IEU0 */
- bne,pt %xcc, 282b /* CTI */
- sub %o0, 128, %o0 /* IEU0 Group */
- andcc %o2, 0x70, %g6 /* IEU1 */
- be,pn %xcc, 284f /* CTI */
- andcc %o2, 8, %g0 /* IEU1 Group */
- /* Clk1 8-( */
- /* Clk2 8-( */
- /* Clk3 8-( */
- /* Clk4 8-( */
-283: rd %pc, %o5 /* PDU Group */
- sub %o1, %g6, %o1 /* IEU0 Group */
- sub %o5, %g6, %o5 /* IEU1 */
- jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/
- sub %o0, %g6, %o0 /* IEU0 Group */
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
- RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
-284: be,pt %xcc, 285f /* CTI Group */
- andcc %o2, 4, %g0 /* IEU1 */
- ldx [%o1 - 8], %g2 /* Load Group */
- sub %o0, 8, %o0 /* IEU0 */
- sub %o1, 8, %o1 /* IEU0 Group */
- stx %g2, [%o0] /* Store */
-285: be,pt %xcc, 1f /* CTI */
- andcc %o2, 2, %g0 /* IEU1 Group */
- lduw [%o1 - 4], %g2 /* Load Group */
- sub %o0, 4, %o0 /* IEU0 */
- sub %o1, 4, %o1 /* IEU0 Group */
- stw %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- andcc %o2, 1, %g0 /* IEU1 Group */
- lduh [%o1 - 2], %g2 /* Load Group */
- sub %o0, 2, %o0 /* IEU0 */
- sub %o1, 2, %o1 /* IEU0 Group */
- sth %g2, [%o0] /* Store */
-1: be,pt %xcc, 1f /* CTI */
- nop /* IEU0 Group */
- ldub [%o1 - 1], %g2 /* Load Group */
- stb %g2, [%o0 - 1] /* Store Group + bubble */
-1: retl
- mov %g4, %o0
-
-232: brz,pt %g2, 2f /* CTI Group */
- sub %o2, %g2, %o2 /* IEU0 Group */
-1: ldub [%o1 - 1], %g5 /* Load Group */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %g2, 1, %g2 /* IEU1 Group */
- bne,pt %xcc, 1b /* CTI */
- stb %g5, [%o0] /* Store */
-2: andn %o2, 7, %g5 /* IEU0 Group */
- and %o2, 7, %o2 /* IEU1 */
- fmovd %f0, %f2 /* FPU */
- alignaddr %o1, %g0, %g1 /* GRU Group */
- ldd [%g1], %f4 /* Load Group */
-1: ldd [%g1 - 8], %f6 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 Group */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f6, %f4, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 Group */
- be,pn %xcc, 233f /* CTI */
- sub %o0, 8, %o0 /* IEU1 */
- ldd [%g1 - 8], %f4 /* Load Group */
- sub %g1, 8, %g1 /* IEU0 */
- subcc %g5, 8, %g5 /* IEU1 */
- faligndata %f4, %f6, %f0 /* GRU Group */
- std %f0, [%o0 - 8] /* Store */
- sub %o1, 8, %o1 /* IEU0 */
- bne,pn %xcc, 1b /* CTI Group */
- sub %o0, 8, %o0 /* IEU0 */
-233: brz,pn %o2, 234f /* CTI Group */
- nop /* IEU0 */
-237: ldub [%o1 - 1], %g5 /* LOAD */
- sub %o1, 1, %o1 /* IEU0 */
- sub %o0, 1, %o0 /* IEU1 */
- subcc %o2, 1, %o2 /* IEU1 */
- bne,pt %xcc, 237b /* CTI */
- stb %g5, [%o0] /* Store Group */
-234: wr %g0, FPRS_FEF, %fprs
- retl
- mov %g4, %o0
-END(memmove)
-libc_hidden_def(memmove)
-
-#ifdef USE_BPR
-weak_alias(memcpy,__align_cpy_1)
-weak_alias(memcpy,__align_cpy_2)
-weak_alias(memcpy,__align_cpy_4)
-weak_alias(memcpy,__align_cpy_8)
-weak_alias(memcpy,__align_cpy_16)
-#endif
diff --git a/libc/string/sparc/sparc64/stpcpy.S b/libc/string/sparc/sparc64/stpcpy.S
deleted file mode 100644
index 8c26c6bec..000000000
--- a/libc/string/sparc/sparc64/stpcpy.S
+++ /dev/null
@@ -1,271 +0,0 @@
-/* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
- For SPARC v9.
- Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <asm/asi.h>
-#ifndef XCC
- .register %g2, #scratch
- .register %g3, #scratch
- .register %g6, #scratch
-#endif
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(stpcpy)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- sllx %g1, 32, %g2 /* IEU0 Group */
-
- bne,pn %icc, 12f /* CTI */
- andcc %o1, 7, %g3 /* IEU1 */
- or %g1, %g2, %g1 /* IEU0 Group */
- bne,pn %icc, 14f /* CTI */
-
- sllx %g1, 7, %g2 /* IEU0 Group */
-1: ldx [%o1], %o3 /* Load */
- add %o1, 8, %o1 /* IEU1 */
-2: mov %o3, %g3 /* IEU0 Group */
-
- sub %o3, %g1, %o2 /* IEU1 */
-3: ldxa [%o1] ASI_PNF, %o3 /* Load */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %g3, %o2 /* IEU0 Group */
-#endif
- add %o0, 8, %o0 /* IEU0 Group */
- andcc %o2, %g2, %g0 /* IEU1 */
-
- add %o1, 8, %o1 /* IEU0 Group */
- be,a,pt %xcc, 2b /* CTI */
- stx %g3, [%o0 - 8] /* Store */
- srlx %g3, 56, %g5 /* IEU0 Group */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
- srlx %g3, 48, %g4 /* IEU0 */
- andcc %g4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 10f /* CTI */
- srlx %g3, 40, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 9f /* CTI */
-
- srlx %g3, 32, %g4 /* IEU0 */
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 8f /* CTI */
- srlx %g3, 24, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
- srlx %g3, 16, %g4 /* IEU0 */
- andcc %g4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 6f /* CTI */
- srlx %g3, 8, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
-
- sub %o3, %g1, %o2 /* IEU0 */
- stx %g3, [%o0 - 8] /* Store Group */
- andcc %g3, 0xff, %g0 /* IEU1 */
- bne,pt %icc, 3b /* CTI */
-
- mov %o3, %g3 /* IEU0 Group */
-4: retl /* CTI+IEU1 Group */
- sub %o0, 1, %o0 /* IEU0 */
-
- .align 16
-6: ba,pt %xcc, 23f /* CTI Group */
- sub %o0, 3, %g6 /* IEU0 */
-5: sub %o0, 2, %g6 /* IEU0 Group */
- stb %g5, [%o0 - 2] /* Store */
-
- srlx %g3, 16, %g4 /* IEU0 Group */
-23: sth %g4, [%o0 - 4] /* Store */
- srlx %g3, 32, %g4 /* IEU0 Group */
- stw %g4, [%o0 - 8] /* Store */
-
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-8: ba,pt %xcc, 24f /* CTI Group */
- sub %o0, 5, %g6 /* IEU0 */
-
-7: sub %o0, 4, %g6 /* IEU0 Group */
- stb %g5, [%o0 - 4] /* Store */
- srlx %g3, 32, %g4 /* IEU0 Group */
-24: stw %g4, [%o0 - 8] /* Store */
-
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-10: ba,pt %xcc, 25f /* CTI Group */
- sub %o0, 7, %g6 /* IEU0 */
-
-9: sub %o0, 6, %g6 /* IEU0 Group */
- stb %g5, [%o0 - 6] /* Store */
- srlx %g3, 48, %g4 /* IEU0 */
-25: sth %g4, [%o0 - 8] /* Store Group */
-
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-11: stb %g5, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, 8, %o0 /* IEU0 */
-
- .align 16
-12: or %g1, %g2, %g1 /* IEU0 Group */
- ldub [%o1], %o3 /* Load */
- sllx %g1, 7, %g2 /* IEU0 Group */
- stb %o3, [%o0] /* Store Group */
-
-13: add %o0, 1, %o0 /* IEU0 */
- add %o1, 1, %o1 /* IEU1 */
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
-
- lduba [%o1] ASI_PNF, %o3 /* Load */
- andcc %o0, 7, %g0 /* IEU1 Group */
- bne,a,pt %icc, 13b /* CTI */
- stb %o3, [%o0] /* Store */
-
- andcc %o1, 7, %g3 /* IEU1 Group */
- be,a,pt %icc, 1b /* CTI */
- ldx [%o1], %o3 /* Load */
-14: orcc %g0, 64, %g4 /* IEU1 Group */
-
- sllx %g3, 3, %g5 /* IEU0 */
- sub %o1, %g3, %o1 /* IEU0 Group */
- sub %g4, %g5, %g4 /* IEU1 */
- /* %g1 = 0101010101010101 *
- * %g2 = 8080808080808080 *
- * %g3 = source alignment *
- * %g5 = number of bits to shift left *
- * %g4 = number of bits to shift right */
- ldxa [%o1] ASI_PNF, %o5 /* Load Group */
-
- addcc %o1, 8, %o1 /* IEU1 */
-15: sllx %o5, %g5, %o3 /* IEU0 Group */
- ldxa [%o1] ASI_PNF, %o5 /* Load */
- srlx %o5, %g4, %o4 /* IEU0 Group */
-
- add %o0, 8, %o0 /* IEU1 */
- or %o3, %o4, %o3 /* IEU0 Group */
- add %o1, 8, %o1 /* IEU1 */
- sub %o3, %g1, %o4 /* IEU0 Group */
-
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o3, %o4 /* IEU0 Group */
-#endif
- andcc %o4, %g2, %g0 /* IEU1 Group */
- be,a,pt %xcc, 15b /* CTI */
- stx %o3, [%o0 - 8] /* Store */
- srlx %o3, 56, %o4 /* IEU0 Group */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 22f /* CTI */
- srlx %o3, 48, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 21f /* CTI */
- srlx %o3, 40, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 20f /* CTI */
-
- srlx %o3, 32, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 19f /* CTI */
- srlx %o3, 24, %o4 /* IEU0 */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 18f /* CTI */
- srlx %o3, 16, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 17f /* CTI */
- srlx %o3, 8, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 16f /* CTI */
-
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- bne,pn %icc, 15b /* CTI */
- stx %o3, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
-
- sub %o0, 1, %o0 /* IEU0 */
-
- .align 16
-17: ba,pt %xcc, 26f /* CTI Group */
- subcc %o0, 3, %g6 /* IEU1 */
-18: ba,pt %xcc, 27f /* CTI Group */
- subcc %o0, 4, %g6 /* IEU1 */
-
-19: ba,pt %xcc, 28f /* CTI Group */
- subcc %o0, 5, %g6 /* IEU1 */
-16: subcc %o0, 2, %g6 /* IEU1 Group */
- srlx %o3, 8, %o4 /* IEU0 */
-
- stb %o4, [%o0 - 2] /* Store */
-26: srlx %o3, 16, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 3] /* Store */
-27: srlx %o3, 24, %o4 /* IEU0 Group */
-
- stb %o4, [%o0 - 4] /* Store */
-28: srlx %o3, 32, %o4 /* IEU0 Group */
- stw %o4, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
-
- mov %g6, %o0 /* IEU0 */
-
- .align 16
-21: ba,pt %xcc, 29f /* CTI Group */
- subcc %o0, 7, %g6 /* IEU1 */
-22: ba,pt %xcc, 30f /* CTI Group */
- subcc %o0, 8, %g6 /* IEU1 */
-
-20: subcc %o0, 6, %g6 /* IEU1 Group */
- srlx %o3, 40, %o4 /* IEU0 */
- stb %o4, [%o0 - 6] /* Store */
-29: srlx %o3, 48, %o4 /* IEU0 Group */
-
- stb %o4, [%o0 - 7] /* Store */
-30: srlx %o3, 56, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
-
- mov %g6, %o0 /* IEU0 */
-END(stpcpy)
-libc_hidden_def(stpcpy)
diff --git a/libc/string/sparc/sparc64/strcat.S b/libc/string/sparc/sparc64/strcat.S
deleted file mode 100644
index fcc4ba59c..000000000
--- a/libc/string/sparc/sparc64/strcat.S
+++ /dev/null
@@ -1,339 +0,0 @@
-/* strcat (dest, src) -- Append SRC on the end of DEST.
- For SPARC v9.
- Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jj@ultra.linux.cz> and
- Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <asm/asi.h>
-#ifndef XCC
-#define XCC xcc
-#define USE_BPR
- .register %g2, #scratch
- .register %g3, #scratch
- .register %g6, #scratch
-#endif
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(strcat)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- mov %o0, %g6 /* IEU1 */
-
- sllx %g1, 32, %g2 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- or %g1, %g2, %g1 /* IEU0 Group */
- bne,pn %icc, 32f /* CTI */
-
- sllx %g1, 7, %g2 /* IEU0 Group */
- brz,pn %o3, 30f /* CTI+IEU1 */
- ldx [%o0], %o3 /* Load */
-48: add %o0, 8, %o0 /* IEU0 Group */
-
-49: sub %o3, %g1, %o2 /* IEU0 Group */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %g5 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %g5, %g2, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g2, %g0 /* IEU1 Group */
-#endif
- be,pt %xcc, 49b /* CTI */
-
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g3 /* IEU1 Group */
- srlx %o2, 32, %o2 /* IEU0 */
-50: andcc %o2, %g2, %g0 /* IEU1 Group */
-
- be,pn %xcc, 51f /* CTI */
- srlx %g3, 56, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 29f /* CTI */
-
- srlx %g3, 48, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 28f /* CTI */
- srlx %g3, 40, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 27f /* CTI */
- srlx %g3, 32, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 26f /* CTI */
-51: srlx %g3, 24, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 25f /* CTI */
-
- srlx %g3, 16, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 24f /* CTI */
- srlx %g3, 8, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 23f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 52f /* CTI */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 49b /* CTI */
-
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g3 /* IEU1 Group */
- ba,pt %xcc, 50b /* CTI */
- srlx %o2, 32, %o2 /* IEU0 */
-
- .align 16
-52: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -9, %o0 /* IEU0 */
-23: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -10, %o0 /* IEU0 */
-
-24: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -11, %o0 /* IEU0 */
-25: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -12, %o0 /* IEU0 */
-
-26: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -13, %o0 /* IEU0 */
-27: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -14, %o0 /* IEU0 */
-
-28: ba,pt %xcc, 12f /* CTI Group */
- add %o0, -15, %o0 /* IEU0 */
-29: add %o0, -16, %o0 /* IEU0 Group */
-30: andcc %o1, 7, %g3 /* IEU1 */
-
-31: bne,pn %icc, 14f /* CTI */
- orcc %g0, 64, %g4 /* IEU1 Group */
-1: ldx [%o1], %o3 /* Load */
- add %o1, 8, %o1 /* IEU1 */
-
-2: mov %o3, %g3 /* IEU0 Group */
-3: sub %o3, %g1, %o2 /* IEU1 */
- ldxa [%o1] ASI_PNF, %o3 /* Load */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %g3, %o2 /* IEU0 Group */
-#endif
- add %o0, 8, %o0 /* IEU0 Group */
-
- andcc %o2, %g2, %g0 /* IEU1 */
- add %o1, 8, %o1 /* IEU0 Group */
- be,a,pt %xcc, 2b /* CTI */
- stx %g3, [%o0 - 8] /* Store */
-
- srlx %g3, 56, %g5 /* IEU0 Group */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
- srlx %g3, 48, %g4 /* IEU0 */
-
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 10f /* CTI */
- srlx %g3, 40, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 9f /* CTI */
- srlx %g3, 32, %g4 /* IEU0 */
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 8f /* CTI */
-
- srlx %g3, 24, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
- srlx %g3, 16, %g4 /* IEU0 */
-
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- srlx %g3, 8, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 5f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
- stx %g3, [%o0 - 8] /* Store Group */
- andcc %g3, 0xff, %g0 /* IEU1 */
-
- bne,pt %icc, 3b /* CTI */
- mov %o3, %g3 /* IEU0 Group */
-4: retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-
- .align 16
-5: stb %g5, [%o0 - 2] /* Store Group */
- srlx %g3, 16, %g4 /* IEU0 */
-6: sth %g4, [%o0 - 4] /* Store Group */
- srlx %g3, 32, %g4 /* IEU0 */
-
- stw %g4, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-7: stb %g5, [%o0 - 4] /* Store Group */
-
- srlx %g3, 32, %g4 /* IEU0 */
-8: stw %g4, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-
-9: stb %g5, [%o0 - 6] /* Store Group */
- srlx %g3, 48, %g4 /* IEU0 */
-10: sth %g4, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
-
- mov %g6, %o0 /* IEU0 */
-11: stb %g5, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-
- .align 16
-32: andcc %o0, 7, %g0 /* IEU1 Group */
- be,a,pn %icc, 48b /* CTI */
- ldx [%o0], %o3 /* Load */
- add %o0, 1, %o0 /* IEU0 Group */
-
- brnz,a,pt %o3, 32b /* CTI+IEU1 */
- lduba [%o0] ASI_PNF, %o3 /* Load */
- add %o0, -1, %o0 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 Group */
-
- be,a,pn %icc, 31b /* CTI */
- andcc %o1, 7, %g3 /* IEU1 Group */
-12: ldub [%o1], %o3 /* Load */
- stb %o3, [%o0] /* Store Group */
-
-13: add %o0, 1, %o0 /* IEU0 */
- add %o1, 1, %o1 /* IEU1 */
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
-
- lduba [%o1] ASI_PNF, %o3 /* Load */
- andcc %o0, 7, %g0 /* IEU1 Group */
- bne,a,pt %icc, 13b /* CTI */
- stb %o3, [%o0] /* Store */
-
- andcc %o1, 7, %g3 /* IEU1 Group */
- be,a,pt %icc, 1b /* CTI */
- ldx [%o1], %o3 /* Load */
- orcc %g0, 64, %g4 /* IEU1 Group */
-
-14: sllx %g3, 3, %g5 /* IEU0 */
- sub %o1, %g3, %o1 /* IEU0 Group */
- sub %g4, %g5, %g4 /* IEU1 */
- /* %g1 = 0101010101010101 *
- * %g2 = 8080808080808080 *
- * %g3 = source alignment *
- * %g5 = number of bits to shift left *
- * %g4 = number of bits to shift right */
- ldxa [%o1] ASI_PNF, %o5 /* Load Group */
-
- addcc %o1, 8, %o1 /* IEU1 */
-15: sllx %o5, %g5, %o3 /* IEU0 Group */
- ldxa [%o1] ASI_PNF, %o5 /* Load */
- srlx %o5, %g4, %o4 /* IEU0 Group */
-
- add %o0, 8, %o0 /* IEU1 */
- or %o3, %o4, %o3 /* IEU0 Group */
- add %o1, 8, %o1 /* IEU1 */
- sub %o3, %g1, %o4 /* IEU0 Group */
-
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o3, %o4 /* IEU0 Group */
-#endif
- andcc %o4, %g2, %g0 /* IEU1 Group */
- be,a,pt %xcc, 15b /* CTI */
- stx %o3, [%o0 - 8] /* Store */
- srlx %o3, 56, %o4 /* IEU0 Group */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 22f /* CTI */
- srlx %o3, 48, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 21f /* CTI */
- srlx %o3, 40, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 20f /* CTI */
-
- srlx %o3, 32, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 19f /* CTI */
- srlx %o3, 24, %o4 /* IEU0 */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 18f /* CTI */
- srlx %o3, 16, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 17f /* CTI */
- srlx %o3, 8, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 16f /* CTI */
-
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- bne,pn %icc, 15b /* CTI */
- stx %o3, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
-
- mov %g6, %o0 /* IEU0 */
-
- .align 16
-16: srlx %o3, 8, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 2] /* Store */
-17: srlx %o3, 16, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 3] /* Store */
-
-18: srlx %o3, 24, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 4] /* Store */
-19: srlx %o3, 32, %o4 /* IEU0 Group */
- stw %o4, [%o0 - 8] /* Store */
-
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
- nop
- nop
-
-20: srlx %o3, 40, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 6] /* Store */
-21: srlx %o3, 48, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 7] /* Store */
-
-22: srlx %o3, 56, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-END(strcat)
-libc_hidden_def(strcat)
diff --git a/libc/string/sparc/sparc64/strchr.S b/libc/string/sparc/sparc64/strchr.S
deleted file mode 100644
index da26d1f9c..000000000
--- a/libc/string/sparc/sparc64/strchr.S
+++ /dev/null
@@ -1,486 +0,0 @@
-/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
- For SPARC v9.
- Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <features.h>
-#include <asm/asi.h>
-#ifndef XCC
-#define XCC xcc
-#define USE_BPR
- .register %g2, #scratch
- .register %g3, #scratch
- .register %g6, #scratch
-#endif
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(strchr)
- andcc %o1, 0xff, %o1 /* IEU1 Group */
- be,pn %icc, 17f /* CTI */
- sllx %o1, 8, %g3 /* IEU0 Group */
- sethi %hi(0x01010101), %g1 /* IEU1 */
-
- or %g3, %o1, %g3 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
- sllx %g3, 16, %g5 /* IEU0 Group */
- or %g1, %lo(0x01010101), %g1 /* IEU1 */
-
- sllx %g1, 32, %g2 /* IEU0 Group */
- brz,pn %o3, 5f /* CTI+IEU1 */
- orcc %g3, %g5, %g3 /* IEU1 Group */
- sllx %g3, 32, %g5 /* IEU0 */
-
- cmp %o3, %o1 /* IEU1 Group */
- be,pn %xcc, 14f /* CTI */
- or %g1, %g2, %g1 /* IEU0 */
- andcc %o0, 7, %g0 /* IEU1 Group */
-
- bne,a,pn %icc, 15f /* CTI */
- add %o0, 1, %o0 /* IEU0 */
- ldx [%o0], %o3 /* Load Group */
-1: sllx %g1, 7, %g2 /* IEU0 */
-
- or %g3, %g5, %g3 /* IEU1 */
- add %o0, 8, %o0 /* IEU0 Group */
- xor %o3, %g3, %o4 /* IEU1 */
- /* %g1 = 0101010101010101 *
- * %g2 = 8080088080808080 *
- * %g3 = c c c c c c c c *
- * %o3 = value *
- * %o4 = value XOR c */
-2: sub %o3, %g1, %o2 /* IEU0 Group */
-
- sub %o4, %g1, %o5 /* IEU1 */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %g6 /* IEU0 Group */
- andn %o5, %o4, %o5 /* IEU1 */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- or %o5, %g6, %o5 /* IEU0 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- or %o5, %o2, %o5 /* IEU0 Group */
-#endif
- add %o0, 8, %o0 /* IEU1 */
-
- andcc %o5, %g2, %g0 /* IEU1 Group */
- be,a,pt %xcc, 2b /* CTI */
- xor %o3, %g3, %o4 /* IEU0 */
- srlx %o5, 32, %g5 /* IEU0 Group */
-
- add %o2, %g1, %o2 /* IEU1 */
-3: andcc %g5, %g2, %g0 /* IEU1 Group */
- be,pn %xcc, 4f /* CTI */
- srlx %o2, 56, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- srlx %o4, 56, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 6f /* CTI */
- srlx %o2, 48, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
-
- srlx %o4, 48, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
- srlx %o2, 40, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- srlx %o4, 40, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 8f /* CTI */
- srlx %o2, 32, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
-
- srlx %o4, 32, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 9f /* CTI */
-4: srlx %o2, 24, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- srlx %o4, 24, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 10f /* CTI */
- srlx %o2, 16, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
-
- srlx %o4, 16, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
- srlx %o2, 8, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- srlx %o4, 8, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 12f /* CTI */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 13f /* CTI */
- xor %o3, %g3, %o4 /* IEU0 */
- ldxa [%o0] ASI_PNF, %o3 /* Load Group */
-
- sub %o4, %g1, %o5 /* IEU0 */
- or %o5, %o2, %o5 /* IEU1 */
- add %o0, 8, %o0 /* IEU0 Group */
- andcc %o5, %g2, %g0 /* IEU1 */
-
- be,a,pt %xcc, 2b /* CTI */
- xor %o3, %g3, %o4 /* IEU0 Group */
- srlx %o5, 32, %g5 /* IEU0 Group */
- ba,pt %xcc, 3b /* CTI */
-
- add %o2, %g1, %o2 /* IEU1 */
-
- .align 16
-5: retl /* CTI+IEU1 Group */
- clr %o0 /* IEU0 */
-6: retl /* CTI+IEU1 Group */
- add %o0, -16, %o0 /* IEU0 */
-
-7: retl /* CTI+IEU1 Group */
- add %o0, -15, %o0 /* IEU0 */
-8: retl /* CTI+IEU1 Group */
- add %o0, -14, %o0 /* IEU0 */
-
-9: retl /* CTI+IEU1 Group */
- add %o0, -13, %o0 /* IEU0 */
-10: retl /* CTI+IEU1 Group */
- add %o0, -12, %o0 /* IEU0 */
-
-11: retl /* CTI+IEU1 Group */
- add %o0, -11, %o0 /* IEU0 */
-12: retl /* CTI+IEU1 Group */
- add %o0, -10, %o0 /* IEU0 */
-
-13: retl /* CTI+IEU1 Group */
- add %o0, -9, %o0 /* IEU0 */
-14: retl /* CTI+IEU1 Group */
- nop /* IEU0 */
-
- .align 16
-15: ldub [%o0], %o3 /* Load Group */
-16: andcc %o0, 7, %g0 /* IEU1 */
- be,a,pn %icc, 1b /* CTI */
- ldx [%o0], %o3 /* Load Group */
-
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5b /* CTI */
- add %o0, 1, %o0 /* IEU0 */
- cmp %o3, %o1 /* IEU1 Group */
-
- bne,a,pn %icc, 16b /* CTI */
- ldub [%o0], %o3 /* Load */
- retl /* CTI+IEU1 Group */
- add %o0, -1, %o0 /* IEU0 */
-
- /* strchr (str, 0) */
- .align 32
- nop
- .align 16
-17: sethi %hi(0x01010101), %g1 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- sllx %g1, 32, %g2 /* IEU0 Group */
-
- andcc %o0, 7, %g0 /* IEU1 */
- or %g1, %g2, %g1 /* IEU0 Group */
- bne,pn %icc, 32f /* CTI */
- sllx %g1, 7, %g2 /* IEU0 Group */
-
- brz,pn %o3, 30f /* CTI+IEU1 */
- ldx [%o0], %o3 /* Load */
-18: add %o0, 8, %o0 /* IEU0 Group */
-19: sub %o3, %g1, %o2 /* IEU0 Group */
-
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %g6 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %g6, %g2, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g2, %g0 /* IEU1 Group */
-#endif
- be,pt %xcc, 19b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
-
- addcc %o2, %g1, %g3 /* IEU1 Group */
- srlx %o2, 32, %o2 /* IEU0 */
-20: andcc %o2, %g2, %g0 /* IEU1 Group */
- be,pn %xcc, 21f /* CTI */
-
- srlx %g3, 56, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 29f /* CTI */
- srlx %g3, 48, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 28f /* CTI */
- srlx %g3, 40, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 27f /* CTI */
- srlx %g3, 32, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 26f /* CTI */
-
-21: srlx %g3, 24, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 25f /* CTI */
- srlx %g3, 16, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 24f /* CTI */
- srlx %g3, 8, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 23f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 22f /* CTI */
-
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 19b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
-
- addcc %o2, %g1, %g3 /* IEU1 Group */
- ba,pt %xcc, 20b /* CTI */
- srlx %o2, 32, %o2 /* IEU0 */
-
- .align 16
-22: retl /* CTI+IEU1 Group */
- add %o0, -9, %o0 /* IEU0 */
-23: retl /* CTI+IEU1 Group */
- add %o0, -10, %o0 /* IEU0 */
-
-24: retl /* CTI+IEU1 Group */
- add %o0, -11, %o0 /* IEU0 */
-25: retl /* CTI+IEU1 Group */
- add %o0, -12, %o0 /* IEU0 */
-
-26: retl /* CTI+IEU1 Group */
- add %o0, -13, %o0 /* IEU0 */
-27: retl /* CTI+IEU1 Group */
- add %o0, -14, %o0 /* IEU0 */
-
-28: retl /* CTI+IEU1 Group */
- add %o0, -15, %o0 /* IEU0 */
-29: retl /* CTI+IEU1 Group */
- add %o0, -16, %o0 /* IEU0 */
-
-30: retl /* CTI+IEU1 Group */
- nop /* IEU0 */
-
- .align 16
-32: andcc %o0, 7, %g0 /* IEU1 Group */
- be,a,pn %icc, 18b /* CTI */
- ldx [%o0], %o3 /* Load */
- add %o0, 1, %o0 /* IEU0 Group */
-
- brnz,a,pt %o3, 32b /* CTI+IEU1 */
- lduba [%o0] ASI_PNF, %o3 /* Load */
- retl /* CTI+IEU1 Group */
- add %o0, -1, %o0 /* IEU0 */
-END(strchr)
-libc_hidden_def(strchr)
-#ifdef __UCLIBC_SUSV3_LEGACY__
-strong_alias(strchr,index)
-#endif
-
- .align 32
-ENTRY(strrchr)
- andcc %o1, 0xff, %o1 /* IEU1 Group */
- be,pn %icc, 17b /* CTI */
- clr %g4 /* IEU0 */
- andcc %o0, 7, %g0 /* IEU1 Group */
-
- bne,pn %icc, 13f /* CTI */
- sllx %o1, 8, %g3 /* IEU0 */
- ldx [%o0], %o3 /* Load Group */
-1: sethi %hi(0x01010101), %g1 /* IEU0 */
-
- or %g3, %o1, %g3 /* IEU1 */
- sllx %g3, 16, %g5 /* IEU0 Group */
- or %g1, %lo(0x01010101), %g1 /* IEU1 */
- sllx %g1, 32, %g2 /* IEU0 Group */
-
- or %g3, %g5, %g3 /* IEU1 */
- sllx %g3, 32, %g5 /* IEU0 Group */
- or %g1, %g2, %g1 /* IEU1 */
- sllx %g1, 7, %g2 /* IEU0 Group */
-
- or %g3, %g5, %g3 /* IEU1 */
- add %o0, 8, %o0 /* IEU0 Group */
- xor %o3, %g3, %o4 /* IEU1 */
- /* %g1 = 0101010101010101 *
- * %g2 = 8080088080808080 *
- * %g3 = c c c c c c c c *
- * %o3 = value *
- * %o4 = value XOR c */
-2: sub %o3, %g1, %o2 /* IEU0 Group */
-
-3: sub %o4, %g1, %o5 /* IEU1 */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %g6 /* IEU0 Group */
- andn %o5, %o4, %o5 /* IEU1 */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
-
- or %o5, %g6, %o5 /* IEU0 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
-
- or %o5, %o2, %o5 /* IEU0 Group */
-#endif
- add %o0, 8, %o0 /* IEU1 */
- andcc %o5, %g2, %g0 /* IEU1 Group */
- be,a,pt %xcc, 2b /* CTI */
-
- xor %o3, %g3, %o4 /* IEU0 */
- srlx %o5, 32, %g5 /* IEU0 Group */
- add %o2, %g1, %o2 /* IEU1 */
- andcc %g5, %g2, %g0 /* IEU1 Group */
-
- be,pn %xcc, 7f /* CTI */
- srlx %o2, 56, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
-
- srlx %o4, 56, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- srlx %o2, 48, %g5 /* IEU0 */
- be,a,pn %icc, 4f /* CTI */
-
- add %o0, -16, %g4 /* IEU0 Group */
-4: andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
- srlx %o4, 48, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- srlx %o2, 40, %g5 /* IEU0 */
- be,a,pn %icc, 5f /* CTI */
- add %o0, -15, %g4 /* IEU0 Group */
-
-5: andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
- srlx %o4, 40, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- srlx %o2, 32, %g5 /* IEU0 */
- be,a,pn %icc, 6f /* CTI */
- add %o0, -14, %g4 /* IEU0 Group */
-6: andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 12f /* CTI */
- srlx %o4, 32, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,a,pn %icc, 7f /* CTI */
-
- add %o0, -13, %g4 /* IEU0 */
-7: srlx %o2, 24, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
-
- srlx %o4, 24, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- srlx %o2, 16, %g5 /* IEU0 */
- be,a,pn %icc, 8f /* CTI */
-
- add %o0, -12, %g4 /* IEU0 Group */
-8: andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
- srlx %o4, 16, %g5 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- srlx %o2, 8, %g5 /* IEU0 */
- be,a,pn %icc, 9f /* CTI */
- add %o0, -11, %g4 /* IEU0 Group */
-
-9: andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
- srlx %o4, 8, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,a,pn %icc, 10f /* CTI */
- add %o0, -10, %g4 /* IEU0 */
-10: andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12f /* CTI */
-
- sub %o3, %g1, %o2 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,a,pn %icc, 11f /* CTI */
- add %o0, -9, %g4 /* IEU0 */
-
-11: ba,pt %xcc, 3b /* CTI Group */
- xor %o3, %g3, %o4 /* IEU0 Group */
-12: retl /* CTI+IEU1 Group */
- mov %g4, %o0 /* IEU0 */
-
- .align 16
-13: ldub [%o0], %o3 /* Load Group */
- add %o0, 1, %o0 /* IEU0 */
-14: andcc %o3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 12b /* CTI */
-
- cmp %o3, %o1 /* IEU1 Group */
- ldub [%o0], %o3 /* Load */
- be,a,pn %icc, 15f /* CTI */
- add %o0, -1, %g4 /* IEU0 Group */
-
-15: andcc %o0, 7, %g0 /* IEU1 Group */
- bne,a,pt %icc, 14b /* CTI */
- add %o0, 1, %o0 /* IEU0 */
- ba,pt %xcc, 1b /* CTI Group */
-
- ldx [%o0], %o3 /* Load */
-END(strrchr)
-libc_hidden_def(strrchr)
-#ifdef __UCLIBC_SUSV3_LEGACY__
-strong_alias(strrchr,rindex)
-#endif
diff --git a/libc/string/sparc/sparc64/strcmp.S b/libc/string/sparc/sparc64/strcmp.S
deleted file mode 100644
index df9e69179..000000000
--- a/libc/string/sparc/sparc64/strcmp.S
+++ /dev/null
@@ -1,279 +0,0 @@
-/* Compare two strings for differences.
- For SPARC v9.
- Copyright (C) 1997, 1999, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <asm/asi.h>
-#ifndef XCC
- .register %g2, #scratch
- .register %g3, #scratch
- .register %g6, #scratch
-#endif
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(strcmp)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- bne,pn %icc, 7f /* CTI */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
-
- andcc %o1, 7, %g3 /* IEU1 */
- bne,pn %icc, 9f /* CTI */
- sllx %g1, 32, %g2 /* IEU0 Group */
- ldx [%o0], %o2 /* Load */
-
- or %g1, %g2, %g1 /* IEU0 Group */
-1: ldx [%o1], %o3 /* Load */
- sub %o1, %o0, %o1 /* IEU1 */
- sllx %g1, 7, %g2 /* IEU0 Group */
-
-2: add %o0, 8, %o0 /* IEU1 */
- sub %o2, %g1, %g3 /* IEU0 Group */
- subcc %o2, %o3, %g0 /* IEU1 */
- bne,pn %xcc, 13f /* CTI */
-
-#ifdef EIGHTBIT_NOT_RARE
- andn %g3, %o2, %g4 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o2 /* Load */
- andcc %g4, %g2, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o2 /* Load Group */
- andcc %g3, %g2, %g0 /* IEU1 */
-#endif
- be,a,pt %xcc, 2b /* CTI */
- ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load Group */
-
- addcc %g3, %g1, %o4 /* IEU1 */
- srlx %g3, 32, %g3 /* IEU0 */
- andcc %g3, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 3f /* CTI */
-
- srlx %o4, 56, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 48, %o5 /* IEU0 */
-
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 40, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4f /* CTI */
- srlx %o4, 32, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
-
-3: srlx %o4, 24, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 16, %o5 /* IEU0 */
-
- andcc %o5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4f /* CTI */
- srlx %o4, 8, %o5 /* IEU0 */
- andcc %o5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4f /* CTI */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- bne,a,pn %icc, 2b /* CTI */
- ldxa [%o1 + %o0] ASI_PNF, %o3 /* Load */
-
-4: retl /* CTI+IEU1 Group */
- clr %o0 /* IEU0 */
-
- .align 32
-13: mov 0xff, %g6 /* IEU0 Group */
-#ifdef EIGHTBIT_NOT_RARE
- andcc %g4, %g2, %g0 /* IEU1 */
-#else
- andcc %g3, %g2, %g0 /* IEU1 */
-#endif
- be,pt %xcc, 25f /* CTI */
- addcc %g3, %g1, %o4 /* IEU1 Group */
-
- srlx %g3, 32, %g3 /* IEU0 */
- andcc %g3, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 23f /* CTI */
- sllx %g6, 56, %o5 /* IEU0 */
-
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %xcc, 24f /* CTI */
- sllx %g6, 48, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
-
- be,pn %xcc, 24f /* CTI */
- sllx %g6, 40, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %xcc, 24f /* CTI */
-
- sllx %g6, 32, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %xcc, 24f /* CTI */
-23: sllx %g6, 24, %o5 /* IEU0 */
-
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %icc, 24f /* CTI */
- sllx %g6, 16, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
-
- be,pn %icc, 24f /* CTI */
- sllx %g6, 8, %o5 /* IEU0 */
- andcc %o4, %o5, %g0 /* IEU1 Group */
- be,pn %icc, 24f /* CTI */
-
- mov %g6, %o5 /* IEU0 */
-25: cmp %o4, %o3 /* IEU1 Group */
-5: mov -1, %o0 /* IEU0 */
- retl /* CTI+IEU1 Group */
-
- movgu %xcc, 1, %o0 /* Single Group */
-
- .align 16
-24: sub %o5, 1, %g6 /* IEU0 Group */
- clr %o0 /* IEU1 */
- or %o5, %g6, %o5 /* IEU0 Group */
- andn %o4, %o5, %o4 /* IEU0 Group */
-
- andn %o3, %o5, %o3 /* IEU1 */
- cmp %o4, %o3 /* IEU1 Group */
- movgu %xcc, 1, %o0 /* Single Group */
- retl /* CTI+IEU1 Group */
-
- movlu %xcc, -1, %o0 /* Single Group */
-6: retl /* CTI+IEU1 Group */
- mov %o4, %o0 /* IEU0 */
-
- .align 16
-7: ldub [%o0], %o2 /* Load */
- add %o0, 1, %o0 /* IEU1 */
- ldub [%o1], %o3 /* Load Group */
- sllx %g1, 32, %g2 /* IEU0 */
-
-8: add %o1, 1, %o1 /* IEU1 */
- subcc %o2, %o3, %o4 /* IEU1 Group */
- bne,pn %xcc, 6b /* CTI */
- lduba [%o0] ASI_PNF, %o2 /* Load */
-
- brz,pn %o3, 4b /* CTI+IEU1 Group */
- lduba [%o1] ASI_PNF, %o3 /* Load */
- andcc %o0, 7, %g0 /* IEU1 Group */
- bne,a,pn %icc, 8b /* CTI */
-
- add %o0, 1, %o0 /* IEU0 */
- or %g1, %g2, %g1 /* IEU0 Group */
- andcc %o1, 7, %g3 /* IEU1 */
- be,a,pn %icc, 1b /* CTI */
-
- ldxa [%o0] ASI_PNF, %o2 /* Load Group */
-9: sllx %g3, 3, %g5 /* IEU0 */
- mov 64, %o5 /* IEU1 */
- sub %o1, %g3, %o1 /* IEU0 Group */
-
- sub %o5, %g5, %o5 /* IEU1 */
- ldxa [%o1] ASI_PNF, %g6 /* Load Group */
- or %g1, %g2, %g1 /* IEU0 */
- sub %o1, %o0, %o1 /* IEU1 */
-
- sllx %g1, 7, %g2 /* IEU0 Group */
- add %o1, 8, %o1 /* IEU1 */
- /* %g1 = 0101010101010101
- * %g2 = 8080808080800880
- * %g5 = number of bits to shift left
- * %o5 = number of bits to shift right */
-10: sllx %g6, %g5, %o3 /* IEU0 Group */
- ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
-
-11: srlx %g6, %o5, %o4 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o2 /* Load */
- or %o3, %o4, %o3 /* IEU1 */
- add %o0, 8, %o0 /* IEU0 Group */
-
- subcc %o2, %o3, %g0 /* IEU1 */
-#ifdef EIGHTBIT_NOT_RARE
- sub %o2, %g1, %g3 /* IEU0 Group */
- bne,pn %xcc, 13b /* CTI */
- andn %g3, %o2, %g4 /* IEU0 Group */
-
- andcc %g4, %g2, %g0 /* IEU1 Group */
- be,pt %xcc, 10b /* CTI */
- srlx %g4, 32, %g4 /* IEU0 */
- andcc %g4, %g2, %g0 /* IEU1 Group */
-#else
- bne,pn %xcc, 13b /* CTI */
- sub %o2, %g1, %g3 /* IEU0 Group */
- andcc %g3, %g2, %g0 /* IEU1 Group */
-
- be,pt %xcc, 10b /* CTI */
- srlx %g3, 32, %g3 /* IEU0 */
- andcc %g3, %g2, %g0 /* IEU1 Group */
-#endif
- be,pt %xcc, 12f /* CTI */
-
- srlx %o2, 56, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 48, %g3 /* IEU0 */
-
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 40, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4b /* CTI */
- srlx %o2, 32, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
-
-12: srlx %o2, 24, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 16, %g3 /* IEU0 */
-
- andcc %g3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- srlx %o2, 8, %g3 /* IEU0 */
- andcc %g3, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 4b /* CTI */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
- sllx %g6, %g5, %o3 /* IEU0 */
-
- ba,pt %xcc, 11b /* CTI Group */
- ldxa [%o1 + %o0] ASI_PNF, %g6 /* Load */
-END(strcmp)
-libc_hidden_def(strcmp)
diff --git a/libc/string/sparc/sparc64/strcpy.S b/libc/string/sparc/sparc64/strcpy.S
deleted file mode 100644
index 1317d5489..000000000
--- a/libc/string/sparc/sparc64/strcpy.S
+++ /dev/null
@@ -1,245 +0,0 @@
-/* Copy SRC to DEST returning DEST.
- For SPARC v9.
- Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <asm/asi.h>
-#ifndef XCC
- .register %g2, #scratch
- .register %g3, #scratch
- .register %g6, #scratch
-#endif
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(strcpy)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- mov %o0, %g6 /* IEU1 */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
-
- sllx %g1, 32, %g2 /* IEU0 Group */
- bne,pn %icc, 12f /* CTI */
- andcc %o1, 7, %g3 /* IEU1 */
- or %g1, %g2, %g1 /* IEU0 Group */
-
- bne,pn %icc, 14f /* CTI */
- sllx %g1, 7, %g2 /* IEU0 Group */
-1: ldx [%o1], %o3 /* Load */
- add %o1, 8, %o1 /* IEU1 */
-
-2: mov %o3, %g3 /* IEU0 Group */
-3: sub %o3, %g1, %o2 /* IEU1 */
- ldxa [%o1] ASI_PNF, %o3 /* Load */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %g3, %o2 /* IEU0 Group */
-#endif
- add %o0, 8, %o0 /* IEU0 Group */
-
- andcc %o2, %g2, %g0 /* IEU1 */
- add %o1, 8, %o1 /* IEU0 Group */
- be,a,pt %xcc, 2b /* CTI */
- stx %g3, [%o0 - 8] /* Store */
-
- srlx %g3, 56, %g5 /* IEU0 Group */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
- srlx %g3, 48, %g4 /* IEU0 */
-
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 10f /* CTI */
- srlx %g3, 40, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 9f /* CTI */
- srlx %g3, 32, %g4 /* IEU0 */
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 8f /* CTI */
-
- srlx %g3, 24, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
- srlx %g3, 16, %g4 /* IEU0 */
-
- andcc %g4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- srlx %g3, 8, %g5 /* IEU0 */
- andcc %g5, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 5f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
- stx %g3, [%o0 - 8] /* Store Group */
- andcc %g3, 0xff, %g0 /* IEU1 */
-
- bne,pt %icc, 3b /* CTI */
- mov %o3, %g3 /* IEU0 Group */
-4: retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-
- .align 16
-5: stb %g5, [%o0 - 2] /* Store Group */
- srlx %g3, 16, %g4 /* IEU0 */
-6: sth %g4, [%o0 - 4] /* Store Group */
- srlx %g3, 32, %g4 /* IEU0 */
-
- stw %g4, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-7: stb %g5, [%o0 - 4] /* Store Group */
-
- srlx %g3, 32, %g4 /* IEU0 */
-8: stw %g4, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-
-9: stb %g5, [%o0 - 6] /* Store Group */
- srlx %g3, 48, %g4 /* IEU0 */
-10: sth %g4, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
-
- mov %g6, %o0 /* IEU0 */
-11: stb %g5, [%o0 - 8] /* Store Group */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-
-12: or %g1, %g2, %g1 /* IEU0 Group */
- ldub [%o1], %o3 /* Load */
- sllx %g1, 7, %g2 /* IEU0 Group */
- stb %o3, [%o0] /* Store Group */
-
-13: add %o0, 1, %o0 /* IEU0 */
- add %o1, 1, %o1 /* IEU1 */
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 4b /* CTI */
-
- lduba [%o1] ASI_PNF, %o3 /* Load */
- andcc %o0, 7, %g0 /* IEU1 Group */
- bne,a,pt %icc, 13b /* CTI */
- stb %o3, [%o0] /* Store */
-
- andcc %o1, 7, %g3 /* IEU1 Group */
- be,a,pt %icc, 1b /* CTI */
- ldx [%o1], %o3 /* Load */
-14: orcc %g0, 64, %g4 /* IEU1 Group */
-
- sllx %g3, 3, %g5 /* IEU0 */
- sub %o1, %g3, %o1 /* IEU0 Group */
- sub %g4, %g5, %g4 /* IEU1 */
- /* %g1 = 0101010101010101 *
- * %g2 = 8080808080808080 *
- * %g3 = source alignment *
- * %g5 = number of bits to shift left *
- * %g4 = number of bits to shift right */
- ldxa [%o1] ASI_PNF, %o5 /* Load Group */
-
- addcc %o1, 8, %o1 /* IEU1 */
-15: sllx %o5, %g5, %o3 /* IEU0 Group */
- ldxa [%o1] ASI_PNF, %o5 /* Load */
- srlx %o5, %g4, %o4 /* IEU0 Group */
-
- add %o0, 8, %o0 /* IEU1 */
- or %o3, %o4, %o3 /* IEU0 Group */
- add %o1, 8, %o1 /* IEU1 */
- sub %o3, %g1, %o4 /* IEU0 Group */
-
-#ifdef EIGHTBIT_NOT_RARE
- andn %o4, %o3, %o4 /* IEU0 Group */
-#endif
- andcc %o4, %g2, %g0 /* IEU1 Group */
- be,a,pt %xcc, 15b /* CTI */
- stx %o3, [%o0 - 8] /* Store */
- srlx %o3, 56, %o4 /* IEU0 Group */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 22f /* CTI */
- srlx %o3, 48, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 21f /* CTI */
- srlx %o3, 40, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 20f /* CTI */
-
- srlx %o3, 32, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 19f /* CTI */
- srlx %o3, 24, %o4 /* IEU0 */
-
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 18f /* CTI */
- srlx %o3, 16, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 17f /* CTI */
- srlx %o3, 8, %o4 /* IEU0 */
- andcc %o4, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 16f /* CTI */
-
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- bne,pn %icc, 15b /* CTI */
- stx %o3, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
-
- mov %g6, %o0 /* IEU0 */
-
- .align 16
-16: srlx %o3, 8, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 2] /* Store */
-17: srlx %o3, 16, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 3] /* Store */
-
-18: srlx %o3, 24, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 4] /* Store */
-19: srlx %o3, 32, %o4 /* IEU0 Group */
- stw %o4, [%o0 - 8] /* Store */
-
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
- nop
- nop
-
-20: srlx %o3, 40, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 6] /* Store */
-21: srlx %o3, 48, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 7] /* Store */
-
-22: srlx %o3, 56, %o4 /* IEU0 Group */
- stb %o4, [%o0 - 8] /* Store */
- retl /* CTI+IEU1 Group */
- mov %g6, %o0 /* IEU0 */
-END(strcpy)
-
-libc_hidden_def(strcpy)
diff --git a/libc/string/sparc/sparc64/strlen.S b/libc/string/sparc/sparc64/strlen.S
deleted file mode 100644
index 1fe854961..000000000
--- a/libc/string/sparc/sparc64/strlen.S
+++ /dev/null
@@ -1,173 +0,0 @@
-/* Determine the length of a string. For SPARC v9.
- Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
- Jakub Jelinek <jj@ultra.linux.cz>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <asm/asi.h>
-
- /* Normally, this uses
- ((xword - 0x0101010101010101) & 0x8080808080808080) test
- to find out if any byte in xword could be zero. This is fast, but
- also gives false alarm for any byte in range 0x81-0xff. It does
- not matter for correctness, as if this test tells us there could
- be some zero byte, we check it byte by byte, but if bytes with
- high bits set are common in the strings, then this will give poor
- performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
- will use one tick slower, but more precise test
- ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
- which does not give any false alarms (but if some bits are set,
- one cannot assume from it which bytes are zero and which are not).
- It is yet to be measured, what is the correct default for glibc
- in these days for an average user.
- */
-
- .text
- .align 32
-ENTRY(strlen)
- sethi %hi(0x01010101), %g1 /* IEU0 Group */
- ldub [%o0], %o3 /* Load */
- or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
- mov %o0, %o1 /* IEU1 */
-
- sllx %g1, 32, %g4 /* IEU0 Group */
- andcc %o0, 7, %g0 /* IEU1 */
- or %g1, %g4, %g1 /* IEU0 Group */
- brz,pn %o3, 13f /* CTI+IEU1 */
-
- sllx %g1, 7, %g4 /* IEU0 Group */
- bne,a,pn %icc, 15f /* CTI */
- add %o0, 1, %o0 /* IEU1 */
- /* %g1 = 0x0101010101010101 *
- * %g4 = 0x8080808080808080 *
- * %o0 = string pointer *
- * %o1 = start of string */
-1: ldx [%o0], %o3 /* Load Group */
-
- add %o0, 8, %o0 /* IEU1 */
-2: sub %o3, %g1, %o2 /* IEU0 Group */
-#ifdef EIGHTBIT_NOT_RARE
- andn %o2, %o3, %o5 /* IEU0 Group */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o5, %g4, %g0 /* IEU1 Group */
-#else
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g4, %g0 /* IEU1 Group */
-#endif
-
- be,pt %xcc, 2b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g5 /* IEU1 Group */
-#ifdef EIGHTBIT_NOT_RARE
- srlx %o5, 32, %o5 /* IEU0 */
-
-3: andcc %o5, %g4, %g0 /* IEU1 Group */
-#else
- srlx %o2, 32, %o2 /* IEU0 */
-
-3: andcc %o2, %g4, %g0 /* IEU1 Group */
-#endif
- be,pn %xcc, 4f /* CTI */
- srlx %g5, 56, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 12f /* CTI */
- srlx %g5, 48, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 11f /* CTI */
-
- srlx %g5, 40, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 10f /* CTI */
- srlx %g5, 32, %o2 /* IEU0 */
-
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 9f /* CTI */
-4: srlx %g5, 24, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
-
- be,pn %icc, 8f /* CTI */
- srlx %g5, 16, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 7f /* CTI */
-
- srlx %g5, 8, %o2 /* IEU0 */
- andcc %o2, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 6f /* CTI */
- sub %o3, %g1, %o2 /* IEU0 */
-
- andcc %g5, 0xff, %g0 /* IEU1 Group */
- be,pn %icc, 5f /* CTI */
- ldxa [%o0] ASI_PNF, %o3 /* Load */
- andcc %o2, %g4, %g0 /* IEU1 Group */
-
- be,pt %xcc, 2b /* CTI */
- add %o0, 8, %o0 /* IEU0 */
- addcc %o2, %g1, %g5 /* IEU1 Group */
- ba,pt %xcc, 3b /* CTI */
-
- srlx %o2, 32, %o2 /* IEU0 */
-5: add %o0, -9, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-
-6: add %o0, -10, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-7: add %o0, -11, %o0 /* IEU0 Group */
-
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-8: add %o0, -12, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, %o1, %o0 /* IEU0 */
-9: add %o0, -13, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-
-10: add %o0, -14, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-11: add %o0, -15, %o0 /* IEU0 Group */
-
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-12: add %o0, -16, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
-
- sub %o0, %o1, %o0 /* IEU0 */
-13: retl /* CTI+IEU1 Group */
- mov 0, %o0 /* IEU0 */
- nop
-
-15: ldub [%o0], %o3 /* Load Group */
-16: andcc %o0, 7, %g0 /* IEU1 */
- be,pn %icc, 1b /* CTI */
- nop /* IEU0 Group */
-
- add %o0, 1, %o0 /* IEU1 */
- andcc %o3, 0xff, %g0 /* IEU1 Group */
- bne,a,pt %icc, 16b /* CTI */
- lduba [%o0] ASI_PNF, %o3 /* Load */
-
- add %o0, -1, %o0 /* IEU0 Group */
- retl /* CTI+IEU1 Group */
- sub %o0, %o1, %o0 /* IEU0 */
-END(strlen)
-libc_hidden_def(strlen)
diff --git a/libc/string/stpcpy.c b/libc/string/stpcpy.c
index 8a487584e..2fd2c0648 100644
--- a/libc/string/stpcpy.c
+++ b/libc/string/stpcpy.c
@@ -10,19 +10,13 @@
#ifdef WANT_WIDE
# define Wstpcpy wcpcpy
#else
-/* Experimentally off - libc_hidden_proto(stpcpy) */
+# undef stpcpy
# define Wstpcpy stpcpy
#endif
Wchar *Wstpcpy(register Wchar * __restrict s1, const Wchar * __restrict s2)
{
-#ifdef __BCC__
- do {
- *s1 = *s2++;
- } while (*s1++ != 0);
-#else
while ( (*s1++ = *s2++) != 0 );
-#endif
return s1 - 1;
}
diff --git a/libc/string/stpncpy.c b/libc/string/stpncpy.c
index dac8471fd..50d83a131 100644
--- a/libc/string/stpncpy.c
+++ b/libc/string/stpncpy.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstpncpy wcpncpy
#else
-/* Experimentally off - libc_hidden_proto(stpncpy) */
# define Wstpncpy stpncpy
#endif
@@ -21,22 +20,10 @@ Wchar *Wstpncpy(register Wchar * __restrict s1,
Wchar *s = s1;
const Wchar *p = s2;
-#ifdef __BCC__
- while (n--) {
- if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */
- ++s;
- }
- return s1 + (s2 - p);
-#else
while (n) {
if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */
++s;
--n;
}
return s1 + (s2 - p);
-#endif
}
-
-#ifndef WANT_WIDE
-libc_hidden_def(stpncpy)
-#endif
diff --git a/libc/string/strcasecmp.c b/libc/string/strcasecmp.c
index f9852236b..f894e426e 100644
--- a/libc/string/strcasecmp.c
+++ b/libc/string/strcasecmp.c
@@ -12,28 +12,15 @@
#ifdef WANT_WIDE
# define strcasecmp wcscasecmp
# define strcasecmp_l wcscasecmp_l
-libc_hidden_proto(wcscasecmp)
-# if defined(__USE_GNU) && defined(__UCLIBC_HAS_XLOCALE__)
-libc_hidden_proto(wcscasecmp_l)
-# endif
# ifdef __UCLIBC_DO_XLOCALE
-libc_hidden_proto(towlower_l)
# define TOLOWER(C) towlower_l((C), locale_arg)
# else
-libc_hidden_proto(towlower)
# define TOLOWER(C) towlower((C))
# endif
#else
-/* Experimentally off - libc_hidden_proto(strcasecmp) */
-/* Experimentally off - libc_hidden_proto(strcasecmp_l) */
# ifdef __UCLIBC_DO_XLOCALE
-libc_hidden_proto(tolower_l)
# define TOLOWER(C) tolower_l((C), locale_arg)
# else
-#if !defined __UCLIBC_HAS_XLOCALE__ && defined __UCLIBC_HAS_CTYPE_TABLES__
-libc_hidden_proto(__ctype_tolower)
-#endif
-libc_hidden_proto(tolower)
# define TOLOWER(C) tolower((C))
# endif
#endif
@@ -44,11 +31,12 @@ int strcasecmp(register const Wchar *s1, register const Wchar *s2)
{
return strcasecmp_l(s1, s2, __UCLIBC_CURLOCALE);
}
+#ifndef WANT_WIDE
libc_hidden_def(strcasecmp)
+#endif
#else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
-/* Experimentally off - libc_hidden_proto(__XL_NPP(strcasecmp)) */
int __XL_NPP(strcasecmp)(register const Wchar *s1, register const Wchar *s2
__LOCALE_PARAM )
{
@@ -73,6 +61,8 @@ int __XL_NPP(strcasecmp)(register const Wchar *s1, register const Wchar *s2
return r;
#endif
}
+#if !defined WANT_WIDE || (defined WANT_WIDE && defined __UCLIBC_DO_XLOCALE)
libc_hidden_def(__XL_NPP(strcasecmp))
+#endif
#endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
diff --git a/libc/string/strcasestr.c b/libc/string/strcasestr.c
index 2671b4b98..3334086bf 100644
--- a/libc/string/strcasestr.c
+++ b/libc/string/strcasestr.c
@@ -8,13 +8,6 @@
#include "_string.h"
#include <ctype.h>
-#ifdef __UCLIBC_HAS_XLOCALE__
-libc_hidden_proto(__ctype_tolower_loc)
-#elif defined __UCLIBC_HAS_CTYPE_TABLES__
-libc_hidden_proto(__ctype_tolower)
-#endif
-libc_hidden_proto(tolower)
-
char *strcasestr(const char *s1, const char *s2)
{
register const char *s = s1;
diff --git a/libc/string/strcat.c b/libc/string/strcat.c
index 40a9be111..63619bcc8 100644
--- a/libc/string/strcat.c
+++ b/libc/string/strcat.c
@@ -13,8 +13,6 @@
# define Wstrcat strcat
#endif
-libc_hidden_proto(Wstrcat)
-
Wchar *Wstrcat(Wchar * __restrict s1, register const Wchar * __restrict s2)
{
register Wchar *s = s1;
diff --git a/libc/string/strchr.c b/libc/string/strchr.c
index 329545e9f..7ea477362 100644
--- a/libc/string/strchr.c
+++ b/libc/string/strchr.c
@@ -13,8 +13,6 @@
# define Wstrchr strchr
#endif
-libc_hidden_proto(Wstrchr)
-
Wchar *Wstrchr(register const Wchar *s, Wint c)
{
do {
@@ -25,8 +23,9 @@ Wchar *Wstrchr(register const Wchar *s, Wint c)
return NULL;
}
-libc_hidden_def(Wstrchr)
-
-#if !defined WANT_WIDE && defined __UCLIBC_SUSV3_LEGACY__
+#ifndef WANT_WIDE
+libc_hidden_def(strchr)
+# ifdef __UCLIBC_SUSV3_LEGACY__
weak_alias(strchr,index)
+# endif
#endif
diff --git a/libc/string/strchrnul.c b/libc/string/strchrnul.c
index 6fe7f6c3d..9c10e1fc8 100644
--- a/libc/string/strchrnul.c
+++ b/libc/string/strchrnul.c
@@ -15,13 +15,13 @@
# define Wstrchrnul strchrnul
#endif
-libc_hidden_proto(Wstrchrnul)
-
Wchar *Wstrchrnul(register const Wchar *s, Wint c)
{
--s;
while (*++s && (*s != ((Wchar)c)));
return (Wchar *) s;
}
-libc_hidden_def(Wstrchrnul)
+# ifndef WANT_WIDE
+libc_hidden_def(strchrnul)
+# endif
#endif
diff --git a/libc/string/strcmp.c b/libc/string/strcmp.c
index 5477adf3a..abae61812 100644
--- a/libc/string/strcmp.c
+++ b/libc/string/strcmp.c
@@ -15,8 +15,6 @@
# define Wstrcoll strcoll
#endif
-libc_hidden_proto(Wstrcmp)
-
int Wstrcmp(register const Wchar *s1, register const Wchar *s2)
{
#ifdef WANT_WIDE
@@ -40,7 +38,6 @@ int Wstrcmp(register const Wchar *s1, register const Wchar *s2)
libc_hidden_def(Wstrcmp)
#ifndef __UCLIBC_HAS_LOCALE__
-libc_hidden_proto(Wstrcoll)
strong_alias(Wstrcmp,Wstrcoll)
libc_hidden_def(Wstrcoll)
#endif
diff --git a/libc/string/strcpy.c b/libc/string/strcpy.c
index cda4094ac..549360c22 100644
--- a/libc/string/strcpy.c
+++ b/libc/string/strcpy.c
@@ -13,20 +13,15 @@
# define Wstrcpy strcpy
#endif
-libc_hidden_proto(Wstrcpy)
-
Wchar *Wstrcpy(Wchar * __restrict s1, const Wchar * __restrict s2)
{
register Wchar *s = s1;
-#ifdef __BCC__
- do {
- *s = *s2++;
- } while (*s++ != 0);
-#else
while ( (*s++ = *s2++) != 0 );
-#endif
return s1;
}
-libc_hidden_def(Wstrcpy)
+
+#ifndef WANT_WIDE
+libc_hidden_def(strcpy)
+#endif
diff --git a/libc/string/strcspn.c b/libc/string/strcspn.c
index 1ec460a15..0466af99b 100644
--- a/libc/string/strcspn.c
+++ b/libc/string/strcspn.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstrcspn wcscspn
#else
-/* Experimentally off - libc_hidden_proto(strcspn) */
# define Wstrcspn strcspn
#endif
diff --git a/libc/string/strdup.c b/libc/string/strdup.c
index 61fc186c8..049a23f63 100644
--- a/libc/string/strdup.c
+++ b/libc/string/strdup.c
@@ -9,16 +9,12 @@
#include <stdlib.h>
#ifdef WANT_WIDE
-libc_hidden_proto(wcslen)
# define Wstrdup wcsdup
# define Wstrlen wcslen
#else
-/* Experimentally off - libc_hidden_proto(strdup) */
-/* Experimentally off - libc_hidden_proto(strlen) */
# define Wstrdup strdup
# define Wstrlen strlen
#endif
-/* Experimentally off - libc_hidden_proto(memcpy) */
Wchar *Wstrdup(register const Wchar *s1)
{
diff --git a/libc/string/strerror.c b/libc/string/strerror.c
index 355c7bdda..7250da07d 100644
--- a/libc/string/strerror.c
+++ b/libc/string/strerror.c
@@ -9,8 +9,6 @@
#include <string.h>
#include "_syserrmsg.h"
-/* Experimentally off - libc_hidden_proto(strerror) */
-libc_hidden_proto(__xpg_strerror_r)
char *strerror(int errnum)
{
diff --git a/libc/string/strlcpy.c b/libc/string/strlcpy.c
index cdad4dc5d..83787049a 100644
--- a/libc/string/strlcpy.c
+++ b/libc/string/strlcpy.c
@@ -11,21 +11,14 @@
# define Wstrlcpy __wcslcpy
# define Wstrxfrm wcsxfrm
#else
-/* Experimentally off - libc_hidden_proto(strlcpy) */
# define Wstrlcpy strlcpy
# define Wstrxfrm strxfrm
#endif
-
/* OpenBSD function:
* Copy at most n-1 chars from src to dst and nul-terminate dst.
* Returns strlen(src), so truncation occurred if the return value is >= n. */
-#ifdef WANT_WIDE
-size_t Wstrlcpy(register Wchar *__restrict dst,
- register const Wchar *__restrict src,
- size_t n) attribute_hidden;
-#endif
size_t Wstrlcpy(register Wchar *__restrict dst,
register const Wchar *__restrict src,
size_t n)
@@ -51,13 +44,8 @@ size_t Wstrlcpy(register Wchar *__restrict dst,
}
#ifndef WANT_WIDE
libc_hidden_def(strlcpy)
-#ifndef __UCLIBC_HAS_LOCALE__
-/* Experimentally off - libc_hidden_proto(strxfrm) */
-strong_alias(strlcpy,strxfrm)
-libc_hidden_def(strxfrm)
#endif
-#else
+
#ifndef __UCLIBC_HAS_LOCALE__
-strong_alias(__wcslcpy,wcsxfrm)
-#endif
+strong_alias(Wstrlcpy,Wstrxfrm)
#endif
diff --git a/libc/string/strlen.c b/libc/string/strlen.c
index 2edb6e4e8..021a8cabc 100644
--- a/libc/string/strlen.c
+++ b/libc/string/strlen.c
@@ -13,8 +13,6 @@
# define Wstrlen strlen
#endif
-libc_hidden_proto(Wstrlen)
-
size_t Wstrlen(const Wchar *s)
{
register const Wchar *p;
diff --git a/libc/string/strncasecmp.c b/libc/string/strncasecmp.c
index ed052fa21..2eac47dd4 100644
--- a/libc/string/strncasecmp.c
+++ b/libc/string/strncasecmp.c
@@ -12,28 +12,15 @@
#ifdef WANT_WIDE
# define strncasecmp wcsncasecmp
# define strncasecmp_l wcsncasecmp_l
-libc_hidden_proto(wcsncasecmp)
-# if defined(__USE_GNU) && defined(__UCLIBC_HAS_XLOCALE__)
-libc_hidden_proto(wcsncasecmp_l)
-# endif
# ifdef __UCLIBC_DO_XLOCALE
-libc_hidden_proto(towlower_l)
# define TOLOWER(C) towlower_l((C), locale_arg)
# else
-libc_hidden_proto(towlower)
# define TOLOWER(C) towlower((C))
# endif
#else
-/* Experimentally off - libc_hidden_proto(strncasecmp) */
-/* Experimentally off - libc_hidden_proto(strncasecmp_l) */
# ifdef __UCLIBC_DO_XLOCALE
-libc_hidden_proto(tolower_l)
# define TOLOWER(C) tolower_l((C), locale_arg)
# else
-#if !defined __UCLIBC_HAS_XLOCALE__ && defined __UCLIBC_HAS_CTYPE_TABLES__
-libc_hidden_proto(__ctype_tolower)
-#endif
-libc_hidden_proto(tolower)
# define TOLOWER(C) tolower((C))
# endif
#endif
@@ -44,11 +31,12 @@ int strncasecmp(register const Wchar *s1, register const Wchar *s2, size_t n)
{
return strncasecmp_l(s1, s2, n, __UCLIBC_CURLOCALE);
}
+#ifndef WANT_WIDE
libc_hidden_def(strncasecmp)
+#endif
#else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
-/* Experimentally off - libc_hidden_proto(__XL_NPP(strncasecmp)) */
int __XL_NPP(strncasecmp)(register const Wchar *s1, register const Wchar *s2,
size_t n __LOCALE_PARAM )
{
@@ -76,6 +64,8 @@ int __XL_NPP(strncasecmp)(register const Wchar *s1, register const Wchar *s2,
return r;
#endif
}
+#if !defined WANT_WIDE || (defined WANT_WIDE && defined __UCLIBC_DO_XLOCALE)
libc_hidden_def(__XL_NPP(strncasecmp))
+#endif
#endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
diff --git a/libc/string/strncat.c b/libc/string/strncat.c
index 0180d1328..0fa9b4ae1 100644
--- a/libc/string/strncat.c
+++ b/libc/string/strncat.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstrncat wcsncat
#else
-/* Experimentally off - libc_hidden_proto(strncat) */
# define Wstrncat strncat
#endif
@@ -21,14 +20,10 @@ Wchar *Wstrncat(Wchar * __restrict s1, register const Wchar * __restrict s2,
while (*s++);
--s;
-#ifdef __BCC__
- while (n-- && ((*s = *s2++) != 0)) ++s;
-#else
while (n && ((*s = *s2++) != 0)) {
--n;
++s;
}
-#endif
*s = 0;
return s1;
diff --git a/libc/string/strncmp.c b/libc/string/strncmp.c
index 59e4a2c22..2da61771c 100644
--- a/libc/string/strncmp.c
+++ b/libc/string/strncmp.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstrncmp wcsncmp
#else
-/* Experimentally off - libc_hidden_proto(strncmp) */
# define Wstrncmp strncmp
#endif
diff --git a/libc/string/strncpy.c b/libc/string/strncpy.c
index d93561294..4a44e1f02 100644
--- a/libc/string/strncpy.c
+++ b/libc/string/strncpy.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstrncpy wcsncpy
#else
-/* Experimentally off - libc_hidden_proto(strncpy) */
# define Wstrncpy strncpy
#endif
@@ -19,18 +18,11 @@ Wchar *Wstrncpy(Wchar * __restrict s1, register const Wchar * __restrict s2,
{
register Wchar *s = s1;
-#ifdef __BCC__
- while (n--) {
- if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */
- ++s;
- }
-#else
while (n) {
if ((*s = *s2) != 0) s2++; /* Need to fill tail with 0s. */
++s;
--n;
}
-#endif
return s1;
}
diff --git a/libc/string/strndup.c b/libc/string/strndup.c
index 96a36d404..8e608669c 100644
--- a/libc/string/strndup.c
+++ b/libc/string/strndup.c
@@ -8,9 +8,6 @@
#include "_string.h"
#include <stdlib.h>
-/* Experimentally off - libc_hidden_proto(strndup) */
-/* Experimentally off - libc_hidden_proto(strnlen) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
char *strndup(register const char *s1, size_t n)
{
diff --git a/libc/string/strnlen.c b/libc/string/strnlen.c
index 8fbc25c11..08de0887d 100644
--- a/libc/string/strnlen.c
+++ b/libc/string/strnlen.c
@@ -15,26 +15,17 @@
# define Wstrnlen strnlen
#endif
-libc_hidden_proto(Wstrnlen)
-
size_t Wstrnlen(const Wchar *s, size_t max)
{
register const Wchar *p = s;
-#ifdef __BCC__
- /* bcc can optimize the counter if it thinks it is a pointer... */
- register const char *maxp = (const char *) max;
-#else
-# define maxp max
-#endif
- while (maxp && *p) {
+ while (max && *p) {
++p;
- --maxp;
+ --max;
}
return p - s;
}
-#undef maxp
libc_hidden_def(Wstrnlen)
#endif
diff --git a/libc/string/strpbrk.c b/libc/string/strpbrk.c
index abeb84380..ddfc75172 100644
--- a/libc/string/strpbrk.c
+++ b/libc/string/strpbrk.c
@@ -13,8 +13,6 @@
# define Wstrpbrk strpbrk
#endif
-libc_hidden_proto(Wstrpbrk)
-
Wchar *Wstrpbrk(const Wchar *s1, const Wchar *s2)
{
register const Wchar *s;
diff --git a/libc/string/strrchr.c b/libc/string/strrchr.c
index 253c4166d..db12bbc7c 100644
--- a/libc/string/strrchr.c
+++ b/libc/string/strrchr.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstrrchr wcsrchr
#else
-/* Experimentally off - libc_hidden_proto(strrchr) */
# define Wstrrchr strrchr
#endif
diff --git a/libc/string/strsep.c b/libc/string/strsep.c
index 373b00a71..ce17dcf89 100644
--- a/libc/string/strsep.c
+++ b/libc/string/strsep.c
@@ -9,10 +9,7 @@
#ifdef __USE_BSD
-/* Experimentally off - libc_hidden_proto(strpbrk) */
-/* Experimentally off - libc_hidden_proto(strcspn) */
-/* Experimentally off - libc_hidden_proto(strsep) */
char *strsep(char ** __restrict s1, const char * __restrict s2)
{
register char *s = *s1;
diff --git a/libc/string/strsignal.c b/libc/string/strsignal.c
index ee083d649..0fbbf8504 100644
--- a/libc/string/strsignal.c
+++ b/libc/string/strsignal.c
@@ -18,16 +18,13 @@
#include <bits/uClibc_uintmaxtostr.h>
#include <signal.h>
-/* Experimentally off - libc_hidden_proto(strsignal) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
-
#define _SYS_NSIG 32
#ifdef __UCLIBC_HAS_SIGNUM_MESSAGES__
# define _SYS_SIGMSG_MAXLEN 25
-#else /* __UCLIBC_HAS_SIGNUM_MESSAGES__ */
+#else
# define _SYS_SIGMSG_MAXLEN 0
-#endif /* __UCLIBC_HAS_SIGNUM_MESSAGES__ */
+#endif
#if _SYS_SIGMSG_MAXLEN < __UIM_BUFLEN_INT + 15
# define _STRSIGNAL_BUFSIZE (__UIM_BUFLEN_INT + 15)
@@ -85,16 +82,16 @@ static const unsigned char sstridx[] = {
char *strsignal(int signum)
{
- register char *s;
- int i;
- static char buf[_STRSIGNAL_BUFSIZE];
- static const char unknown[] = {
+ register char *s;
+ int i;
+ static char buf[_STRSIGNAL_BUFSIZE];
+ static const char unknown[] = {
'U', 'n', 'k', 'n', 'o', 'w', 'n', ' ', 's', 'i', 'g', 'n', 'a', 'l', ' '
- };
+ };
#if defined(__alpha__) || defined(__mips__) || defined(__hppa__) || defined(__sparc__)
/* Need to translate signum to string index. */
- for (i = 0 ; i < sizeof(sstridx)/sizeof(sstridx[0]) ; i++) {
+ for (i = 0; i < sizeof(sstridx)/sizeof(sstridx[0]); i++) {
if (sstridx[i] == signum) {
goto GOT_SSTRIDX;
}
@@ -106,12 +103,12 @@ char *strsignal(int signum)
i = signum;
#endif
- if (((unsigned int) signum) < _SYS_NSIG) {
+ if (((unsigned int) signum) < _SYS_NSIG) {
/* Trade time for space. This function should rarely be called
* so rather than keeping an array of pointers for the different
* messages, just run through the buffer until we find the
* correct string. */
- for (s = (char *) _string_syssigmsgs ; i ; ++s) {
+ for (s = (char *) _string_syssigmsgs; i; ++s) {
if (!*s) {
--i;
}
@@ -119,10 +116,10 @@ char *strsignal(int signum)
if (*s) { /* Make sure we have an actual message. */
goto DONE;
}
- }
+ }
- s = _int10tostr(buf+sizeof(buf)-1, signum) - sizeof(unknown);
- memcpy(s, unknown, sizeof(unknown));
+ s = _int10tostr(buf + sizeof(buf)-1, signum) - sizeof(unknown);
+ memcpy(s, unknown, sizeof(unknown));
DONE:
return s;
@@ -132,13 +129,12 @@ char *strsignal(int signum)
char *strsignal(int signum)
{
- static char buf[_STRSIGNAL_BUFSIZE];
- static const char unknown[] = {
+ static char buf[_STRSIGNAL_BUFSIZE];
+ static const char unknown[] = {
'U', 'n', 'k', 'n', 'o', 'w', 'n', ' ', 's', 'i', 'g', 'n', 'a', 'l', ' '
- };
+ };
- return (char *) memcpy(_int10tostr(buf+sizeof(buf)-1, signum)
- - sizeof(unknown),
+ return memcpy(_int10tostr(buf + sizeof(buf)-1, signum) - sizeof(unknown),
unknown, sizeof(unknown));
}
diff --git a/libc/string/strspn.c b/libc/string/strspn.c
index ca83ef900..942b6f308 100644
--- a/libc/string/strspn.c
+++ b/libc/string/strspn.c
@@ -13,8 +13,6 @@
# define Wstrspn strspn
#endif
-libc_hidden_proto(Wstrspn)
-
size_t Wstrspn(const Wchar *s1, const Wchar *s2)
{
register const Wchar *s = s1;
diff --git a/libc/string/strstr.c b/libc/string/strstr.c
index 05712e62b..7e2a64e7d 100644
--- a/libc/string/strstr.c
+++ b/libc/string/strstr.c
@@ -10,7 +10,6 @@
#ifdef WANT_WIDE
# define Wstrstr wcsstr
#else
-/* Experimentally off - libc_hidden_proto(strstr) */
# define Wstrstr strstr
#endif
@@ -39,6 +38,6 @@ Wchar *Wstrstr(const Wchar *s1, const Wchar *s2)
}
#ifndef WANT_WIDE
libc_hidden_def(strstr)
-#else
+#elif defined __UCLIBC_SUSV3_LEGACY__
strong_alias(wcsstr,wcswcs)
#endif
diff --git a/libc/string/strtok.c b/libc/string/strtok.c
index 159dd6b6a..c337d81a7 100644
--- a/libc/string/strtok.c
+++ b/libc/string/strtok.c
@@ -15,7 +15,6 @@
# define Wstrtok_r strtok_r
#endif
-/* Experimentally off - libc_hidden_proto(Wstrtok_r) */
Wchar *Wstrtok(Wchar * __restrict s1, const Wchar * __restrict s2)
{
diff --git a/libc/string/strtok_r.c b/libc/string/strtok_r.c
index 2ad7746b1..2026888f8 100644
--- a/libc/string/strtok_r.c
+++ b/libc/string/strtok_r.c
@@ -8,15 +8,10 @@
#include "_string.h"
#ifdef WANT_WIDE
-libc_hidden_proto(wcsspn)
-libc_hidden_proto(wcspbrk)
# define Wstrtok_r wcstok
# define Wstrspn wcsspn
# define Wstrpbrk wcspbrk
#else
-/* Experimentally off - libc_hidden_proto(strtok_r) */
-/* Experimentally off - libc_hidden_proto(strspn) */
-/* Experimentally off - libc_hidden_proto(strpbrk) */
# define Wstrtok_r strtok_r
# define Wstrspn strspn
# define Wstrpbrk strpbrk
diff --git a/libc/string/strverscmp.c b/libc/string/strverscmp.c
new file mode 100644
index 000000000..7818a9186
--- /dev/null
+++ b/libc/string/strverscmp.c
@@ -0,0 +1,106 @@
+/* Compare strings while treating digits characters numerically.
+ Copyright (C) 1997-2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+#include <string.h>
+#include <ctype.h>
+
+/* states: S_N: normal, S_I: comparing integral part, S_F: comparing
+ fractionnal parts, S_Z: idem but with leading Zeroes only */
+#define S_N 0x0
+#define S_I 0x3
+#define S_F 0x6
+#define S_Z 0x9
+
+/* result_type: CMP: return diff; LEN: compare using len_diff/diff */
+#define CMP 2
+#define LEN 3
+
+
+/* Compare S1 and S2 as strings holding indices/version numbers,
+ returning less than, equal to or greater than zero if S1 is less than,
+ equal to or greater than S2 (for more info, see the texinfo doc).
+*/
+
+int strverscmp (const char *s1, const char *s2)
+{
+ const unsigned char *p1 = (const unsigned char *) s1;
+ const unsigned char *p2 = (const unsigned char *) s2;
+
+ /* Symbol(s) 0 [1-9] others
+ Transition (10) 0 (01) d (00) x */
+ static const uint8_t next_state[] =
+ {
+ /* state x d 0 */
+ /* S_N */ S_N, S_I, S_Z,
+ /* S_I */ S_N, S_I, S_I,
+ /* S_F */ S_N, S_F, S_F,
+ /* S_Z */ S_N, S_F, S_Z
+ };
+
+ static const int8_t result_type[] =
+ {
+ /* state x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 */
+
+ /* S_N */ CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP,
+ /* S_I */ CMP, -1, -1, +1, LEN, LEN, +1, LEN, LEN,
+ /* S_F */ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP,
+ /* S_Z */ CMP, +1, +1, -1, CMP, CMP, -1, CMP, CMP
+ };
+ unsigned char c1, c2;
+ int state, diff;
+
+ if (p1 == p2)
+ return 0;
+
+ c1 = *p1++;
+ c2 = *p2++;
+ /* Hint: '0' is a digit too. */
+ state = S_N + ((c1 == '0') + (isdigit (c1) != 0));
+
+ while ((diff = c1 - c2) == 0)
+ {
+ if (c1 == '\0')
+ return diff;
+
+ state = next_state[state];
+ c1 = *p1++;
+ c2 = *p2++;
+ state += (c1 == '0') + (isdigit (c1) != 0);
+ }
+
+ state = result_type[state * 3 + (((c2 == '0') + (isdigit (c2) != 0)))];
+
+ switch (state)
+ {
+ case CMP:
+ return diff;
+
+ case LEN:
+ while (isdigit (*p1++))
+ if (!isdigit (*p2++))
+ return 1;
+
+ return isdigit (*p2) ? -1 : diff;
+
+ default:
+ return state;
+ }
+}
+libc_hidden_def(strverscmp)
diff --git a/libc/string/sys_errlist.c b/libc/string/sys_errlist.c
index 17ed4d62c..682ff0e7e 100644
--- a/libc/string/sys_errlist.c
+++ b/libc/string/sys_errlist.c
@@ -12,8 +12,6 @@ extern const char _string_syserrmsgs[] attribute_hidden;
#ifdef __UCLIBC_HAS_SYS_ERRLIST__
-link_warning(_sys_errlist, "sys_nerr and sys_errlist are obsolete and uClibc support for them (in at least some configurations) will probably be unavailable in the near future.")
-
const char *const sys_errlist[] = {
[0] = _string_syserrmsgs + 0,
[EPERM] = _string_syserrmsgs + 8,
diff --git a/libc/string/x86_64/bzero.S b/libc/string/x86_64/bzero.S
index 4d179ec4e..231d7cb41 100644
--- a/libc/string/x86_64/bzero.S
+++ b/libc/string/x86_64/bzero.S
@@ -1,5 +1,6 @@
#include <features.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
# define memset bzero
+# define __memset_chk __bzero_chk
# include "memset.S"
#endif
diff --git a/libc/string/x86_64/memcpy.S b/libc/string/x86_64/memcpy.S
index 697b992d0..e164278df 100644
--- a/libc/string/x86_64/memcpy.S
+++ b/libc/string/x86_64/memcpy.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
@@ -26,7 +25,7 @@
#define MEMPCPY_P (defined memcpy)
.text
-#if defined PIC && !defined NOT_IN_libc
+#if defined __PIC__ && !defined NOT_IN_libc && defined __UCLIBC_HAS_FORTIFY__
ENTRY (__memcpy_chk)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
diff --git a/libc/string/x86_64/mempcpy.S b/libc/string/x86_64/mempcpy.S
index 3816d9f72..b0607aa57 100644
--- a/libc/string/x86_64/mempcpy.S
+++ b/libc/string/x86_64/mempcpy.S
@@ -1,3 +1,4 @@
#define memcpy mempcpy
+#define __memcpy_chk __mempcpy_chk
#include "memcpy.S"
libc_hidden_def(mempcpy)
diff --git a/libc/string/x86_64/memset.S b/libc/string/x86_64/memset.S
index 46751006b..d6744129d 100644
--- a/libc/string/x86_64/memset.S
+++ b/libc/string/x86_64/memset.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
@@ -29,7 +28,7 @@
#define LARGE $120000
.text
-#if !BZERO_P && defined PIC && !defined NOT_IN_libc
+#if defined __PIC__ && !defined NOT_IN_libc && defined __UCLIBC_HAS_FORTIFY__
ENTRY (__memset_chk)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
@@ -142,6 +141,6 @@ END (memset)
libc_hidden_def(memset)
#endif
-#if !BZERO_P && defined PIC && !defined NOT_IN_libc
+#if !BZERO_P && defined __PIC__ && !defined NOT_IN_libc && defined __UCLIBC_HAS_FORTIFY__
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
#endif
diff --git a/libc/string/x86_64/strcat.S b/libc/string/x86_64/strcat.S
index 23d068fea..55e09e5f1 100644
--- a/libc/string/x86_64/strcat.S
+++ b/libc/string/x86_64/strcat.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/x86_64/strchr.S b/libc/string/x86_64/strchr.S
index 9ef46b7f2..256b97911 100644
--- a/libc/string/x86_64/strchr.S
+++ b/libc/string/x86_64/strchr.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/x86_64/strcmp.S b/libc/string/x86_64/strcmp.S
index 437e145bf..05d6f39c1 100644
--- a/libc/string/x86_64/strcmp.S
+++ b/libc/string/x86_64/strcmp.S
@@ -15,9 +15,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/x86_64/strcpy.S b/libc/string/x86_64/strcpy.S
index 612a30d1a..3ada70fbd 100644
--- a/libc/string/x86_64/strcpy.S
+++ b/libc/string/x86_64/strcpy.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/x86_64/strcspn.S b/libc/string/x86_64/strcspn.S
index fd9b09c48..7a06c8867 100644
--- a/libc/string/x86_64/strcspn.S
+++ b/libc/string/x86_64/strcspn.S
@@ -19,9 +19,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/x86_64/strlen.S b/libc/string/x86_64/strlen.S
index 4213f0ab6..9e84326c2 100644
--- a/libc/string/x86_64/strlen.S
+++ b/libc/string/x86_64/strlen.S
@@ -14,9 +14,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/x86_64/strspn.S b/libc/string/x86_64/strspn.S
index 41cff0490..366377649 100644
--- a/libc/string/x86_64/strspn.S
+++ b/libc/string/x86_64/strspn.S
@@ -19,9 +19,8 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#include "_glibc_inc.h"
diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S
index 19f3a6818..244205611 100644
--- a/libc/string/xtensa/memcpy.S
+++ b/libc/string/xtensa/memcpy.S
@@ -13,11 +13,10 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- Boston, MA 02110-1301, USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <sysdep.h>
#include <bits/xtensa-config.h>
.macro src_b r, w0, w1
@@ -83,7 +82,7 @@ __memcpy_aux:
loopnez a4, 2f
#else
beqz a4, 2f
- add a7, a3, a4 // a7 = end address for source
+ add a7, a3, a4 /* a7 = end address for source */
#endif
1: l8ui a6, a3, 0
addi a3, a3, 1
@@ -92,13 +91,13 @@ __memcpy_aux:
#if !XCHAL_HAVE_LOOPS
blt a3, a7, 1b
#endif
-2: retw
+2: abi_ret
/* Destination is unaligned. */
.align 4
-.Ldst1mod2: // dst is only byte aligned
+.Ldst1mod2: /* dst is only byte aligned */
/* Do short copies byte-by-byte. */
_bltui a4, 7, .Lbytecopy
@@ -113,7 +112,7 @@ __memcpy_aux:
/* Return to main algorithm if dst is now aligned. */
_bbci.l a5, 1, .Ldstaligned
-.Ldst2mod4: // dst has 16-bit alignment
+.Ldst2mod4: /* dst has 16-bit alignment */
/* Do short copies byte-by-byte. */
_bltui a4, 6, .Lbytecopy
@@ -134,7 +133,7 @@ __memcpy_aux:
ENTRY (memcpy)
/* a2 = dst, a3 = src, a4 = len */
- mov a5, a2 // copy dst so that a2 is return value
+ mov a5, a2 /* copy dst so that a2 is return value */
_bbsi.l a2, 0, .Ldst1mod2
_bbsi.l a2, 1, .Ldst2mod4
.Ldstaligned:
@@ -152,7 +151,7 @@ ENTRY (memcpy)
#else
beqz a7, 2f
slli a8, a7, 4
- add a8, a8, a3 // a8 = end of last 16B source chunk
+ add a8, a8, a3 /* a8 = end of last 16B source chunk */
#endif
1: l32i a6, a3, 0
l32i a7, a3, 4
@@ -182,7 +181,7 @@ ENTRY (memcpy)
3: bbsi.l a4, 2, 4f
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
- retw
+ abi_ret
/* Copy 4 bytes. */
4: l32i a6, a3, 0
@@ -191,7 +190,7 @@ ENTRY (memcpy)
addi a5, a5, 4
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
- retw
+ abi_ret
/* Copy 2 bytes. */
5: l16ui a6, a3, 0
@@ -199,14 +198,14 @@ ENTRY (memcpy)
s16i a6, a5, 0
addi a5, a5, 2
bbsi.l a4, 0, 6f
- retw
+ abi_ret
/* Copy 1 byte. */
6: l8ui a6, a3, 0
s8i a6, a5, 0
.Ldone:
- retw
+ abi_ret
/* Destination is aligned; source is unaligned. */
@@ -218,18 +217,18 @@ ENTRY (memcpy)
/* Copy 16 bytes per iteration for word-aligned dst and
unaligned src. */
- ssa8 a3 // set shift amount from byte offset
+ ssa8 a3 /* set shift amount from byte offset */
#if UNALIGNED_ADDRESSES_CHECKED
- and a11, a3, a8 // save unalignment offset for below
- sub a3, a3, a11 // align a3
+ and a11, a3, a8 /* save unalignment offset for below */
+ sub a3, a3, a11 /* align a3 */
#endif
- l32i a6, a3, 0 // load first word
+ l32i a6, a3, 0 /* load first word */
#if XCHAL_HAVE_LOOPS
loopnez a7, 2f
#else
beqz a7, 2f
slli a10, a7, 4
- add a10, a10, a3 // a10 = end of last 16B source chunk
+ add a10, a10, a3 /* a10 = end of last 16B source chunk */
#endif
1: l32i a7, a3, 4
l32i a8, a3, 8
@@ -273,11 +272,11 @@ ENTRY (memcpy)
mov a6, a7
4:
#if UNALIGNED_ADDRESSES_CHECKED
- add a3, a3, a11 // readjust a3 with correct misalignment
+ add a3, a3, a11 /* readjust a3 with correct misalignment */
#endif
bbsi.l a4, 1, 5f
bbsi.l a4, 0, 6f
- retw
+ abi_ret
/* Copy 2 bytes. */
5: l8ui a6, a3, 0
@@ -287,11 +286,11 @@ ENTRY (memcpy)
s8i a7, a5, 1
addi a5, a5, 2
bbsi.l a4, 0, 6f
- retw
+ abi_ret
/* Copy 1 byte. */
6: l8ui a6, a3, 0
s8i a6, a5, 0
- retw
+ abi_ret
libc_hidden_def (memcpy)
diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S
index c0928825d..20bf14c75 100644
--- a/libc/string/xtensa/memset.S
+++ b/libc/string/xtensa/memset.S
@@ -13,11 +13,10 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- Boston, MA 02110-1301, USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <sysdep.h>
#include <bits/xtensa-config.h>
/* Do not use .literal_position in the ENTRY macro. */
@@ -29,7 +28,7 @@
The algorithm is as follows:
Create a word with c in all byte positions.
-
+
If the destination is aligned, set 16B chunks with a loop, and then
finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
@@ -57,21 +56,21 @@ __memset_aux:
loopnez a4, 2f
#else
beqz a4, 2f
- add a6, a5, a4 // a6 = ending address
+ add a6, a5, a4 /* a6 = ending address */
#endif
1: s8i a3, a5, 0
addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS
blt a5, a6, 1b
#endif
-2: retw
+2: abi_ret
/* Destination is unaligned. */
.align 4
-.Ldst1mod2: // dst is only byte aligned
+.Ldst1mod2: /* dst is only byte aligned */
/* Do short sizes byte-by-byte. */
bltui a4, 8, .Lbyteset
@@ -84,7 +83,7 @@ __memset_aux:
/* Now retest if dst is aligned. */
_bbci.l a5, 1, .Ldstaligned
-.Ldst2mod4: // dst has 16-bit alignment
+.Ldst2mod4: /* dst has 16-bit alignment */
/* Do short sizes byte-by-byte. */
bltui a4, 8, .Lbyteset
@@ -108,7 +107,7 @@ ENTRY (memset)
slli a7, a3, 16
or a3, a3, a7
- mov a5, a2 // copy dst so that a2 is return value
+ mov a5, a2 /* copy dst so that a2 is return value */
/* Check if dst is unaligned. */
_bbsi.l a2, 0, .Ldst1mod2
@@ -124,7 +123,7 @@ ENTRY (memset)
#else
beqz a7, 2f
slli a6, a7, 4
- add a6, a6, a5 // a6 = end of last 16B chunk
+ add a6, a6, a5 /* a6 = end of last 16B chunk */
#endif
/* Set 16 bytes per iteration. */
1: s32i a3, a5, 0
@@ -160,6 +159,6 @@ ENTRY (memset)
/* Set 1 byte. */
s8i a3, a5, 0
-6: retw
+6: abi_ret
libc_hidden_def (memset)
diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S
index 622bb27ed..2dce590db 100644
--- a/libc/string/xtensa/strcmp.S
+++ b/libc/string/xtensa/strcmp.S
@@ -13,11 +13,10 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- Boston, MA 02110-1301, USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <sysdep.h>
#include <bits/xtensa-config.h>
#include <features.h>
@@ -35,45 +34,46 @@
#define MASK4 0x40404040
+ .text
+ .align 4
+ .literal_position
.literal .Lmask0, MASK0
.literal .Lmask1, MASK1
.literal .Lmask2, MASK2
.literal .Lmask3, MASK3
.literal .Lmask4, MASK4
-
- .text
ENTRY (strcmp)
/* a2 = s1, a3 = s2 */
- l8ui a8, a2, 0 // byte 0 from s1
- l8ui a9, a3, 0 // byte 0 from s2
- movi a10, 3 // mask
+ l8ui a8, a2, 0 /* byte 0 from s1 */
+ l8ui a9, a3, 0 /* byte 0 from s2 */
+ movi a10, 3 /* mask */
bne a8, a9, .Lretdiff
or a11, a2, a3
bnone a11, a10, .Laligned
- xor a11, a2, a3 // compare low two bits of s1 and s2
- bany a11, a10, .Lunaligned // if they have different alignment
+ xor a11, a2, a3 /* compare low two bits of s1 and s2 */
+ bany a11, a10, .Lunaligned /* if they have different alignment */
/* s1/s2 are not word-aligned. */
- addi a2, a2, 1 // advance s1
- beqz a8, .Leq // bytes equal, if zero, strings are equal
- addi a3, a3, 1 // advance s2
- bnone a2, a10, .Laligned // if s1/s2 now aligned
- l8ui a8, a2, 0 // byte 1 from s1
- l8ui a9, a3, 0 // byte 1 from s2
- addi a2, a2, 1 // advance s1
- bne a8, a9, .Lretdiff // if different, return difference
- beqz a8, .Leq // bytes equal, if zero, strings are equal
- addi a3, a3, 1 // advance s2
- bnone a2, a10, .Laligned // if s1/s2 now aligned
- l8ui a8, a2, 0 // byte 2 from s1
- l8ui a9, a3, 0 // byte 2 from s2
- addi a2, a2, 1 // advance s1
- bne a8, a9, .Lretdiff // if different, return difference
- beqz a8, .Leq // bytes equal, if zero, strings are equal
- addi a3, a3, 1 // advance s2
+ addi a2, a2, 1 /* advance s1 */
+ beqz a8, .Leq /* bytes equal, if zero, strings are equal */
+ addi a3, a3, 1 /* advance s2 */
+ bnone a2, a10, .Laligned /* if s1/s2 now aligned */
+ l8ui a8, a2, 0 /* byte 1 from s1 */
+ l8ui a9, a3, 0 /* byte 1 from s2 */
+ addi a2, a2, 1 /* advance s1 */
+ bne a8, a9, .Lretdiff /* if different, return difference */
+ beqz a8, .Leq /* bytes equal, if zero, strings are equal */
+ addi a3, a3, 1 /* advance s2 */
+ bnone a2, a10, .Laligned /* if s1/s2 now aligned */
+ l8ui a8, a2, 0 /* byte 2 from s1 */
+ l8ui a9, a3, 0 /* byte 2 from s2 */
+ addi a2, a2, 1 /* advance s1 */
+ bne a8, a9, .Lretdiff /* if different, return difference */
+ beqz a8, .Leq /* bytes equal, if zero, strings are equal */
+ addi a3, a3, 1 /* advance s2 */
j .Laligned
/* s1 and s2 have different alignment.
@@ -92,8 +92,8 @@ ENTRY (strcmp)
/* (2 mod 4) alignment for loop instruction */
.Lunaligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 // set up for the maximum loop count
- loop a8, .Lretdiff // loop forever (almost anyway)
+ _movi.n a8, 0 /* set up for the maximum loop count */
+ loop a8, .Lretdiff /* loop forever (almost anyway) */
#endif
.Lnextbyte:
l8ui a8, a2, 0
@@ -108,7 +108,7 @@ ENTRY (strcmp)
#endif
.Lretdiff:
sub a2, a8, a9
- retw
+ abi_ret
/* s1 is word-aligned; s2 is word-aligned.
@@ -131,32 +131,32 @@ ENTRY (strcmp)
#if XCHAL_HAVE_LOOPS
.Laligned:
.begin no-transform
- l32r a4, .Lmask0 // mask for byte 0
+ l32r a4, .Lmask0 /* mask for byte 0 */
l32r a7, .Lmask4
/* Loop forever. (a4 is more than than the maximum number
of iterations) */
loop a4, .Laligned_done
/* First unrolled loop body. */
- l32i a8, a2, 0 // get word from s1
- l32i a9, a3, 0 // get word from s2
+ l32i a8, a2, 0 /* get word from s1 */
+ l32i a9, a3, 0 /* get word from s2 */
slli a5, a8, 1
bne a8, a9, .Lwne2
or a9, a8, a5
bnall a9, a7, .Lprobeq
/* Second unrolled loop body. */
- l32i a8, a2, 4 // get word from s1+4
- l32i a9, a3, 4 // get word from s2+4
+ l32i a8, a2, 4 /* get word from s1+4 */
+ l32i a9, a3, 4 /* get word from s2+4 */
slli a5, a8, 1
bne a8, a9, .Lwne2
or a9, a8, a5
bnall a9, a7, .Lprobeq2
- addi a2, a2, 8 // advance s1 pointer
- addi a3, a3, 8 // advance s2 pointer
+ addi a2, a2, 8 /* advance s1 pointer */
+ addi a3, a3, 8 /* advance s2 pointer */
.Laligned_done:
- or a1, a1, a1 // nop
+ or a1, a1, a1 /* nop */
.Lprobeq2:
/* Adjust pointers to account for the loop unrolling. */
@@ -166,15 +166,15 @@ ENTRY (strcmp)
#else /* !XCHAL_HAVE_LOOPS */
.Laligned:
- movi a4, MASK0 // mask for byte 0
+ movi a4, MASK0 /* mask for byte 0 */
movi a7, MASK4
j .Lfirstword
.Lnextword:
- addi a2, a2, 4 // advance s1 pointer
- addi a3, a3, 4 // advance s2 pointer
+ addi a2, a2, 4 /* advance s1 pointer */
+ addi a3, a3, 4 /* advance s2 pointer */
.Lfirstword:
- l32i a8, a2, 0 // get word from s1
- l32i a9, a3, 0 // get word from s2
+ l32i a8, a2, 0 /* get word from s1 */
+ l32i a9, a3, 0 /* get word from s2 */
slli a5, a8, 1
bne a8, a9, .Lwne2
or a9, a8, a5
@@ -186,50 +186,50 @@ ENTRY (strcmp)
/* Words are probably equal, but check for sure.
If not, loop over the rest of string using normal algorithm. */
- bnone a8, a4, .Leq // if byte 0 is zero
- l32r a5, .Lmask1 // mask for byte 1
- l32r a6, .Lmask2 // mask for byte 2
- bnone a8, a5, .Leq // if byte 1 is zero
- l32r a7, .Lmask3 // mask for byte 3
- bnone a8, a6, .Leq // if byte 2 is zero
- bnone a8, a7, .Leq // if byte 3 is zero
- addi.n a2, a2, 4 // advance s1 pointer
- addi.n a3, a3, 4 // advance s2 pointer
+ bnone a8, a4, .Leq /* if byte 0 is zero */
+ l32r a5, .Lmask1 /* mask for byte 1 */
+ l32r a6, .Lmask2 /* mask for byte 2 */
+ bnone a8, a5, .Leq /* if byte 1 is zero */
+ l32r a7, .Lmask3 /* mask for byte 3 */
+ bnone a8, a6, .Leq /* if byte 2 is zero */
+ bnone a8, a7, .Leq /* if byte 3 is zero */
+ addi.n a2, a2, 4 /* advance s1 pointer */
+ addi.n a3, a3, 4 /* advance s2 pointer */
#if XCHAL_HAVE_LOOPS
/* align (1 mod 4) */
- loop a4, .Leq // loop forever (a4 is bigger than max iters)
+ loop a4, .Leq /* loop forever (a4 is bigger than max iters) */
.end no-transform
- l32i a8, a2, 0 // get word from s1
- l32i a9, a3, 0 // get word from s2
- addi a2, a2, 4 // advance s1 pointer
+ l32i a8, a2, 0 /* get word from s1 */
+ l32i a9, a3, 0 /* get word from s2 */
+ addi a2, a2, 4 /* advance s1 pointer */
bne a8, a9, .Lwne
- bnone a8, a4, .Leq // if byte 0 is zero
- bnone a8, a5, .Leq // if byte 1 is zero
- bnone a8, a6, .Leq // if byte 2 is zero
- bnone a8, a7, .Leq // if byte 3 is zero
- addi a3, a3, 4 // advance s2 pointer
+ bnone a8, a4, .Leq /* if byte 0 is zero */
+ bnone a8, a5, .Leq /* if byte 1 is zero */
+ bnone a8, a6, .Leq /* if byte 2 is zero */
+ bnone a8, a7, .Leq /* if byte 3 is zero */
+ addi a3, a3, 4 /* advance s2 pointer */
#else /* !XCHAL_HAVE_LOOPS */
j .Lfirstword2
.Lnextword2:
- addi a3, a3, 4 // advance s2 pointer
+ addi a3, a3, 4 /* advance s2 pointer */
.Lfirstword2:
- l32i a8, a2, 0 // get word from s1
- l32i a9, a3, 0 // get word from s2
- addi a2, a2, 4 // advance s1 pointer
+ l32i a8, a2, 0 /* get word from s1 */
+ l32i a9, a3, 0 /* get word from s2 */
+ addi a2, a2, 4 /* advance s1 pointer */
bne a8, a9, .Lwne
- bnone a8, a4, .Leq // if byte 0 is zero
- bnone a8, a5, .Leq // if byte 1 is zero
- bnone a8, a6, .Leq // if byte 2 is zero
- bany a8, a7, .Lnextword2 // if byte 3 is zero
+ bnone a8, a4, .Leq /* if byte 0 is zero */
+ bnone a8, a5, .Leq /* if byte 1 is zero */
+ bnone a8, a6, .Leq /* if byte 2 is zero */
+ bany a8, a7, .Lnextword2 /* if byte 3 is zero */
#endif /* !XCHAL_HAVE_LOOPS */
/* Words are equal; some byte is zero. */
-.Leq: movi a2, 0 // return equal
- retw
+.Leq: movi a2, 0 /* return equal */
+ abi_ret
.Lwne2: /* Words are not equal. On big-endian processors, if none of the
bytes are zero, the return value can be determined by a simple
@@ -239,22 +239,22 @@ ENTRY (strcmp)
bnall a10, a7, .Lsomezero
bgeu a8, a9, .Lposreturn
movi a2, -1
- retw
+ abi_ret
.Lposreturn:
movi a2, 1
- retw
-.Lsomezero: // There is probably some zero byte.
+ abi_ret
+.Lsomezero: /* There is probably some zero byte. */
#endif /* __XTENSA_EB__ */
.Lwne: /* Words are not equal. */
- xor a2, a8, a9 // get word with nonzero in byte that differs
- bany a2, a4, .Ldiff0 // if byte 0 differs
- movi a5, MASK1 // mask for byte 1
- bnone a8, a4, .Leq // if byte 0 is zero
- bany a2, a5, .Ldiff1 // if byte 1 differs
- movi a6, MASK2 // mask for byte 2
- bnone a8, a5, .Leq // if byte 1 is zero
- bany a2, a6, .Ldiff2 // if byte 2 differs
- bnone a8, a6, .Leq // if byte 2 is zero
+ xor a2, a8, a9 /* get word with nonzero in byte that differs */
+ bany a2, a4, .Ldiff0 /* if byte 0 differs */
+ movi a5, MASK1 /* mask for byte 1 */
+ bnone a8, a4, .Leq /* if byte 0 is zero */
+ bany a2, a5, .Ldiff1 /* if byte 1 differs */
+ movi a6, MASK2 /* mask for byte 2 */
+ bnone a8, a5, .Leq /* if byte 1 is zero */
+ bany a2, a6, .Ldiff2 /* if byte 2 differs */
+ bnone a8, a6, .Leq /* if byte 2 is zero */
#ifdef __XTENSA_EB__
.Ldiff3:
.Ldiff2:
@@ -263,14 +263,14 @@ ENTRY (strcmp)
byte. Just subtract words to get the return value.
The high order equal bytes cancel, leaving room for the sign. */
sub a2, a8, a9
- retw
+ abi_ret
.Ldiff0:
/* Need to make room for the sign, so can't subtract whole words. */
extui a10, a8, 24, 8
extui a11, a9, 24, 8
sub a2, a10, a11
- retw
+ abi_ret
#else /* !__XTENSA_EB__ */
/* Little-endian is a little more difficult because can't subtract
@@ -281,28 +281,28 @@ ENTRY (strcmp)
extui a10, a8, 24, 8
extui a11, a9, 24, 8
sub a2, a10, a11
- retw
+ abi_ret
.Ldiff0:
/* Byte 0 is different. */
extui a10, a8, 0, 8
extui a11, a9, 0, 8
sub a2, a10, a11
- retw
+ abi_ret
.Ldiff1:
/* Byte 0 is equal; byte 1 is different. */
extui a10, a8, 8, 8
extui a11, a9, 8, 8
sub a2, a10, a11
- retw
+ abi_ret
.Ldiff2:
/* Bytes 0-1 are equal; byte 2 is different. */
extui a10, a8, 16, 8
extui a11, a9, 16, 8
sub a2, a10, a11
- retw
+ abi_ret
#endif /* !__XTENSA_EB */
diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S
index 108070384..9f42b34e6 100644
--- a/libc/string/xtensa/strcpy.S
+++ b/libc/string/xtensa/strcpy.S
@@ -13,11 +13,10 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- Boston, MA 02110-1301, USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <sysdep.h>
#include <bits/xtensa-config.h>
#ifdef __XTENSA_EB__
@@ -36,7 +35,7 @@
ENTRY (strcpy)
/* a2 = dst, a3 = src */
- mov a10, a2 // leave dst in return value register
+ mov a10, a2 /* leave dst in return value register */
movi a4, MASK0
movi a5, MASK1
movi a6, MASK2
@@ -51,25 +50,25 @@ ENTRY (strcpy)
j .Ldstunaligned
-.Lsrc1mod2: // src address is odd
- l8ui a8, a3, 0 // get byte 0
- addi a3, a3, 1 // advance src pointer
- s8i a8, a10, 0 // store byte 0
- beqz a8, 1f // if byte 0 is zero
- addi a10, a10, 1 // advance dst pointer
- bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned
+.Lsrc1mod2: /* src address is odd */
+ l8ui a8, a3, 0 /* get byte 0 */
+ addi a3, a3, 1 /* advance src pointer */
+ s8i a8, a10, 0 /* store byte 0 */
+ beqz a8, 1f /* if byte 0 is zero */
+ addi a10, a10, 1 /* advance dst pointer */
+ bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */
-.Lsrc2mod4: // src address is 2 mod 4
- l8ui a8, a3, 0 // get byte 0
+.Lsrc2mod4: /* src address is 2 mod 4 */
+ l8ui a8, a3, 0 /* get byte 0 */
/* 1-cycle interlock */
- s8i a8, a10, 0 // store byte 0
- beqz a8, 1f // if byte 0 is zero
- l8ui a8, a3, 1 // get byte 0
- addi a3, a3, 2 // advance src pointer
- s8i a8, a10, 1 // store byte 0
- addi a10, a10, 2 // advance dst pointer
+ s8i a8, a10, 0 /* store byte 0 */
+ beqz a8, 1f /* if byte 0 is zero */
+ l8ui a8, a3, 1 /* get byte 0 */
+ addi a3, a3, 2 /* advance src pointer */
+ s8i a8, a10, 1 /* store byte 0 */
+ addi a10, a10, 2 /* advance dst pointer */
bnez a8, .Lsrcaligned
-1: retw
+1: abi_ret
/* dst is word-aligned; src is word-aligned. */
@@ -78,46 +77,46 @@ ENTRY (strcpy)
#if XCHAL_HAVE_LOOPS
/* (2 mod 4) alignment for loop instruction */
.Laligned:
- _movi.n a8, 0 // set up for the maximum loop count
- loop a8, .Lz3 // loop forever (almost anyway)
- l32i a8, a3, 0 // get word from src
- addi a3, a3, 4 // advance src pointer
- bnone a8, a4, .Lz0 // if byte 0 is zero
- bnone a8, a5, .Lz1 // if byte 1 is zero
- bnone a8, a6, .Lz2 // if byte 2 is zero
- s32i a8, a10, 0 // store word to dst
- bnone a8, a7, .Lz3 // if byte 3 is zero
- addi a10, a10, 4 // advance dst pointer
+ _movi.n a8, 0 /* set up for the maximum loop count */
+ loop a8, .Lz3 /* loop forever (almost anyway) */
+ l32i a8, a3, 0 /* get word from src */
+ addi a3, a3, 4 /* advance src pointer */
+ bnone a8, a4, .Lz0 /* if byte 0 is zero */
+ bnone a8, a5, .Lz1 /* if byte 1 is zero */
+ bnone a8, a6, .Lz2 /* if byte 2 is zero */
+ s32i a8, a10, 0 /* store word to dst */
+ bnone a8, a7, .Lz3 /* if byte 3 is zero */
+ addi a10, a10, 4 /* advance dst pointer */
#else /* !XCHAL_HAVE_LOOPS */
-1: addi a10, a10, 4 // advance dst pointer
+1: addi a10, a10, 4 /* advance dst pointer */
.Laligned:
- l32i a8, a3, 0 // get word from src
- addi a3, a3, 4 // advance src pointer
- bnone a8, a4, .Lz0 // if byte 0 is zero
- bnone a8, a5, .Lz1 // if byte 1 is zero
- bnone a8, a6, .Lz2 // if byte 2 is zero
- s32i a8, a10, 0 // store word to dst
- bany a8, a7, 1b // if byte 3 is zero
+ l32i a8, a3, 0 /* get word from src */
+ addi a3, a3, 4 /* advance src pointer */
+ bnone a8, a4, .Lz0 /* if byte 0 is zero */
+ bnone a8, a5, .Lz1 /* if byte 1 is zero */
+ bnone a8, a6, .Lz2 /* if byte 2 is zero */
+ s32i a8, a10, 0 /* store word to dst */
+ bany a8, a7, 1b /* if byte 3 is zero */
#endif /* !XCHAL_HAVE_LOOPS */
.Lz3: /* Byte 3 is zero. */
- retw
+ abi_ret
.Lz0: /* Byte 0 is zero. */
#ifdef __XTENSA_EB__
movi a8, 0
#endif
s8i a8, a10, 0
- retw
+ abi_ret
.Lz1: /* Byte 1 is zero. */
#ifdef __XTENSA_EB__
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
- retw
+ abi_ret
.Lz2: /* Byte 2 is zero. */
#ifdef __XTENSA_EB__
@@ -126,15 +125,15 @@ ENTRY (strcpy)
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
- retw
+ abi_ret
.align 4
/* (2 mod 4) alignment for loop instruction */
.Ldstunaligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 // set up for the maximum loop count
- loop a8, 2f // loop forever (almost anyway)
+ _movi.n a8, 0 /* set up for the maximum loop count */
+ loop a8, 2f /* loop forever (almost anyway) */
#endif
1: l8ui a8, a3, 0
addi a3, a3, 1
@@ -145,6 +144,6 @@ ENTRY (strcpy)
#else
bnez a8, 1b
#endif
-2: retw
+2: abi_ret
libc_hidden_def (strcpy)
diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S
index dd72c16fa..e1c98c8f0 100644
--- a/libc/string/xtensa/strlen.S
+++ b/libc/string/xtensa/strlen.S
@@ -13,11 +13,10 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- Boston, MA 02110-1301, USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <sysdep.h>
#include <bits/xtensa-config.h>
#ifdef __XTENSA_EB__
@@ -36,7 +35,7 @@
ENTRY (strlen)
/* a2 = s */
- addi a3, a2, -4 // because we overincrement at the end
+ addi a3, a2, -4 /* because we overincrement at the end */
movi a4, MASK0
movi a5, MASK1
movi a6, MASK2
@@ -45,22 +44,22 @@ ENTRY (strlen)
bbsi.l a2, 1, .L2mod4
j .Laligned
-.L1mod2: // address is odd
- l8ui a8, a3, 4 // get byte 0
- addi a3, a3, 1 // advance string pointer
- beqz a8, .Lz3 // if byte 0 is zero
- bbci.l a3, 1, .Laligned // if string pointer is now word-aligned
+.L1mod2: /* address is odd */
+ l8ui a8, a3, 4 /* get byte 0 */
+ addi a3, a3, 1 /* advance string pointer */
+ beqz a8, .Lz3 /* if byte 0 is zero */
+ bbci.l a3, 1, .Laligned /* if string pointer is now word-aligned */
-.L2mod4: // address is 2 mod 4
- addi a3, a3, 2 // advance ptr for aligned access
- l32i a8, a3, 0 // get word with first two bytes of string
- bnone a8, a6, .Lz2 // if byte 2 (of word, not string) is zero
- bany a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero
+.L2mod4: /* address is 2 mod 4 */
+ addi a3, a3, 2 /* advance ptr for aligned access */
+ l32i a8, a3, 0 /* get word with first two bytes of string */
+ bnone a8, a6, .Lz2 /* if byte 2 (of word, not string) is zero */
+ bany a8, a7, .Laligned /* if byte 3 (of word, not string) is nonzero */
/* Byte 3 is zero. */
- addi a3, a3, 3 // point to zero byte
- sub a2, a3, a2 // subtract to get length
- retw
+ addi a3, a3, 3 /* point to zero byte */
+ sub a2, a3, a2 /* subtract to get length */
+ abi_ret
/* String is word-aligned. */
@@ -69,36 +68,36 @@ ENTRY (strlen)
/* (2 mod 4) alignment for loop instruction */
.Laligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 // set up for the maximum loop count
- loop a8, .Lz3 // loop forever (almost anyway)
+ _movi.n a8, 0 /* set up for the maximum loop count */
+ loop a8, .Lz3 /* loop forever (almost anyway) */
#endif
-1: l32i a8, a3, 4 // get next word of string
- addi a3, a3, 4 // advance string pointer
- bnone a8, a4, .Lz0 // if byte 0 is zero
- bnone a8, a5, .Lz1 // if byte 1 is zero
- bnone a8, a6, .Lz2 // if byte 2 is zero
+1: l32i a8, a3, 4 /* get next word of string */
+ addi a3, a3, 4 /* advance string pointer */
+ bnone a8, a4, .Lz0 /* if byte 0 is zero */
+ bnone a8, a5, .Lz1 /* if byte 1 is zero */
+ bnone a8, a6, .Lz2 /* if byte 2 is zero */
#if XCHAL_HAVE_LOOPS
- bnone a8, a7, .Lz3 // if byte 3 is zero
+ bnone a8, a7, .Lz3 /* if byte 3 is zero */
#else
- bany a8, a7, 1b // repeat if byte 3 is non-zero
+ bany a8, a7, 1b /* repeat if byte 3 is non-zero */
#endif
.Lz3: /* Byte 3 is zero. */
- addi a3, a3, 3 // point to zero byte
+ addi a3, a3, 3 /* point to zero byte */
/* Fall through.... */
.Lz0: /* Byte 0 is zero. */
- sub a2, a3, a2 // subtract to get length
- retw
+ sub a2, a3, a2 /* subtract to get length */
+ abi_ret
.Lz1: /* Byte 1 is zero. */
- addi a3, a3, 1 // point to zero byte
- sub a2, a3, a2 // subtract to get length
- retw
+ addi a3, a3, 1 /* point to zero byte */
+ sub a2, a3, a2 /* subtract to get length */
+ abi_ret
.Lz2: /* Byte 2 is zero. */
- addi a3, a3, 2 // point to zero byte
- sub a2, a3, a2 // subtract to get length
- retw
+ addi a3, a3, 2 /* point to zero byte */
+ sub a2, a3, a2 /* subtract to get length */
+ abi_ret
libc_hidden_def (strlen)
diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S
index 7ba2ef77d..aa8db5da1 100644
--- a/libc/string/xtensa/strncpy.S
+++ b/libc/string/xtensa/strncpy.S
@@ -13,11 +13,10 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
- Boston, MA 02110-1301, USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
-#include "../../sysdeps/linux/xtensa/sysdep.h"
+#include <sysdep.h>
#include <bits/xtensa-config.h>
#ifdef __XTENSA_EB__
@@ -41,41 +40,41 @@
.literal_position
__strncpy_aux:
-.Lsrc1mod2: // src address is odd
- l8ui a8, a3, 0 // get byte 0
- addi a3, a3, 1 // advance src pointer
- s8i a8, a10, 0 // store byte 0
- addi a4, a4, -1 // decrement n
- beqz a4, .Lret // if n is zero
- addi a10, a10, 1 // advance dst pointer
- beqz a8, .Lfill // if byte 0 is zero
- bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned
-
-.Lsrc2mod4: // src address is 2 mod 4
- l8ui a8, a3, 0 // get byte 0
- addi a4, a4, -1 // decrement n
- s8i a8, a10, 0 // store byte 0
- beqz a4, .Lret // if n is zero
- addi a10, a10, 1 // advance dst pointer
- beqz a8, .Lfill // if byte 0 is zero
- l8ui a8, a3, 1 // get byte 0
- addi a3, a3, 2 // advance src pointer
- s8i a8, a10, 0 // store byte 0
- addi a4, a4, -1 // decrement n
- beqz a4, .Lret // if n is zero
- addi a10, a10, 1 // advance dst pointer
+.Lsrc1mod2: /* src address is odd */
+ l8ui a8, a3, 0 /* get byte 0 */
+ addi a3, a3, 1 /* advance src pointer */
+ s8i a8, a10, 0 /* store byte 0 */
+ addi a4, a4, -1 /* decrement n */
+ beqz a4, .Lret /* if n is zero */
+ addi a10, a10, 1 /* advance dst pointer */
+ beqz a8, .Lfill /* if byte 0 is zero */
+ bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */
+
+.Lsrc2mod4: /* src address is 2 mod 4 */
+ l8ui a8, a3, 0 /* get byte 0 */
+ addi a4, a4, -1 /* decrement n */
+ s8i a8, a10, 0 /* store byte 0 */
+ beqz a4, .Lret /* if n is zero */
+ addi a10, a10, 1 /* advance dst pointer */
+ beqz a8, .Lfill /* if byte 0 is zero */
+ l8ui a8, a3, 1 /* get byte 0 */
+ addi a3, a3, 2 /* advance src pointer */
+ s8i a8, a10, 0 /* store byte 0 */
+ addi a4, a4, -1 /* decrement n */
+ beqz a4, .Lret /* if n is zero */
+ addi a10, a10, 1 /* advance dst pointer */
bnez a8, .Lsrcaligned
j .Lfill
.Lret:
- retw
+ abi_ret
ENTRY (strncpy)
/* a2 = dst, a3 = src */
- mov a10, a2 // leave dst in return value register
- beqz a4, .Lret // if n is zero
+ mov a10, a2 /* leave dst in return value register */
+ beqz a4, .Lret /* if n is zero */
movi a11, MASK0
movi a5, MASK1
@@ -125,28 +124,28 @@ ENTRY (strncpy)
.Lfillcleanup:
/* Fill leftover (1 to 3) bytes with zero. */
- s8i a9, a10, 0 // store byte 0
- addi a4, a4, -1 // decrement n
+ s8i a9, a10, 0 /* store byte 0 */
+ addi a4, a4, -1 /* decrement n */
addi a10, a10, 1
- bnez a4, .Lfillcleanup
-
-2: retw
-
-.Lfill1mod2: // dst address is odd
- s8i a9, a10, 0 // store byte 0
- addi a4, a4, -1 // decrement n
- beqz a4, 2b // if n is zero
- addi a10, a10, 1 // advance dst pointer
- bbci.l a10, 1, .Lfillaligned // if dst is now word-aligned
-
-.Lfill2mod4: // dst address is 2 mod 4
- s8i a9, a10, 0 // store byte 0
- addi a4, a4, -1 // decrement n
- beqz a4, 2b // if n is zero
- s8i a9, a10, 1 // store byte 1
- addi a4, a4, -1 // decrement n
- beqz a4, 2b // if n is zero
- addi a10, a10, 2 // advance dst pointer
+ bnez a4, .Lfillcleanup
+
+2: abi_ret
+
+.Lfill1mod2: /* dst address is odd */
+ s8i a9, a10, 0 /* store byte 0 */
+ addi a4, a4, -1 /* decrement n */
+ beqz a4, 2b /* if n is zero */
+ addi a10, a10, 1 /* advance dst pointer */
+ bbci.l a10, 1, .Lfillaligned /* if dst is now word-aligned */
+
+.Lfill2mod4: /* dst address is 2 mod 4 */
+ s8i a9, a10, 0 /* store byte 0 */
+ addi a4, a4, -1 /* decrement n */
+ beqz a4, 2b /* if n is zero */
+ s8i a9, a10, 1 /* store byte 1 */
+ addi a4, a4, -1 /* decrement n */
+ beqz a4, 2b /* if n is zero */
+ addi a10, a10, 2 /* advance dst pointer */
j .Lfillaligned
@@ -156,32 +155,32 @@ ENTRY (strncpy)
/* (2 mod 4) alignment for loop instruction */
.Laligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 // set up for the maximum loop count
- loop a8, 1f // loop forever (almost anyway)
- blti a4, 5, .Ldstunaligned // n is near limit; do one at a time
- l32i a8, a3, 0 // get word from src
- addi a3, a3, 4 // advance src pointer
- bnone a8, a11, .Lz0 // if byte 0 is zero
- bnone a8, a5, .Lz1 // if byte 1 is zero
- bnone a8, a6, .Lz2 // if byte 2 is zero
- s32i a8, a10, 0 // store word to dst
- addi a4, a4, -4 // decrement n
- addi a10, a10, 4 // advance dst pointer
- bnone a8, a7, .Lfill // if byte 3 is zero
-1:
+ _movi.n a8, 0 /* set up for the maximum loop count */
+ loop a8, 1f /* loop forever (almost anyway) */
+ blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */
+ l32i a8, a3, 0 /* get word from src */
+ addi a3, a3, 4 /* advance src pointer */
+ bnone a8, a11, .Lz0 /* if byte 0 is zero */
+ bnone a8, a5, .Lz1 /* if byte 1 is zero */
+ bnone a8, a6, .Lz2 /* if byte 2 is zero */
+ s32i a8, a10, 0 /* store word to dst */
+ addi a4, a4, -4 /* decrement n */
+ addi a10, a10, 4 /* advance dst pointer */
+ bnone a8, a7, .Lfill /* if byte 3 is zero */
+1:
#else /* !XCHAL_HAVE_LOOPS */
-1: blti a4, 5, .Ldstunaligned // n is near limit; do one at a time
- l32i a8, a3, 0 // get word from src
- addi a3, a3, 4 // advance src pointer
- bnone a8, a11, .Lz0 // if byte 0 is zero
- bnone a8, a5, .Lz1 // if byte 1 is zero
- bnone a8, a6, .Lz2 // if byte 2 is zero
- s32i a8, a10, 0 // store word to dst
- addi a4, a4, -4 // decrement n
- addi a10, a10, 4 // advance dst pointer
- bany a8, a7, 1b // no zeroes
+1: blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */
+ l32i a8, a3, 0 /* get word from src */
+ addi a3, a3, 4 /* advance src pointer */
+ bnone a8, a11, .Lz0 /* if byte 0 is zero */
+ bnone a8, a5, .Lz1 /* if byte 1 is zero */
+ bnone a8, a6, .Lz2 /* if byte 2 is zero */
+ s32i a8, a10, 0 /* store word to dst */
+ addi a4, a4, -4 /* decrement n */
+ addi a10, a10, 4 /* advance dst pointer */
+ bany a8, a7, 1b /* no zeroes */
#endif /* !XCHAL_HAVE_LOOPS */
j .Lfill
@@ -191,8 +190,8 @@ ENTRY (strncpy)
movi a8, 0
#endif
s8i a8, a10, 0
- addi a4, a4, -1 // decrement n
- addi a10, a10, 1 // advance dst pointer
+ addi a4, a4, -1 /* decrement n */
+ addi a10, a10, 1 /* advance dst pointer */
j .Lfill
.Lz1: /* Byte 1 is zero. */
@@ -200,8 +199,8 @@ ENTRY (strncpy)
extui a8, a8, 16, 16
#endif
s16i a8, a10, 0
- addi a4, a4, -2 // decrement n
- addi a10, a10, 2 // advance dst pointer
+ addi a4, a4, -2 /* decrement n */
+ addi a10, a10, 2 /* advance dst pointer */
j .Lfill
.Lz2: /* Byte 2 is zero. */
@@ -211,8 +210,8 @@ ENTRY (strncpy)
s16i a8, a10, 0
movi a8, 0
s8i a8, a10, 2
- addi a4, a4, -3 // decrement n
- addi a10, a10, 3 // advance dst pointer
+ addi a4, a4, -3 /* decrement n */
+ addi a10, a10, 3 /* advance dst pointer */
j .Lfill
.align 4
@@ -220,8 +219,8 @@ ENTRY (strncpy)
.Ldstunaligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 // set up for the maximum loop count
- loop a8, 2f // loop forever (almost anyway)
+ _movi.n a8, 0 /* set up for the maximum loop count */
+ loop a8, 2f /* loop forever (almost anyway) */
#endif
1: l8ui a8, a3, 0
addi a3, a3, 1
@@ -236,6 +235,6 @@ ENTRY (strncpy)
#endif
2: j .Lfill
-3: retw
+3: abi_ret
libc_hidden_def (strncpy)