summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Vlasenko <vda.linux@googlemail.com>2008-12-17 01:36:31 +0000
committerDenis Vlasenko <vda.linux@googlemail.com>2008-12-17 01:36:31 +0000
commit41c15785a9b620a8c85944649c20cca853f40e84 (patch)
tree2676af638efdebba3a9e9b0cf2aa0eab1ec54819
parent3d21a36bcd1441e88529eb990d0f9d8ac41a2a4d (diff)
since gcc -Os hates us and does not inline string ops,
implement inline versions of some of them. Enable only those which result roughly in the same code size as using out-or-line versions. None of this affects users, installed headers won't have any trace of it.
-rw-r--r--include/libc-string_i386.h314
-rw-r--r--include/string.h29
-rw-r--r--libc/string/generic/memchr.c4
-rw-r--r--libc/string/generic/mempcpy.c3
-rw-r--r--libc/string/i386/memcpy.c2
-rw-r--r--libc/string/i386/memset.c1
-rw-r--r--libc/string/i386/strcpy.c2
-rw-r--r--libc/string/i386/strlen.c2
-rw-r--r--libc/string/memchr.c1
-rw-r--r--libc/string/mempcpy.c1
-rw-r--r--libc/string/stpcpy.c2
11 files changed, 341 insertions, 20 deletions
diff --git a/include/libc-string_i386.h b/include/libc-string_i386.h
new file mode 100644
index 000000000..3ed9c8783
--- /dev/null
+++ b/include/libc-string_i386.h
@@ -0,0 +1,314 @@
+/*
+ * Copyright (C) 2008 Denys Vlasenko <vda.linux@googlemail.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball
+ */
+
+#if !defined _STRING_H
+#error "Never use <libc-string_i386.h> directly; include <string.h> instead"
+#endif
+
+#ifndef _LIBC_STRING_i386_H
+#define _LIBC_STRING_i386_H 1
+
+static __always_inline
+void *inlined_memset_const_c_count4(void *s, unsigned eax, unsigned count)
+{
+ int ecx, edi;
+
+ if (count == 0)
+ return s;
+
+ /* Very small (2 stores or less) are best done with direct
+ * mov <const>,<mem> instructions (they do not clobber registers) */
+ if (count == 1) {
+ *(char *)(s + 0) = eax;
+ return s;
+ }
+
+ eax *= 0x01010101; /* done at compile time */
+
+ if (count == 2) {
+ *(short *)(s + 0) = eax;
+ return s;
+ }
+ if (count == 3) {
+ *(short *)(s + 0) = eax;
+ *(char *) (s + 2) = eax;
+ return s;
+ }
+ if (count == 1*4 + 0) {
+ *(int *)(s + 0) = eax;
+ return s;
+ }
+ if (count == 1*4 + 1) {
+ *(int *) (s + 0) = eax;
+ *(char *)(s + 4) = eax;
+ return s;
+ }
+ if (count == 1*4 + 2) {
+ *(int *) (s + 0) = eax;
+ *(short *)(s + 4) = eax;
+ return s;
+ }
+
+ /* Small string stores: don't clobber ecx
+ * (clobbers only eax and edi) */
+#define small_store(arg) { \
+ __asm__ __volatile__( \
+ arg \
+ : "=&D" (edi) \
+ : "a" (eax), "0" (s) \
+ : "memory" \
+ ); \
+ return s; \
+}
+ if (count == 1*4 + 3) small_store("stosl; stosw; stosb");
+ if (count == 2*4 + 0) {
+ ((int *)s)[0] = eax;
+ ((int *)s)[1] = eax;
+ return s;
+ }
+ if (count == 2*4 + 1) small_store("stosl; stosl; stosb");
+ if (count == 2*4 + 2) small_store("stosl; stosl; stosw");
+ if (count == 2*4 + 3) small_store("stosl; stosl; stosw; stosb");
+ if (count == 3*4 + 0) small_store("stosl; stosl; stosl");
+ if (count == 3*4 + 1) small_store("stosl; stosl; stosl; stosb");
+ if (count == 3*4 + 2) small_store("stosl; stosl; stosl; stosw");
+ if (count == 3*4 + 3) small_store("stosl; stosl; stosl; stosw; stosb");
+ if (count == 4*4 + 0) small_store("stosl; stosl; stosl; stosl");
+ if (count == 4*4 + 1) small_store("stosl; stosl; stosl; stosl; stosb");
+ /* going over 7 bytes is suboptimal */
+ /* stosw is 2-byte insn, so this one takes 6 bytes: */
+ if (count == 4*4 + 2) small_store("stosl; stosl; stosl; stosl; stosw");
+ /* 7 bytes */
+ if (count == 4*4 + 3) small_store("stosl; stosl; stosl; stosl; stosw; stosb");
+ /* 5 bytes */
+ if (count == 5*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl");
+ /* 6 bytes */
+ if (count == 5*4 + 1) small_store("stosl; stosl; stosl; stosl; stosl; stosb");
+ /* 7 bytes */
+ if (count == 5*4 + 2) small_store("stosl; stosl; stosl; stosl; stosl; stosw");
+ /* 8 bytes, but oh well... */
+ if (count == 5*4 + 3) small_store("stosl; stosl; stosl; stosl; stosl; stosw; stosb");
+ /* 6 bytes */
+ if (count == 6*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl; stosl");
+ /* the rest would be 7+ bytes and is handled below instead */
+#undef small_store
+
+ /* Not small, but multiple-of-4 store.
+ * "mov <const>,%ecx; rep; stosl" sequence is 7 bytes */
+ __asm__ __volatile__(
+ " rep; stosl\n"
+ : "=&c" (ecx), "=&D" (edi)
+ : "a" (eax), "0" (count / 4), "1" (s)
+ : "memory"
+ );
+ return s;
+}
+#if 1 /* -51 bytes on shared i386 build with gcc 4.3.0 */
+#define memset(s, c, count) ( \
+ ( !(__builtin_constant_p(c) && __builtin_constant_p(count)) \
+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+ ) \
+ ? memset((s), (c), (count)) \
+ : inlined_memset_const_c_count4((s), (c), (count)) \
+ )
+#endif
+
+
+static __always_inline
+void *inlined_mempcpy_const_count4(void *d, const void *s, unsigned count)
+{
+ int ecx;
+ char *esi, *edi;
+
+ if (count == 0)
+ return d;
+
+ if (count == 1) {
+ *(char *)d = *(char *)s;
+ return d + 1;
+ }
+ if (count == 2) {
+ *(short *)d = *(short *)s;
+ return d + 2;
+ }
+ /* Small string moves: don't clobber ecx
+ * (clobbers only esi and edi) */
+#define small_move(arg) { \
+ __asm__ __volatile__( \
+ arg \
+ : "=&S" (esi), "=&D" (edi) \
+ : "0" (s), "1" (d) \
+ : "memory" \
+ ); \
+ return edi; \
+}
+ if (count == 3) small_move("movsw; movsb");
+ if (count == 1*4 + 0) {
+ *(int *)d = *(int *)s;
+ return d + 4;
+ }
+ if (count == 1*4 + 1) small_move("movsl; movsb");
+ if (count == 1*4 + 2) small_move("movsl; movsw");
+ if (count == 1*4 + 3) small_move("movsl; movsw; movsb");
+ if (count == 2*4 + 0) small_move("movsl; movsl");
+ if (count == 2*4 + 1) small_move("movsl; movsl; movsb");
+ if (count == 2*4 + 2) small_move("movsl; movsl; movsw");
+ if (count == 2*4 + 3) small_move("movsl; movsl; movsw; movsb");
+ if (count == 3*4 + 0) small_move("movsl; movsl; movsl");
+ if (count == 3*4 + 1) small_move("movsl; movsl; movsl; movsb");
+ if (count == 3*4 + 2) small_move("movsl; movsl; movsl; movsw");
+ if (count == 3*4 + 3) small_move("movsl; movsl; movsl; movsw; movsb");
+ if (count == 4*4 + 0) small_move("movsl; movsl; movsl; movsl");
+ if (count == 4*4 + 1) small_move("movsl; movsl; movsl; movsl; movsb");
+ /* going over 7 bytes is suboptimal */
+ /* movsw is 2-byte insn, so this one takes 6 bytes: */
+ if (count == 4*4 + 2) small_move("movsl; movsl; movsl; movsl; movsw");
+ /* 7 bytes */
+ if (count == 4*4 + 3) small_move("movsl; movsl; movsl; movsl; movsw; movsb");
+ /* 5 bytes */
+ if (count == 5*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl");
+ /* 6 bytes */
+ if (count == 5*4 + 1) small_move("movsl; movsl; movsl; movsl; movsl; movsb");
+ /* 7 bytes */
+ if (count == 5*4 + 2) small_move("movsl; movsl; movsl; movsl; movsl; movsw");
+ /* 8 bytes, but oh well... */
+ if (count == 5*4 + 3) small_move("movsl; movsl; movsl; movsl; movsl; movsw; movsb");
+ /* 6 bytes */
+ if (count == 6*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl; movsl");
+ /* the rest would be 7+ bytes and is handled below instead */
+#undef small_move
+
+ /* Not small, but multiple-of-4 move.
+ * "mov <const>,%ecx; rep; movsl" sequence is 7 bytes */
+ __asm__ __volatile__(
+ " rep; movsl\n"
+ : "=&c" (ecx), "=&S" (esi), "=&D" (edi)
+ : "0" (count / 4), "1" (s), "2" (d)
+ : "memory"
+ );
+ return edi;
+}
+static __always_inline
+void *inlined_memcpy_const_count4(void *d, const void *s, unsigned count)
+{
+ inlined_mempcpy_const_count4(d, s, count);
+ return d;
+}
+#if 1 /* +34 bytes on shared i386 build with gcc 4.3.0 */
+#define mempcpy(d, s, count) ( \
+ ( !(__builtin_constant_p(count)) \
+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+ ) \
+ ? mempcpy((d), (s), (count)) \
+ : inlined_mempcpy_const_count4((d), (s), (count)) \
+ )
+#define memcpy(d, s, count) ( \
+ ( !(__builtin_constant_p(count)) \
+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \
+ ) \
+ ? memcpy((d), (s), (count)) \
+ : inlined_memcpy_const_count4((d), (s), (count)) \
+ )
+#endif
+
+
+static __always_inline
+size_t inlined_strlen(const char *s)
+{
+ int edi;
+ int ecx;
+ __asm__ __volatile__(
+ " repne; scasb\n"
+ /* " notl %0\n" */
+ /* " decl %0\n" */
+ : "=c" (ecx), "=&D" (edi)
+ : "1" (s), "a" (0), "0" (0xffffffffu)
+ /* : no clobbers */
+ );
+ return -ecx - 1;
+}
+#if 0 /* +1108 bytes on shared i386 build with gcc 4.3.0 */
+#define strlen(s) inlined_strlen(s)
+#endif
+
+
+static __always_inline
+char *inlined_stpcpy(char *dest, const char *src)
+{
+ char *esi, *edi;
+ int eax;
+ __asm__ __volatile__(
+ "1: lodsb\n"
+ " stosb\n"
+ " testb %%al, %%al\n"
+ " jnz 1b\n"
+ : "=&S" (esi), "=&D" (edi), "=&a" (eax)
+ : "0" (src), "1" (dest)
+ : "memory"
+ );
+ return edi - 1;
+}
+static __always_inline
+char *inlined_strcpy(char *dest, const char *src)
+{
+ inlined_stpcpy(dest, src);
+ return dest;
+}
+#if 0 /* +562 bytes on shared i386 build with gcc 4.3.0 */
+#define stpcpy(dest, src) inlined_stpcpy(dest, src)
+#define strcpy(dest, src) inlined_strcpy(dest, src)
+#endif
+
+
+static __always_inline
+void *inlined_memchr(const void *s, int c, size_t count)
+{
+ void *edi;
+ int ecx;
+ /* Unfortunately, c gets loaded to %eax (wide insn), not %al */
+ __asm__ __volatile__(
+ " jecxz 1f\n"
+ " repne; scasb\n"
+ " leal -1(%%edi), %%edi\n"
+ " je 2f\n"
+ "1:\n"
+ " xorl %%edi, %%edi\n"
+ "2:\n"
+ : "=&D" (edi), "=&c" (ecx)
+ : "a" (c), "0" (s), "1" (count)
+ /* : no clobbers */
+ );
+ return edi;
+}
+static __always_inline
+void *inlined_memchr_const_c(const void *s, int c, size_t count)
+{
+ void *edi;
+ int ecx, eax;
+ __asm__ __volatile__(
+ " jecxz 1f\n"
+ " movb %4, %%al\n" /* const c to %%al */
+ " repne; scasb\n"
+ " leal -1(%%edi), %%edi\n"
+ " je 2f\n"
+ "1:\n"
+ " xorl %%edi, %%edi\n"
+ "2:\n"
+ : "=&D" (edi), "=&c" (ecx), "=&a" (eax)
+ : "0" (s), "i" (c), "1" (count)
+ /* : no clobbers */
+ );
+ return edi;
+}
+#if 1 /* +2 bytes on shared i386 build with gcc 4.3.0 */
+#define memchr(s, c, count) ( \
+ __builtin_constant_p(c) \
+ ? inlined_memchr_const_c(s, (c) & 0xff, count) \
+ : inlined_memchr(s, c, count) \
+ )
+#endif
+
+#endif /* _LIBC_STRING_i386_H */
diff --git a/include/string.h b/include/string.h
index e889dc11a..ab1076565 100644
--- a/include/string.h
+++ b/include/string.h
@@ -378,7 +378,7 @@ libc_hidden_proto(ffs)
/* The following two functions are non-standard but necessary for non-32 bit
platforms. */
-#if 0 /*def __USE_GNU*/
+# if 0 /*#ifdef __USE_GNU*/
extern int ffsl (long int __l) __THROW __attribute__ ((__const__));
# ifdef __GNUC__
__extension__ extern int ffsll (long long int __ll)
@@ -422,44 +422,44 @@ libc_hidden_proto(strsep)
#ifdef __USE_GNU
/* Compare S1 and S2 as strings holding name & indices/version numbers. */
-#if 0
+# if 0
extern int strverscmp (__const char *__s1, __const char *__s2)
__THROW __attribute_pure__ __nonnull ((1, 2));
libc_hidden_proto(strverscmp)
-#endif
+# endif
/* Return a string describing the meaning of the signal number in SIG. */
extern char *strsignal (int __sig) __THROW;
libc_hidden_proto(strsignal)
/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */
-#if 0 /* uClibc: disabled */
+# if 0 /* uClibc: disabled */
extern char *__stpcpy (char *__restrict __dest, __const char *__restrict __src)
__THROW __nonnull ((1, 2));
-#endif
+# endif
extern char *stpcpy (char *__restrict __dest, __const char *__restrict __src)
__THROW __nonnull ((1, 2));
libc_hidden_proto(stpcpy)
/* Copy no more than N characters of SRC to DEST, returning the address of
the last character written into DEST. */
-#if 0 /* uClibc: disabled */
+# if 0 /* uClibc: disabled */
extern char *__stpncpy (char *__restrict __dest,
__const char *__restrict __src, size_t __n)
__THROW __nonnull ((1, 2));
-#endif
+# endif
extern char *stpncpy (char *__restrict __dest,
__const char *__restrict __src, size_t __n)
__THROW __nonnull ((1, 2));
libc_hidden_proto(stpncpy)
-#if 0 /* uClibc does not support strfry or memfrob. */
+# if 0 /* uClibc does not support strfry or memfrob. */
/* Sautee STRING briskly. */
extern char *strfry (char *__string) __THROW __nonnull ((1));
/* Frobnicate N bytes of S. */
extern void *memfrob (void *__s, size_t __n) __THROW __nonnull ((1));
-#endif
+# endif
# ifndef basename
/* Return the file name within directory of FILENAME. We don't
@@ -469,7 +469,7 @@ extern void *memfrob (void *__s, size_t __n) __THROW __nonnull ((1));
extern char *basename (__const char *__filename) __THROW __nonnull ((1));
libc_hidden_proto(basename)
# endif
-#endif
+#endif /* __USE_GNU */
#ifdef __USE_BSD
@@ -484,4 +484,11 @@ libc_hidden_proto(strlcpy)
__END_DECLS
-#endif /* string.h */
+
+#ifdef UCLIBC_INTERNAL
+# if defined __i386__
+# include <libc-string_i386.h>
+# endif
+#endif
+
+#endif /* string.h */
diff --git a/libc/string/generic/memchr.c b/libc/string/generic/memchr.c
index 8ea3f539a..d5cd0005e 100644
--- a/libc/string/generic/memchr.c
+++ b/libc/string/generic/memchr.c
@@ -25,14 +25,12 @@
#include <stdlib.h>
#include <limits.h>
-/* Experimentally off - libc_hidden_proto(memchr) */
-/* libc_hidden_proto(abort) */
-
#include "memcopy.h"
#define LONG_MAX_32_BITS 2147483647
/* Search no more than N bytes of S for C. */
+#undef memchr
void *memchr (const void * s, int c_in, size_t n)
{
const unsigned char *char_ptr;
diff --git a/libc/string/generic/mempcpy.c b/libc/string/generic/mempcpy.c
index 8d7356486..d7fa79ef5 100644
--- a/libc/string/generic/mempcpy.c
+++ b/libc/string/generic/mempcpy.c
@@ -8,9 +8,8 @@
#include <string.h>
#ifdef __USE_GNU
-/* Experimentally off - libc_hidden_proto(mempcpy) */
-/* Experimentally off - libc_hidden_proto(memcpy) */
+# undef mempcpy
void *mempcpy (void *dstpp, const void *srcpp, size_t len)
{
memcpy(dstpp, srcpp, len);
diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c
index 216ddfd1a..af86cf255 100644
--- a/libc/string/i386/memcpy.c
+++ b/libc/string/i386/memcpy.c
@@ -32,7 +32,7 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(memcpy) */
+#undef memcpy
void *memcpy(void * to, const void * from, size_t n)
{
int d0, d1, d2;
diff --git a/libc/string/i386/memset.c b/libc/string/i386/memset.c
index bbaa45215..cfc16983c 100644
--- a/libc/string/i386/memset.c
+++ b/libc/string/i386/memset.c
@@ -33,6 +33,7 @@
#include <string.h>
/* Experimentally off - libc_hidden_proto(memset) */
+#undef memset
void *memset(void *s, int c, size_t count)
{
int d0, d1;
diff --git a/libc/string/i386/strcpy.c b/libc/string/i386/strcpy.c
index 09065a9b7..fff1bd006 100644
--- a/libc/string/i386/strcpy.c
+++ b/libc/string/i386/strcpy.c
@@ -32,7 +32,7 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strcpy) */
+#undef strcpy
char *strcpy(char * dest, const char * src)
{
int d0, d1, d2;
diff --git a/libc/string/i386/strlen.c b/libc/string/i386/strlen.c
index 61a178393..761d27aae 100644
--- a/libc/string/i386/strlen.c
+++ b/libc/string/i386/strlen.c
@@ -32,7 +32,7 @@
#include <string.h>
-/* Experimentally off - libc_hidden_proto(strlen) */
+#undef strlen
size_t strlen(const char *s)
{
int d0;
diff --git a/libc/string/memchr.c b/libc/string/memchr.c
index 5e60f6554..438f4fa4a 100644
--- a/libc/string/memchr.c
+++ b/libc/string/memchr.c
@@ -10,6 +10,7 @@
#ifdef WANT_WIDE
# define Wmemchr wmemchr
#else
+# undef memchr
# define Wmemchr memchr
#endif
diff --git a/libc/string/mempcpy.c b/libc/string/mempcpy.c
index e7605146a..d79bd1937 100644
--- a/libc/string/mempcpy.c
+++ b/libc/string/mempcpy.c
@@ -12,6 +12,7 @@
#ifdef WANT_WIDE
# define Wmempcpy wmempcpy
#else
+# undef mempcpy
# define Wmempcpy mempcpy
#endif
diff --git a/libc/string/stpcpy.c b/libc/string/stpcpy.c
index 8a487584e..58ace8fc7 100644
--- a/libc/string/stpcpy.c
+++ b/libc/string/stpcpy.c
@@ -10,7 +10,7 @@
#ifdef WANT_WIDE
# define Wstpcpy wcpcpy
#else
-/* Experimentally off - libc_hidden_proto(stpcpy) */
+# undef stpcpy
# define Wstpcpy stpcpy
#endif