diff options
author | Eric Andersen <andersen@codepoet.org> | 2003-11-06 09:14:26 +0000 |
---|---|---|
committer | Eric Andersen <andersen@codepoet.org> | 2003-11-06 09:14:26 +0000 |
commit | 44d0d21408e72de5ecbcf3d9831b3575eb8f7848 (patch) | |
tree | 2d65c0a97b72acac8925e87fa2b6c497a3327fd8 | |
parent | 9c071de3cf25b10b3c05815b819331c43a4e1822 (diff) |
Add some initial x86 string optimizations. These make no attempt to use nifty
things like mmx/3dnow/etc. These are not inline, and will therefore not be as
fast as modifying the headers to use inlines (and cannot therefore do tricky
things when dealing with const memory). But they should (I hope!) be faster
than their generic equivalents....
More importantly, these should provide a good example for others to follow when
adding arch specific optimizations.
-Erik
-rw-r--r-- | libc/string/Makefile | 21 | ||||
-rw-r--r-- | libc/string/i386/Makefile | 50 | ||||
-rw-r--r-- | libc/string/i386/string.c | 316 |
3 files changed, 385 insertions, 2 deletions
diff --git a/libc/string/Makefile b/libc/string/Makefile index d9fec82a8..38b2eaae0 100644 --- a/libc/string/Makefile +++ b/libc/string/Makefile @@ -24,6 +24,12 @@ TOPDIR=../../ include $(TOPDIR)Rules.mak +DIRS = +ifeq ($(TARGET_ARCH),$(wildcard $(TARGET_ARCH))) +DIRS = $(TARGET_ARCH) +endif +ALL_SUBDIRS = i386 + MSRC= wstring.c MOBJ= basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \ memcpy.o memmove.o mempcpy.o memrchr.o memset.o rawmemchr.o stpcpy.o \ @@ -58,7 +64,7 @@ ifeq ($(UCLIBC_HAS_WCHAR),y) OBJS += $(MOBJW) $(MOBJWx) endif -all: $(OBJS) $(LIBC) +all: $(OBJS) $(LIBC) subdirs $(LIBC): ar-target @@ -85,6 +91,17 @@ $(COBJS): %.o : %.c $(CC) $(CFLAGS) -c $< -o $@ $(STRIPTOOL) -x -R .note -R .comment $*.o -clean: +clean: subdirs_clean $(RM) *.[oa] *~ core +subdirs: $(patsubst %, _dir_%, $(DIRS)) +subdirs_clean: $(patsubst %, _dirclean_%, $(ALL_SUBDIRS)) + +$(patsubst %, _dir_%, $(DIRS)) : dummy + $(MAKE) -C $(patsubst _dir_%, %, $@) + +$(patsubst %, _dirclean_%, $(ALL_SUBDIRS)) : dummy + $(MAKE) -C $(patsubst _dirclean_%, %, $@) clean + +.PHONY: dummy + diff --git a/libc/string/i386/Makefile b/libc/string/i386/Makefile new file mode 100644 index 000000000..fad7ec3cf --- /dev/null +++ b/libc/string/i386/Makefile @@ -0,0 +1,50 @@ +# Makefile for uClibc +# +# Copyright (C) 2000 by Lineo, inc. +# Copyright (C) 2000,2001 Erik Andersen <andersen@uclibc.org> +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU Library General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more +# details. +# +# You should have received a copy of the GNU Library General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Derived in part from the Linux-8086 C library, the GNU C Library, and several +# other sundry sources. Files within this library are copyright by their +# respective copyright holders. + +TOPDIR=../../../ +include $(TOPDIR)Rules.mak + +MSRC= string.c +MOBJ= strcpy.o strncpy.o strcat.o strncat.o strcmp.o \ + strncmp.o strchr.o strrchr.o strlen.o strnlen.o \ + memcpy.o memmove.o memchr.o memset.o +OBJS=$(MOBJ) + +all: $(OBJS) $(LIBC) + +$(LIBC): ar-target + +ar-target: $(OBJS) + $(AR) $(ARFLAGS) $(LIBC) $(OBJS) + +$(MOBJ): $(MSRC) + $(CC) $(CFLAGS) -DL_$* $< -c -o $*.o + $(STRIPTOOL) -x -R .note -R .comment $*.o + +$(COBJS): %.o : %.c + $(CC) $(CFLAGS) -c $< -o $@ + $(STRIPTOOL) -x -R .note -R .comment $*.o + +clean: + $(RM) *.[oa] *~ core + diff --git a/libc/string/i386/string.c b/libc/string/i386/string.c new file mode 100644 index 000000000..bc85b46a1 --- /dev/null +++ b/libc/string/i386/string.c @@ -0,0 +1,316 @@ +/* + * This string-include defines all string functions as inline + * functions. Use gcc. It also assumes ds=es=data space, this should be + * normal. Most of the string-functions are rather heavily hand-optimized, + * see especially strtok,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. String instructions have been + * used through-out, making for "slightly" unclear code :-) + * + * NO Copyright (C) 1991, 1992 Linus Torvalds, + * consider these trivial functions to be PD. + * + * Modified for uClibc by Erik Andersen <andersen@codepoet.org> + * These make no attempt to use nifty things like mmx/3dnow/etc. + * These are not inline, and will therefore not be as fast as + * modifying the headers to use inlines (and cannot therefore + * do tricky things when dealing with const memory). But they + * should (I hope!) be faster than their generic equivalents.... + * + * More importantly, these should provide a good example for + * others to follow when adding arch specific optimizations. + * -Erik + */ + +#define _STDIO_UTILITY +#define _GNU_SOURCE +#include <string.h> + +#ifdef L_strcpy +char * strcpy(char * dest, const char * src) +{ + int d0, d1, d2; + __asm__ __volatile__( + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2) + :"0" (src),"1" (dest) : "memory"); + return dest; +} +#endif + + +#ifdef L_strncpy +char * strncpy(char * dest, const char * src, size_t count) +{ + int d0, d1, d2, d3; + __asm__ __volatile__( + "1:\tdecl %2\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "rep\n\t" + "stosb\n" + "2:" + : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) + :"0" (src),"1" (dest),"2" (count) : "memory"); + return dest; +} +#endif + + +#ifdef L_strcat +char *strcat(char * dest, const char * src) +{ + int d0, d1, d2, d3; + __asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "decl %1\n" + "1:\tlodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffff):"memory"); + return dest; +} +#endif + + +#ifdef L_strncat +char *strncat(char * dest, + const char * src, size_t count) +{ + int d0, d1, d2, d3; + __asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "decl %1\n\t" + "movl %8,%3\n" + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "stosb\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %2,%2\n\t" + "stosb" + : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) + : "0" (src),"1" (dest),"2" (0),"3" (0xffffffff), "g" (count) + : "memory"); + return dest; +} +#endif + + +#ifdef L_strcmp +int strcmp(const char *cs, const char *ct) +{ + int d0, d1; + register int __res; + __asm__ __volatile__( + "1:\tlodsb\n\t" + "scasb\n\t" + "jne 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n\t" + "jmp 3f\n" + "2:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "3:" + :"=a" (__res), "=&S" (d0), "=&D" (d1) + :"1" (cs),"2" (ct)); + return __res; +} +#endif + + +#ifdef L_strncmp +int strncmp(const char *cs, const char *ct, size_t count) +{ + register int __res; + int d0, d1, d2; + __asm__ __volatile__( + "1:\tdecl %3\n\t" + "js 2f\n\t" + "lodsb\n\t" + "scasb\n\t" + "jne 3f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:\txorl %%eax,%%eax\n\t" + "jmp 4f\n" + "3:\tsbbl %%eax,%%eax\n\t" + "orb $1,%%al\n" + "4:" + :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + :"1" (cs),"2" (ct),"3" (count)); + return __res; +} +#endif + + +#ifdef L_strchr +char * strchr(const char *s, int c) +{ + int d0; + register char * __res; + __asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "movl $1,%1\n" + "2:\tmovl %1,%0\n\t" + "decl %0" + :"=a" (__res), "=&S" (d0) : "1" (s),"0" (c)); + return __res; +} +#endif + + +#ifdef L_strrchr +char *strrchr(const char *s, int c) +{ + int d0, d1; + register char * __res; + __asm__ __volatile__( + "movb %%al,%%ah\n" + "1:\tlodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\ttestb %%al,%%al\n\t" + "jne 1b" + :"=g" (__res), "=&S" (d0), "=&a" (d1) :"0" (0),"1" (s),"2" (c)); + return __res; +} +#endif + + + +#ifdef L_strlen +size_t strlen(const char *s) +{ + int d0; + register int __res; + __asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "notl %0\n\t" + "decl %0" + :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); + return __res; +} +#endif + + +#ifdef L_strnlen +size_t strnlen(const char *s, size_t count) +{ + int d0; + register int __res; + __asm__ __volatile__( + "movl %2,%0\n\t" + "jmp 2f\n" + "1:\tcmpb $0,(%0)\n\t" + "je 3f\n\t" + "incl %0\n" + "2:\tdecl %1\n\t" + "cmpl $-1,%1\n\t" + "jne 1b\n" + "3:\tsubl %2,%0" + :"=a" (__res), "=&d" (d0) + :"c" (s),"1" (count)); + return __res; +} +#endif + + +#ifdef L_memcpy +void *memcpy(void * to, const void * from, size_t n) +{ + int d0, d1, d2; + __asm__ __volatile__( + "rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" + "2:" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) + : "memory"); + return (to); +} +#endif + + +#ifdef L_memmove +void *memmove(void *dest, const void *src, size_t n) +{ + int d0, d1, d2; + if (dest<src) + __asm__ __volatile__( + "rep\n\t" + "movsb" + : "=&c" (d0), "=&S" (d1), "=&D" (d2) + :"0" (n),"1" (src),"2" (dest) + : "memory"); + else + __asm__ __volatile__( + "std\n\t" + "rep\n\t" + "movsb\n\t" + "cld" + : "=&c" (d0), "=&S" (d1), "=&D" (d2) + :"0" (n), + "1" (n-1+(const char *)src), + "2" (n-1+(char *)dest) + :"memory"); + return dest; +} +#endif + +#ifdef L_memchr +void *memchr(const void *cs, int c, size_t count) +{ + int d0; + register void * __res; + if (!count) + return NULL; + __asm__ __volatile__( + "repne\n\t" + "scasb\n\t" + "je 1f\n\t" + "movl $1,%0\n" + "1:\tdecl %0" + :"=D" (__res), "=&c" (d0) : "a" (c),"0" (cs),"1" (count)); + return __res; +} +#endif + +#ifdef L_memset +void *memset(void *s, int c, size_t count) +{ + int d0, d1; + __asm__ __volatile__( + "rep\n\t" + "stosb" + : "=&c" (d0), "=&D" (d1) + :"a" (c),"1" (s),"0" (count) + :"memory"); + return s; +} +#endif + |