From e34f25503ad8f9d5b7e8d1801cbe345ce3daa949 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Tue, 9 Dec 2008 13:23:59 +0000 Subject: smaller memcpy text data bss dec hex filename - 39 0 0 39 27 libc/string/i386/memcpy.os + 35 0 0 35 23 libc/string/i386/memcpy.os --- libc/string/i386/memcpy.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'libc') diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c index 285583f3b..216ddfd1a 100644 --- a/libc/string/i386/memcpy.c +++ b/libc/string/i386/memcpy.c @@ -35,19 +35,20 @@ /* Experimentally off - libc_hidden_proto(memcpy) */ void *memcpy(void * to, const void * from, size_t n) { - int d0, d1, d2; - __asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); - return (to); + int d0, d1, d2; + __asm__ __volatile__( + " rep; movsl\n" + " movl %4,%%ecx\n" + " andl $3,%%ecx\n" + /* jz is optional. avoids "rep; movsb" with ecx == 0, + * but adds a branch, which is currently (2008) faster */ + " jz 1f\n" + " rep; movsb\n" + "1:\n" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory" + ); + return to; } libc_hidden_def(memcpy) -- cgit v1.2.3