diff options
-rw-r--r-- | libc/string/i386/memcpy.c | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c index 285583f3b..216ddfd1a 100644 --- a/libc/string/i386/memcpy.c +++ b/libc/string/i386/memcpy.c @@ -35,19 +35,20 @@ /* Experimentally off - libc_hidden_proto(memcpy) */ void *memcpy(void * to, const void * from, size_t n) { - int d0, d1, d2; - __asm__ __volatile__( - "rep ; movsl\n\t" - "testb $2,%b4\n\t" - "je 1f\n\t" - "movsw\n" - "1:\ttestb $1,%b4\n\t" - "je 2f\n\t" - "movsb\n" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) - : "memory"); - return (to); + int d0, d1, d2; + __asm__ __volatile__( + " rep; movsl\n" + " movl %4,%%ecx\n" + " andl $3,%%ecx\n" + /* jz is optional. avoids "rep; movsb" with ecx == 0, + * but adds a branch, which is currently (2008) faster */ + " jz 1f\n" + " rep; movsb\n" + "1:\n" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) + : "memory" + ); + return to; } libc_hidden_def(memcpy) |