diff options
| -rw-r--r-- | libc/string/i386/memcpy.c | 29 | 
1 files changed, 15 insertions, 14 deletions
diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c index 285583f3b..216ddfd1a 100644 --- a/libc/string/i386/memcpy.c +++ b/libc/string/i386/memcpy.c @@ -35,19 +35,20 @@  /* Experimentally off - libc_hidden_proto(memcpy) */  void *memcpy(void * to, const void * from, size_t n)  { -    int d0, d1, d2; -    __asm__ __volatile__( -	    "rep ; movsl\n\t" -	    "testb $2,%b4\n\t" -	    "je 1f\n\t" -	    "movsw\n" -	    "1:\ttestb $1,%b4\n\t" -	    "je 2f\n\t" -	    "movsb\n" -	    "2:" -	    : "=&c" (d0), "=&D" (d1), "=&S" (d2) -	    :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from) -	    : "memory"); -    return (to); +	int d0, d1, d2; +	__asm__ __volatile__( +		"	rep; movsl\n" +		"	movl %4,%%ecx\n" +		"	andl $3,%%ecx\n" +		/* jz is optional. avoids "rep; movsb" with ecx == 0, +		 * but adds a branch, which is currently (2008) faster */ +		"	jz 1f\n" +		"	rep; movsb\n" +		"1:\n" +		: "=&c" (d0), "=&D" (d1), "=&S" (d2) +		: "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) +		: "memory" +	); +	return to;  }  libc_hidden_def(memcpy)  | 
