summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libc/string/i386/memcpy.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c
index 285583f3b..216ddfd1a 100644
--- a/libc/string/i386/memcpy.c
+++ b/libc/string/i386/memcpy.c
@@ -35,19 +35,20 @@
/* Experimentally off - libc_hidden_proto(memcpy) */
void *memcpy(void * to, const void * from, size_t n)
{
- int d0, d1, d2;
- __asm__ __volatile__(
- "rep ; movsl\n\t"
- "testb $2,%b4\n\t"
- "je 1f\n\t"
- "movsw\n"
- "1:\ttestb $1,%b4\n\t"
- "je 2f\n\t"
- "movsb\n"
- "2:"
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
- : "memory");
- return (to);
+ int d0, d1, d2;
+ __asm__ __volatile__(
+ " rep; movsl\n"
+ " movl %4,%%ecx\n"
+ " andl $3,%%ecx\n"
+ /* jz is optional. avoids "rep; movsb" with ecx == 0,
+ * but adds a branch, which is currently (2008) faster */
+ " jz 1f\n"
+ " rep; movsb\n"
+ "1:\n"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
+ : "memory"
+ );
+ return to;
}
libc_hidden_def(memcpy)