diff options
Diffstat (limited to 'libc/string/x86_64/strcat.S')
-rw-r--r-- | libc/string/x86_64/strcat.S | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/libc/string/x86_64/strcat.S b/libc/string/x86_64/strcat.S index 9e2da50f2..23d068fea 100644 --- a/libc/string/x86_64/strcat.S +++ b/libc/string/x86_64/strcat.S @@ -45,7 +45,9 @@ ENTRY (BP_SYM (strcat)) /* Now the source is aligned. Scan for NUL byte. */ - .p2align 4 + + /* Next 3 insns are 10 bytes total, make sure we decode them in one go */ + .p2align 4,,10 4: /* First unroll. */ movq (%rax), %rcx /* get double word (= 8 bytes) in question */ @@ -103,8 +105,11 @@ ENTRY (BP_SYM (strcat)) the addition will not result in 0. */ jz 4b /* no NUL found => continue loop */ - .p2align 4 /* Align, it is a jump target. */ -3: subq $8,%rax /* correct pointer increment. */ + /* Align, it is a jump target. */ + /* Next 3 insns are 8 bytes total, make sure we decode them in one go */ + .p2align 3,,8 +3: + subq $8,%rax /* correct pointer increment. */ testb %cl, %cl /* is first byte NUL? */ jz 2f /* yes => return */ @@ -160,7 +165,9 @@ ENTRY (BP_SYM (strcat)) /* Now the sources is aligned. Unfortunatly we cannot force to have both source and destination aligned, so ignore the alignment of the destination. */ - .p2align 4 + + /* Next 3 insns are 10 bytes total, make sure we decode them in one go */ + .p2align 4,,10 22: /* 1st unroll. */ movq (%rsi), %rax /* Read double word (8 bytes). */ @@ -237,7 +244,9 @@ ENTRY (BP_SYM (strcat)) /* Do the last few bytes. %rax contains the value to write. The loop is unrolled twice. */ - .p2align 4 + + /* Next 3 insns are 6 bytes total, make sure we decode them in one go */ + .p2align 3,,6 23: movb %al, (%rdx) /* 1st byte. */ testb %al, %al /* Is it NUL. */ |