summaryrefslogtreecommitdiff
path: root/libc/string/x86_64/strcat.S
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string/x86_64/strcat.S')
-rw-r--r--libc/string/x86_64/strcat.S19
1 files changed, 14 insertions, 5 deletions
diff --git a/libc/string/x86_64/strcat.S b/libc/string/x86_64/strcat.S
index 9e2da50f2..23d068fea 100644
--- a/libc/string/x86_64/strcat.S
+++ b/libc/string/x86_64/strcat.S
@@ -45,7 +45,9 @@ ENTRY (BP_SYM (strcat))
/* Now the source is aligned. Scan for NUL byte. */
- .p2align 4
+
+ /* Next 3 insns are 10 bytes total, make sure we decode them in one go */
+ .p2align 4,,10
4:
/* First unroll. */
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
@@ -103,8 +105,11 @@ ENTRY (BP_SYM (strcat))
the addition will not result in 0. */
jz 4b /* no NUL found => continue loop */
- .p2align 4 /* Align, it is a jump target. */
-3: subq $8,%rax /* correct pointer increment. */
+ /* Align, it is a jump target. */
+ /* Next 3 insns are 8 bytes total, make sure we decode them in one go */
+ .p2align 3,,8
+3:
+ subq $8,%rax /* correct pointer increment. */
testb %cl, %cl /* is first byte NUL? */
jz 2f /* yes => return */
@@ -160,7 +165,9 @@ ENTRY (BP_SYM (strcat))
/* Now the sources is aligned. Unfortunatly we cannot force
to have both source and destination aligned, so ignore the
alignment of the destination. */
- .p2align 4
+
+ /* Next 3 insns are 10 bytes total, make sure we decode them in one go */
+ .p2align 4,,10
22:
/* 1st unroll. */
movq (%rsi), %rax /* Read double word (8 bytes). */
@@ -237,7 +244,9 @@ ENTRY (BP_SYM (strcat))
/* Do the last few bytes. %rax contains the value to write.
The loop is unrolled twice. */
- .p2align 4
+
+ /* Next 3 insns are 6 bytes total, make sure we decode them in one go */
+ .p2align 3,,6
23:
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */