summaryrefslogtreecommitdiff
path: root/libc/string/x86_64/strspn.S
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string/x86_64/strspn.S')
-rw-r--r--libc/string/x86_64/strspn.S16
1 files changed, 12 insertions, 4 deletions
diff --git a/libc/string/x86_64/strspn.S b/libc/string/x86_64/strspn.S
index 416424565..41cff0490 100644
--- a/libc/string/x86_64/strspn.S
+++ b/libc/string/x86_64/strspn.S
@@ -50,8 +50,10 @@ ENTRY (strspn)
Although all the following instruction only modify %cl we always
have a correct zero-extended 64-bit value in %rcx. */
- .p2align 4
-L(2): movb (%rax), %cl /* get byte from stopset */
+ /* Next 3 insns are 6 bytes total, make sure we decode them in one go */
+ .p2align 3,,6
+L(2):
+ movb (%rax), %cl /* get byte from stopset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
@@ -83,8 +85,14 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
value in the table. But the value of NUL is NUL so the loop
terminates for NUL in every case. */
- .p2align 4
-L(3): addq $4, %rax /* adjust pointer for full loop round */
+ /* Next 3 insns are 9 bytes total. */
+ /* .p2align 4,,9 would make sure we decode them in one go, */
+ /* but it will also align entire function to 16 bytes, */
+ /* potentially creating largish padding at link time. */
+ /* We are aligning to 8 bytes instead: */
+ .p2align 3,,8
+L(3):
+ addq $4, %rax /* adjust pointer for full loop round */
movb (%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */