diff options
Diffstat (limited to 'libc/string/x86_64/strspn.S')
-rw-r--r-- | libc/string/x86_64/strspn.S | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/libc/string/x86_64/strspn.S b/libc/string/x86_64/strspn.S index 416424565..41cff0490 100644 --- a/libc/string/x86_64/strspn.S +++ b/libc/string/x86_64/strspn.S @@ -50,8 +50,10 @@ ENTRY (strspn) Although all the following instruction only modify %cl we always have a correct zero-extended 64-bit value in %rcx. */ - .p2align 4 -L(2): movb (%rax), %cl /* get byte from stopset */ + /* Next 3 insns are 6 bytes total, make sure we decode them in one go */ + .p2align 3,,6 +L(2): + movb (%rax), %cl /* get byte from stopset */ testb %cl, %cl /* is NUL char? */ jz L(1) /* yes => start compare loop */ movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */ @@ -83,8 +85,14 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */ value in the table. But the value of NUL is NUL so the loop terminates for NUL in every case. */ - .p2align 4 -L(3): addq $4, %rax /* adjust pointer for full loop round */ + /* Next 3 insns are 9 bytes total. */ + /* .p2align 4,,9 would make sure we decode them in one go, */ + /* but it will also align entire function to 16 bytes, */ + /* potentially creating largish padding at link time. */ + /* We are aligning to 8 bytes instead: */ + .p2align 3,,8 +L(3): + addq $4, %rax /* adjust pointer for full loop round */ movb (%rax), %cl /* get byte from string */ testb %cl, (%rsp,%rcx) /* is it contained in skipset? */ |