summaryrefslogtreecommitdiff
path: root/libc/string/x86_64/strcspn.S
diff options
context:
space:
mode:
authorDenis Vlasenko <vda.linux@googlemail.com>2008-04-15 08:27:24 +0000
committerDenis Vlasenko <vda.linux@googlemail.com>2008-04-15 08:27:24 +0000
commitdf7958a9606a342e3c3ac5a40fc41f3a79669d62 (patch)
treef22657788d4ca4bd427f7ff7247dfa353590ac9c /libc/string/x86_64/strcspn.S
parent534dfb536f19737f2642ee56dd67a97c5db6a74e (diff)
amd64 string ops: use alignment more carefully, and comment it.
By capping max padding to not be bigger than three next insns, we avoid having ridiculously big NOPs like this one: 53:66 66 66 66 2e 0f 1f nopw %cs:0x0(%rax,%rax,1) 5a:84 00 00 00 00 00 which was bigger than next three insns combined! Size changes: text data bss dec hex filename 102 0 0 102 66 x86_64/memcpy.o 102 0 0 102 66 x86_64.old/memcpy.o 90 0 0 90 5a x86_64/mempcpy.o 102 0 0 102 66 x86_64.old/mempcpy.o 210 0 0 210 d2 x86_64/memset.o 242 0 0 242 f2 x86_64.old/memset.o 213 0 0 213 d5 x86_64/stpcpy.o 220 0 0 220 dc x86_64.old/stpcpy.o 428 0 0 428 1ac x86_64/strcat.o 444 0 0 444 1bc x86_64.old/strcat.o 417 0 0 417 1a1 x86_64/strchr.o 418 0 0 418 1a2 x86_64.old/strchr.o 33 0 0 33 21 x86_64/strcmp.o 33 0 0 33 21 x86_64.old/strcmp.o 213 0 0 213 d5 x86_64/strcpy.o 220 0 0 220 dc x86_64.old/strcpy.o 135 0 0 135 87 x86_64/strcspn.o 151 0 0 151 97 x86_64.old/strcspn.o 225 0 0 225 e1 x86_64/strlen.o 233 0 0 233 e9 x86_64.old/strlen.o 140 0 0 140 8c x86_64/strpbrk.o 156 0 0 156 9c x86_64.old/strpbrk.o 135 0 0 135 87 x86_64/strspn.o 151 0 0 151 97 x86_64.old/strspn.o Also, a few files got their .text alignment relaxed from 16 to 8 bytes, which reduces padding at link time.
Diffstat (limited to 'libc/string/x86_64/strcspn.S')
-rw-r--r--libc/string/x86_64/strcspn.S12
1 files changed, 10 insertions, 2 deletions
diff --git a/libc/string/x86_64/strcspn.S b/libc/string/x86_64/strcspn.S
index 9563de496..fd9b09c48 100644
--- a/libc/string/x86_64/strcspn.S
+++ b/libc/string/x86_64/strcspn.S
@@ -55,7 +55,9 @@ ENTRY (strcspn)
Although all the following instruction only modify %cl we always
have a correct zero-extended 64-bit value in %rcx. */
- .p2align 4
+ /* Next 3 insns are 6 bytes total, make sure we decode them in one go */
+ .p2align 3,,6
+
L(2): movb (%rax), %cl /* get byte from skipset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
@@ -88,7 +90,13 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
value in the table. But the value of NUL is NUL so the loop
terminates for NUL in every case. */
- .p2align 4
+ /* Next 3 insns are 9 bytes total. */
+ /* .p2align 4,,9 would make sure we decode them in one go, */
+ /* but it will also align entire function to 16 bytes, */
+ /* potentially creating largish padding at link time. */
+ /* We are aligning to 8 bytes instead: */
+ .p2align 3,,8
+
L(3): addq $4, %rax /* adjust pointer for full loop round */
movb (%rax), %cl /* get byte from string */