summaryrefslogtreecommitdiff
path: root/libc/string
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string')
-rw-r--r--libc/string/arm/_memcpy.S182
-rw-r--r--libc/string/arm/bcopy.S12
-rw-r--r--libc/string/arm/bzero.S12
-rw-r--r--libc/string/arm/memcmp.S28
-rw-r--r--libc/string/arm/memcpy.S11
-rw-r--r--libc/string/arm/memmove.S11
-rw-r--r--libc/string/arm/memset.S62
-rw-r--r--libc/string/arm/strcmp.S19
-rw-r--r--libc/string/arm/strlen.S25
-rw-r--r--libc/string/arm/strncmp.S33
10 files changed, 377 insertions, 18 deletions
diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S
index 3704f96b5..5ef63c45a 100644
--- a/libc/string/arm/_memcpy.S
+++ b/libc/string/arm/_memcpy.S
@@ -39,7 +39,9 @@
#include <features.h>
#include <endian.h>
+#include <bits/arm_asm.h>
+#if !defined(THUMB1_ONLY)
/*
* This is one fun bit of code ...
* Some easy listening music is suggested while trying to understand this
@@ -77,11 +79,36 @@
.type _memcpy,%function
.align 4
+/* XXX: The Thumb-2 conditionals can be removed if/when we require an
+ assembler that supports unified syntax. */
+.macro copy regs
+#if defined(__thumb2__)
+ ittt ge
+ ldmiage r1!, \regs
+ stmiage r0!, \regs
+#else
+ ldmgeia r1!, \regs
+ stmgeia r0!, \regs
+#endif
+.endm
+
+.macro copydb regs
+#if defined(__thumb2__)
+ ittt ge
+ ldmdbge r1!, \regs
+ stmdbge r0!, \regs
+#else
+ ldmgedb r1!, \regs
+ stmgedb r0!, \regs
+#endif
+.endm
+
_memcpy:
/* Determine copy direction */
cmp r1, r0
bcc .Lmemcpy_backwards
+ IT(tt, eq)
moveq r0, #0 /* Quick abort for len=0 */
#if defined(__USE_BX__)
bxeq lr
@@ -102,7 +129,7 @@ _memcpy:
blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
subs r2, r2, #0x14
blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
- stmdb sp!, {r4} /* borrow r4 */
+ str r4, [sp, #-4]! /* borrow r4 */
/* blat 32 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */
@@ -115,19 +142,22 @@ _memcpy:
bge .Lmemcpy_floop32
cmn r2, #0x10
- ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmgeia r0!, {r3, r4, r12, lr}
+ /* blat a remaining 16 bytes */
+ copy "{r3, r4, r12, lr}"
subge r2, r2, #0x10
- ldmia sp!, {r4} /* return r4 */
+ ldr r4, [sp], #4 /* restore r4 */
.Lmemcpy_fl32:
adds r2, r2, #0x14
/* blat 12 bytes at a time */
.Lmemcpy_floop12:
- ldmgeia r1!, {r3, r12, lr}
- stmgeia r0!, {r3, r12, lr}
+ copy "{r3, r12, lr}"
+#if defined(__thumb2__)
+ subsge r2, r2, #0x0c
+#else
subges r2, r2, #0x0c
+#endif
bge .Lmemcpy_floop12
.Lmemcpy_fl12:
@@ -135,26 +165,48 @@ _memcpy:
blt .Lmemcpy_fl4
subs r2, r2, #4
+ IT(tt, lt)
ldrlt r3, [r1], #4
strlt r3, [r0], #4
- ldmgeia r1!, {r3, r12}
- stmgeia r0!, {r3, r12}
+ copy "{r3, r12}"
subge r2, r2, #4
.Lmemcpy_fl4:
/* less than 4 bytes to go */
adds r2, r2, #4
+#if defined(__thumb2__)
+ it eq
+ popeq {r0, pc} /* done */
+#elif defined(__ARM_ARCH_4T__)
+ ldmeqia sp!, {r0, r3} /* done */
+ bxeq r3
+#else
ldmeqia sp!, {r0, pc} /* done */
+#endif
/* copy the crud byte at a time */
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0], #1
+#if defined(__thumb2__)
+ itt ge
+ ldrbge r3, [r1], #1
+ strbge r3, [r0], #1
+ itt gt
+ ldrbgt r3, [r1], #1
+ strbgt r3, [r0], #1
+#else
ldrgeb r3, [r1], #1
strgeb r3, [r0], #1
ldrgtb r3, [r1], #1
strgtb r3, [r0], #1
+#endif
+#if defined(__ARM_ARCH_4T__)
+ ldmia sp!, {r0, r3}
+ bx r3
+#else
ldmia sp!, {r0, pc}
+#endif
/* erg - unaligned destination */
.Lmemcpy_fdestul:
@@ -164,10 +216,19 @@ _memcpy:
/* align destination with byte copies */
ldrb r3, [r1], #1
strb r3, [r0], #1
+#if defined(__thumb2__)
+ itt ge
+ ldrbge r3, [r1], #1
+ strbge r3, [r0], #1
+ itt gt
+ ldrbgt r3, [r1], #1
+ strbgt r3, [r0], #1
+#else
ldrgeb r3, [r1], #1
strgeb r3, [r0], #1
ldrgtb r3, [r1], #1
strgtb r3, [r0], #1
+#endif
subs r2, r2, r12
blt .Lmemcpy_fl4 /* less the 4 bytes */
@@ -370,12 +431,12 @@ _memcpy:
.Lmemcpy_bl32:
cmn r2, #0x10
- ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmgedb r0!, {r3, r4, r12, lr}
+ /* blat a remaining 16 bytes */
+ copydb "{r3, r4, r12, lr}"
subge r2, r2, #0x10
adds r2, r2, #0x14
- ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
- stmgedb r0!, {r3, r12, lr}
+ /* blat a remaining 12 bytes */
+ copydb "{r3, r12, lr}"
subge r2, r2, #0x0c
ldmia sp!, {r4, lr}
@@ -383,15 +444,16 @@ _memcpy:
adds r2, r2, #8
blt .Lmemcpy_bl4
subs r2, r2, #4
+ IT(tt, lt)
ldrlt r3, [r1, #-4]!
strlt r3, [r0, #-4]!
- ldmgedb r1!, {r3, r12}
- stmgedb r0!, {r3, r12}
+ copydb "{r3, r12}"
subge r2, r2, #4
.Lmemcpy_bl4:
/* less than 4 bytes to go */
adds r2, r2, #4
+ IT(t, eq)
#if defined(__USE_BX__)
bxeq lr
#else
@@ -401,10 +463,19 @@ _memcpy:
cmp r2, #2
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
+#ifdef __thumb2__
+ itt ge
+ ldrbge r3, [r1, #-1]!
+ strbge r3, [r0, #-1]!
+ itt gt
+ ldrbgt r3, [r1, #-1]!
+ strbgt r3, [r0, #-1]!
+#else
ldrgeb r3, [r1, #-1]!
strgeb r3, [r0, #-1]!
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
+#endif
#if defined(__USE_BX__)
bx lr
#else
@@ -417,10 +488,19 @@ _memcpy:
/* align destination with byte copies */
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
+#ifdef __thumb2__
+ itt ge
+ ldrbge r3, [r1, #-1]!
+ strbge r3, [r0, #-1]!
+ itt gt
+ ldrbgt r3, [r1, #-1]!
+ strbgt r3, [r0, #-1]!
+#else
ldrgeb r3, [r1, #-1]!
strgeb r3, [r0, #-1]!
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
+#endif
subs r2, r2, r12
blt .Lmemcpy_bl4 /* less than 4 bytes to go */
ands r12, r1, #3
@@ -591,3 +671,77 @@ _memcpy:
.Lmemcpy_bsrcul1l4:
add r1, r1, #1
b .Lmemcpy_bl4
+
+#else /* THUMB1_ONLY */
+
+/* This is a fairly dumb implementation for when we can't use the 32-bit code
+ above. */
+.text
+.global _memcpy
+.hidden _memcpy
+.type _memcpy,%function
+.align 4
+.thumb
+_memcpy:
+ push {r0, r4}
+ cmp r2, #0
+ beq .Lmemcpy_exit
+ @ See if we have overlapping regions, and need to reverse the
+ @ direction of the copy
+ cmp r0, r1
+ bls .Lmemcpy_forwards
+ add r4, r1, r2
+ cmp r0, r4
+ bcc .Lmemcpy_backwards
+.Lmemcpy_forwards:
+ /* Forwards. */
+ mov r3, r0
+ eor r3, r1
+ mov r4, #3
+ tst r3, r4
+ bne .Lmemcpy_funaligned
+ cmp r2, #8
+ bcc .Lmemcpy_funaligned
+1: @ copy up to the first word boundary.
+ tst r0, r4
+ beq 1f
+ ldrb r3, [r1]
+ add r1, r1, #1
+ strb r3, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ b 1b
+1: @ Copy aligned words
+ ldr r3, [r1]
+ add r1, r1, #4
+ str r3, [r0]
+ add r0, r0, #4
+ sub r2, r2, #4
+ cmp r2, #4
+ bcs 1b
+ cmp r2, #0
+ beq .Lmemcpy_exit
+.Lmemcpy_funaligned:
+1:
+ ldrb r3, [r1]
+ add r1, r1, #1
+ strb r3, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ bne 1b
+.Lmemcpy_exit:
+ pop {r0, r4}
+ bx lr
+
+.Lmemcpy_backwards:
+ add r0, r0, r2
+ add r1, r1, r2
+1:
+ sub r0, r0, #1
+ sub r1, r1, #1
+ ldrb r3, [r1]
+ strb r3, [r0]
+ sub r2, r2, #1
+ bne 1b
+ b .Lmemcpy_exit
+#endif
diff --git a/libc/string/arm/bcopy.S b/libc/string/arm/bcopy.S
index db3c9e6c1..2d6e90d13 100644
--- a/libc/string/arm/bcopy.S
+++ b/libc/string/arm/bcopy.S
@@ -40,6 +40,7 @@
/* bcopy = memcpy/memmove with arguments reversed. */
#include <features.h>
+#include <bits/arm_asm.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
@@ -48,12 +49,23 @@
.type bcopy,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+bcopy:
+ push {r2, lr}
+ mov ip, r0
+ mov r0, r1
+ mov r1, ip
+ bl _memcpy
+ POP_RET
+#else
bcopy:
/* switch the source and destination registers */
eor r0, r1, r0
eor r1, r0, r1
eor r0, r1, r0
b _memcpy /* (PLT) */
+#endif
.size bcopy,.-bcopy
diff --git a/libc/string/arm/bzero.S b/libc/string/arm/bzero.S
index ee49cf560..e576a12e9 100644
--- a/libc/string/arm/bzero.S
+++ b/libc/string/arm/bzero.S
@@ -38,6 +38,7 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
@@ -46,10 +47,21 @@
.type bzero,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+bzero:
+ push {r2, lr}
+ mov r2, r1
+ mov r1, #0
+ bl HIDDEN_JUMPTARGET(memset)
+ POP_RET
+#else
+
bzero:
mov r2, r1
mov r1, #0
b HIDDEN_JUMPTARGET(memset)
+#endif
.size bzero,.-bzero
diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S
index 4f78b5128..65409f43a 100644
--- a/libc/string/arm/memcmp.S
+++ b/libc/string/arm/memcmp.S
@@ -30,15 +30,41 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global memcmp
.type memcmp,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
+memcmp:
+ cmp r2, #0
+ bne 1f
+ mov r0, #0
+ bx lr
+1:
+ push {r4}
+ add r4, r0, r2
+2:
+ ldrb r2, [r0]
+ add r0, r0, #1
+ ldrb r3, [r1]
+ add r1, r1, #1
+ cmp r4, r0
+ beq 3f
+ cmp r2, r3
+ beq 2b
+3:
+ sub r0, r2, r3
+ pop {r4}
+ bx lr
+#else
memcmp:
/* if ((len - 1) < 0) return 0 */
subs r2, r2, #1
+ IT(tt, mi)
movmi r0, #0
#if defined(__USE_BX__)
bxmi lr
@@ -51,6 +77,7 @@ memcmp:
ldrb r2, [r0], #1
ldrb r3, [r1], #1
cmp ip, r0
+ IT(t, cs)
cmpcs r2, r3
beq 1b
sub r0, r2, r3
@@ -59,6 +86,7 @@ memcmp:
#else
mov pc, lr
#endif
+#endif
.size memcmp,.-memcmp
diff --git a/libc/string/arm/memcpy.S b/libc/string/arm/memcpy.S
index 7a5b6ab76..d2013d211 100644
--- a/libc/string/arm/memcpy.S
+++ b/libc/string/arm/memcpy.S
@@ -38,16 +38,23 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global memcpy
.type memcpy,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
memcpy:
- stmfd sp!, {r0, lr}
+ push {r0, lr}
bl _memcpy
- ldmfd sp!, {r0, pc}
+ POP_RET
+#else
+memcpy:
+ b _memcpy
+#endif
.size memcpy,.-memcpy
diff --git a/libc/string/arm/memmove.S b/libc/string/arm/memmove.S
index 45cd9b4d4..c11b98dd4 100644
--- a/libc/string/arm/memmove.S
+++ b/libc/string/arm/memmove.S
@@ -38,16 +38,23 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global memmove
.type memmove,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
memmove:
- stmfd sp!, {r0, lr}
+ push {r2, lr}
bl _memcpy
- ldmfd sp!, {r0, pc}
+ POP_RET
+#else
+memmove:
+ b _memcpy
+#endif
.size memmove,.-memmove
diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S
index 16bfe0dc5..66aa6039c 100644
--- a/libc/string/arm/memset.S
+++ b/libc/string/arm/memset.S
@@ -19,12 +19,52 @@
#include <features.h>
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
.text
.global memset
.type memset,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
+memset:
+ mov ip, r0
+ cmp r2, #8 @ at least 8 bytes to do?
+ bcc 2f
+
+ lsl r3, r1, #8
+ orr r1, r3
+ lsl r3, r1, #16
+ orr r1, r3
+
+ mov r3, #3
+1: @ Fill up to the first word boundary
+ tst r0, r3
+ beq 1f
+ strb r1, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ b 1b
+1: @ Fill aligned words
+ str r1, [r0]
+ add r0, r0, #4
+ sub r2, r2, #4
+ cmp r2, #4
+ bcs 1b
+
+2: @ Fill the remaining bytes
+ cmp r2, #0
+ beq 2f
+1:
+ strb r1, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ bne 1b
+2:
+ mov r0, ip
+ bx lr
+#else
memset:
mov a4, a1
cmp a3, $8 @ at least 8 bytes to do?
@@ -33,8 +73,14 @@ memset:
orr a2, a2, a2, lsl $16
1:
tst a4, $3 @ aligned yet?
+#if defined(__thumb2__)
+ itt ne
+ strbne a2, [a4], $1
+ subne a3, a3, $1
+#else
strneb a2, [a4], $1
subne a3, a3, $1
+#endif
bne 1b
mov ip, a2
1:
@@ -51,16 +97,30 @@ memset:
stmia a4!, {a2, ip}
sub a3, a3, $8
cmp a3, $8 @ 8 bytes still to do?
+#if defined(__thumb2__)
+ itt ge
+ stmiage a4!, {a2, ip}
+ subge a3, a3, $8
+#else
stmgeia a4!, {a2, ip}
subge a3, a3, $8
+#endif
bge 1b
2:
movs a3, a3 @ anything left?
+ IT(t, eq)
#if defined(__USE_BX__)
bxeq lr
#else
moveq pc, lr @ nope
#endif
+#if defined (__thumb2__)
+1:
+ strb a2, [a4], #1
+ subs a3, a3, #1
+ bne 1b
+ bx lr
+#else
rsb a3, a3, $7
add pc, pc, a3, lsl $2
mov r0, r0
@@ -76,6 +136,8 @@ memset:
#else
mov pc, lr
#endif
+#endif
+#endif
.size memset,.-memset
diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S
index 89aa38874..97363c1c2 100644
--- a/libc/string/arm/strcmp.S
+++ b/libc/string/arm/strcmp.S
@@ -30,17 +30,35 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global strcmp
.type strcmp,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+strcmp:
+1:
+ ldrb r2, [r0]
+ add r0, r0, #1
+ ldrb r3, [r1]
+ add r1, r1, #1
+ cmp r2, #0
+ beq 2f
+ cmp r2, r3
+ beq 1b
+2:
+ sub r0, r2, r3
+ bx lr
+#else
strcmp:
1:
ldrb r2, [r0], #1
ldrb r3, [r1], #1
cmp r2, #1
+ IT(t, cs)
cmpcs r2, r3
beq 1b
sub r0, r2, r3
@@ -49,6 +67,7 @@ strcmp:
#else
mov pc, lr
#endif
+#endif
.size strcmp,.-strcmp
diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S
index 5b4b02e17..949e918f4 100644
--- a/libc/string/arm/strlen.S
+++ b/libc/string/arm/strlen.S
@@ -20,6 +20,7 @@
#include <features.h>
#include <endian.h>
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
/* size_t strlen(const char *S)
* entry: r0 -> string
@@ -31,6 +32,19 @@
.type strlen,%function
.align 4
+#if defined(THUMB1_ONLY)
+/* A simple implementation for when the ARM implementation can't be used. */
+.thumb_func
+strlen:
+ mov r2, #0
+1:
+ ldrb r1, [r0, r2]
+ add r2, r2, #1
+ cmp r1, #0
+ bne 1b
+ sub r0, r2, #1
+ bx lr
+#else
strlen:
bic r1, r0, $3 @ addr of word containing first byte
ldr r2, [r1], $4 @ get the first word
@@ -41,38 +55,48 @@ strlen:
#if __BYTE_ORDER == __BIG_ENDIAN
orr r2, r2, $0xff000000 @ set this byte to non-zero
subs r3, r3, $1 @ any more to do?
+ IT(t, gt)
orrgt r2, r2, $0x00ff0000 @ if so, set this byte
subs r3, r3, $1 @ more?
+ IT(t, gt)
orrgt r2, r2, $0x0000ff00 @ then set.
#else
orr r2, r2, $0x000000ff @ set this byte to non-zero
subs r3, r3, $1 @ any more to do?
+ IT(t, gt)
orrgt r2, r2, $0x0000ff00 @ if so, set this byte
subs r3, r3, $1 @ more?
+ IT(t, gt)
orrgt r2, r2, $0x00ff0000 @ then set.
#endif
Laligned: @ here, we have a word in r2. Does it
tst r2, $0x000000ff @ contain any zeroes?
+ IT(tttt, ne)
tstne r2, $0x0000ff00 @
tstne r2, $0x00ff0000 @
tstne r2, $0xff000000 @
addne r0, r0, $4 @ if not, the string is 4 bytes longer
+ IT(t, ne)
ldrne r2, [r1], $4 @ and we continue to the next word
bne Laligned @
Llastword: @ drop through to here once we find a
#if __BYTE_ORDER == __BIG_ENDIAN
tst r2, $0xff000000 @ word that has a zero byte in it
+ IT(tttt, ne)
addne r0, r0, $1 @
tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it
addne r0, r0, $1 @
tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th
+ IT(t, ne)
addne r0, r0, $1 @ must be zero)
#else
tst r2, $0x000000ff @
+ IT(tttt, ne)
addne r0, r0, $1 @
tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it
addne r0, r0, $1 @
tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th
+ IT(t, ne)
addne r0, r0, $1 @ must be zero)
#endif
#if defined(__USE_BX__)
@@ -80,6 +104,7 @@ Llastword: @ drop through to here once we find a
#else
mov pc,lr
#endif
+#endif
.size strlen,.-strlen
diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S
index eaf0620b4..8487639c8 100644
--- a/libc/string/arm/strncmp.S
+++ b/libc/string/arm/strncmp.S
@@ -30,15 +30,46 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global strncmp
.type strncmp,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
strncmp:
/* if (len == 0) return 0 */
cmp r2, #0
+ bne 1f
+ mov r0, #0
+ bx lr
+1:
+ push {r4}
+
+ /* ip == last src address to compare */
+ add r4, r0, r2
+2:
+ cmp r4, r0
+ beq 3f
+ ldrb r2, [r0]
+ add r0, r0, #1
+ ldrb r3, [r1]
+ add r1, r1, #1
+ cmp r2, #0
+ beq 3f
+ cmp r2, r3
+ beq 2b
+3:
+ sub r0, r2, r3
+ pop {r4}
+ bx lr
+#else
+strncmp:
+ /* if (len == 0) return 0 */
+ cmp r2, #0
+ IT(tt, eq)
moveq r0, #0
#if defined(__USE_BX__)
bxeq lr
@@ -53,6 +84,7 @@ strncmp:
ldrb r2, [r0], #1
ldrb r3, [r1], #1
cmp ip, r0
+ IT(tt, cs)
cmpcs r2, #1
cmpcs r2, r3
beq 1b
@@ -62,6 +94,7 @@ strncmp:
#else
mov pc, lr
#endif
+#endif
.size strncmp,.-strncmp