diff options
author | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2008-03-26 13:40:36 +0000 |
---|---|---|
committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2008-03-26 13:40:36 +0000 |
commit | efce79f09ae6daa77cd322df0d532beec3f445f5 (patch) | |
tree | ae936850c5671b8bea0abf0d33bf2196f7abc796 /libc/string/arm | |
parent | 17e961d9c708ab202760ce830f8efe73e91bb129 (diff) |
Paul Brook writes:
The attached patch adds support for compiling arm uClibc as pure Thumb code.
This is needed because some recent ARM codes do not implement traditional ARM
mode. Specifically:
* Cortex-M1 - An extremely minimal FPGA based core that only implements
Thumb-1 (aka ARMv6-M).
* Cortex-M3 - A Thumb-2 only ARMv7-M core.
Most of uClibc already builds in Thumb mode, all that is left are a handful of
assembly bits.
Tested on arm-uclinuxeabi.
Diffstat (limited to 'libc/string/arm')
-rw-r--r-- | libc/string/arm/_memcpy.S | 182 | ||||
-rw-r--r-- | libc/string/arm/bcopy.S | 12 | ||||
-rw-r--r-- | libc/string/arm/bzero.S | 12 | ||||
-rw-r--r-- | libc/string/arm/memcmp.S | 28 | ||||
-rw-r--r-- | libc/string/arm/memcpy.S | 11 | ||||
-rw-r--r-- | libc/string/arm/memmove.S | 11 | ||||
-rw-r--r-- | libc/string/arm/memset.S | 62 | ||||
-rw-r--r-- | libc/string/arm/strcmp.S | 19 | ||||
-rw-r--r-- | libc/string/arm/strlen.S | 25 | ||||
-rw-r--r-- | libc/string/arm/strncmp.S | 33 |
10 files changed, 377 insertions, 18 deletions
diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S index 3704f96b5..5ef63c45a 100644 --- a/libc/string/arm/_memcpy.S +++ b/libc/string/arm/_memcpy.S @@ -39,7 +39,9 @@ #include <features.h> #include <endian.h> +#include <bits/arm_asm.h> +#if !defined(THUMB1_ONLY) /* * This is one fun bit of code ... * Some easy listening music is suggested while trying to understand this @@ -77,11 +79,36 @@ .type _memcpy,%function .align 4 +/* XXX: The Thumb-2 conditionals can be removed if/when we require an + assembler that supports unified syntax. */ +.macro copy regs +#if defined(__thumb2__) + ittt ge + ldmiage r1!, \regs + stmiage r0!, \regs +#else + ldmgeia r1!, \regs + stmgeia r0!, \regs +#endif +.endm + +.macro copydb regs +#if defined(__thumb2__) + ittt ge + ldmdbge r1!, \regs + stmdbge r0!, \regs +#else + ldmgedb r1!, \regs + stmgedb r0!, \regs +#endif +.endm + _memcpy: /* Determine copy direction */ cmp r1, r0 bcc .Lmemcpy_backwards + IT(tt, eq) moveq r0, #0 /* Quick abort for len=0 */ #if defined(__USE_BX__) bxeq lr @@ -102,7 +129,7 @@ _memcpy: blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ subs r2, r2, #0x14 blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ - stmdb sp!, {r4} /* borrow r4 */ + str r4, [sp, #-4]! /* borrow r4 */ /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ @@ -115,19 +142,22 @@ _memcpy: bge .Lmemcpy_floop32 cmn r2, #0x10 - ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ - stmgeia r0!, {r3, r4, r12, lr} + /* blat a remaining 16 bytes */ + copy "{r3, r4, r12, lr}" subge r2, r2, #0x10 - ldmia sp!, {r4} /* return r4 */ + ldr r4, [sp], #4 /* restore r4 */ .Lmemcpy_fl32: adds r2, r2, #0x14 /* blat 12 bytes at a time */ .Lmemcpy_floop12: - ldmgeia r1!, {r3, r12, lr} - stmgeia r0!, {r3, r12, lr} + copy "{r3, r12, lr}" +#if defined(__thumb2__) + subsge r2, r2, #0x0c +#else subges r2, r2, #0x0c +#endif bge .Lmemcpy_floop12 .Lmemcpy_fl12: @@ -135,26 +165,48 @@ _memcpy: blt .Lmemcpy_fl4 subs r2, r2, #4 + IT(tt, lt) ldrlt r3, [r1], #4 strlt r3, [r0], #4 - ldmgeia r1!, {r3, r12} - stmgeia r0!, {r3, r12} + copy "{r3, r12}" subge r2, r2, #4 .Lmemcpy_fl4: /* less than 4 bytes to go */ adds r2, r2, #4 +#if defined(__thumb2__) + it eq + popeq {r0, pc} /* done */ +#elif defined(__ARM_ARCH_4T__) + ldmeqia sp!, {r0, r3} /* done */ + bxeq r3 +#else ldmeqia sp!, {r0, pc} /* done */ +#endif /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0], #1 +#if defined(__thumb2__) + itt ge + ldrbge r3, [r1], #1 + strbge r3, [r0], #1 + itt gt + ldrbgt r3, [r1], #1 + strbgt r3, [r0], #1 +#else ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 +#endif +#if defined(__ARM_ARCH_4T__) + ldmia sp!, {r0, r3} + bx r3 +#else ldmia sp!, {r0, pc} +#endif /* erg - unaligned destination */ .Lmemcpy_fdestul: @@ -164,10 +216,19 @@ _memcpy: /* align destination with byte copies */ ldrb r3, [r1], #1 strb r3, [r0], #1 +#if defined(__thumb2__) + itt ge + ldrbge r3, [r1], #1 + strbge r3, [r0], #1 + itt gt + ldrbgt r3, [r1], #1 + strbgt r3, [r0], #1 +#else ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 +#endif subs r2, r2, r12 blt .Lmemcpy_fl4 /* less the 4 bytes */ @@ -370,12 +431,12 @@ _memcpy: .Lmemcpy_bl32: cmn r2, #0x10 - ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ - stmgedb r0!, {r3, r4, r12, lr} + /* blat a remaining 16 bytes */ + copydb "{r3, r4, r12, lr}" subge r2, r2, #0x10 adds r2, r2, #0x14 - ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ - stmgedb r0!, {r3, r12, lr} + /* blat a remaining 12 bytes */ + copydb "{r3, r12, lr}" subge r2, r2, #0x0c ldmia sp!, {r4, lr} @@ -383,15 +444,16 @@ _memcpy: adds r2, r2, #8 blt .Lmemcpy_bl4 subs r2, r2, #4 + IT(tt, lt) ldrlt r3, [r1, #-4]! strlt r3, [r0, #-4]! - ldmgedb r1!, {r3, r12} - stmgedb r0!, {r3, r12} + copydb "{r3, r12}" subge r2, r2, #4 .Lmemcpy_bl4: /* less than 4 bytes to go */ adds r2, r2, #4 + IT(t, eq) #if defined(__USE_BX__) bxeq lr #else @@ -401,10 +463,19 @@ _memcpy: cmp r2, #2 ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! +#ifdef __thumb2__ + itt ge + ldrbge r3, [r1, #-1]! + strbge r3, [r0, #-1]! + itt gt + ldrbgt r3, [r1, #-1]! + strbgt r3, [r0, #-1]! +#else ldrgeb r3, [r1, #-1]! strgeb r3, [r0, #-1]! ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! +#endif #if defined(__USE_BX__) bx lr #else @@ -417,10 +488,19 @@ _memcpy: /* align destination with byte copies */ ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! +#ifdef __thumb2__ + itt ge + ldrbge r3, [r1, #-1]! + strbge r3, [r0, #-1]! + itt gt + ldrbgt r3, [r1, #-1]! + strbgt r3, [r0, #-1]! +#else ldrgeb r3, [r1, #-1]! strgeb r3, [r0, #-1]! ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! +#endif subs r2, r2, r12 blt .Lmemcpy_bl4 /* less than 4 bytes to go */ ands r12, r1, #3 @@ -591,3 +671,77 @@ _memcpy: .Lmemcpy_bsrcul1l4: add r1, r1, #1 b .Lmemcpy_bl4 + +#else /* THUMB1_ONLY */ + +/* This is a fairly dumb implementation for when we can't use the 32-bit code + above. */ +.text +.global _memcpy +.hidden _memcpy +.type _memcpy,%function +.align 4 +.thumb +_memcpy: + push {r0, r4} + cmp r2, #0 + beq .Lmemcpy_exit + @ See if we have overlapping regions, and need to reverse the + @ direction of the copy + cmp r0, r1 + bls .Lmemcpy_forwards + add r4, r1, r2 + cmp r0, r4 + bcc .Lmemcpy_backwards +.Lmemcpy_forwards: + /* Forwards. */ + mov r3, r0 + eor r3, r1 + mov r4, #3 + tst r3, r4 + bne .Lmemcpy_funaligned + cmp r2, #8 + bcc .Lmemcpy_funaligned +1: @ copy up to the first word boundary. + tst r0, r4 + beq 1f + ldrb r3, [r1] + add r1, r1, #1 + strb r3, [r0] + add r0, r0, #1 + sub r2, r2, #1 + b 1b +1: @ Copy aligned words + ldr r3, [r1] + add r1, r1, #4 + str r3, [r0] + add r0, r0, #4 + sub r2, r2, #4 + cmp r2, #4 + bcs 1b + cmp r2, #0 + beq .Lmemcpy_exit +.Lmemcpy_funaligned: +1: + ldrb r3, [r1] + add r1, r1, #1 + strb r3, [r0] + add r0, r0, #1 + sub r2, r2, #1 + bne 1b +.Lmemcpy_exit: + pop {r0, r4} + bx lr + +.Lmemcpy_backwards: + add r0, r0, r2 + add r1, r1, r2 +1: + sub r0, r0, #1 + sub r1, r1, #1 + ldrb r3, [r1] + strb r3, [r0] + sub r2, r2, #1 + bne 1b + b .Lmemcpy_exit +#endif diff --git a/libc/string/arm/bcopy.S b/libc/string/arm/bcopy.S index db3c9e6c1..2d6e90d13 100644 --- a/libc/string/arm/bcopy.S +++ b/libc/string/arm/bcopy.S @@ -40,6 +40,7 @@ /* bcopy = memcpy/memmove with arguments reversed. */ #include <features.h> +#include <bits/arm_asm.h> #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -48,12 +49,23 @@ .type bcopy,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +bcopy: + push {r2, lr} + mov ip, r0 + mov r0, r1 + mov r1, ip + bl _memcpy + POP_RET +#else bcopy: /* switch the source and destination registers */ eor r0, r1, r0 eor r1, r0, r1 eor r0, r1, r0 b _memcpy /* (PLT) */ +#endif .size bcopy,.-bcopy diff --git a/libc/string/arm/bzero.S b/libc/string/arm/bzero.S index ee49cf560..e576a12e9 100644 --- a/libc/string/arm/bzero.S +++ b/libc/string/arm/bzero.S @@ -38,6 +38,7 @@ */ #include <features.h> +#include <bits/arm_asm.h> #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -46,10 +47,21 @@ .type bzero,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +bzero: + push {r2, lr} + mov r2, r1 + mov r1, #0 + bl HIDDEN_JUMPTARGET(memset) + POP_RET +#else + bzero: mov r2, r1 mov r1, #0 b HIDDEN_JUMPTARGET(memset) +#endif .size bzero,.-bzero diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S index 4f78b5128..65409f43a 100644 --- a/libc/string/arm/memcmp.S +++ b/libc/string/arm/memcmp.S @@ -30,15 +30,41 @@ */ #include <features.h> +#include <bits/arm_asm.h> .text .global memcmp .type memcmp,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +memcmp: + cmp r2, #0 + bne 1f + mov r0, #0 + bx lr +1: + push {r4} + add r4, r0, r2 +2: + ldrb r2, [r0] + add r0, r0, #1 + ldrb r3, [r1] + add r1, r1, #1 + cmp r4, r0 + beq 3f + cmp r2, r3 + beq 2b +3: + sub r0, r2, r3 + pop {r4} + bx lr +#else memcmp: /* if ((len - 1) < 0) return 0 */ subs r2, r2, #1 + IT(tt, mi) movmi r0, #0 #if defined(__USE_BX__) bxmi lr @@ -51,6 +77,7 @@ memcmp: ldrb r2, [r0], #1 ldrb r3, [r1], #1 cmp ip, r0 + IT(t, cs) cmpcs r2, r3 beq 1b sub r0, r2, r3 @@ -59,6 +86,7 @@ memcmp: #else mov pc, lr #endif +#endif .size memcmp,.-memcmp diff --git a/libc/string/arm/memcpy.S b/libc/string/arm/memcpy.S index 7a5b6ab76..d2013d211 100644 --- a/libc/string/arm/memcpy.S +++ b/libc/string/arm/memcpy.S @@ -38,16 +38,23 @@ */ #include <features.h> +#include <bits/arm_asm.h> .text .global memcpy .type memcpy,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func memcpy: - stmfd sp!, {r0, lr} + push {r0, lr} bl _memcpy - ldmfd sp!, {r0, pc} + POP_RET +#else +memcpy: + b _memcpy +#endif .size memcpy,.-memcpy diff --git a/libc/string/arm/memmove.S b/libc/string/arm/memmove.S index 45cd9b4d4..c11b98dd4 100644 --- a/libc/string/arm/memmove.S +++ b/libc/string/arm/memmove.S @@ -38,16 +38,23 @@ */ #include <features.h> +#include <bits/arm_asm.h> .text .global memmove .type memmove,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func memmove: - stmfd sp!, {r0, lr} + push {r2, lr} bl _memcpy - ldmfd sp!, {r0, pc} + POP_RET +#else +memmove: + b _memcpy +#endif .size memmove,.-memmove diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S index 16bfe0dc5..66aa6039c 100644 --- a/libc/string/arm/memset.S +++ b/libc/string/arm/memset.S @@ -19,12 +19,52 @@ #include <features.h> #include <sys/syscall.h> +#include <bits/arm_asm.h> .text .global memset .type memset,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +memset: + mov ip, r0 + cmp r2, #8 @ at least 8 bytes to do? + bcc 2f + + lsl r3, r1, #8 + orr r1, r3 + lsl r3, r1, #16 + orr r1, r3 + + mov r3, #3 +1: @ Fill up to the first word boundary + tst r0, r3 + beq 1f + strb r1, [r0] + add r0, r0, #1 + sub r2, r2, #1 + b 1b +1: @ Fill aligned words + str r1, [r0] + add r0, r0, #4 + sub r2, r2, #4 + cmp r2, #4 + bcs 1b + +2: @ Fill the remaining bytes + cmp r2, #0 + beq 2f +1: + strb r1, [r0] + add r0, r0, #1 + sub r2, r2, #1 + bne 1b +2: + mov r0, ip + bx lr +#else memset: mov a4, a1 cmp a3, $8 @ at least 8 bytes to do? @@ -33,8 +73,14 @@ memset: orr a2, a2, a2, lsl $16 1: tst a4, $3 @ aligned yet? +#if defined(__thumb2__) + itt ne + strbne a2, [a4], $1 + subne a3, a3, $1 +#else strneb a2, [a4], $1 subne a3, a3, $1 +#endif bne 1b mov ip, a2 1: @@ -51,16 +97,30 @@ memset: stmia a4!, {a2, ip} sub a3, a3, $8 cmp a3, $8 @ 8 bytes still to do? +#if defined(__thumb2__) + itt ge + stmiage a4!, {a2, ip} + subge a3, a3, $8 +#else stmgeia a4!, {a2, ip} subge a3, a3, $8 +#endif bge 1b 2: movs a3, a3 @ anything left? + IT(t, eq) #if defined(__USE_BX__) bxeq lr #else moveq pc, lr @ nope #endif +#if defined (__thumb2__) +1: + strb a2, [a4], #1 + subs a3, a3, #1 + bne 1b + bx lr +#else rsb a3, a3, $7 add pc, pc, a3, lsl $2 mov r0, r0 @@ -76,6 +136,8 @@ memset: #else mov pc, lr #endif +#endif +#endif .size memset,.-memset diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S index 89aa38874..97363c1c2 100644 --- a/libc/string/arm/strcmp.S +++ b/libc/string/arm/strcmp.S @@ -30,17 +30,35 @@ */ #include <features.h> +#include <bits/arm_asm.h> .text .global strcmp .type strcmp,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +strcmp: +1: + ldrb r2, [r0] + add r0, r0, #1 + ldrb r3, [r1] + add r1, r1, #1 + cmp r2, #0 + beq 2f + cmp r2, r3 + beq 1b +2: + sub r0, r2, r3 + bx lr +#else strcmp: 1: ldrb r2, [r0], #1 ldrb r3, [r1], #1 cmp r2, #1 + IT(t, cs) cmpcs r2, r3 beq 1b sub r0, r2, r3 @@ -49,6 +67,7 @@ strcmp: #else mov pc, lr #endif +#endif .size strcmp,.-strcmp diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S index 5b4b02e17..949e918f4 100644 --- a/libc/string/arm/strlen.S +++ b/libc/string/arm/strlen.S @@ -20,6 +20,7 @@ #include <features.h> #include <endian.h> #include <sys/syscall.h> +#include <bits/arm_asm.h> /* size_t strlen(const char *S) * entry: r0 -> string @@ -31,6 +32,19 @@ .type strlen,%function .align 4 +#if defined(THUMB1_ONLY) +/* A simple implementation for when the ARM implementation can't be used. */ +.thumb_func +strlen: + mov r2, #0 +1: + ldrb r1, [r0, r2] + add r2, r2, #1 + cmp r1, #0 + bne 1b + sub r0, r2, #1 + bx lr +#else strlen: bic r1, r0, $3 @ addr of word containing first byte ldr r2, [r1], $4 @ get the first word @@ -41,38 +55,48 @@ strlen: #if __BYTE_ORDER == __BIG_ENDIAN orr r2, r2, $0xff000000 @ set this byte to non-zero subs r3, r3, $1 @ any more to do? + IT(t, gt) orrgt r2, r2, $0x00ff0000 @ if so, set this byte subs r3, r3, $1 @ more? + IT(t, gt) orrgt r2, r2, $0x0000ff00 @ then set. #else orr r2, r2, $0x000000ff @ set this byte to non-zero subs r3, r3, $1 @ any more to do? + IT(t, gt) orrgt r2, r2, $0x0000ff00 @ if so, set this byte subs r3, r3, $1 @ more? + IT(t, gt) orrgt r2, r2, $0x00ff0000 @ then set. #endif Laligned: @ here, we have a word in r2. Does it tst r2, $0x000000ff @ contain any zeroes? + IT(tttt, ne) tstne r2, $0x0000ff00 @ tstne r2, $0x00ff0000 @ tstne r2, $0xff000000 @ addne r0, r0, $4 @ if not, the string is 4 bytes longer + IT(t, ne) ldrne r2, [r1], $4 @ and we continue to the next word bne Laligned @ Llastword: @ drop through to here once we find a #if __BYTE_ORDER == __BIG_ENDIAN tst r2, $0xff000000 @ word that has a zero byte in it + IT(tttt, ne) addne r0, r0, $1 @ tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it addne r0, r0, $1 @ tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th + IT(t, ne) addne r0, r0, $1 @ must be zero) #else tst r2, $0x000000ff @ + IT(tttt, ne) addne r0, r0, $1 @ tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it addne r0, r0, $1 @ tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th + IT(t, ne) addne r0, r0, $1 @ must be zero) #endif #if defined(__USE_BX__) @@ -80,6 +104,7 @@ Llastword: @ drop through to here once we find a #else mov pc,lr #endif +#endif .size strlen,.-strlen diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S index eaf0620b4..8487639c8 100644 --- a/libc/string/arm/strncmp.S +++ b/libc/string/arm/strncmp.S @@ -30,15 +30,46 @@ */ #include <features.h> +#include <bits/arm_asm.h> .text .global strncmp .type strncmp,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func strncmp: /* if (len == 0) return 0 */ cmp r2, #0 + bne 1f + mov r0, #0 + bx lr +1: + push {r4} + + /* ip == last src address to compare */ + add r4, r0, r2 +2: + cmp r4, r0 + beq 3f + ldrb r2, [r0] + add r0, r0, #1 + ldrb r3, [r1] + add r1, r1, #1 + cmp r2, #0 + beq 3f + cmp r2, r3 + beq 2b +3: + sub r0, r2, r3 + pop {r4} + bx lr +#else +strncmp: + /* if (len == 0) return 0 */ + cmp r2, #0 + IT(tt, eq) moveq r0, #0 #if defined(__USE_BX__) bxeq lr @@ -53,6 +84,7 @@ strncmp: ldrb r2, [r0], #1 ldrb r3, [r1], #1 cmp ip, r0 + IT(tt, cs) cmpcs r2, #1 cmpcs r2, r3 beq 1b @@ -62,6 +94,7 @@ strncmp: #else mov pc, lr #endif +#endif .size strncmp,.-strncmp |