diff options
26 files changed, 771 insertions, 23 deletions
| @@ -219,6 +219,8 @@ ifeq ($(TARGET_ARCH),arm)  	CPU_CFLAGS-$(CONFIG_ARM_XSCALE)+=$(call check_gcc,-mtune=xscale,-mtune=strongarm110)  	CPU_CFLAGS-$(CONFIG_ARM_XSCALE)+=-march=armv5te -Wa,-mcpu=xscale   	CPU_CFLAGS-$(CONFIG_ARM_IWMMXT)+=-march=iwmmxt -Wa,-mcpu=iwmmxt -mabi=iwmmxt + 	CPU_CFLAGS-$(CONFIG_ARM_CORTEX_M3)+=-mcpu=cortex-m3 -mthumb + 	CPU_CFLAGS-$(CONFIG_ARM_CORTEX_M1)+=-mcpu=cortex-m1 -mthumb  endif  ifeq ($(TARGET_ARCH),mips) diff --git a/extra/Configs/Config.arm b/extra/Configs/Config.arm index 7aec08ce2..26e1f3da4 100644 --- a/extra/Configs/Config.arm +++ b/extra/Configs/Config.arm @@ -107,6 +107,16 @@ config CONFIG_ARM1176JZF_S  	bool "Arm 1176JZF-S"  	select ARCH_HAS_MMU +config CONFIG_ARM_CORTEX_M3 +	bool "Arm Cortex-M3" +	select ARCH_HAS_NO_MMU +	select USE_BX + +config CONFIG_ARM_CORTEX_M1 +	bool "Arm Cortex-M1" +	select ARCH_HAS_NO_MMU +	select USE_BX +  config CONFIG_ARM_SA110  	bool "Intel StrongArm SA-110"  	select ARCH_HAS_MMU diff --git a/ldso/ldso/arm/resolve.S b/ldso/ldso/arm/resolve.S index cbeb2232d..b422c334d 100644 --- a/ldso/ldso/arm/resolve.S +++ b/ldso/ldso/arm/resolve.S @@ -91,12 +91,13 @@   */  #include <sys/syscall.h> +#include <bits/arm_asm.h>  #include <features.h>   .text   .align 4      @ 16 byte boundary and there are 32 bytes below (arm case) - #if !defined(__thumb__) + #if !defined(__thumb__) || defined(__thumb2__)   .arm   .globl _dl_linux_resolve   .type _dl_linux_resolve,%function diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S index 3704f96b5..5ef63c45a 100644 --- a/libc/string/arm/_memcpy.S +++ b/libc/string/arm/_memcpy.S @@ -39,7 +39,9 @@  #include <features.h>  #include <endian.h> +#include <bits/arm_asm.h> +#if !defined(THUMB1_ONLY)  /*   * This is one fun bit of code ...   * Some easy listening music is suggested while trying to understand this @@ -77,11 +79,36 @@  .type _memcpy,%function  .align 4 +/* XXX: The Thumb-2 conditionals can be removed if/when we require an +   assembler that supports unified syntax.  */ +.macro copy regs +#if defined(__thumb2__) +	ittt	ge +	ldmiage	r1!, \regs +	stmiage	r0!, \regs +#else +	ldmgeia	r1!, \regs +	stmgeia	r0!, \regs +#endif +.endm + +.macro copydb regs +#if defined(__thumb2__) +	ittt	ge +	ldmdbge	r1!, \regs +	stmdbge	r0!, \regs +#else +	ldmgedb	r1!, \regs +	stmgedb	r0!, \regs +#endif +.endm +  _memcpy:  	/* Determine copy direction */  	cmp	r1, r0  	bcc	.Lmemcpy_backwards +	IT(tt, eq)  	moveq	r0, #0			/* Quick abort for len=0 */  #if defined(__USE_BX__)          bxeq    lr @@ -102,7 +129,7 @@ _memcpy:  	blt	.Lmemcpy_fl12		/* less than 12 bytes (4 from above) */  	subs	r2, r2, #0x14           	blt	.Lmemcpy_fl32		/* less than 32 bytes (12 from above) */ -	stmdb	sp!, {r4}		/* borrow r4 */ +	str	r4, [sp, #-4]!		/* borrow r4 */  	/* blat 32 bytes at a time */  	/* XXX for really big copies perhaps we should use more registers */ @@ -115,19 +142,22 @@ _memcpy:  	bge	.Lmemcpy_floop32  	cmn	r2, #0x10 -	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */ -	stmgeia	r0!, {r3, r4, r12, lr} +	/* blat a remaining 16 bytes */ +	copy	"{r3, r4, r12, lr}"  	subge	r2, r2, #0x10          -	ldmia	sp!, {r4}		/* return r4 */ +	ldr	r4, [sp], #4		/* restore r4 */  .Lmemcpy_fl32:  	adds	r2, r2, #0x14           	/* blat 12 bytes at a time */  .Lmemcpy_floop12: -	ldmgeia	r1!, {r3, r12, lr} -	stmgeia	r0!, {r3, r12, lr} +	copy	"{r3, r12, lr}" +#if defined(__thumb2__) +	subsge	r2, r2, #0x0c          +#else  	subges	r2, r2, #0x0c          +#endif  	bge	.Lmemcpy_floop12  .Lmemcpy_fl12: @@ -135,26 +165,48 @@ _memcpy:  	blt	.Lmemcpy_fl4  	subs	r2, r2, #4 +	IT(tt, lt)  	ldrlt	r3, [r1], #4  	strlt	r3, [r0], #4 -	ldmgeia	r1!, {r3, r12} -	stmgeia	r0!, {r3, r12} +	copy	"{r3, r12}"  	subge	r2, r2, #4  .Lmemcpy_fl4:  	/* less than 4 bytes to go */  	adds	r2, r2, #4 +#if defined(__thumb2__) +	it	eq +	popeq	{r0, pc}		/* done */ +#elif defined(__ARM_ARCH_4T__) +	ldmeqia	sp!, {r0, r3}		/* done */ +	bxeq	r3 +#else  	ldmeqia	sp!, {r0, pc}		/* done */ +#endif  	/* copy the crud byte at a time */  	cmp	r2, #2  	ldrb	r3, [r1], #1  	strb	r3, [r0], #1 +#if defined(__thumb2__) +	itt	ge +	ldrbge	r3, [r1], #1 +	strbge	r3, [r0], #1 +	itt	gt +	ldrbgt	r3, [r1], #1 +	strbgt	r3, [r0], #1 +#else  	ldrgeb	r3, [r1], #1  	strgeb	r3, [r0], #1  	ldrgtb	r3, [r1], #1  	strgtb	r3, [r0], #1 +#endif +#if defined(__ARM_ARCH_4T__) +	ldmia	sp!, {r0, r3} +	bx	r3 +#else  	ldmia	sp!, {r0, pc} +#endif  	/* erg - unaligned destination */  .Lmemcpy_fdestul: @@ -164,10 +216,19 @@ _memcpy:  	/* align destination with byte copies */  	ldrb	r3, [r1], #1  	strb	r3, [r0], #1 +#if defined(__thumb2__) +	itt	ge +	ldrbge	r3, [r1], #1 +	strbge	r3, [r0], #1 +	itt	gt +	ldrbgt	r3, [r1], #1 +	strbgt	r3, [r0], #1 +#else  	ldrgeb	r3, [r1], #1  	strgeb	r3, [r0], #1  	ldrgtb	r3, [r1], #1  	strgtb	r3, [r0], #1 +#endif  	subs	r2, r2, r12  	blt	.Lmemcpy_fl4		/* less the 4 bytes */ @@ -370,12 +431,12 @@ _memcpy:  .Lmemcpy_bl32:  	cmn	r2, #0x10             -	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */ -	stmgedb	r0!, {r3, r4, r12, lr} +	/* blat a remaining 16 bytes */ +	copydb	"{r3, r4, r12, lr}"  	subge	r2, r2, #0x10           	adds	r2, r2, #0x14          -	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */ -	stmgedb	r0!, {r3, r12, lr} +	/* blat a remaining 12 bytes */ +	copydb	"{r3, r12, lr}"  	subge	r2, r2, #0x0c           	ldmia	sp!, {r4, lr} @@ -383,15 +444,16 @@ _memcpy:  	adds	r2, r2, #8  	blt	.Lmemcpy_bl4  	subs	r2, r2, #4 +	IT(tt, lt)  	ldrlt	r3, [r1, #-4]!  	strlt	r3, [r0, #-4]! -	ldmgedb	r1!, {r3, r12} -	stmgedb	r0!, {r3, r12} +	copydb	"{r3, r12}"  	subge	r2, r2, #4  .Lmemcpy_bl4:  	/* less than 4 bytes to go */  	adds	r2, r2, #4 +	IT(t, eq)  #if defined(__USE_BX__)          bxeq    lr  #else @@ -401,10 +463,19 @@ _memcpy:  	cmp	r2, #2  	ldrb	r3, [r1, #-1]!  	strb	r3, [r0, #-1]! +#ifdef __thumb2__ +	itt	ge +	ldrbge	r3, [r1, #-1]! +	strbge	r3, [r0, #-1]! +	itt	gt +	ldrbgt	r3, [r1, #-1]! +	strbgt	r3, [r0, #-1]! +#else  	ldrgeb	r3, [r1, #-1]!  	strgeb	r3, [r0, #-1]!  	ldrgtb	r3, [r1, #-1]!  	strgtb	r3, [r0, #-1]! +#endif  #if defined(__USE_BX__)          bx      lr  #else @@ -417,10 +488,19 @@ _memcpy:  	/* align destination with byte copies */  	ldrb	r3, [r1, #-1]!  	strb	r3, [r0, #-1]! +#ifdef __thumb2__ +	itt	ge +	ldrbge	r3, [r1, #-1]! +	strbge	r3, [r0, #-1]! +	itt	gt +	ldrbgt	r3, [r1, #-1]! +	strbgt	r3, [r0, #-1]! +#else  	ldrgeb	r3, [r1, #-1]!  	strgeb	r3, [r0, #-1]!  	ldrgtb	r3, [r1, #-1]!  	strgtb	r3, [r0, #-1]! +#endif  	subs	r2, r2, r12  	blt	.Lmemcpy_bl4		/* less than 4 bytes to go */  	ands	r12, r1, #3 @@ -591,3 +671,77 @@ _memcpy:  .Lmemcpy_bsrcul1l4:  	add	r1, r1, #1  	b	.Lmemcpy_bl4 + +#else /* THUMB1_ONLY */ + +/* This is a fairly dumb implementation for when we can't use the 32-bit code +   above.  */ +.text +.global _memcpy +.hidden _memcpy +.type _memcpy,%function +.align 4 +.thumb +_memcpy: +	push	{r0, r4} +	cmp	r2, #0 +	beq	.Lmemcpy_exit +	@ See if we have overlapping regions, and need to reverse the +	@ direction of the copy +	cmp	r0, r1 +	bls	.Lmemcpy_forwards +	add	r4, r1, r2 +	cmp	r0, r4 +	bcc	.Lmemcpy_backwards +.Lmemcpy_forwards: +	/* Forwards.  */ +	mov	r3, r0 +	eor	r3, r1 +	mov	r4, #3 +	tst	r3, r4 +	bne	.Lmemcpy_funaligned +	cmp	r2, #8 +	bcc	.Lmemcpy_funaligned +1:	@ copy up to the first word boundary. +	tst	r0, r4 +	beq	1f +	ldrb	r3, [r1] +	add	r1, r1, #1 +	strb	r3, [r0] +	add	r0, r0, #1 +	sub	r2, r2, #1 +	b	1b +1:	@ Copy aligned words +	ldr	r3, [r1] +	add	r1, r1, #4 +	str	r3, [r0] +	add	r0, r0, #4 +	sub	r2, r2, #4 +	cmp	r2, #4 +	bcs	1b +	cmp	r2, #0 +	beq	.Lmemcpy_exit +.Lmemcpy_funaligned: +1: +	ldrb	r3, [r1] +	add	r1, r1, #1 +	strb	r3, [r0] +	add	r0, r0, #1 +	sub	r2, r2, #1 +	bne	1b +.Lmemcpy_exit: +	pop	{r0, r4} +	bx	lr + +.Lmemcpy_backwards: +	add	r0, r0, r2 +	add	r1, r1, r2 +1: +	sub	r0, r0, #1 +	sub	r1, r1, #1 +	ldrb	r3, [r1] +	strb	r3, [r0] +	sub	r2, r2, #1 +	bne	1b +	b	.Lmemcpy_exit +#endif diff --git a/libc/string/arm/bcopy.S b/libc/string/arm/bcopy.S index db3c9e6c1..2d6e90d13 100644 --- a/libc/string/arm/bcopy.S +++ b/libc/string/arm/bcopy.S @@ -40,6 +40,7 @@  /* bcopy = memcpy/memmove with arguments reversed. */  #include <features.h> +#include <bits/arm_asm.h>  #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -48,12 +49,23 @@  .type bcopy,%function  .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +bcopy: +	push	{r2, lr} +	mov	ip, r0 +	mov	r0, r1 +	mov	r1, ip +	bl	_memcpy +	POP_RET +#else  bcopy:  	/* switch the source and destination registers */  	eor     r0, r1, r0   	eor     r1, r0, r1   	eor     r0, r1, r0   	b	_memcpy /* (PLT) */ +#endif  .size bcopy,.-bcopy diff --git a/libc/string/arm/bzero.S b/libc/string/arm/bzero.S index ee49cf560..e576a12e9 100644 --- a/libc/string/arm/bzero.S +++ b/libc/string/arm/bzero.S @@ -38,6 +38,7 @@   */  #include <features.h> +#include <bits/arm_asm.h>  #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -46,10 +47,21 @@  .type bzero,%function  .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +bzero: +	push	{r2, lr} +	mov	r2, r1 +	mov	r1, #0 +	bl	HIDDEN_JUMPTARGET(memset) +	POP_RET +#else +  bzero:  	mov	r2, r1  	mov	r1, #0  	b	HIDDEN_JUMPTARGET(memset) +#endif  .size bzero,.-bzero diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S index 4f78b5128..65409f43a 100644 --- a/libc/string/arm/memcmp.S +++ b/libc/string/arm/memcmp.S @@ -30,15 +30,41 @@   */  #include <features.h> +#include <bits/arm_asm.h>  .text  .global memcmp  .type memcmp,%function  .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +memcmp: +	cmp	r2, #0 +	bne	1f +	mov	r0, #0 +	bx	lr +1: +	push	{r4} +	add	r4, r0, r2 +2: +	ldrb	r2, [r0] +	add	r0, r0, #1 +	ldrb	r3, [r1] +	add	r1, r1, #1 +	cmp	r4, r0 +	beq	3f +	cmp	r2, r3 +	beq	2b +3: +	sub	r0, r2, r3 +        pop	{r4} +	bx	lr +#else  memcmp:  	/* if ((len - 1) < 0) return 0 */  	subs	r2, r2, #1 +	IT(tt, mi)  	movmi	r0, #0  #if defined(__USE_BX__)          bxmi    lr @@ -51,6 +77,7 @@ memcmp:  	ldrb	r2, [r0], #1  	ldrb	r3, [r1], #1  	cmp	ip, r0 +	IT(t, cs)  	cmpcs	r2, r3  	beq	1b  	sub	r0, r2, r3 @@ -59,6 +86,7 @@ memcmp:  #else   	mov	pc, lr  #endif +#endif  .size memcmp,.-memcmp diff --git a/libc/string/arm/memcpy.S b/libc/string/arm/memcpy.S index 7a5b6ab76..d2013d211 100644 --- a/libc/string/arm/memcpy.S +++ b/libc/string/arm/memcpy.S @@ -38,16 +38,23 @@   */  #include <features.h> +#include <bits/arm_asm.h>  .text  .global memcpy  .type memcpy,%function  .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func  memcpy: -	stmfd	sp!, {r0, lr} +	push	{r0, lr}  	bl	_memcpy -	ldmfd	sp!, {r0, pc} +	POP_RET +#else +memcpy: +	b	_memcpy +#endif  .size memcpy,.-memcpy diff --git a/libc/string/arm/memmove.S b/libc/string/arm/memmove.S index 45cd9b4d4..c11b98dd4 100644 --- a/libc/string/arm/memmove.S +++ b/libc/string/arm/memmove.S @@ -38,16 +38,23 @@   */  #include <features.h> +#include <bits/arm_asm.h>  .text  .global memmove  .type memmove,%function  .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func  memmove: -	stmfd	sp!, {r0, lr} +	push	{r2, lr}  	bl	_memcpy -	ldmfd	sp!, {r0, pc} +	POP_RET +#else +memmove: +	b	_memcpy +#endif  .size memmove,.-memmove diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S index 16bfe0dc5..66aa6039c 100644 --- a/libc/string/arm/memset.S +++ b/libc/string/arm/memset.S @@ -19,12 +19,52 @@  #include <features.h>  #include <sys/syscall.h> +#include <bits/arm_asm.h>  .text  .global memset  .type memset,%function  .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +memset: +	mov	ip, r0 +	cmp	r2, #8		@ at least 8 bytes to do? +	bcc	2f + +	lsl	r3, r1, #8 +	orr	r1, r3 +	lsl	r3, r1, #16 +	orr	r1, r3 + +	mov	r3, #3 +1:	@ Fill up to the first word boundary +	tst	r0, r3 +	beq	1f +	strb	r1, [r0] +	add	r0, r0, #1 +	sub	r2, r2, #1 +	b	1b +1:	@ Fill aligned words +	str	r1, [r0] +	add	r0, r0, #4 +	sub	r2, r2, #4 +	cmp	r2, #4 +	bcs	1b + +2:	@ Fill the remaining bytes +	cmp	r2, #0 +	beq	2f +1: +	strb	r1, [r0] +	add	r0, r0, #1 +	sub	r2, r2, #1 +	bne	1b +2: +	mov	r0, ip +	bx lr +#else  memset:  	mov	a4, a1  	cmp	a3, $8		@ at least 8 bytes to do? @@ -33,8 +73,14 @@ memset:  	orr	a2, a2, a2, lsl $16  1:  	tst	a4, $3		@ aligned yet? +#if defined(__thumb2__) +	itt	ne +	strbne	a2, [a4], $1 +	subne	a3, a3, $1 +#else  	strneb	a2, [a4], $1  	subne	a3, a3, $1 +#endif  	bne	1b  	mov	ip, a2  1: @@ -51,16 +97,30 @@ memset:  	stmia	a4!, {a2, ip}  	sub	a3, a3, $8  	cmp	a3, $8		@ 8 bytes still to do? +#if defined(__thumb2__) +	itt	ge +	stmiage	a4!, {a2, ip} +	subge	a3, a3, $8 +#else  	stmgeia	a4!, {a2, ip}  	subge	a3, a3, $8 +#endif  	bge	1b  2:  	movs	a3, a3		@ anything left? +	IT(t, eq)  #if defined(__USE_BX__)          bxeq    lr  #else          moveq	pc, lr		@ nope  #endif +#if defined (__thumb2__) +1: +	strb	a2, [a4], #1 +	subs	a3, a3, #1 +	bne	1b +	bx	lr +#else  	rsb	a3, a3, $7  	add	pc, pc, a3, lsl $2  	mov	r0, r0 @@ -76,6 +136,8 @@ memset:  #else   	mov	pc, lr  #endif +#endif +#endif  .size memset,.-memset diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S index 89aa38874..97363c1c2 100644 --- a/libc/string/arm/strcmp.S +++ b/libc/string/arm/strcmp.S @@ -30,17 +30,35 @@   */  #include <features.h> +#include <bits/arm_asm.h>  .text  .global strcmp  .type strcmp,%function  .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +strcmp: +1: +	ldrb	r2, [r0] +	add	r0, r0, #1 +	ldrb	r3, [r1] +	add	r1, r1, #1 +	cmp	r2, #0 +	beq	2f +	cmp	r2, r3 +	beq	1b +2: +	sub	r0, r2, r3 +	bx      lr +#else  strcmp:  1:  	ldrb	r2, [r0], #1  	ldrb	r3, [r1], #1  	cmp	r2, #1 +	IT(t, cs)  	cmpcs	r2, r3  	beq	1b  	sub	r0, r2, r3 @@ -49,6 +67,7 @@ strcmp:  #else    	mov	pc, lr  #endif +#endif  .size strcmp,.-strcmp diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S index 5b4b02e17..949e918f4 100644 --- a/libc/string/arm/strlen.S +++ b/libc/string/arm/strlen.S @@ -20,6 +20,7 @@  #include <features.h>  #include <endian.h>  #include <sys/syscall.h> +#include <bits/arm_asm.h>  /* size_t strlen(const char *S)   * entry: r0 -> string @@ -31,6 +32,19 @@  .type strlen,%function  .align 4 +#if defined(THUMB1_ONLY) +/* A simple implementation for when the ARM implementation can't be used.  */ +.thumb_func +strlen: +	mov r2, #0 +1: +	ldrb	r1, [r0, r2] +	add	r2, r2, #1 +	cmp	r1, #0 +	bne	1b +	sub	r0, r2, #1 +	bx lr +#else  strlen:  	bic     r1, r0, $3              @ addr of word containing first byte  	ldr     r2, [r1], $4            @ get the first word @@ -41,38 +55,48 @@ strlen:  #if __BYTE_ORDER == __BIG_ENDIAN  	orr     r2, r2, $0xff000000     @ set this byte to non-zero  	subs    r3, r3, $1              @ any more to do? +	IT(t, gt)  	orrgt   r2, r2, $0x00ff0000     @ if so, set this byte  	subs    r3, r3, $1              @ more? +	IT(t, gt)  	orrgt   r2, r2, $0x0000ff00     @ then set.  #else  	orr     r2, r2, $0x000000ff     @ set this byte to non-zero  	subs    r3, r3, $1              @ any more to do? +	IT(t, gt)  	orrgt   r2, r2, $0x0000ff00     @ if so, set this byte  	subs    r3, r3, $1              @ more? +	IT(t, gt)  	orrgt   r2, r2, $0x00ff0000     @ then set.  #endif  Laligned:				@ here, we have a word in r2.  Does it  	tst     r2, $0x000000ff         @ contain any zeroes? +	IT(tttt, ne)  	tstne   r2, $0x0000ff00         @  	tstne   r2, $0x00ff0000         @  	tstne   r2, $0xff000000         @  	addne   r0, r0, $4              @ if not, the string is 4 bytes longer +	IT(t, ne)  	ldrne   r2, [r1], $4            @ and we continue to the next word  	bne     Laligned                @  Llastword:				@ drop through to here once we find a  #if __BYTE_ORDER == __BIG_ENDIAN  	tst     r2, $0xff000000         @ word that has a zero byte in it +	IT(tttt, ne)  	addne   r0, r0, $1              @  	tstne   r2, $0x00ff0000         @ and add up to 3 bytes on to it  	addne   r0, r0, $1              @  	tstne   r2, $0x0000ff00         @ (if first three all non-zero, 4th +	IT(t, ne)  	addne   r0, r0, $1              @  must be zero)  #else  	tst     r2, $0x000000ff         @ +	IT(tttt, ne)  	addne   r0, r0, $1              @  	tstne   r2, $0x0000ff00         @ and add up to 3 bytes on to it  	addne   r0, r0, $1              @  	tstne   r2, $0x00ff0000         @ (if first three all non-zero, 4th +	IT(t, ne)  	addne   r0, r0, $1              @  must be zero)  #endif  #if defined(__USE_BX__) @@ -80,6 +104,7 @@ Llastword:				@ drop through to here once we find a  #else    	mov	pc,lr  #endif +#endif  .size strlen,.-strlen diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S index eaf0620b4..8487639c8 100644 --- a/libc/string/arm/strncmp.S +++ b/libc/string/arm/strncmp.S @@ -30,15 +30,46 @@   */  #include <features.h> +#include <bits/arm_asm.h>  .text  .global strncmp  .type strncmp,%function  .align 4 +#if defined(THUMB1_ONLY) +.thumb_func  strncmp:  	/* if (len == 0) return 0 */  	cmp	r2, #0 +	bne	1f +	mov	r0, #0 +	bx	lr +1: +	push	{r4} + +	/* ip == last src address to compare */ +	add	r4, r0, r2 +2: +	cmp	r4, r0 +	beq	3f +	ldrb	r2, [r0] +	add	r0, r0, #1 +	ldrb	r3, [r1] +	add	r1, r1, #1 +	cmp	r2, #0 +	beq	3f +	cmp	r2, r3 +	beq	2b +3: +	sub	r0, r2, r3 +	pop	{r4} +	bx	lr +#else +strncmp: +	/* if (len == 0) return 0 */ +	cmp	r2, #0 +	IT(tt, eq)  	moveq	r0, #0  #if defined(__USE_BX__)          bxeq    lr @@ -53,6 +84,7 @@ strncmp:  	ldrb	r2, [r0], #1  	ldrb	r3, [r1], #1  	cmp	ip, r0 +	IT(tt, cs)  	cmpcs	r2, #1  	cmpcs	r2, r3  	beq	1b @@ -62,6 +94,7 @@ strncmp:  #else    	mov	pc, lr  #endif +#endif  .size strncmp,.-strncmp diff --git a/libc/sysdeps/linux/arm/__longjmp.S b/libc/sysdeps/linux/arm/__longjmp.S index 4261797f8..5faf4ece9 100644 --- a/libc/sysdeps/linux/arm/__longjmp.S +++ b/libc/sysdeps/linux/arm/__longjmp.S @@ -18,6 +18,7 @@     02111-1307 USA.  */  #include <features.h> +#include <bits/arm_asm.h>  #define _SETJMP_H  #define _ASM  #include <bits/setjmp.h> @@ -26,13 +27,44 @@  .global __longjmp  .type __longjmp,%function  .align 2 +#if defined(THUMB1_ONLY) +.thumb_func +__longjmp: +	mov	r2, r0 +	movs	r0, r1 +	/* can't let setjmp() return zero! */ +	bne	1f +	mov	r0, #1 +1: +	mov	r1, r2 +	/* Restore registers, shuffling them through low regs.  */ +	add	r2, #(4 * 4) +	ldmia	r2!, {r4, r5, r6, r7} +	mov	r8, r4 +	mov	r9, r5 +	mov	sl, r6 +	mov	fp, r7 +	ldmia	r2!, {r4, r5} +	mov	sp, r4 +	mov	lr, r5 +	ldmia	r1!, {r4, r5, r6, r7} +	bx	lr +#else  __longjmp:  	mov	ip, r0		/* save jmp_buf pointer */  	movs	r0, r1		/* get the return value in place */ +	IT(t, eq)  	moveq	r0, #1		/* can't let setjmp() return zero! */ +#if defined(__thumb2__) +	/* Thumb-2 does not allow loading sp with ldm.  */ +	ldmia     ip!,  {v1-v6, sl, fp} +	ldr	  sp, [ip], #4 +	ldr	  lr, [ip], #4 +#else  	ldmia     ip!,  {v1-v6, sl, fp, sp, lr} +#endif  #if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__  #ifdef __VFP_FP__ @@ -76,6 +108,7 @@ __longjmp:  #else  	mov pc, lr  #endif +#endif  .size __longjmp,.-__longjmp  libc_hidden_def(__longjmp) diff --git a/libc/sysdeps/linux/arm/bits/arm_asm.h b/libc/sysdeps/linux/arm/bits/arm_asm.h new file mode 100644 index 000000000..1d87df6eb --- /dev/null +++ b/libc/sysdeps/linux/arm/bits/arm_asm.h @@ -0,0 +1,28 @@ +/* Various definitons used the the ARM uClibc assembly code.  */ +#ifndef _ARM_ASM_H +#define _ARM_ASM_H + +#ifdef __thumb2__ +.thumb +.syntax unified +#define IT(t, cond) i##t cond +#else +/* XXX: This can be removed if/when we require an assembler that supports +   unified assembly syntax.  */ +#define IT(t, cond) +/* Code to return from a thumb function stub.  */ +#ifdef __ARM_ARCH_4T__ +#define POP_RET pop	{r2, pc} +#else +#define POP_RET pop	{r2, r3}; bx	r3 +#endif +#endif + +#if defined(__ARM_ARCH_6M__) +/* Force arm mode to flush out errors on M profile cores.  */ +#undef IT +#define THUMB1_ONLY 1 +#endif + +#endif /* _ARM_ASM_H */ + diff --git a/libc/sysdeps/linux/arm/bsd-_setjmp.S b/libc/sysdeps/linux/arm/bsd-_setjmp.S index f70073266..a05570df7 100644 --- a/libc/sysdeps/linux/arm/bsd-_setjmp.S +++ b/libc/sysdeps/linux/arm/bsd-_setjmp.S @@ -17,13 +17,38 @@     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA     02111-1307 USA.  */ -/* This just does a tail-call to `__sigsetjmp (ARG, 1)'. +#include <bits/arm_asm.h> + +/* This just does a tail-call to `__sigsetjmp (ARG, 0)'.     We cannot do it in C because it must be a tail-call, so frame-unwinding     in setjmp doesn't clobber the state restored by longjmp.  */  .global _setjmp  .type _setjmp,%function  .align 2 +#if defined(THUMB1_ONLY) +.thumb_func +_setjmp: +	mov	r1, #0 +#ifdef __PIC__ +	ldr	r3, .L_GOT +	adr	r2, .L_GOT +	add	r3, r2, r3 + +	ldr	r2, .L_GOT+4	/* __sigsetjmp */ +	ldr	r2, [r2, r3] +	bx	r2 + +	.align 2 +.L_GOT: +	.word	_GLOBAL_OFFSET_TABLE_-.L_GOT +	.word	__sigsetjmp(GOT) +#else +	ldr	r2, =__sigsetjmp +	bx	r2 +.pool +#endif +#else  _setjmp:  	mov	r1, #0  #ifdef __PIC__ @@ -31,5 +56,6 @@ _setjmp:  #else  	b	__sigsetjmp  #endif +#endif  .size _setjmp,.-_setjmp diff --git a/libc/sysdeps/linux/arm/bsd-setjmp.S b/libc/sysdeps/linux/arm/bsd-setjmp.S index 6253c6675..d7ca72ad5 100644 --- a/libc/sysdeps/linux/arm/bsd-setjmp.S +++ b/libc/sysdeps/linux/arm/bsd-setjmp.S @@ -17,6 +17,8 @@     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA     02111-1307 USA.  */ +#include <bits/arm_asm.h> +  /* This just does a tail-call to `__sigsetjmp (ARG, 1)'.     We cannot do it in C because it must be a tail-call, so frame-unwinding     in setjmp doesn't clobber the state restored by longjmp.  */ @@ -24,6 +26,29 @@  .global setjmp  .type setjmp,%function  .align 2 +#if defined(THUMB1_ONLY) +.thumb_func +setjmp: +	mov	r1, #1 +#ifdef __PIC__ +	ldr	r3, .L_GOT +	adr	r2, .L_GOT +	add	r3, r2, r3 + +	ldr	r2, .L_GOT+4	/* __sigsetjmp */ +	ldr	r2, [r2, r3] +	bx	r2 + +	.align 2 +.L_GOT: +	.word	_GLOBAL_OFFSET_TABLE_-.L_GOT +	.word	__sigsetjmp(GOT) +#else +	ldr	r2, =__sigsetjmp +	bx	r2 +.pool +#endif +#else  setjmp:  	mov	r1, #1  #ifdef __PIC__ @@ -31,5 +56,6 @@ setjmp:  #else  	b	__sigsetjmp  #endif +#endif  .size setjmp,.-setjmp diff --git a/libc/sysdeps/linux/arm/clone.S b/libc/sysdeps/linux/arm/clone.S index a5a847d1e..d9483735d 100644 --- a/libc/sysdeps/linux/arm/clone.S +++ b/libc/sysdeps/linux/arm/clone.S @@ -24,17 +24,66 @@  #include <features.h>  #include <bits/errno.h>  #include <sys/syscall.h> +#include <bits/arm_asm.h> -#ifdef __NR_clone +#if defined(__NR_clone)  /* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */  .text  .global clone  .type clone,%function  .align 2 +#if defined(THUMB1_ONLY) +.thumb_func  clone:  	@ sanity check args  	cmp	r0, #0 +	beq	__einval +	cmp	r1, #0 +	beq	__einval + +	@ insert the args onto the new stack +	sub	r1, r1, #8 +	str	r3, [r1, #4] +	@ save the function pointer as the 0th element +	str	r0, [r1] + +	@ do the system call +	@ get flags +	mov	r0, r2 +	@ new sp is already in r1 +	DO_CALL (clone) +	movs	a1, a1 +	blt	__error +	beq	1f +	bx	lr +1: + +	@ pick the function arg and call address off the stack and execute +	ldr	r0, [sp, #4] +	ldr	r1, [sp] +	bl	2f	@ blx r1 + +	@ and we are done, passing the return value through r0 +	bl	HIDDEN_JUMPTARGET(_exit) +	@ Should never return +	b	. + +2: +	bx	r1 + +__einval: +	ldr	r0, =-EINVAL +__error: +	push	{r3, lr} +	bl	__syscall_error +	POP_RET +.pool +#else +clone: +	@ sanity check args +	cmp	r0, #0 +	IT(te, ne)  	cmpne	r1, #0  	moveq	r0, #-EINVAL  	beq	__error @@ -52,6 +101,7 @@ clone:  	DO_CALL (clone)  	movs	a1, a1  	blt	__error +	IT(t, ne)  #if defined(__USE_BX__)  	bxne	lr  #else @@ -68,6 +118,7 @@ clone:  __error:  	b	__syscall_error +#endif  .size clone,.-clone diff --git a/libc/sysdeps/linux/arm/crt1.S b/libc/sysdeps/linux/arm/crt1.S index 8d4d230a7..082348e39 100644 --- a/libc/sysdeps/linux/arm/crt1.S +++ b/libc/sysdeps/linux/arm/crt1.S @@ -94,6 +94,7 @@ ARM register quick reference:  */  #include <features.h> +#include <bits/arm_asm.h>  .text  	.globl	_start @@ -105,6 +106,73 @@ ARM register quick reference:  	.weak	_fini  #endif +#if defined(THUMB1_ONLY) +.thumb_func +_start: +	/* Clear the frame pointer since this is the outermost frame.  */ +	mov r3, #0 +	mov fp, r3 + +#ifdef __ARCH_USE_MMU__ +	/* Pop argc off the stack and save a pointer to argv */ +	pop {a2} +	mov a3, sp +#else +	/* +	 * uClinux/arm stacks look a little different from normal +	 * MMU-full Linux/arm stacks (for no good reason) +	 */ +	/* pull argc and argv off the stack.  We are going to push 3 +	 * arguments, so pop one here to maintain doubleword alignment.  */ +	pop {a2} +	ldr a3, [sp] +#endif + +	/* Push stack limit and rtld_fini */ +	push {a1, a3} + +#ifdef __PIC__ +	ldr r4, .L_GOT +.L_GOT_OFF: +	adr r5, .L_GOT +	add r4, r5, r4 + +	ldr r5, .L_GOT+4	/* _fini */ +	ldr a1, [r4, r5] +	push {a1}		/* Push _fini */ + +	ldr r5, .L_GOT+8	/* _init */ +	ldr a4, [r4, r5] +	 +	ldr r5, .L_GOT+12	/* main */ +	ldr a1, [r4, r5] + +#else +	/* Fetch address of fini */ +	ldr r4, =_fini +	/* Push fini */ +	push {r4} + +	/* Set up the other arguments in registers */ +	ldr a1, =main +	ldr a4, =_init +#endif +	/* __uClibc_main (main, argc, argv, init, fini, rtld_fini, stack_end) */ +	/* Let the libc call main and exit with its return code.  */ +	bl __uClibc_main + +	/* should never get here....*/ +	bl abort +.pool + +#ifdef __PIC__ +.L_GOT: +	.word	_GLOBAL_OFFSET_TABLE_-.L_GOT +	.word _fini(GOT) +	.word _init(GOT) +	.word main(GOT) +#endif +#else /* !THUMB1_ONLY */  _start:  	/* Clear the frame pointer and link register since this is the outermost frame.  */  	mov fp, #0 @@ -175,6 +243,7 @@ _start:  	.word _init(GOT)  	.word main(GOT)  #endif +#endif  /* Define a symbol for the first piece of initialized data.  */  	.data diff --git a/libc/sysdeps/linux/arm/crti.S b/libc/sysdeps/linux/arm/crti.S index 4835b8331..e335b7140 100644 --- a/libc/sysdeps/linux/arm/crti.S +++ b/libc/sysdeps/linux/arm/crti.S @@ -1,5 +1,6 @@  	.file	"initfini.c" +#include <bits/arm_asm.h>  	.section .init  	.global	_init  	.type	_init, %function diff --git a/libc/sysdeps/linux/arm/crtn.S b/libc/sysdeps/linux/arm/crtn.S index 7a1ca1ab1..de01b38dc 100644 --- a/libc/sysdeps/linux/arm/crtn.S +++ b/libc/sysdeps/linux/arm/crtn.S @@ -1,5 +1,6 @@  	.file	"initfini.c" +#include <bits/arm_asm.h>  	.section .init  	.global	_init  	.type	_init, %function diff --git a/libc/sysdeps/linux/arm/mmap64.S b/libc/sysdeps/linux/arm/mmap64.S index ba8cb2fca..73d6b51ce 100644 --- a/libc/sysdeps/linux/arm/mmap64.S +++ b/libc/sysdeps/linux/arm/mmap64.S @@ -20,6 +20,7 @@  #define _ERRNO_H  #include <bits/errno.h>  #include <sys/syscall.h> +#include <bits/arm_asm.h>  #if defined __UCLIBC_HAS_LFS__ && defined __NR_mmap2 @@ -28,9 +29,46 @@  .global mmap64  .type mmap64,%function  .align 2 -mmap64:  #ifdef __ARM_EABI__ +#if defined(THUMB1_ONLY) +.thumb_func +mmap64: +#ifdef __ARMEB__ +/* Offsets are after pushing 3 words.  */ +# define LOW_OFFSET  12 + 8 + 4 +# define HIGH_OFFSET 12 + 8 + 0 +#else +# define LOW_OFFSET  12 + 8 + 0 +# define HIGH_OFFSET 12 + 8 + 4 +#endif +	push	{r4, r5, r6} +	ldr	r6, [sp, $LOW_OFFSET] +	ldr	r5, [sp, $HIGH_OFFSET] +	lsl	r4, r6, #20		@ check that offset is page-aligned +	bne	.Linval +	lsr	r4, r5, #12		@ check for overflow +	bne	.Linval +	@ compose page offset +	lsr	r6, r6, #12 +	lsl	r5, r5, #20 +	orr	r5, r5, r6 +	ldr	r4, [sp, #8]		@ load fd +	DO_CALL (mmap2) +	ldr	r1, =0xfffff000 +	cmp	r0, r1 +	bcs	.Lerror +	bx	lr +.Linval: +	ldr	r0, =-EINVAL +	pop	{r4, r5, r6} +.Lerror: +	push	{r3, lr} +	bl	__syscall_error +	POP_RET +.pool +#else /* !THUMB1_ONLY */ +mmap64:  #ifdef __ARMEB__  # define LOW_OFFSET      8 + 4  /* The initial + 4 is for the stack postdecrement.  */ @@ -45,6 +83,7 @@ mmap64:  	str	r4, [sp, #-4]!  	movs	r4, ip, lsl $20		@ check that offset is page-aligned  	mov	ip, ip, lsr $12 +	IT(t, eq)  	moveqs	r4, r5, lsr $12		@ check for overflow  	bne	.Linval  	ldr	r4, [sp, $8]		@ load fd @@ -52,6 +91,7 @@ mmap64:  	DO_CALL (mmap2)  	cmn	r0, $4096  	ldmfd	sp!, {r4, r5} +	IT(t, cc)  #if defined(__USE_BX__)  	bxcc	lr  #else @@ -62,7 +102,8 @@ mmap64:  	mov	r0, $-EINVAL  	ldmfd	sp!, {r4, r5}  	b	__syscall_error -#else +#endif +#else /* !__ARM_EABI__ */  	stmfd	sp!, {r4, r5, lr}  	ldr	r5, [sp, $16]  	ldr	r4, [sp, $12] diff --git a/libc/sysdeps/linux/arm/setjmp.S b/libc/sysdeps/linux/arm/setjmp.S index 8d15b8324..2df7d551a 100644 --- a/libc/sysdeps/linux/arm/setjmp.S +++ b/libc/sysdeps/linux/arm/setjmp.S @@ -18,15 +18,41 @@     02111-1307 USA.  */  #include <features.h> +#include <bits/arm_asm.h>  .global __sigsetjmp  .type __sigsetjmp,%function  .align 2 +#if defined(THUMB1_ONLY) +.thumb_func  __sigsetjmp: +	push	{r3, r4, r5, r6, r7, lr}  	mov	ip, r0 +	stmia	r0!, {r4, r5, r6, r7} +	mov	r2, r8 +	mov	r3, r9 +	mov	r4, sl +	mov	r5, fp +	add	r6, sp, #(6 * 4) +	mov	r7, lr +	stmia	r0!, {r2, r3, r4, r5, r6, r7} +	mov	r0, ip +	bl	__sigjmp_save +	pop	{r3, r4, r5, r6, r7, pc} + +#else +__sigsetjmp: +	/* Save registers */ +	mov	ip, r0 +#if defined(__thumb2__) +	stmia	ip!, {v1-v6, sl, fp} +	movs	r2, sp +	stmia	ip!, {r2, lr} +#else  	/* Save registers */  	stmia	ip!, {v1-v6, sl, fp, sp, lr} +#endif  #if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__  # ifdef __VFP_FP__  	/* Store the VFP registers.  */ @@ -70,5 +96,6 @@ __sigsetjmp:  #else  	B	__sigjmp_save  #endif +#endif  .size __sigsetjmp,.-__sigsetjmp diff --git a/libc/sysdeps/linux/arm/sigrestorer.S b/libc/sysdeps/linux/arm/sigrestorer.S index 194228a38..79728fd40 100644 --- a/libc/sysdeps/linux/arm/sigrestorer.S +++ b/libc/sysdeps/linux/arm/sigrestorer.S @@ -16,6 +16,7 @@     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA     02111-1307 USA.  */ +#include <bits/arm_asm.h>  #include <sys/syscall.h>  #include <linux/version.h> @@ -38,6 +39,9 @@  .type __default_sa_restorer,%function  .align 2  #ifdef __ARM_EABI__ +#ifdef __thumb__ +.thumb_func +#endif  	.fnstart  	.save {r0-r15}  #if LINUX_VERSION_CODE >= 0x020612 @@ -62,6 +66,9 @@ __default_sa_restorer:  .type __default_rt_sa_restorer,%function  .align 2  #ifdef __ARM_EABI__ +#ifdef __thumb__ +.thumb_func +#endif  	.fnstart  	.save {r0-r15}  #if LINUX_VERSION_CODE >= 0x020612 diff --git a/libc/sysdeps/linux/arm/syscall-eabi.S b/libc/sysdeps/linux/arm/syscall-eabi.S index efc30690c..b9318821b 100644 --- a/libc/sysdeps/linux/arm/syscall-eabi.S +++ b/libc/sysdeps/linux/arm/syscall-eabi.S @@ -17,6 +17,7 @@     02111-1307 USA.  */  #include <sys/syscall.h> +#include <bits/arm_asm.h>  /* In the EABI syscall interface, we don't need a special syscall to     implement syscall().  It won't work reliably with 64-bit arguments @@ -26,6 +27,29 @@  .global syscall  .type syscall,%function  .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +syscall: +	push	{r4, r5, r6, r7} +	mov	ip, r0 +	mov	r0, r1 +	mov	r1, r2 +	mov	r2, r3 +	add	r7, sp, #(4 * 4) +	ldmia	r7!, {r3, r4, r5, r6} +	mov	r7, ip +	swi	0x0 +	pop	{r4, r5, r6, r7} +	ldr	r1, =0xfffff000 +	cmp	r0, r1 +	bcs	1f +	bx lr +1: +	push	{r3, lr} +	bl	__syscall_error +	POP_RET +.pool +#else  syscall:  	mov	ip, sp  	stmfd	sp!, {r4, r5, r6, r7} @@ -37,11 +61,13 @@ syscall:  	swi	0x0  	ldmfd	sp!, {r4, r5, r6, r7}  	cmn	r0, #4096 +	IT(t, cc)  #if defined(__USE_BX__)  	bxcc	lr  #else  	movcc	pc, lr  #endif  	b	__syscall_error +#endif  .size syscall,.-syscall diff --git a/libc/sysdeps/linux/arm/vfork.S b/libc/sysdeps/linux/arm/vfork.S index e9f63d46e..42595b026 100644 --- a/libc/sysdeps/linux/arm/vfork.S +++ b/libc/sysdeps/linux/arm/vfork.S @@ -6,6 +6,7 @@   */  #include <features.h> +#include <bits/arm_asm.h>  #define _ERRNO_H  #include <bits/errno.h> @@ -18,11 +19,47 @@  .type	__vfork,%function  .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +__vfork: +#ifdef __NR_vfork +	DO_CALL (vfork) +	ldr		r1, =0xfffff000 +	cmp		r0, r1 +	bcs		1f +	bx		lr +1: + +	/* Check if vfork even exists.  */ +	ldr		r1, =-ENOSYS +	cmp		r0, r1 +	bne		__error + +	/* If we don't have vfork, use fork.  */ +	DO_CALL (fork) +	ldr		r1, =0xfffff000 +	cmp		r0, r1 + +	/* Syscall worked.  Return to child/parent */ +	bcs		1f +	bx		lr +1: + +__error: +	push	{r3, lr} +	bl	__syscall_error +	POP_RET +.pool + +#endif + +#else  __vfork:  #ifdef __NR_vfork  	DO_CALL (vfork)  	cmn	r0, #4096 +	IT(t, cc)  #if defined(__USE_BX__)  	bxcc	lr  #else @@ -40,6 +77,7 @@ __vfork:  	cmn     r0, #4096  	/* Syscall worked.  Return to child/parent */ +	IT(t, cc)  #if defined(__USE_BX__)  	bxcc	lr  #else @@ -48,8 +86,10 @@ __vfork:  __error:  	b	__syscall_error +#endif  .size __vfork,.-__vfork +  weak_alias(__vfork,vfork)  libc_hidden_weak(vfork)  #endif | 
