From efce79f09ae6daa77cd322df0d532beec3f445f5 Mon Sep 17 00:00:00 2001
From: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Date: Wed, 26 Mar 2008 13:40:36 +0000
Subject: Paul Brook writes: The attached patch adds support for compiling arm
 uClibc as pure Thumb code. This is needed because some recent ARM codes do
 not implement traditional ARM mode.  Specifically:

* Cortex-M1 - An extremely minimal FPGA based core that only implements
Thumb-1 (aka ARMv6-M).
* Cortex-M3 - A Thumb-2 only ARMv7-M core.

Most of uClibc already builds in Thumb mode, all that is left are a handful of
assembly bits.

Tested on arm-uclinuxeabi.
---
 libc/string/arm/_memcpy.S             | 182 +++++++++++++++++++++++++++++++---
 libc/string/arm/bcopy.S               |  12 +++
 libc/string/arm/bzero.S               |  12 +++
 libc/string/arm/memcmp.S              |  28 ++++++
 libc/string/arm/memcpy.S              |  11 +-
 libc/string/arm/memmove.S             |  11 +-
 libc/string/arm/memset.S              |  62 ++++++++++++
 libc/string/arm/strcmp.S              |  19 ++++
 libc/string/arm/strlen.S              |  25 +++++
 libc/string/arm/strncmp.S             |  33 ++++++
 libc/sysdeps/linux/arm/__longjmp.S    |  33 ++++++
 libc/sysdeps/linux/arm/bits/arm_asm.h |  28 ++++++
 libc/sysdeps/linux/arm/bsd-_setjmp.S  |  28 +++++-
 libc/sysdeps/linux/arm/bsd-setjmp.S   |  26 +++++
 libc/sysdeps/linux/arm/clone.S        |  53 +++++++++-
 libc/sysdeps/linux/arm/crt1.S         |  69 +++++++++++++
 libc/sysdeps/linux/arm/crti.S         |   1 +
 libc/sysdeps/linux/arm/crtn.S         |   1 +
 libc/sysdeps/linux/arm/mmap64.S       |  45 ++++++++-
 libc/sysdeps/linux/arm/setjmp.S       |  27 +++++
 libc/sysdeps/linux/arm/sigrestorer.S  |   7 ++
 libc/sysdeps/linux/arm/syscall-eabi.S |  26 +++++
 libc/sysdeps/linux/arm/vfork.S        |  40 ++++++++
 23 files changed, 757 insertions(+), 22 deletions(-)
 create mode 100644 libc/sysdeps/linux/arm/bits/arm_asm.h

(limited to 'libc')

diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S
index 3704f96b5..5ef63c45a 100644
--- a/libc/string/arm/_memcpy.S
+++ b/libc/string/arm/_memcpy.S
@@ -39,7 +39,9 @@
 
 #include <features.h>
 #include <endian.h>
+#include <bits/arm_asm.h>
 
+#if !defined(THUMB1_ONLY)
 /*
  * This is one fun bit of code ...
  * Some easy listening music is suggested while trying to understand this
@@ -77,11 +79,36 @@
 .type _memcpy,%function
 .align 4
 
+/* XXX: The Thumb-2 conditionals can be removed if/when we require an
+   assembler that supports unified syntax.  */
+.macro copy regs
+#if defined(__thumb2__)
+	ittt	ge
+	ldmiage	r1!, \regs
+	stmiage	r0!, \regs
+#else
+	ldmgeia	r1!, \regs
+	stmgeia	r0!, \regs
+#endif
+.endm
+
+.macro copydb regs
+#if defined(__thumb2__)
+	ittt	ge
+	ldmdbge	r1!, \regs
+	stmdbge	r0!, \regs
+#else
+	ldmgedb	r1!, \regs
+	stmgedb	r0!, \regs
+#endif
+.endm
+
 _memcpy:
 	/* Determine copy direction */
 	cmp	r1, r0
 	bcc	.Lmemcpy_backwards
 
+	IT(tt, eq)
 	moveq	r0, #0			/* Quick abort for len=0 */
 #if defined(__USE_BX__)
         bxeq    lr
@@ -102,7 +129,7 @@ _memcpy:
 	blt	.Lmemcpy_fl12		/* less than 12 bytes (4 from above) */
 	subs	r2, r2, #0x14         
 	blt	.Lmemcpy_fl32		/* less than 32 bytes (12 from above) */
-	stmdb	sp!, {r4}		/* borrow r4 */
+	str	r4, [sp, #-4]!		/* borrow r4 */
 
 	/* blat 32 bytes at a time */
 	/* XXX for really big copies perhaps we should use more registers */
@@ -115,19 +142,22 @@ _memcpy:
 	bge	.Lmemcpy_floop32
 
 	cmn	r2, #0x10
-	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
-	stmgeia	r0!, {r3, r4, r12, lr}
+	/* blat a remaining 16 bytes */
+	copy	"{r3, r4, r12, lr}"
 	subge	r2, r2, #0x10         
-	ldmia	sp!, {r4}		/* return r4 */
+	ldr	r4, [sp], #4		/* restore r4 */
 
 .Lmemcpy_fl32:
 	adds	r2, r2, #0x14         
 
 	/* blat 12 bytes at a time */
 .Lmemcpy_floop12:
-	ldmgeia	r1!, {r3, r12, lr}
-	stmgeia	r0!, {r3, r12, lr}
+	copy	"{r3, r12, lr}"
+#if defined(__thumb2__)
+	subsge	r2, r2, #0x0c         
+#else
 	subges	r2, r2, #0x0c         
+#endif
 	bge	.Lmemcpy_floop12
 
 .Lmemcpy_fl12:
@@ -135,26 +165,48 @@ _memcpy:
 	blt	.Lmemcpy_fl4
 
 	subs	r2, r2, #4
+	IT(tt, lt)
 	ldrlt	r3, [r1], #4
 	strlt	r3, [r0], #4
-	ldmgeia	r1!, {r3, r12}
-	stmgeia	r0!, {r3, r12}
+	copy	"{r3, r12}"
 	subge	r2, r2, #4
 
 .Lmemcpy_fl4:
 	/* less than 4 bytes to go */
 	adds	r2, r2, #4
+#if defined(__thumb2__)
+	it	eq
+	popeq	{r0, pc}		/* done */
+#elif defined(__ARM_ARCH_4T__)
+	ldmeqia	sp!, {r0, r3}		/* done */
+	bxeq	r3
+#else
 	ldmeqia	sp!, {r0, pc}		/* done */
+#endif
 
 	/* copy the crud byte at a time */
 	cmp	r2, #2
 	ldrb	r3, [r1], #1
 	strb	r3, [r0], #1
+#if defined(__thumb2__)
+	itt	ge
+	ldrbge	r3, [r1], #1
+	strbge	r3, [r0], #1
+	itt	gt
+	ldrbgt	r3, [r1], #1
+	strbgt	r3, [r0], #1
+#else
 	ldrgeb	r3, [r1], #1
 	strgeb	r3, [r0], #1
 	ldrgtb	r3, [r1], #1
 	strgtb	r3, [r0], #1
+#endif
+#if defined(__ARM_ARCH_4T__)
+	ldmia	sp!, {r0, r3}
+	bx	r3
+#else
 	ldmia	sp!, {r0, pc}
+#endif
 
 	/* erg - unaligned destination */
 .Lmemcpy_fdestul:
@@ -164,10 +216,19 @@ _memcpy:
 	/* align destination with byte copies */
 	ldrb	r3, [r1], #1
 	strb	r3, [r0], #1
+#if defined(__thumb2__)
+	itt	ge
+	ldrbge	r3, [r1], #1
+	strbge	r3, [r0], #1
+	itt	gt
+	ldrbgt	r3, [r1], #1
+	strbgt	r3, [r0], #1
+#else
 	ldrgeb	r3, [r1], #1
 	strgeb	r3, [r0], #1
 	ldrgtb	r3, [r1], #1
 	strgtb	r3, [r0], #1
+#endif
 	subs	r2, r2, r12
 	blt	.Lmemcpy_fl4		/* less the 4 bytes */
 
@@ -370,12 +431,12 @@ _memcpy:
 
 .Lmemcpy_bl32:
 	cmn	r2, #0x10            
-	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
-	stmgedb	r0!, {r3, r4, r12, lr}
+	/* blat a remaining 16 bytes */
+	copydb	"{r3, r4, r12, lr}"
 	subge	r2, r2, #0x10         
 	adds	r2, r2, #0x14         
-	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
-	stmgedb	r0!, {r3, r12, lr}
+	/* blat a remaining 12 bytes */
+	copydb	"{r3, r12, lr}"
 	subge	r2, r2, #0x0c         
 	ldmia	sp!, {r4, lr}
 
@@ -383,15 +444,16 @@ _memcpy:
 	adds	r2, r2, #8
 	blt	.Lmemcpy_bl4
 	subs	r2, r2, #4
+	IT(tt, lt)
 	ldrlt	r3, [r1, #-4]!
 	strlt	r3, [r0, #-4]!
-	ldmgedb	r1!, {r3, r12}
-	stmgedb	r0!, {r3, r12}
+	copydb	"{r3, r12}"
 	subge	r2, r2, #4
 
 .Lmemcpy_bl4:
 	/* less than 4 bytes to go */
 	adds	r2, r2, #4
+	IT(t, eq)
 #if defined(__USE_BX__)
         bxeq    lr
 #else
@@ -401,10 +463,19 @@ _memcpy:
 	cmp	r2, #2
 	ldrb	r3, [r1, #-1]!
 	strb	r3, [r0, #-1]!
+#ifdef __thumb2__
+	itt	ge
+	ldrbge	r3, [r1, #-1]!
+	strbge	r3, [r0, #-1]!
+	itt	gt
+	ldrbgt	r3, [r1, #-1]!
+	strbgt	r3, [r0, #-1]!
+#else
 	ldrgeb	r3, [r1, #-1]!
 	strgeb	r3, [r0, #-1]!
 	ldrgtb	r3, [r1, #-1]!
 	strgtb	r3, [r0, #-1]!
+#endif
 #if defined(__USE_BX__)
         bx      lr
 #else
@@ -417,10 +488,19 @@ _memcpy:
 	/* align destination with byte copies */
 	ldrb	r3, [r1, #-1]!
 	strb	r3, [r0, #-1]!
+#ifdef __thumb2__
+	itt	ge
+	ldrbge	r3, [r1, #-1]!
+	strbge	r3, [r0, #-1]!
+	itt	gt
+	ldrbgt	r3, [r1, #-1]!
+	strbgt	r3, [r0, #-1]!
+#else
 	ldrgeb	r3, [r1, #-1]!
 	strgeb	r3, [r0, #-1]!
 	ldrgtb	r3, [r1, #-1]!
 	strgtb	r3, [r0, #-1]!
+#endif
 	subs	r2, r2, r12
 	blt	.Lmemcpy_bl4		/* less than 4 bytes to go */
 	ands	r12, r1, #3
@@ -591,3 +671,77 @@ _memcpy:
 .Lmemcpy_bsrcul1l4:
 	add	r1, r1, #1
 	b	.Lmemcpy_bl4
+
+#else /* THUMB1_ONLY */
+
+/* This is a fairly dumb implementation for when we can't use the 32-bit code
+   above.  */
+.text
+.global _memcpy
+.hidden _memcpy
+.type _memcpy,%function
+.align 4
+.thumb
+_memcpy:
+	push	{r0, r4}
+	cmp	r2, #0
+	beq	.Lmemcpy_exit
+	@ See if we have overlapping regions, and need to reverse the
+	@ direction of the copy
+	cmp	r0, r1
+	bls	.Lmemcpy_forwards
+	add	r4, r1, r2
+	cmp	r0, r4
+	bcc	.Lmemcpy_backwards
+.Lmemcpy_forwards:
+	/* Forwards.  */
+	mov	r3, r0
+	eor	r3, r1
+	mov	r4, #3
+	tst	r3, r4
+	bne	.Lmemcpy_funaligned
+	cmp	r2, #8
+	bcc	.Lmemcpy_funaligned
+1:	@ copy up to the first word boundary.
+	tst	r0, r4
+	beq	1f
+	ldrb	r3, [r1]
+	add	r1, r1, #1
+	strb	r3, [r0]
+	add	r0, r0, #1
+	sub	r2, r2, #1
+	b	1b
+1:	@ Copy aligned words
+	ldr	r3, [r1]
+	add	r1, r1, #4
+	str	r3, [r0]
+	add	r0, r0, #4
+	sub	r2, r2, #4
+	cmp	r2, #4
+	bcs	1b
+	cmp	r2, #0
+	beq	.Lmemcpy_exit
+.Lmemcpy_funaligned:
+1:
+	ldrb	r3, [r1]
+	add	r1, r1, #1
+	strb	r3, [r0]
+	add	r0, r0, #1
+	sub	r2, r2, #1
+	bne	1b
+.Lmemcpy_exit:
+	pop	{r0, r4}
+	bx	lr
+
+.Lmemcpy_backwards:
+	add	r0, r0, r2
+	add	r1, r1, r2
+1:
+	sub	r0, r0, #1
+	sub	r1, r1, #1
+	ldrb	r3, [r1]
+	strb	r3, [r0]
+	sub	r2, r2, #1
+	bne	1b
+	b	.Lmemcpy_exit
+#endif
diff --git a/libc/string/arm/bcopy.S b/libc/string/arm/bcopy.S
index db3c9e6c1..2d6e90d13 100644
--- a/libc/string/arm/bcopy.S
+++ b/libc/string/arm/bcopy.S
@@ -40,6 +40,7 @@
 /* bcopy = memcpy/memmove with arguments reversed. */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 #ifdef __UCLIBC_SUSV3_LEGACY__
 
@@ -48,12 +49,23 @@
 .type bcopy,%function
 .align 4
 
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+bcopy:
+	push	{r2, lr}
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, ip
+	bl	_memcpy
+	POP_RET
+#else
 bcopy:
 	/* switch the source and destination registers */
 	eor     r0, r1, r0 
 	eor     r1, r0, r1 
 	eor     r0, r1, r0 
 	b	_memcpy /* (PLT) */
+#endif
 	
 .size bcopy,.-bcopy
 
diff --git a/libc/string/arm/bzero.S b/libc/string/arm/bzero.S
index ee49cf560..e576a12e9 100644
--- a/libc/string/arm/bzero.S
+++ b/libc/string/arm/bzero.S
@@ -38,6 +38,7 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 #ifdef __UCLIBC_SUSV3_LEGACY__
 
@@ -46,10 +47,21 @@
 .type bzero,%function
 .align 4
 
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+bzero:
+	push	{r2, lr}
+	mov	r2, r1
+	mov	r1, #0
+	bl	HIDDEN_JUMPTARGET(memset)
+	POP_RET
+#else
+
 bzero:
 	mov	r2, r1
 	mov	r1, #0
 	b	HIDDEN_JUMPTARGET(memset)
+#endif
 
 .size bzero,.-bzero
 
diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S
index 4f78b5128..65409f43a 100644
--- a/libc/string/arm/memcmp.S
+++ b/libc/string/arm/memcmp.S
@@ -30,15 +30,41 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .text
 .global memcmp
 .type memcmp,%function
 .align 4
 
+#if defined(THUMB1_ONLY)
+.thumb_func
+memcmp:
+	cmp	r2, #0
+	bne	1f
+	mov	r0, #0
+	bx	lr
+1:
+	push	{r4}
+	add	r4, r0, r2
+2:
+	ldrb	r2, [r0]
+	add	r0, r0, #1
+	ldrb	r3, [r1]
+	add	r1, r1, #1
+	cmp	r4, r0
+	beq	3f
+	cmp	r2, r3
+	beq	2b
+3:
+	sub	r0, r2, r3
+        pop	{r4}
+	bx	lr
+#else
 memcmp:
 	/* if ((len - 1) < 0) return 0 */
 	subs	r2, r2, #1
+	IT(tt, mi)
 	movmi	r0, #0
 #if defined(__USE_BX__)
         bxmi    lr
@@ -51,6 +77,7 @@ memcmp:
 	ldrb	r2, [r0], #1
 	ldrb	r3, [r1], #1
 	cmp	ip, r0
+	IT(t, cs)
 	cmpcs	r2, r3
 	beq	1b
 	sub	r0, r2, r3
@@ -59,6 +86,7 @@ memcmp:
 #else
  	mov	pc, lr
 #endif
+#endif
 
 .size memcmp,.-memcmp
 
diff --git a/libc/string/arm/memcpy.S b/libc/string/arm/memcpy.S
index 7a5b6ab76..d2013d211 100644
--- a/libc/string/arm/memcpy.S
+++ b/libc/string/arm/memcpy.S
@@ -38,16 +38,23 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .text
 .global memcpy
 .type memcpy,%function
 .align 4
 
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
 memcpy:
-	stmfd	sp!, {r0, lr}
+	push	{r0, lr}
 	bl	_memcpy
-	ldmfd	sp!, {r0, pc}
+	POP_RET
+#else
+memcpy:
+	b	_memcpy
+#endif
 
 .size memcpy,.-memcpy
 
diff --git a/libc/string/arm/memmove.S b/libc/string/arm/memmove.S
index 45cd9b4d4..c11b98dd4 100644
--- a/libc/string/arm/memmove.S
+++ b/libc/string/arm/memmove.S
@@ -38,16 +38,23 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .text
 .global memmove
 .type memmove,%function
 .align 4
 
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
 memmove:
-	stmfd	sp!, {r0, lr}
+	push	{r2, lr}
 	bl	_memcpy
-	ldmfd	sp!, {r0, pc}
+	POP_RET
+#else
+memmove:
+	b	_memcpy
+#endif
 
 .size memmove,.-memmove
 
diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S
index 16bfe0dc5..66aa6039c 100644
--- a/libc/string/arm/memset.S
+++ b/libc/string/arm/memset.S
@@ -19,12 +19,52 @@
 
 #include <features.h>
 #include <sys/syscall.h>
+#include <bits/arm_asm.h>
 
 .text
 .global memset
 .type memset,%function
 .align 4
 
+#if defined(THUMB1_ONLY)
+.thumb_func
+memset:
+	mov	ip, r0
+	cmp	r2, #8		@ at least 8 bytes to do?
+	bcc	2f
+
+	lsl	r3, r1, #8
+	orr	r1, r3
+	lsl	r3, r1, #16
+	orr	r1, r3
+
+	mov	r3, #3
+1:	@ Fill up to the first word boundary
+	tst	r0, r3
+	beq	1f
+	strb	r1, [r0]
+	add	r0, r0, #1
+	sub	r2, r2, #1
+	b	1b
+1:	@ Fill aligned words
+	str	r1, [r0]
+	add	r0, r0, #4
+	sub	r2, r2, #4
+	cmp	r2, #4
+	bcs	1b
+
+2:	@ Fill the remaining bytes
+	cmp	r2, #0
+	beq	2f
+1:
+	strb	r1, [r0]
+	add	r0, r0, #1
+	sub	r2, r2, #1
+	bne	1b
+2:
+	mov	r0, ip
+	bx lr
+#else
 memset:
 	mov	a4, a1
 	cmp	a3, $8		@ at least 8 bytes to do?
@@ -33,8 +73,14 @@ memset:
 	orr	a2, a2, a2, lsl $16
 1:
 	tst	a4, $3		@ aligned yet?
+#if defined(__thumb2__)
+	itt	ne
+	strbne	a2, [a4], $1
+	subne	a3, a3, $1
+#else
 	strneb	a2, [a4], $1
 	subne	a3, a3, $1
+#endif
 	bne	1b
 	mov	ip, a2
 1:
@@ -51,16 +97,30 @@ memset:
 	stmia	a4!, {a2, ip}
 	sub	a3, a3, $8
 	cmp	a3, $8		@ 8 bytes still to do?
+#if defined(__thumb2__)
+	itt	ge
+	stmiage	a4!, {a2, ip}
+	subge	a3, a3, $8
+#else
 	stmgeia	a4!, {a2, ip}
 	subge	a3, a3, $8
+#endif
 	bge	1b
 2:
 	movs	a3, a3		@ anything left?
+	IT(t, eq)
 #if defined(__USE_BX__)
         bxeq    lr
 #else
         moveq	pc, lr		@ nope
 #endif
+#if defined (__thumb2__)
+1:
+	strb	a2, [a4], #1
+	subs	a3, a3, #1
+	bne	1b
+	bx	lr
+#else
 	rsb	a3, a3, $7
 	add	pc, pc, a3, lsl $2
 	mov	r0, r0
@@ -76,6 +136,8 @@ memset:
 #else
  	mov	pc, lr
 #endif
+#endif
+#endif
 
 .size memset,.-memset
 
diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S
index 89aa38874..97363c1c2 100644
--- a/libc/string/arm/strcmp.S
+++ b/libc/string/arm/strcmp.S
@@ -30,17 +30,35 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .text
 .global strcmp
 .type strcmp,%function
 .align 4
 
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+strcmp:
+1:
+	ldrb	r2, [r0]
+	add	r0, r0, #1
+	ldrb	r3, [r1]
+	add	r1, r1, #1
+	cmp	r2, #0
+	beq	2f
+	cmp	r2, r3
+	beq	1b
+2:
+	sub	r0, r2, r3
+	bx      lr
+#else
 strcmp:
 1:
 	ldrb	r2, [r0], #1
 	ldrb	r3, [r1], #1
 	cmp	r2, #1
+	IT(t, cs)
 	cmpcs	r2, r3
 	beq	1b
 	sub	r0, r2, r3
@@ -49,6 +67,7 @@ strcmp:
 #else
   	mov	pc, lr
 #endif
+#endif
 
 .size strcmp,.-strcmp
 
diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S
index 5b4b02e17..949e918f4 100644
--- a/libc/string/arm/strlen.S
+++ b/libc/string/arm/strlen.S
@@ -20,6 +20,7 @@
 #include <features.h>
 #include <endian.h>
 #include <sys/syscall.h>
+#include <bits/arm_asm.h>
 
 /* size_t strlen(const char *S)
  * entry: r0 -> string
@@ -31,6 +32,19 @@
 .type strlen,%function
 .align 4
 
+#if defined(THUMB1_ONLY)
+/* A simple implementation for when the ARM implementation can't be used.  */
+.thumb_func
+strlen:
+	mov r2, #0
+1:
+	ldrb	r1, [r0, r2]
+	add	r2, r2, #1
+	cmp	r1, #0
+	bne	1b
+	sub	r0, r2, #1
+	bx lr
+#else
 strlen:
 	bic     r1, r0, $3              @ addr of word containing first byte
 	ldr     r2, [r1], $4            @ get the first word
@@ -41,38 +55,48 @@ strlen:
 #if __BYTE_ORDER == __BIG_ENDIAN
 	orr     r2, r2, $0xff000000     @ set this byte to non-zero
 	subs    r3, r3, $1              @ any more to do?
+	IT(t, gt)
 	orrgt   r2, r2, $0x00ff0000     @ if so, set this byte
 	subs    r3, r3, $1              @ more?
+	IT(t, gt)
 	orrgt   r2, r2, $0x0000ff00     @ then set.
 #else
 	orr     r2, r2, $0x000000ff     @ set this byte to non-zero
 	subs    r3, r3, $1              @ any more to do?
+	IT(t, gt)
 	orrgt   r2, r2, $0x0000ff00     @ if so, set this byte
 	subs    r3, r3, $1              @ more?
+	IT(t, gt)
 	orrgt   r2, r2, $0x00ff0000     @ then set.
 #endif
 Laligned:				@ here, we have a word in r2.  Does it
 	tst     r2, $0x000000ff         @ contain any zeroes?
+	IT(tttt, ne)
 	tstne   r2, $0x0000ff00         @
 	tstne   r2, $0x00ff0000         @
 	tstne   r2, $0xff000000         @
 	addne   r0, r0, $4              @ if not, the string is 4 bytes longer
+	IT(t, ne)
 	ldrne   r2, [r1], $4            @ and we continue to the next word
 	bne     Laligned                @
 Llastword:				@ drop through to here once we find a
 #if __BYTE_ORDER == __BIG_ENDIAN
 	tst     r2, $0xff000000         @ word that has a zero byte in it
+	IT(tttt, ne)
 	addne   r0, r0, $1              @
 	tstne   r2, $0x00ff0000         @ and add up to 3 bytes on to it
 	addne   r0, r0, $1              @
 	tstne   r2, $0x0000ff00         @ (if first three all non-zero, 4th
+	IT(t, ne)
 	addne   r0, r0, $1              @  must be zero)
 #else
 	tst     r2, $0x000000ff         @
+	IT(tttt, ne)
 	addne   r0, r0, $1              @
 	tstne   r2, $0x0000ff00         @ and add up to 3 bytes on to it
 	addne   r0, r0, $1              @
 	tstne   r2, $0x00ff0000         @ (if first three all non-zero, 4th
+	IT(t, ne)
 	addne   r0, r0, $1              @  must be zero)
 #endif
 #if defined(__USE_BX__)
@@ -80,6 +104,7 @@ Llastword:				@ drop through to here once we find a
 #else
   	mov	pc,lr
 #endif
+#endif
 
 .size strlen,.-strlen
 
diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S
index eaf0620b4..8487639c8 100644
--- a/libc/string/arm/strncmp.S
+++ b/libc/string/arm/strncmp.S
@@ -30,15 +30,46 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .text
 .global strncmp
 .type strncmp,%function
 .align 4
 
+#if defined(THUMB1_ONLY)
+.thumb_func
 strncmp:
 	/* if (len == 0) return 0 */
 	cmp	r2, #0
+	bne	1f
+	mov	r0, #0
+	bx	lr
+1:
+	push	{r4}
+
+	/* ip == last src address to compare */
+	add	r4, r0, r2
+2:
+	cmp	r4, r0
+	beq	3f
+	ldrb	r2, [r0]
+	add	r0, r0, #1
+	ldrb	r3, [r1]
+	add	r1, r1, #1
+	cmp	r2, #0
+	beq	3f
+	cmp	r2, r3
+	beq	2b
+3:
+	sub	r0, r2, r3
+	pop	{r4}
+	bx	lr
+#else
+strncmp:
+	/* if (len == 0) return 0 */
+	cmp	r2, #0
+	IT(tt, eq)
 	moveq	r0, #0
 #if defined(__USE_BX__)
         bxeq    lr
@@ -53,6 +84,7 @@ strncmp:
 	ldrb	r2, [r0], #1
 	ldrb	r3, [r1], #1
 	cmp	ip, r0
+	IT(tt, cs)
 	cmpcs	r2, #1
 	cmpcs	r2, r3
 	beq	1b
@@ -62,6 +94,7 @@ strncmp:
 #else
   	mov	pc, lr
 #endif
+#endif
 
 .size strncmp,.-strncmp
 
diff --git a/libc/sysdeps/linux/arm/__longjmp.S b/libc/sysdeps/linux/arm/__longjmp.S
index 4261797f8..5faf4ece9 100644
--- a/libc/sysdeps/linux/arm/__longjmp.S
+++ b/libc/sysdeps/linux/arm/__longjmp.S
@@ -18,6 +18,7 @@
    02111-1307 USA.  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 #define _SETJMP_H
 #define _ASM
 #include <bits/setjmp.h>
@@ -26,13 +27,44 @@
 .global __longjmp
 .type __longjmp,%function
 .align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
+__longjmp:
+	mov	r2, r0
+	movs	r0, r1
+	/* can't let setjmp() return zero! */
+	bne	1f
+	mov	r0, #1
+1:
+	mov	r1, r2
+	/* Restore registers, shuffling them through low regs.  */
+	add	r2, #(4 * 4)
+	ldmia	r2!, {r4, r5, r6, r7}
+	mov	r8, r4
+	mov	r9, r5
+	mov	sl, r6
+	mov	fp, r7
+	ldmia	r2!, {r4, r5}
+	mov	sp, r4
+	mov	lr, r5
+	ldmia	r1!, {r4, r5, r6, r7}
+	bx	lr
+#else
 __longjmp:
 	mov	ip, r0		/* save jmp_buf pointer */
 	
 	movs	r0, r1		/* get the return value in place */
+	IT(t, eq)
 	moveq	r0, #1		/* can't let setjmp() return zero! */
 
+#if defined(__thumb2__)
+	/* Thumb-2 does not allow loading sp with ldm.  */
+	ldmia     ip!,  {v1-v6, sl, fp}
+	ldr	  sp, [ip], #4
+	ldr	  lr, [ip], #4
+#else
 	ldmia     ip!,  {v1-v6, sl, fp, sp, lr}
+#endif
 
 #if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__
 #ifdef __VFP_FP__
@@ -76,6 +108,7 @@ __longjmp:
 #else
 	mov pc, lr
 #endif
+#endif
 
 .size __longjmp,.-__longjmp
 libc_hidden_def(__longjmp)
diff --git a/libc/sysdeps/linux/arm/bits/arm_asm.h b/libc/sysdeps/linux/arm/bits/arm_asm.h
new file mode 100644
index 000000000..1d87df6eb
--- /dev/null
+++ b/libc/sysdeps/linux/arm/bits/arm_asm.h
@@ -0,0 +1,28 @@
+/* Various definitons used the the ARM uClibc assembly code.  */
+#ifndef _ARM_ASM_H
+#define _ARM_ASM_H
+
+#ifdef __thumb2__
+.thumb
+.syntax unified
+#define IT(t, cond) i##t cond
+#else
+/* XXX: This can be removed if/when we require an assembler that supports
+   unified assembly syntax.  */
+#define IT(t, cond)
+/* Code to return from a thumb function stub.  */
+#ifdef __ARM_ARCH_4T__
+#define POP_RET pop	{r2, pc}
+#else
+#define POP_RET pop	{r2, r3}; bx	r3
+#endif
+#endif
+
+#if defined(__ARM_ARCH_6M__)
+/* Force arm mode to flush out errors on M profile cores.  */
+#undef IT
+#define THUMB1_ONLY 1
+#endif
+
+#endif /* _ARM_ASM_H */
+
diff --git a/libc/sysdeps/linux/arm/bsd-_setjmp.S b/libc/sysdeps/linux/arm/bsd-_setjmp.S
index f70073266..a05570df7 100644
--- a/libc/sysdeps/linux/arm/bsd-_setjmp.S
+++ b/libc/sysdeps/linux/arm/bsd-_setjmp.S
@@ -17,13 +17,38 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/* This just does a tail-call to `__sigsetjmp (ARG, 1)'.
+#include <bits/arm_asm.h>
+
+/* This just does a tail-call to `__sigsetjmp (ARG, 0)'.
    We cannot do it in C because it must be a tail-call, so frame-unwinding
    in setjmp doesn't clobber the state restored by longjmp.  */
 
 .global _setjmp
 .type _setjmp,%function
 .align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
+_setjmp:
+	mov	r1, #0
+#ifdef __PIC__
+	ldr	r3, .L_GOT
+	adr	r2, .L_GOT
+	add	r3, r2, r3
+
+	ldr	r2, .L_GOT+4	/* __sigsetjmp */
+	ldr	r2, [r2, r3]
+	bx	r2
+
+	.align 2
+.L_GOT:
+	.word	_GLOBAL_OFFSET_TABLE_-.L_GOT
+	.word	__sigsetjmp(GOT)
+#else
+	ldr	r2, =__sigsetjmp
+	bx	r2
+.pool
+#endif
+#else
 _setjmp:
 	mov	r1, #0
 #ifdef __PIC__
@@ -31,5 +56,6 @@ _setjmp:
 #else
 	b	__sigsetjmp
 #endif
+#endif
 
 .size _setjmp,.-_setjmp
diff --git a/libc/sysdeps/linux/arm/bsd-setjmp.S b/libc/sysdeps/linux/arm/bsd-setjmp.S
index 6253c6675..d7ca72ad5 100644
--- a/libc/sysdeps/linux/arm/bsd-setjmp.S
+++ b/libc/sysdeps/linux/arm/bsd-setjmp.S
@@ -17,6 +17,8 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <bits/arm_asm.h>
+
 /* This just does a tail-call to `__sigsetjmp (ARG, 1)'.
    We cannot do it in C because it must be a tail-call, so frame-unwinding
    in setjmp doesn't clobber the state restored by longjmp.  */
@@ -24,6 +26,29 @@
 .global setjmp
 .type setjmp,%function
 .align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
+setjmp:
+	mov	r1, #1
+#ifdef __PIC__
+	ldr	r3, .L_GOT
+	adr	r2, .L_GOT
+	add	r3, r2, r3
+
+	ldr	r2, .L_GOT+4	/* __sigsetjmp */
+	ldr	r2, [r2, r3]
+	bx	r2
+
+	.align 2
+.L_GOT:
+	.word	_GLOBAL_OFFSET_TABLE_-.L_GOT
+	.word	__sigsetjmp(GOT)
+#else
+	ldr	r2, =__sigsetjmp
+	bx	r2
+.pool
+#endif
+#else
 setjmp:
 	mov	r1, #1
 #ifdef __PIC__
@@ -31,5 +56,6 @@ setjmp:
 #else
 	b	__sigsetjmp
 #endif
+#endif
 
 .size setjmp,.-setjmp
diff --git a/libc/sysdeps/linux/arm/clone.S b/libc/sysdeps/linux/arm/clone.S
index a5a847d1e..d9483735d 100644
--- a/libc/sysdeps/linux/arm/clone.S
+++ b/libc/sysdeps/linux/arm/clone.S
@@ -24,17 +24,66 @@
 #include <features.h>
 #include <bits/errno.h>
 #include <sys/syscall.h>
+#include <bits/arm_asm.h>
 
-#ifdef __NR_clone
+#if defined(__NR_clone)
 /* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */
 
 .text
 .global clone
 .type clone,%function
 .align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
 clone:
 	@ sanity check args
 	cmp	r0, #0
+	beq	__einval
+	cmp	r1, #0
+	beq	__einval
+
+	@ insert the args onto the new stack
+	sub	r1, r1, #8
+	str	r3, [r1, #4]
+	@ save the function pointer as the 0th element
+	str	r0, [r1]
+
+	@ do the system call
+	@ get flags
+	mov	r0, r2
+	@ new sp is already in r1
+	DO_CALL (clone)
+	movs	a1, a1
+	blt	__error
+	beq	1f
+	bx	lr
+1:
+
+	@ pick the function arg and call address off the stack and execute
+	ldr	r0, [sp, #4]
+	ldr	r1, [sp]
+	bl	2f	@ blx r1
+
+	@ and we are done, passing the return value through r0
+	bl	HIDDEN_JUMPTARGET(_exit)
+	@ Should never return
+	b	.
+
+2:
+	bx	r1
+
+__einval:
+	ldr	r0, =-EINVAL
+__error:
+	push	{r3, lr}
+	bl	__syscall_error
+	POP_RET
+.pool
+#else
+clone:
+	@ sanity check args
+	cmp	r0, #0
+	IT(te, ne)
 	cmpne	r1, #0
 	moveq	r0, #-EINVAL
 	beq	__error
@@ -52,6 +101,7 @@ clone:
 	DO_CALL (clone)
 	movs	a1, a1
 	blt	__error
+	IT(t, ne)
 #if defined(__USE_BX__)
 	bxne	lr
 #else
@@ -68,6 +118,7 @@ clone:
 
 __error:
 	b	__syscall_error
+#endif
 
 .size clone,.-clone
 
diff --git a/libc/sysdeps/linux/arm/crt1.S b/libc/sysdeps/linux/arm/crt1.S
index 8d4d230a7..082348e39 100644
--- a/libc/sysdeps/linux/arm/crt1.S
+++ b/libc/sysdeps/linux/arm/crt1.S
@@ -94,6 +94,7 @@ ARM register quick reference:
 */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .text
 	.globl	_start
@@ -105,6 +106,73 @@ ARM register quick reference:
 	.weak	_fini
 #endif
 
+#if defined(THUMB1_ONLY)
+.thumb_func
+_start:
+	/* Clear the frame pointer since this is the outermost frame.  */
+	mov r3, #0
+	mov fp, r3
+
+#ifdef __ARCH_USE_MMU__
+	/* Pop argc off the stack and save a pointer to argv */
+	pop {a2}
+	mov a3, sp
+#else
+	/*
+	 * uClinux/arm stacks look a little different from normal
+	 * MMU-full Linux/arm stacks (for no good reason)
+	 */
+	/* pull argc and argv off the stack.  We are going to push 3
+	 * arguments, so pop one here to maintain doubleword alignment.  */
+	pop {a2}
+	ldr a3, [sp]
+#endif
+
+	/* Push stack limit and rtld_fini */
+	push {a1, a3}
+
+#ifdef __PIC__
+	ldr r4, .L_GOT
+.L_GOT_OFF:
+	adr r5, .L_GOT
+	add r4, r5, r4
+
+	ldr r5, .L_GOT+4	/* _fini */
+	ldr a1, [r4, r5]
+	push {a1}		/* Push _fini */
+
+	ldr r5, .L_GOT+8	/* _init */
+	ldr a4, [r4, r5]
+	
+	ldr r5, .L_GOT+12	/* main */
+	ldr a1, [r4, r5]
+
+#else
+	/* Fetch address of fini */
+	ldr r4, =_fini
+	/* Push fini */
+	push {r4}
+
+	/* Set up the other arguments in registers */
+	ldr a1, =main
+	ldr a4, =_init
+#endif
+	/* __uClibc_main (main, argc, argv, init, fini, rtld_fini, stack_end) */
+	/* Let the libc call main and exit with its return code.  */
+	bl __uClibc_main
+
+	/* should never get here....*/
+	bl abort
+.pool
+
+#ifdef __PIC__
+.L_GOT:
+	.word	_GLOBAL_OFFSET_TABLE_-.L_GOT
+	.word _fini(GOT)
+	.word _init(GOT)
+	.word main(GOT)
+#endif
+#else /* !THUMB1_ONLY */
 _start:
 	/* Clear the frame pointer and link register since this is the outermost frame.  */
 	mov fp, #0
@@ -175,6 +243,7 @@ _start:
 	.word _init(GOT)
 	.word main(GOT)
 #endif
+#endif
 
 /* Define a symbol for the first piece of initialized data.  */
 	.data
diff --git a/libc/sysdeps/linux/arm/crti.S b/libc/sysdeps/linux/arm/crti.S
index 4835b8331..e335b7140 100644
--- a/libc/sysdeps/linux/arm/crti.S
+++ b/libc/sysdeps/linux/arm/crti.S
@@ -1,5 +1,6 @@
 	.file	"initfini.c"
 	
+#include <bits/arm_asm.h>
 	.section .init
 	.global	_init
 	.type	_init, %function
diff --git a/libc/sysdeps/linux/arm/crtn.S b/libc/sysdeps/linux/arm/crtn.S
index 7a1ca1ab1..de01b38dc 100644
--- a/libc/sysdeps/linux/arm/crtn.S
+++ b/libc/sysdeps/linux/arm/crtn.S
@@ -1,5 +1,6 @@
 	.file	"initfini.c"
 	
+#include <bits/arm_asm.h>
 	.section .init
 	.global	_init
 	.type	_init, %function
diff --git a/libc/sysdeps/linux/arm/mmap64.S b/libc/sysdeps/linux/arm/mmap64.S
index ba8cb2fca..73d6b51ce 100644
--- a/libc/sysdeps/linux/arm/mmap64.S
+++ b/libc/sysdeps/linux/arm/mmap64.S
@@ -20,6 +20,7 @@
 #define _ERRNO_H
 #include <bits/errno.h>
 #include <sys/syscall.h>
+#include <bits/arm_asm.h>
 
 #if defined __UCLIBC_HAS_LFS__ && defined __NR_mmap2
 
@@ -28,9 +29,46 @@
 .global mmap64
 .type mmap64,%function
 .align 2
-mmap64:
 
 #ifdef __ARM_EABI__
+#if defined(THUMB1_ONLY)
+.thumb_func
+mmap64:
+#ifdef __ARMEB__
+/* Offsets are after pushing 3 words.  */
+# define LOW_OFFSET  12 + 8 + 4
+# define HIGH_OFFSET 12 + 8 + 0
+#else
+# define LOW_OFFSET  12 + 8 + 0
+# define HIGH_OFFSET 12 + 8 + 4
+#endif
+	push	{r4, r5, r6}
+	ldr	r6, [sp, $LOW_OFFSET]
+	ldr	r5, [sp, $HIGH_OFFSET]
+	lsl	r4, r6, #20		@ check that offset is page-aligned
+	bne	.Linval
+	lsr	r4, r5, #12		@ check for overflow
+	bne	.Linval
+	@ compose page offset
+	lsr	r6, r6, #12
+	lsl	r5, r5, #20
+	orr	r5, r5, r6
+	ldr	r4, [sp, #8]		@ load fd
+	DO_CALL (mmap2)
+	ldr	r1, =0xfffff000
+	cmp	r0, r1
+	bcs	.Lerror
+	bx	lr
+.Linval:
+	ldr	r0, =-EINVAL
+	pop	{r4, r5, r6}
+.Lerror:
+	push	{r3, lr}
+	bl	__syscall_error
+	POP_RET
+.pool
+#else /* !THUMB1_ONLY */
+mmap64:
 #ifdef __ARMEB__
 # define LOW_OFFSET      8 + 4
 /* The initial + 4 is for the stack postdecrement.  */
@@ -45,6 +83,7 @@ mmap64:
 	str	r4, [sp, #-4]!
 	movs	r4, ip, lsl $20		@ check that offset is page-aligned
 	mov	ip, ip, lsr $12
+	IT(t, eq)
 	moveqs	r4, r5, lsr $12		@ check for overflow
 	bne	.Linval
 	ldr	r4, [sp, $8]		@ load fd
@@ -52,6 +91,7 @@ mmap64:
 	DO_CALL (mmap2)
 	cmn	r0, $4096
 	ldmfd	sp!, {r4, r5}
+	IT(t, cc)
 #if defined(__USE_BX__)
 	bxcc	lr
 #else
@@ -62,7 +102,8 @@ mmap64:
 	mov	r0, $-EINVAL
 	ldmfd	sp!, {r4, r5}
 	b	__syscall_error
-#else
+#endif
+#else /* !__ARM_EABI__ */
 	stmfd	sp!, {r4, r5, lr}
 	ldr	r5, [sp, $16]
 	ldr	r4, [sp, $12]
diff --git a/libc/sysdeps/linux/arm/setjmp.S b/libc/sysdeps/linux/arm/setjmp.S
index 8d15b8324..2df7d551a 100644
--- a/libc/sysdeps/linux/arm/setjmp.S
+++ b/libc/sysdeps/linux/arm/setjmp.S
@@ -18,15 +18,41 @@
    02111-1307 USA.  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 .global __sigsetjmp
 .type __sigsetjmp,%function
 .align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
 __sigsetjmp:
+	push	{r3, r4, r5, r6, r7, lr}
 	mov	ip, r0
+	stmia	r0!, {r4, r5, r6, r7}
+	mov	r2, r8
+	mov	r3, r9
+	mov	r4, sl
+	mov	r5, fp
+	add	r6, sp, #(6 * 4)
+	mov	r7, lr
+	stmia	r0!, {r2, r3, r4, r5, r6, r7}
 
+	mov	r0, ip
+	bl	__sigjmp_save
+	pop	{r3, r4, r5, r6, r7, pc}
+
+#else
+__sigsetjmp:
+	/* Save registers */
+	mov	ip, r0
+#if defined(__thumb2__)
+	stmia	ip!, {v1-v6, sl, fp}
+	movs	r2, sp
+	stmia	ip!, {r2, lr}
+#else
 	/* Save registers */
 	stmia	ip!, {v1-v6, sl, fp, sp, lr}
+#endif
 #if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__
 # ifdef __VFP_FP__
 	/* Store the VFP registers.  */
@@ -70,5 +96,6 @@ __sigsetjmp:
 #else
 	B	__sigjmp_save
 #endif
+#endif
 
 .size __sigsetjmp,.-__sigsetjmp
diff --git a/libc/sysdeps/linux/arm/sigrestorer.S b/libc/sysdeps/linux/arm/sigrestorer.S
index 194228a38..79728fd40 100644
--- a/libc/sysdeps/linux/arm/sigrestorer.S
+++ b/libc/sysdeps/linux/arm/sigrestorer.S
@@ -16,6 +16,7 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <bits/arm_asm.h>
 #include <sys/syscall.h>
 #include <linux/version.h>
 
@@ -38,6 +39,9 @@
 .type __default_sa_restorer,%function
 .align 2
 #ifdef __ARM_EABI__
+#ifdef __thumb__
+.thumb_func
+#endif
 	.fnstart
 	.save {r0-r15}
 #if LINUX_VERSION_CODE >= 0x020612
@@ -62,6 +66,9 @@ __default_sa_restorer:
 .type __default_rt_sa_restorer,%function
 .align 2
 #ifdef __ARM_EABI__
+#ifdef __thumb__
+.thumb_func
+#endif
 	.fnstart
 	.save {r0-r15}
 #if LINUX_VERSION_CODE >= 0x020612
diff --git a/libc/sysdeps/linux/arm/syscall-eabi.S b/libc/sysdeps/linux/arm/syscall-eabi.S
index efc30690c..b9318821b 100644
--- a/libc/sysdeps/linux/arm/syscall-eabi.S
+++ b/libc/sysdeps/linux/arm/syscall-eabi.S
@@ -17,6 +17,7 @@
    02111-1307 USA.  */
 
 #include <sys/syscall.h>
+#include <bits/arm_asm.h>
 
 /* In the EABI syscall interface, we don't need a special syscall to
    implement syscall().  It won't work reliably with 64-bit arguments
@@ -26,6 +27,29 @@
 .global syscall
 .type syscall,%function
 .align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
+syscall:
+	push	{r4, r5, r6, r7}
+	mov	ip, r0
+	mov	r0, r1
+	mov	r1, r2
+	mov	r2, r3
+	add	r7, sp, #(4 * 4)
+	ldmia	r7!, {r3, r4, r5, r6}
+	mov	r7, ip
+	swi	0x0
+	pop	{r4, r5, r6, r7}
+	ldr	r1, =0xfffff000
+	cmp	r0, r1
+	bcs	1f
+	bx lr
+1:
+	push	{r3, lr}
+	bl	__syscall_error
+	POP_RET
+.pool
+#else
 syscall:
 	mov	ip, sp
 	stmfd	sp!, {r4, r5, r6, r7}
@@ -37,11 +61,13 @@ syscall:
 	swi	0x0
 	ldmfd	sp!, {r4, r5, r6, r7}
 	cmn	r0, #4096
+	IT(t, cc)
 #if defined(__USE_BX__)
 	bxcc	lr
 #else
 	movcc	pc, lr
 #endif
 	b	__syscall_error
+#endif
 
 .size syscall,.-syscall
diff --git a/libc/sysdeps/linux/arm/vfork.S b/libc/sysdeps/linux/arm/vfork.S
index e9f63d46e..42595b026 100644
--- a/libc/sysdeps/linux/arm/vfork.S
+++ b/libc/sysdeps/linux/arm/vfork.S
@@ -6,6 +6,7 @@
  */
 
 #include <features.h>
+#include <bits/arm_asm.h>
 
 #define _ERRNO_H
 #include <bits/errno.h>
@@ -18,11 +19,47 @@
 .type	__vfork,%function
 .align 4
 
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+__vfork:
+#ifdef __NR_vfork
+	DO_CALL (vfork)
+	ldr		r1, =0xfffff000
+	cmp		r0, r1
+	bcs		1f
+	bx		lr
+1:
+
+	/* Check if vfork even exists.  */
+	ldr		r1, =-ENOSYS
+	cmp		r0, r1
+	bne		__error
+
+	/* If we don't have vfork, use fork.  */
+	DO_CALL (fork)
+	ldr		r1, =0xfffff000
+	cmp		r0, r1
+
+	/* Syscall worked.  Return to child/parent */
+	bcs		1f
+	bx		lr
+1:
+
+__error:
+	push	{r3, lr}
+	bl	__syscall_error
+	POP_RET
+.pool
+
+#endif
+
+#else
 __vfork:
 
 #ifdef __NR_vfork
 	DO_CALL (vfork)
 	cmn	r0, #4096
+	IT(t, cc)
 #if defined(__USE_BX__)
 	bxcc	lr
 #else
@@ -40,6 +77,7 @@ __vfork:
 	cmn     r0, #4096
 
 	/* Syscall worked.  Return to child/parent */
+	IT(t, cc)
 #if defined(__USE_BX__)
 	bxcc	lr
 #else
@@ -48,8 +86,10 @@ __vfork:
 
 __error:
 	b	__syscall_error
+#endif
 
 .size __vfork,.-__vfork
+
 weak_alias(__vfork,vfork)
 libc_hidden_weak(vfork)
 #endif
-- 
cgit v1.2.3