From 6af3332a4cbd1ffbc81f74759ef7c5e1a87d2e10 Mon Sep 17 00:00:00 2001
From: Vincent Ren-Wei Chen <vincentc@andestech.com>
Date: Tue, 17 Jan 2017 07:31:24 +0100
Subject: nds32: add NPTL/TLS, *context function, libm changes and code cleanup

This commit includes following features.
1. Support NPTL/TLS
2. Add libm function which is used to handle FP rounding and excpetions
   (ex: fclrexcpt,fedisblxcpti,feenablxcpt... )
3. Add *context function for operating user context
   (ex: setcontext,getcontext,makecontext... )
4. Change the return flow from signal handler
5. Cleanup of old code

The testsuite only has 2 errors, tst-cpuclock1 and tst-cputimer1,
which are related to timing accuracy. (math and locale tests are disabled)

Signed-off-by: Vincent Ren-Wei Chen <vincentc@andestech.com>
---
 ldso/ldso/nds32/dl-startup.h |  94 +++-------------------------------------
 ldso/ldso/nds32/dl-sysdep.h  |   5 ++-
 ldso/ldso/nds32/dl-tlsdesc.S | 100 +++++++++++++++++++++++++++++++++++++++++++
 ldso/ldso/nds32/elfinterp.c  |  94 +++++++++++++++++++++++++++-------------
 ldso/ldso/nds32/resolve.S    |  24 -----------
 ldso/libdl/Makefile.in       |   1 -
 6 files changed, 173 insertions(+), 145 deletions(-)
 create mode 100644 ldso/ldso/nds32/dl-tlsdesc.S

(limited to 'ldso')

diff --git a/ldso/ldso/nds32/dl-startup.h b/ldso/ldso/nds32/dl-startup.h
index f700531ca..56892a2ba 100644
--- a/ldso/ldso/nds32/dl-startup.h
+++ b/ldso/ldso/nds32/dl-startup.h
@@ -6,68 +6,7 @@
 /* Need bootstrap relocations */
 #define ARCH_NEEDS_BOOTSTRAP_RELOCS
 
-#if defined(NDS32_ABI_2) || defined(NDS32_ABI_2FP)
-# define STACK_PUSH
-# define STACK_POP
-#else
-# define STACK_PUSH     "addi   $sp,    $sp,    -24"
-# define STACK_POP      "addi   $sp,    $sp,    24"
-#endif
 
-
-#ifdef __NDS32_N1213_43U1H__
-__asm__("\
-	.text\n\
-	.globl 	_start\n\
-	.globl  _dl_start\n\
-	.globl 	_dl_start_user\n\
-    .type	_start,#function\n\
-    .type	_dl_start,#function\n\
-	.type 	_dl_start_user,#function\n\
-	.align	4\n\
-	.pic\n\
-1:\n\
-	ret\n\
-_start:\n\
-	! we are PIC code, so get global offset table\n\
-	jal	1b\n\
-	sethi	$gp, HI20(_GLOBAL_OFFSET_TABLE_)\n\
-	ori		$gp, $gp, LO12(_GLOBAL_OFFSET_TABLE_+4)\n\
-	add		$gp, $lp, $gp\n\
-\n\
-	! at start time, all the args are on the stack\n\
-	addi	$r0,	$sp,	0\n\
-    ! adjust stack\n\
-    !addi    $sp,    $sp,    -24\n\
-	"STACK_PUSH"\n\
-	bal	_dl_start@PLT\n\
-	! save user entry point in r6\n\
-	addi	$r6,	$r0,	0\n\
-    ! adjust sp and reload registers\n\
-    !addi    $sp,    $sp,    24\n\
-	"STACK_POP"\n\
-\n\
-_dl_start_user:\n\
-\n\
-	! See if we were run as a command with the executable file\n\
-	! name as an extra leading argument.\n\
-	! skip these arguments\n\
-	l.w		$r2,	_dl_skip_args@GOTOFF	! args to skip\n\
-	lwi		$r0,	[$sp+0]					! original argc\n\
-	slli	$r1,	$r2,	2				! offset for new sp\n\
-	add		$sp,	$sp,	$r1				! adjust sp to skip args\n\
-	sub		$r0,	$r0,	$r2				! set new argc\n\
-	swi		$r0,	[$sp+0]					! save new argc\n\
-\n\
-	! load address of _dl_fini finalizer function\n\
-	la		$r5, _dl_fini@GOTOFF\n\
-	! jump to the user_s entry point\n\
-	addi	$r15,	$r6,	0\n\
-	jr		$r15\n\
-	.size   _dl_start_user, . - _dl_start_user\n\
-	.previous\n\
-");
-#else
 __asm__("\
 	.text\n\
 	.globl 	_start\n\
@@ -89,13 +28,11 @@ _start:\n\
 	addi	$r0,	$sp,	0\n\
     ! adjust stack\n\
     !addi    $sp,    $sp,    -24\n\
-	"STACK_PUSH"\n\
 	bal	_dl_start@PLT\n\
 	! save user entry point in r6\n\
 	addi	$r6,	$r0,	0\n\
     ! adjust sp and reload registers\n\
     !addi    $sp,    $sp,    24\n\
-	"STACK_POP"\n\
 \n\
 _dl_start_user:\n\
 	! See if we were run as a command with the executable file\n\
@@ -115,31 +52,12 @@ _dl_start_user:\n\
 	.size   _dl_start_user, . - _dl_start_user\n\
 	.previous\n\
 ");
-#endif
 
-#define COPY_UNALIGNED_WORD(swp, twp, align) \
-  { \
-    void *__s = (swp), *__t = (twp); \
-    unsigned char *__s1 = __s, *__t1 = __t; \
-    unsigned short *__s2 = __s, *__t2 = __t; \
-    unsigned long *__s4 = __s, *__t4 = __t; \
-    switch ((align)) \
-    { \
-    case 0: \
-      *__t4 = *__s4; \
-      break; \
-    case 2: \
-      *__t2++ = *__s2++; \
-      *__t2 = *__s2; \
-      break; \
-    default: \
-      *__t1++ = *__s1++; \
-      *__t1++ = *__s1++; \
-      *__t1++ = *__s1++; \
-      *__t1 = *__s1; \
-      break; \
-    } \
-  }
+#define COPY_UNALIGNED_WORD(swp, twp) \
+{ \
+        __typeof (swp) __tmp = __builtin_nds32_unaligned_load_w ((unsigned int*)&swp); \
+        __builtin_nds32_unaligned_store_w ((unsigned int *)twp, __tmp); \
+}
 
 /* Get a pointer to the argv array.  On many platforms this can be just
  * the address if the first argument, on other platforms we need to
@@ -162,7 +80,7 @@ void PERFORM_BOOTSTRAP_RELOC(ELF_RELOC *rpnt, unsigned long *reloc_addr,
 			break;
 		case R_NDS32_32_RELA:
 			value = symbol_addr + rpnt->r_addend;
-			COPY_UNALIGNED_WORD (&value, reloc_addr, (int) reloc_addr & 3);
+			COPY_UNALIGNED_WORD (value, reloc_addr);
 			break;
 #undef COPY_UNALIGNED_WORD
 		case R_NDS32_RELATIVE:
diff --git a/ldso/ldso/nds32/dl-sysdep.h b/ldso/ldso/nds32/dl-sysdep.h
index c4a32ca71..5ff2aa9ae 100644
--- a/ldso/ldso/nds32/dl-sysdep.h
+++ b/ldso/ldso/nds32/dl-sysdep.h
@@ -57,7 +57,8 @@ unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry);
    ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one
    of the main executable's symbols, as for a COPY reloc.  */
 #define elf_machine_type_class(type) \
-  ((((type) == R_NDS32_JMP_SLOT) * ELF_RTYPE_CLASS_PLT)	\
+  ((((type) == R_NDS32_JMP_SLOT || (type) == R_NDS32_TLS_TPOFF \
+      || (type) == R_NDS32_TLS_DESC) * ELF_RTYPE_CLASS_PLT)	\
    | (((type) == R_NDS32_COPY) * ELF_RTYPE_CLASS_COPY))
 
 /* Return the link-time address of _DYNAMIC.  Conveniently, this is the
@@ -81,7 +82,7 @@ elf_machine_load_address (void)
      via the GOT to make sure the compiler initialized %ebx in time.  */
 
 	Elf32_Addr addr;
-	__asm__ ("la	%0, _dl_start@GOTOFF\n" : "=r" (addr) );
+	__asm__ ("la	%0, _DYNAMIC@GOTOFF\n" : "=r" (addr) );
 	return addr - elf_machine_dynamic();
 }
 
diff --git a/ldso/ldso/nds32/dl-tlsdesc.S b/ldso/ldso/nds32/dl-tlsdesc.S
new file mode 100644
index 000000000..a7ea1f2d1
--- /dev/null
+++ b/ldso/ldso/nds32/dl-tlsdesc.S
@@ -0,0 +1,100 @@
+/* Thread-local storage handling in the ELF dynamic linker.  NDS32 version.
+   Copyright (C) 2006-2013 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#if defined __UCLIBC_HAS_TLS__
+#include <tls.h>
+#include "tlsdesc.h"
+
+	.text
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,#function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	lwi	$r0,	[$r0	+ 4]
+	add	$r0,	$r0,	$r25
+	ret
+	cfi_endproc
+	.size   _dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+#ifdef SHARED
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,#function
+	cfi_startproc
+	.pic
+/*
+	The assembly code that follows is a rendition of the following
+	C code, hand-optimized a little bit.
+
+ptrdiff_t
+_dl_tlsdesc_dynamic(struct tlsdesc *tdp)
+{
+       struct tlsdesc_dynamic_arg *td = tdp->argument.pointer;
+       dtv_t *dtv = (dtv_t *)THREAD_DTV();
+       if (__builtin_expect (td->gen_count <= dtv[0].counter
+                             && dtv[td->tlsinfo.ti_module].pointer.val
+                                != TLS_DTV_UNALLOCATED,
+                             1))
+               return dtv[td->tlsinfo.ti_module].pointer.val +
+                       td->tlsinfo.ti_offset - __builtin_thread_pointer();
+
+       return __tls_get_addr (&td->tlsinfo) - __builtin_thread_pointer();
+}
+*/
+	.align 2
+
+_dl_tlsdesc_dynamic:
+	lwi	$r0,	[$r0	+ 4]
+	lwi	$r1,	[$r0 +	#TLSDESC_GEN_COUNT]  	/* $r0=td $r1=td->gen_count*/
+	lwi	$r2,	[$r25 +	#DTV_OFFSET]	    	/* $r2=&dtv[0]*/
+	lwi	$r3,	[$r2]
+	sub	$r1,	$r1,	$r3
+	bgtz	$r1,	2f
+	lwi	$r3,	[$r0 +	#TLSDESC_MODID]	    	/* r3=module id */
+	slli	$r3,	$r3,	#3			/* r3=module offset=module id*8(byte) */
+	lw	$r3,	[$r2 +	$r3]		   	/* r3=&dtc[module ID]=&dtv[0]+ module offset*/
+	movi	$r1,	#-1
+	beq	$r3,	$r1,	2f
+	lwi	$r1,	[$r0 +	#TLSDESC_MODOFF]
+	add	$r0,	$r3,	$r1
+1:
+	ret
+2:
+	smw.adm $sp,[$sp],$sp,#0x6
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(gp, 0)
+	cfi_rel_offset(lp, 4)
+	mfusr 	$r15, $PC;
+	sethi 	$gp,  hi20(_GLOBAL_OFFSET_TABLE_ + 4);
+	ori   	$gp,  $gp,  lo12(_GLOBAL_OFFSET_TABLE_ + 8);
+	add   	$gp,  $r15, $gp;
+	sethi 	$r15, hi20(__tls_get_addr@PLT);
+	ori	$r15, $r15, lo12(__tls_get_addr@PLT);
+	add	$r15, $r15, $gp
+	jral 	$r15
+	lmw.bim $sp,[$sp],$sp,#0x6
+	cfi_adjust_cfa_offset(-8)
+	cfi_restore(gp)
+	cfi_restore(lp)
+	j	1b
+	cfi_endproc
+	.size   _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+#endif
+#endif // __UCLIBC_HAS_TLS__
diff --git a/ldso/ldso/nds32/elfinterp.c b/ldso/ldso/nds32/elfinterp.c
index 6a091f80a..9f671419c 100644
--- a/ldso/ldso/nds32/elfinterp.c
+++ b/ldso/ldso/nds32/elfinterp.c
@@ -44,6 +44,11 @@
 
 #include "ldso.h"
 
+#if defined(USE_TLS) && USE_TLS
+#include "dl-tls.h"
+#include "tlsdeschtab.h"
+#endif
+
 extern int _dl_linux_resolve(void);
 
 unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
@@ -95,7 +100,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 		{
 			_dl_dprintf(_dl_debug_file, "\nresolve function: %s", symname);
 			if (_dl_debug_detail) _dl_dprintf(_dl_debug_file,
-					"\tpatch %x ==> %x @ %x", *got_addr, new_addr, got_addr);
+					"\tpatch %x ==> %x @ %x", (unsigned int)*got_addr, (unsigned int)new_addr, (unsigned int)got_addr);
 		}
 	}
 	if (!_dl_debug_nofixups) {
@@ -168,6 +173,9 @@ _dl_do_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
 	int reloc_type;
 	int symtab_index;
 	char *symname = NULL;
+#if defined USE_TLS && USE_TLS
+	struct elf_resolve *tls_tpnt = NULL;
+#endif
 	unsigned long *reloc_addr;
 	unsigned long symbol_addr;
 	int goof = 0;
@@ -190,40 +198,40 @@ _dl_do_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
 		 * have been intentional.  We should not be linking local symbols
 		 * here, so all bases should be covered.
 		 */
-		if (!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK) {
+		if (!symbol_addr
+		    && (ELF32_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS)
+		    && (ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) {
 			_dl_dprintf (2, "%s: can't resolve symbol '%s'\n",
-				     _dl_progname, strtab + symtab[symtab_index].st_name);
+				     _dl_progname, symname);
 			_dl_exit (1);
 		}
+		if (_dl_trace_prelink) {
+			_dl_debug_lookup(symname, tpnt, &symtab[symtab_index],
+				&sym_ref, elf_machine_type_class(reloc_type));
+		}
+#if defined USE_TLS && USE_TLS
+		tls_tpnt = sym_ref.tpnt;
+#endif
 	}
 
-#define COPY_UNALIGNED_WORD(swp, twp, align) \
-  { \
-    void *__s = (swp), *__t = (twp); \
-    unsigned char *__s1 = __s, *__t1 = __t; \
-    unsigned short *__s2 = __s, *__t2 = __t; \
-    unsigned long *__s4 = __s, *__t4 = __t; \
-    switch ((align)) \
-    { \
-    case 0: \
-      *__t4 = *__s4; \
-      break; \
-    case 2: \
-      *__t2++ = *__s2++; \
-      *__t2 = *__s2; \
-      break; \
-    default: \
-      *__t1++ = *__s1++; \
-      *__t1++ = *__s1++; \
-      *__t1++ = *__s1++; \
-      *__t1 = *__s1; \
-      break; \
-    } \
-  }
+#if defined USE_TLS && USE_TLS
+	/* In case of a TLS reloc, tls_tpnt NULL means we have an 'anonymous'
+	   symbol.  This is the case for a static tls variable, so the lookup
+	   module is just that one is referencing the tls variable. */
+	if (!tls_tpnt)
+		tls_tpnt = tpnt;
+#endif
 
+#define COPY_UNALIGNED_WORD(swp, twp) \
+{ \
+	__typeof (swp) __tmp = __builtin_nds32_unaligned_load_w ((unsigned int*)&swp); \
+	__builtin_nds32_unaligned_store_w ((unsigned int *)twp, __tmp); \
+}
 #if defined (__SUPPORT_LD_DEBUG__)
 	{
-		unsigned long old_val = *reloc_addr;
+		unsigned long old_val = 0;
+		if(reloc_type != R_NDS32_NONE)
+			old_val = *reloc_addr;
 #endif
 		symbol_addr += rpnt->r_addend ;
 		switch (reloc_type) {
@@ -235,7 +243,7 @@ _dl_do_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
 				*reloc_addr = symbol_addr;
 				break;
 			case R_NDS32_32_RELA:
-				COPY_UNALIGNED_WORD (&symbol_addr, reloc_addr,(int) reloc_addr & 3);
+				COPY_UNALIGNED_WORD (symbol_addr, reloc_addr);
 				break;
 #undef COPY_UNALIGNED_WORD
 			case R_NDS32_RELATIVE:
@@ -245,12 +253,38 @@ _dl_do_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
 				_dl_memcpy((void *) reloc_addr,
 					   (void *) symbol_addr, symtab[symtab_index].st_size);
 				break;
+#if defined USE_TLS && USE_TLS
+			case R_NDS32_TLS_TPOFF:
+				CHECK_STATIC_TLS ((struct link_map *) tls_tpnt);
+				*reloc_addr = (symbol_addr + tls_tpnt->l_tls_offset);
+				break;
+			case R_NDS32_TLS_DESC:
+				{
+					struct tlsdesc volatile *td = 
+							(struct tlsdesc volatile *)reloc_addr;
+#ifndef SHARED
+					CHECK_STATIC_TLS((struct link_map *) tls_tpnt);
+#else
+					if (!TRY_STATIC_TLS ((struct link_map *) tls_tpnt))
+					{
+					        td->argument.pointer = _dl_make_tlsdesc_dynamic((struct link_map *) tls_tpnt, symbol_addr);
+					        td->entry = _dl_tlsdesc_dynamic;
+					}
+					else
+#endif
+					{
+					        td->argument.value = symbol_addr + tls_tpnt->l_tls_offset;
+					        td->entry = _dl_tlsdesc_return;
+					}
+				}
+				break;
+#endif
 			default:
 				return -1; /*call _dl_exit(1) */
 		}
 #if defined (__SUPPORT_LD_DEBUG__)
 		if (_dl_debug_reloc && _dl_debug_detail)
-			_dl_dprintf(_dl_debug_file, "\tpatch: %x ==> %x @ %x", old_val, *reloc_addr, reloc_addr);
+			_dl_dprintf(_dl_debug_file, "\tpatch: %x ==> %x @ %x", (unsigned int)old_val, (unsigned int)*reloc_addr, (unsigned int)reloc_addr);
 	}
 
 #endif
@@ -283,7 +317,7 @@ _dl_do_lazy_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
 		}
 #if defined (__SUPPORT_LD_DEBUG__)
 		if (_dl_debug_reloc && _dl_debug_detail)
-			_dl_dprintf(_dl_debug_file, "\tpatch: %x ==> %x @ %x", old_val, *reloc_addr, reloc_addr);
+			_dl_dprintf(_dl_debug_file, "\tpatch: %x ==> %x @ %x", (unsigned int)old_val, (unsigned int)*reloc_addr, (unsigned int)reloc_addr);
 	}
 
 #endif
diff --git a/ldso/ldso/nds32/resolve.S b/ldso/ldso/nds32/resolve.S
index 8c53850d7..e88d9ad60 100644
--- a/ldso/ldso/nds32/resolve.S
+++ b/ldso/ldso/nds32/resolve.S
@@ -3,14 +3,6 @@
  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  */
 
-#if defined(NDS32_ABI_2) || defined(NDS32_ABI_2FP)
-# define STACK_PUSH
-# define STACK_POP
-#else
-# define STACK_PUSH     addi   $sp,    $sp,    -24
-# define STACK_POP      addi   $sp,    $sp,    24
-#endif
-
  .text
  .align 4      ! 16 byte boundary
  .globl _dl_linux_resolve
@@ -28,22 +20,11 @@ _dl_linux_resolve:
 	smw.adm	$r0,	[$sp],	$r5,	6
 
 	! init gp
-#ifdef __NDS32_N1213_43U1H__
-	sethi	$gp,	HI20(_GLOBAL_OFFSET_TABLE_+4)
-	ori	$gp,	$gp,	LO12(_GLOBAL_OFFSET_TABLE_+8)
-	add	$gp,	$ta,	$gp
-#else
 	mfusr	$ta,	$PC
 	sethi	$gp,	HI20(_GLOBAL_OFFSET_TABLE_+4)
 	ori	$gp,	$gp,	LO12(_GLOBAL_OFFSET_TABLE_+8)
 	add	$gp,	$ta,	$gp
-#endif
 
-	! #ifdef __NDS32_ABI_1__
-	! adjust stack
-	!addi	$sp,	$sp,	-24
-	STACK_PUSH
-	! #endif
 
 	! set arguments
 	addi	$r0,	$r17,	0
@@ -61,11 +42,6 @@ _dl_linux_resolve:
 	! save the return
 	addi	$ta,	$r0,	0
 
-	! #ifdef __NDS32_ABI_1__
-	! adjust sp
-	!addi	$sp,	$sp,	24
-	STACK_POP
-	! #endif
 
 	! reload registers
 	lmw.bim	$r0,	[$sp],	$r5,	6
diff --git a/ldso/libdl/Makefile.in b/ldso/libdl/Makefile.in
index 24e00faf0..3fd8a6cd1 100644
--- a/ldso/libdl/Makefile.in
+++ b/ldso/libdl/Makefile.in
@@ -33,7 +33,6 @@ libdl_SRC := $(libdl_DIR)/libdl.c
 libdl_OBJ := $(patsubst $(libdl_DIR)/%.c,$(libdl_OUT)/%.o,$(libdl_SRC))
 
 resolve := $(top_builddir)ldso/ldso/$(TARGET_ARCH)/resolve.o
-
 libdl-a-y := $(libdl_OBJ) $(resolve)
 ifeq ($(DOPIC),y)
 libdl-a-y := $(libdl-a-y:.o=.os)
-- 
cgit v1.2.3