From 534661b91c98492995274c364c8177c45efc63db Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sat, 19 Sep 2009 10:04:05 -0700 Subject: ldso/: tls support for dynamic linker Signed-off-by: Austin Foxley --- ldso/include/dl-hash.h | 61 ++- ldso/include/ldso.h | 6 + ldso/include/ldsodefs.h | 147 ++++++ ldso/ldso/Makefile.in | 11 + ldso/ldso/arm/aeabi_read_tp.S | 64 +++ ldso/ldso/arm/dl-debug.h | 4 +- ldso/ldso/arm/dl-sysdep.h | 21 +- ldso/ldso/arm/elfinterp.c | 52 +- ldso/ldso/arm/resolve.S | 4 + ldso/ldso/arm/thumb_atomics.S | 79 ++++ ldso/ldso/dl-elf.c | 67 +++ ldso/ldso/dl-hash.c | 87 ++-- ldso/ldso/dl-startup.c | 14 + ldso/ldso/dl-tls.c | 1048 +++++++++++++++++++++++++++++++++++++++++ ldso/ldso/i386/dl-sysdep.h | 7 +- ldso/ldso/i386/elfinterp.c | 32 +- ldso/ldso/ldso.c | 158 ++++++- ldso/ldso/mips/elfinterp.c | 71 ++- ldso/ldso/sh/dl-debug.h | 2 + ldso/ldso/sh/dl-sysdep.h | 9 + ldso/ldso/sh/elfinterp.c | 39 +- ldso/ldso/sparc/dl-sysdep.h | 4 +- ldso/ldso/sparc/elfinterp.c | 75 +-- ldso/libdl/libdl.c | 325 ++++++++++++- 24 files changed, 2239 insertions(+), 148 deletions(-) create mode 100644 ldso/include/ldsodefs.h create mode 100644 ldso/ldso/arm/aeabi_read_tp.S create mode 100644 ldso/ldso/arm/thumb_atomics.S create mode 100644 ldso/ldso/dl-tls.c diff --git a/ldso/include/dl-hash.h b/ldso/include/dl-hash.h index e7ca4aba8..1b28a34b6 100644 --- a/ldso/include/dl-hash.h +++ b/ldso/include/dl-hash.h @@ -34,7 +34,32 @@ struct elf_resolve { struct elf_resolve * next; struct elf_resolve * prev; /* Nothing after this address is used by gdb. */ - ElfW(Addr) mapaddr; /* Address at which ELF segments (either main app and DSO) are mapped into */ + +#if USE_TLS + /* Thread-local storage related info. */ + + /* Start of the initialization image. */ + void *l_tls_initimage; + /* Size of the initialization image. */ + size_t l_tls_initimage_size; + /* Size of the TLS block. */ + size_t l_tls_blocksize; + /* Alignment requirement of the TLS block. */ + size_t l_tls_align; + /* Offset of first byte module alignment. */ + size_t l_tls_firstbyte_offset; +# ifndef NO_TLS_OFFSET +# define NO_TLS_OFFSET 0 +# endif + /* For objects present at startup time: offset in the static TLS block. */ + ptrdiff_t l_tls_offset; + /* Index of the module in the dtv array. */ + size_t l_tls_modid; + /* Nonzero if _dl_init_static_tls should be called for this module */ + unsigned int l_need_tls_init:1; +#endif + + ElfW(Addr) mapaddr; enum {elf_lib, elf_executable,program_interpreter, loaded_file} libtype; struct dyn_elf * symbol_scope; unsigned short usage_count; @@ -106,26 +131,31 @@ struct elf_resolve { extern struct dyn_elf * _dl_symbol_tables; extern struct elf_resolve * _dl_loaded_modules; -extern struct dyn_elf * _dl_handles; +extern struct dyn_elf * _dl_handles; extern struct elf_resolve * _dl_add_elf_hash_table(const char * libname, DL_LOADADDR_TYPE loadaddr, unsigned long * dynamic_info, unsigned long dynamic_addr, unsigned long dynamic_size); -extern char * _dl_lookup_hash(const char * name, struct dyn_elf * rpnt, - struct elf_resolve *mytpnt, int type_class -#ifdef __FDPIC__ - , struct elf_resolve **tpntp +#if USE_TLS || defined __FDPIC__ +#define _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT +#define _DL_LOOKUP_HASH_EXTRA_TPNT ,struct elf_resolve **tpntp +#else +#undef _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT +#define _DL_LOOKUP_HASH_EXTRA_TPNT #endif - ); +extern char * _dl_lookup_hash(const char * name, struct dyn_elf * rpnt, + struct elf_resolve *mytpnt, int type_class _DL_LOOKUP_HASH_EXTRA_TPNT); + static __always_inline char *_dl_find_hash(const char *name, struct dyn_elf *rpnt, - struct elf_resolve *mytpnt, int type_class) + struct elf_resolve *mytpnt, int type_class, + struct elf_resolve **tpntp) { -#ifdef __FDPIC__ - return _dl_lookup_hash(name, rpnt, mytpnt, type_class, NULL); +#ifdef _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT + return _dl_lookup_hash(name, rpnt, mytpnt, type_class, tpntp); #else - return _dl_lookup_hash(name, rpnt, mytpnt, type_class); + return _dl_lookup_hash(name, rpnt, mytpnt, type_class); #endif } @@ -148,8 +178,11 @@ static __inline__ int _dl_symbol(char * name) #define LD_ERROR_NOTDYN 5 #define LD_ERROR_MMAP_FAILED 6 #define LD_ERROR_NODYNAMIC 7 -#define LD_WRONG_RELOCS 8 -#define LD_BAD_HANDLE 9 -#define LD_NO_SYMBOL 10 +#define LD_ERROR_TLS_FAILED 8 +#define LD_WRONG_RELOCS 9 +#define LD_BAD_HANDLE 10 +#define LD_NO_SYMBOL 11 + + #endif /* _LD_HASH_H_ */ diff --git a/ldso/include/ldso.h b/ldso/include/ldso.h index dc4d92db6..1dd35febc 100644 --- a/ldso/include/ldso.h +++ b/ldso/include/ldso.h @@ -38,6 +38,10 @@ #include /* Now the ldso specific headers */ #include +#ifdef __UCLIBC_HAS_TLS__ +/* Defines USE_TLS */ +#include +#endif #include /* common align masks, if not specified by sysdep headers */ @@ -113,6 +117,8 @@ extern int _dl_debug_file; #endif extern void *_dl_malloc(size_t size); +extern void * _dl_calloc(size_t __nmemb, size_t __size); +extern void * _dl_realloc(void * __ptr, size_t __size); extern void _dl_free(void *); extern char *_dl_getenv(const char *symbol, char **envp); extern void _dl_unsetenv(const char *symbol, char **envp); diff --git a/ldso/include/ldsodefs.h b/ldso/include/ldsodefs.h new file mode 100644 index 000000000..432c7b848 --- /dev/null +++ b/ldso/include/ldsodefs.h @@ -0,0 +1,147 @@ +#ifndef _LDSODEFS_H +#define _LDSODEFS_H 1 + +#include + +#include +#include + +#ifdef __mips__ +/* The MIPS ABI specifies that the dynamic section has to be read-only. */ + +#define DL_RO_DYN_SECTION 1 + +/* TODO: Import in 64-bit relocations from glibc. */ +#endif + +#ifndef SHARED +# define EXTERN extern +#else +# ifdef IS_IN_rtld +# define EXTERN +# else +# define EXTERN extern +# endif +#endif + +/* Non-shared code has no support for multiple namespaces. */ +#ifdef SHARED +# define DL_NNS 16 +#else +# define DL_NNS 1 +#endif + +#define GL(x) _##x +#define GLRO(x) _##x + +/* Variable pointing to the end of the stack (or close to it). This value + must be constant over the runtime of the application. Some programs + might use the variable which results in copy relocations on some + platforms. But this does not matter, ld.so can always use the local + copy. */ +extern void *__libc_stack_end; + +/* Determine next available module ID. */ +extern size_t _dl_next_tls_modid (void) internal_function attribute_hidden; + +/* Calculate offset of the TLS blocks in the static TLS block. */ +extern void _dl_determine_tlsoffset (void) internal_function attribute_hidden; + +/* Set up the data structures for TLS, when they were not set up at startup. + Returns nonzero on malloc failure. + This is called from _dl_map_object_from_fd or by libpthread. */ +extern int _dl_tls_setup (void) internal_function; +rtld_hidden_proto (_dl_tls_setup) + +/* Allocate memory for static TLS block (unless MEM is nonzero) and dtv. */ +extern void *_dl_allocate_tls (void *mem) internal_function; + +/* Get size and alignment requirements of the static TLS block. */ +extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp) + internal_function; + +extern void _dl_allocate_static_tls (struct link_map *map) + internal_function attribute_hidden; + +/* Taken from glibc/elf/dl-reloc.c */ +#define CHECK_STATIC_TLS(sym_map) \ + do { \ + if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET, 0)) \ + _dl_allocate_static_tls (sym_map); \ + } while (0) + +/* These are internal entry points to the two halves of _dl_allocate_tls, + only used within rtld.c itself at startup time. */ +extern void *_dl_allocate_tls_storage (void) + internal_function attribute_hidden; +extern void *_dl_allocate_tls_init (void *) internal_function; + +/* Deallocate memory allocated with _dl_allocate_tls. */ +extern void _dl_deallocate_tls (void *tcb, bool dealloc_tcb) internal_function; + +extern void _dl_nothread_init_static_tls (struct link_map *) internal_function attribute_hidden; + +/* Highest dtv index currently needed. */ +EXTERN size_t _dl_tls_max_dtv_idx; +/* Flag signalling whether there are gaps in the module ID allocation. */ +EXTERN bool _dl_tls_dtv_gaps; +/* Information about the dtv slots. */ +EXTERN struct dtv_slotinfo_list +{ + size_t len; + struct dtv_slotinfo_list *next; + struct dtv_slotinfo + { + size_t gen; + bool is_static; + struct link_map *map; + } slotinfo[0]; +} *_dl_tls_dtv_slotinfo_list; +/* Number of modules in the static TLS block. */ +EXTERN size_t _dl_tls_static_nelem; +/* Size of the static TLS block. */ +EXTERN size_t _dl_tls_static_size; +/* Size actually allocated in the static TLS block. */ +EXTERN size_t _dl_tls_static_used; +/* Alignment requirement of the static TLS block. */ +EXTERN size_t _dl_tls_static_align; +/* Function pointer for catching TLS errors. */ +EXTERN void **(*_dl_error_catch_tsd) (void) __attribute__ ((const)); + +/* Number of additional entries in the slotinfo array of each slotinfo + list element. A large number makes it almost certain take we never + have to iterate beyond the first element in the slotinfo list. */ +# define TLS_SLOTINFO_SURPLUS (62) + +/* Number of additional slots in the dtv allocated. */ +# define DTV_SURPLUS (14) + + /* Initial dtv of the main thread, not allocated with normal malloc. */ + EXTERN void *_dl_initial_dtv; + /* Generation counter for the dtv. */ + EXTERN size_t _dl_tls_generation; + + EXTERN void (*_dl_init_static_tls) (struct link_map *); + +/* We have the auxiliary vector. */ +#define HAVE_AUX_VECTOR + +/* We can assume that the kernel always provides the AT_UID, AT_EUID, + AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on. */ +#if __ASSUME_AT_XID +# define HAVE_AUX_XID +#endif + +/* We can assume that the kernel always provides the AT_SECURE value + in the auxiliary vector from 2.5.74 or so on. */ +#if __ASSUME_AT_SECURE +# define HAVE_AUX_SECURE +#endif + +/* Starting with one of the 2.4.0 pre-releases the Linux kernel passes + up the page size information. */ +#if __ASSUME_AT_PAGESIZE +# define HAVE_AUX_PAGESIZE +#endif + +#endif diff --git a/ldso/ldso/Makefile.in b/ldso/ldso/Makefile.in index a74c36e5e..350cc8108 100644 --- a/ldso/ldso/Makefile.in +++ b/ldso/ldso/Makefile.in @@ -15,6 +15,17 @@ CFLAGS-ldso += -fno-omit-frame-pointer CFLAGS-ldso += -I$(top_srcdir)ldso/ldso/$(TARGET_ARCH) -I$(top_srcdir)ldso/include -I$(top_srcdir)ldso/ldso CFLAGS-ldso += -DUCLIBC_RUNTIME_PREFIX=\"$(RUNTIME_PREFIX)\" -DUCLIBC_LDSO=\"$(UCLIBC_LDSO)\" +ifeq ($(DODEBUG),y) +# Not really much point in including debugging info, since gdb +# can't really debug ldso, since gdb requires help from ldso to +# debug things.... +# On arm, gcc-4.3.x onwards -Os emits calls to libgcc, which calls _div0, +# which tries to call raise(). And raise comes from libc so a catch 22. +# Using -O2 instead. We could have use -fno-early-inlining with -Os too. + +CFLAGS-ldso += -O2 -g +endif + CFLAGS-ldso/ldso/$(TARGET_ARCH)/ := $(CFLAGS-ldso) CFLAGS-ldso.c := -DLDSO_ELFINTERP=\"$(TARGET_ARCH)/elfinterp.c\" $(CFLAGS-ldso) diff --git a/ldso/ldso/arm/aeabi_read_tp.S b/ldso/ldso/arm/aeabi_read_tp.S new file mode 100644 index 000000000..f81bae676 --- /dev/null +++ b/ldso/ldso/arm/aeabi_read_tp.S @@ -0,0 +1,64 @@ +/* Copyright (C) 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#ifdef __UCLIBC_HAS_THREADS_NATIVE__ + +#include +#include + +/* GCC will emit calls to this routine under -mtp=soft. Linux has an + equivalent helper function (which clobbers fewer registers than + a normal function call) in a high page of memory; tail call to the + helper. + + This function is exported from libc for use by user code. libpthread, librt, + and the dynamic linker get their own private copies, for + performance (and in the case of ld.so, out of necessity); those are + all hidden. */ + +#ifndef NOT_IN_libc + .global __aeabi_read_tp +#else + .hidden __aeabi_read_tp +#endif +ENTRY (__aeabi_read_tp) + mov r0, #0xffff0fff + sub pc, r0, #31 +END (__aeabi_read_tp) + +#endif /* __UCLIBC_HAS_THREADS_NATIVE__ */ + diff --git a/ldso/ldso/arm/dl-debug.h b/ldso/ldso/arm/dl-debug.h index d5103202c..1bca6ff36 100644 --- a/ldso/ldso/arm/dl-debug.h +++ b/ldso/ldso/arm/dl-debug.h @@ -33,12 +33,14 @@ static const char *_dl_reltypes_tab[] = [4] "R_ARM_PC13", "R_ARM_ABS16", "R_ARM_ABS12", "R_ARM_THM_ABS5", [8] "R_ARM_ABS8", "R_ARM_SBREL32","R_ARM_THM_PC22", "R_ARM_THM_PC8", [12] "R_ARM_AMP_VCALL9", "R_ARM_SWI24", "R_ARM_THM_SWI8", "R_ARM_XPC25", - [16] "R_ARM_THM_XPC22", + [16] "R_ARM_THM_XPC22", "R_ARM_TLS_DTPMOD32", "R_ARM_TLS_DTPOFF32", "R_ARM_TLS_TPOFF32", [20] "R_ARM_COPY", "R_ARM_GLOB_DAT","R_ARM_JUMP_SLOT", "R_ARM_RELATIVE", [24] "R_ARM_GOTOFF", "R_ARM_GOTPC", "R_ARM_GOT32", "R_ARM_PLT32", [32] "R_ARM_ALU_PCREL_7_0","R_ARM_ALU_PCREL_15_8","R_ARM_ALU_PCREL_23_15","R_ARM_LDR_SBREL_11_0", [36] "R_ARM_ALU_SBREL_19_12","R_ARM_ALU_SBREL_27_20", [100] "R_ARM_GNU_VTENTRY","R_ARM_GNU_VTINHERIT","R_ARM_THM_PC11","R_ARM_THM_PC9", + [104] "R_ARM_TLS_GD32","R_ARM_TLS_LDM32","R_ARM_TLS_LDO32","R_ARM_TLS_IE32", + [108] "R_ARM_TLS_LE32","R_ARM_TLS_LDO12","R_ARM_TLS_LE12","R_ARM_TLS_IE12GP", [249] "R_ARM_RXPC25", "R_ARM_RSBREL32", "R_ARM_THM_RPC22", "R_ARM_RREL32", [253] "R_ARM_RABS22", "R_ARM_RPC24", "R_ARM_RBASE", }; diff --git a/ldso/ldso/arm/dl-sysdep.h b/ldso/ldso/arm/dl-sysdep.h index 75c58b0ec..5a2912ab5 100644 --- a/ldso/ldso/arm/dl-sysdep.h +++ b/ldso/ldso/arm/dl-sysdep.h @@ -5,6 +5,9 @@ * Copyright (C) 2000-2004 by Erik Andersen */ +#ifndef _ARCH_DL_SYSDEP +#define _ARCH_DL_SYSDEP + /* Define this if the system uses RELOCA. */ #undef ELF_USES_RELOCA #include @@ -55,12 +58,21 @@ static __always_inline unsigned long arm_modulus(unsigned long m, unsigned long struct elf_resolve; unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry); -/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so - PLT entries should not be allowed to define the value. +/* 4096 bytes alignment */ +#define PAGE_ALIGN 0xfffff000 +#define ADDR_ALIGN 0xfff +#define OFFS_ALIGN 0x7ffff000 + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or + TLS variable, so undefined references should not be allowed to + define the value. + ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ -#define elf_machine_type_class(type) \ - ((((type) == R_ARM_JUMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ +#define elf_machine_type_class(type) \ + ((((type) == R_ARM_JUMP_SLOT || (type) == R_ARM_TLS_DTPMOD32 \ + || (type) == R_ARM_TLS_DTPOFF32 || (type) == R_ARM_TLS_TPOFF32) \ + * ELF_RTYPE_CLASS_PLT) \ | (((type) == R_ARM_COPY) * ELF_RTYPE_CLASS_COPY)) /* Return the link-time address of _DYNAMIC. Conveniently, this is the @@ -136,6 +148,7 @@ elf_machine_relative (Elf32_Addr load_off, const Elf32_Addr rel_addr, *reloc_addr += load_off; } while (--relative_count); } +#endif /* !_ARCH_DL_SYSDEP */ #ifdef __ARM_EABI__ #define DL_MALLOC_ALIGN 8 /* EABI needs 8 byte alignment for STRD LDRD */ diff --git a/ldso/ldso/arm/elfinterp.c b/ldso/ldso/arm/elfinterp.c index 197975e4a..1469df016 100644 --- a/ldso/ldso/arm/elfinterp.c +++ b/ldso/ldso/arm/elfinterp.c @@ -50,7 +50,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) Elf32_Sym *symtab; ELF_RELOC *rel_addr; int symtab_index; - char *new_addr; + unsigned long new_addr; char **got_addr; unsigned long instr_addr; @@ -70,7 +70,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) /* Get the address of the GOT entry */ new_addr = _dl_find_hash(symname, tpnt->symbol_scope, - tpnt, ELF_RTYPE_CLASS_PLT); + tpnt, ELF_RTYPE_CLASS_PLT, NULL); if (unlikely(!new_addr)) { _dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname); @@ -89,13 +89,13 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) } } if (!_dl_debug_nofixups) { - *got_addr = new_addr; + *got_addr = (char*)new_addr; } #else - *got_addr = new_addr; + *got_addr = (char*)new_addr; #endif - return (unsigned long) new_addr; + return new_addr; } static int @@ -188,28 +188,40 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, int symtab_index; unsigned long *reloc_addr; unsigned long symbol_addr; + const Elf32_Sym *def = 0; + struct elf_resolve *def_mod = 0; int goof = 0; - reloc_addr = (unsigned long *) (tpnt->loadaddr + (unsigned long) rpnt->r_offset); + reloc_addr = (unsigned long *) (tpnt->loadaddr + + (unsigned long) rpnt->r_offset); + reloc_type = ELF32_R_TYPE(rpnt->r_info); symtab_index = ELF32_R_SYM(rpnt->r_info); symbol_addr = 0; if (symtab_index) { - - symbol_addr = (unsigned long) _dl_find_hash(strtab + symtab[symtab_index].st_name, - scope, tpnt, elf_machine_type_class(reloc_type)); + symbol_addr = _dl_find_hash(strtab + symtab[symtab_index].st_name, + scope, tpnt, + elf_machine_type_class(reloc_type), + &def_mod); /* * We want to allow undefined references to weak symbols - this might * have been intentional. We should not be linking local symbols * here, so all bases should be covered. */ - if (!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK) { - _dl_dprintf (2, "%s: can't resolve symbol '%s'\n", - _dl_progname, strtab + symtab[symtab_index].st_name); - _dl_exit (1); + if (!symbol_addr && (ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS) + && (ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) { + /* This may be non-fatal if called from dlopen. */ + return 1; + } + } else { + /* Relocs against STN_UNDEF are usually treated as using a + symbol value of zero, and using the module containing the + reloc itself. */ + symbol_addr = symtab[symtab_index].st_value; + def_mod = tpnt; } #if defined (__SUPPORT_LD_DEBUG__) @@ -265,6 +277,20 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, _dl_memcpy((void *) reloc_addr, (void *) symbol_addr, symtab[symtab_index].st_size); break; +#if USE_TLS + case R_ARM_TLS_DTPMOD32: + *reloc_addr = def_mod->l_tls_modid; + break; + + case R_ARM_TLS_DTPOFF32: + *reloc_addr += symbol_addr; + break; + + case R_ARM_TLS_TPOFF32: + CHECK_STATIC_TLS ((struct link_map *) def_mod); + *reloc_addr += (symbol_addr + def_mod->l_tls_offset); + break; +#endif default: return -1; /*call _dl_exit(1) */ } diff --git a/ldso/ldso/arm/resolve.S b/ldso/ldso/arm/resolve.S index b422c334d..08889d06e 100644 --- a/ldso/ldso/arm/resolve.S +++ b/ldso/ldso/arm/resolve.S @@ -95,6 +95,10 @@ #include +#define sl r10 +#define fp r11 +#define ip r12 + .text .align 4 @ 16 byte boundary and there are 32 bytes below (arm case) #if !defined(__thumb__) || defined(__thumb2__) diff --git a/ldso/ldso/arm/thumb_atomics.S b/ldso/ldso/arm/thumb_atomics.S new file mode 100644 index 000000000..f6ae3db3c --- /dev/null +++ b/ldso/ldso/arm/thumb_atomics.S @@ -0,0 +1,79 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#ifdef __UCLIBC_HAS_THREADS_NATIVE__ + +#include + +#if defined __thumb__ + +/* Out-of-line atomic operations that we can't do in Thumb mode. + This ends up in various libraries where it is needed (and + a few .a archives where it isn't). */ + + .hidden __thumb_swpb +ENTRY (__thumb_swpb) + swpb r0, r0, [r1] + bx lr +END (__thumb_swpb) + + .hidden __thumb_swp +ENTRY (__thumb_swp) + swp r0, r0, [r1] + bx lr +END (__thumb_swp) + + .hidden __thumb_cmpxchg +ENTRY (__thumb_cmpxchg) + stmdb sp!, {r4, lr} + mov r4, r0 +0: ldr r3, [r2] + cmp r3, r4 + bne 1f + mov r0, r4 + mov r3, #0xffff0fff + mov lr, pc + add pc, r3, #(0xffff0fc0 - 0xffff0fff) + bcc 0b + mov r3, r4 +1: mov r0, r3 + ldmia sp!, {r4, pc} +END (__thumb_cmpxchg) + +#endif /* __thumb__ */ +#endif /* __UCLIBC_HAS_THREADS_NATIVE__ */ + diff --git a/ldso/ldso/dl-elf.c b/ldso/ldso/dl-elf.c index 89708497d..75e8f7186 100644 --- a/ldso/ldso/dl-elf.c +++ b/ldso/ldso/dl-elf.c @@ -329,6 +329,9 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure, ElfW(Dyn) *dpnt; struct elf_resolve *tpnt; ElfW(Phdr) *ppnt; +#if USE_TLS + ElfW(Phdr) *tlsppnt = NULL; +#endif char *status, *header; unsigned long dynamic_info[DYNAMIC_SIZE]; unsigned long *lpnt; @@ -433,6 +436,29 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure, maxvma = ppnt->p_vaddr + ppnt->p_memsz; } } + if (ppnt->p_type == PT_TLS) + { +#if USE_TLS + if (ppnt->p_memsz == 0) + /* Nothing to do for an empty segment. */ + continue; + else + /* Save for after 'tpnt' is actually allocated. */ + tlsppnt = ppnt; +#else + /* + * Yup, the user was an idiot and tried to sneak in a library with + * TLS in it and we don't support it. Let's fall on our own sword + * and scream at the luser while we die. + */ + _dl_dprintf(2, "%s: '%s' library contains unsupported TLS\n", + _dl_progname, libname); + _dl_internal_error_number = LD_ERROR_TLS_FAILED; + _dl_close(infile); + _dl_munmap(header, _dl_pagesize); + return NULL; +#endif + } ppnt++; } @@ -708,6 +734,37 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure, tpnt->ppnt = (ElfW(Phdr) *) DL_RELOC_ADDR(tpnt->loadaddr, epnt->e_phoff); tpnt->n_phent = epnt->e_phnum; +#if USE_TLS + if (tlsppnt) + { + _dl_debug_early("Found TLS header for %s\n", libname); +#if NO_TLS_OFFSET != 0 + tpnt->l_tls_offset = NO_TLS_OFFSET; +#endif + tpnt->l_tls_blocksize = tlsppnt->p_memsz; + tpnt->l_tls_align = tlsppnt->p_align; + if (tlsppnt->p_align == 0) + tpnt->l_tls_firstbyte_offset = 0; + else + tpnt->l_tls_firstbyte_offset = tlsppnt->p_vaddr & + (tlsppnt->p_align - 1); + tpnt->l_tls_initimage_size = tlsppnt->p_filesz; + tpnt->l_tls_initimage = (void *) tlsppnt->p_vaddr; + + /* Assign the next available module ID. */ + tpnt->l_tls_modid = _dl_next_tls_modid (); + + /* We know the load address, so add it to the offset. */ + if (tpnt->l_tls_initimage != NULL) + { + unsigned int tmp = (unsigned int) tpnt->l_tls_initimage; + tpnt->l_tls_initimage = (char *) tlsppnt->p_vaddr + tpnt->loadaddr; + _dl_debug_early("Relocated TLS initial image from %x to %x (size = %x)\n", tmp, tpnt->l_tls_initimage, tpnt->l_tls_initimage_size); + tmp = 0; + } + } +#endif + /* * Add this object into the symbol chain */ @@ -816,6 +873,16 @@ int _dl_fixup(struct dyn_elf *rpnt, int now_flag) } tpnt->init_flag |= JMP_RELOCS_DONE; } + +#if 0 +/* _dl_add_to_slotinfo is called by init_tls() for initial DSO + or by dlopen() for dynamically loaded DSO. */ +#if USE_TLS + /* Add object to slot information data if necessasy. */ + if (tpnt->l_tls_blocksize != 0 && tls_init_tp_called) + _dl_add_to_slotinfo ((struct link_map *) tpnt); +#endif +#endif return goof; } diff --git a/ldso/ldso/dl-hash.c b/ldso/ldso/dl-hash.c index 4809c4348..3103d9f0b 100644 --- a/ldso/ldso/dl-hash.c +++ b/ldso/ldso/dl-hash.c @@ -157,18 +157,29 @@ struct elf_resolve *_dl_add_elf_hash_table(const char *libname, static __attribute_noinline__ const ElfW(Sym) * check_match (const ElfW(Sym) *sym, char *strtab, const char* undef_name, int type_class) { - if (type_class & (sym->st_shndx == SHN_UNDEF)) - /* undefined symbol itself */ - return NULL; -#ifdef __mips__ - if (sym->st_shndx == SHN_UNDEF && !(sym->st_other & STO_MIPS_PLT)) - return NULL; -#endif - - if (sym->st_value == 0) - /* No value */ - return NULL; +#if USE_TLS + if((sym->st_value == 0 && (ELF_ST_TYPE(sym->st_info) != STT_TLS)) + || (type_class & (sym->st_shndx == SHN_UNDEF))) + /* No value or undefined symbol itself */ + return NULL; + + if(ELF_ST_TYPE(sym->st_info) > STT_FUNC + && ELF_ST_TYPE(sym->st_info) != STT_COMMON + && ELF_ST_TYPE(sym->st_info) != STT_TLS) + /* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC and STT_COMMON + * entries (and STT_TLS if TLS is supported) since these + * are no code/data definitions. + */ + return NULL; +#else + if (type_class & (sym->st_shndx == SHN_UNDEF)) + /* undefined symbol itself */ + return NULL; + + if (sym->st_value == 0) + /* No value */ + return NULL; if (ELF_ST_TYPE(sym->st_info) > STT_FUNC && ELF_ST_TYPE(sym->st_info) != STT_COMMON) @@ -177,7 +188,7 @@ check_match (const ElfW(Sym) *sym, char *strtab, const char* undef_name, int typ * code/data definitions */ return NULL; - +#endif if (_dl_strcmp(strtab + sym->st_name, undef_name) != 0) return NULL; @@ -257,12 +268,11 @@ _dl_lookup_sysv_hash(struct elf_resolve *tpnt, ElfW(Sym) *symtab, unsigned long * This function resolves externals, and this is either called when we process * relocations or when we call an entry in the PLT table for the first time. */ -char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, - struct elf_resolve *mytpnt, int type_class -#ifdef __FDPIC__ - , struct elf_resolve **tpntp -#endif - ) +char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, struct elf_resolve *mytpnt, int type_class +#if USE_TLS +,struct elf_resolve **tls_tpnt +#endif +) { struct elf_resolve *tpnt = NULL; ElfW(Sym) *symtab; @@ -270,8 +280,7 @@ char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, unsigned long elf_hash_number = 0xffffffff; const ElfW(Sym) *sym = NULL; - const ElfW(Sym) *weak_sym = 0; - struct elf_resolve *weak_tpnt = 0; + char *weak_result = NULL; #ifdef __LDSO_GNU_HASH_SUPPORT__ unsigned long gnu_hash_number = _dl_gnu_hash((const unsigned char *)name); @@ -329,37 +338,29 @@ char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, if (sym) { /* At this point we have found the requested symbol, do binding */ +#if USE_TLS + if(ELF_ST_TYPE(sym->st_info) == STT_TLS) { + _dl_assert((tls_tpnt != NULL)); + *tls_tpnt = tpnt; + + return (char*)sym->st_value; + } +#endif + switch (ELF_ST_BIND(sym->st_info)) { case STB_WEAK: #if 0 -/* Perhaps we should support old style weak symbol handling - * per what glibc does when you export LD_DYNAMIC_WEAK */ - if (!weak_sym) { - weak_tpnt = tpnt; - weak_sym = sym; - } + /* Perhaps we should support old style weak symbol handling + * per what glibc does when you export LD_DYNAMIC_WEAK */ + if (!weak_result) + weak_result = (char *)tpnt->loadaddr + sym->st_value; break; #endif case STB_GLOBAL: -#ifdef __FDPIC__ - if (tpntp) - *tpntp = tpnt; -#endif - return (char *) DL_FIND_HASH_VALUE (tpnt, type_class, sym); + return (char*)tpnt->loadaddr + sym->st_value; default: /* Local symbols not handled here */ break; } } - if (weak_sym) { -#ifdef __FDPIC__ - if (tpntp) - *tpntp = weak_tpnt; -#endif - return (char *) DL_FIND_HASH_VALUE (weak_tpnt, type_class, weak_sym); - } -#ifdef __FDPIC__ - if (tpntp) - *tpntp = NULL; -#endif - return NULL; + return weak_result; } diff --git a/ldso/ldso/dl-startup.c b/ldso/ldso/dl-startup.c index de9c8bc4e..6f07b960a 100644 --- a/ldso/ldso/dl-startup.c +++ b/ldso/ldso/dl-startup.c @@ -209,6 +209,20 @@ DL_START(unsigned long args) _dl_parse_dynamic_info(dpnt, tpnt->dynamic_info, NULL, load_addr); #endif + /* + * BIG ASSUMPTION: We assume that the dynamic loader does not + * have any TLS data itself. If this ever occurs + * more work than what is done below for the + * loader will have to happen. + */ +#if USE_TLS + /* This was done by _dl_memset above. */ + /* tpnt->l_tls_modid = 0; */ +# if NO_TLS_OFFSET != 0 + tpnt->l_tls_offset = NO_TLS_OFFSET; +# endif +#endif + SEND_EARLY_STDERR_DEBUG("Done scanning DYNAMIC section\n"); #if defined(PERFORM_BOOTSTRAP_GOT) diff --git a/ldso/ldso/dl-tls.c b/ldso/ldso/dl-tls.c new file mode 100644 index 000000000..e718373cd --- /dev/null +++ b/ldso/ldso/dl-tls.c @@ -0,0 +1,1048 @@ +/* vi: set sw=4 ts=4: */ +/* + * Thread-local storage handling in the ELF dynamic linker. + * + * Copyright (C) 2005 by Steven J. Hill + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the above contributors may not be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +void *(*_dl_calloc_function) (size_t __nmemb, size_t __size) = NULL; +void *(*_dl_realloc_function) (void *__ptr, size_t __size) = NULL; +void *(*_dl_memalign_function) (size_t __boundary, size_t __size) = NULL; + +void (*_dl_free_function) (void *__ptr); +void *_dl_memalign (size_t __boundary, size_t __size); +struct link_map *_dl_update_slotinfo (unsigned long int req_modid); + +/* Round up N to the nearest multiple of P, where P is a power of 2 + --- without using libgcc division routines. */ +#define roundup_pow2(n, p) (((n) + (p) - 1) & ~((p) - 1)) + +void * +_dl_calloc (size_t __nmemb, size_t __size) +{ + void *result; + size_t size = (__size * __nmemb); + + if (_dl_calloc_function) + return (*_dl_calloc_function) (__nmemb, __size); + + if ((result = _dl_malloc(size)) != NULL) { + _dl_memset(result, 0, size); + } + + return result; +} + +void * +_dl_realloc (void * __ptr, size_t __size) +{ + if (_dl_realloc_function) + return (*_dl_realloc_function) (__ptr, __size); + + _dl_debug_early("NOT IMPLEMENTED PROPERLY!!!\n"); + return NULL; +} + +void +_dl_free (void *__ptr) +{ + if (_dl_free_function) + (*_dl_free_function) (__ptr); + +#if 0 + _dl_debug_early("NOT IMPLEMENTED PROPERLY!!!\n"); +#endif +} + + +/* The __tls_get_addr function has two basic forms which differ in the + arguments. The IA-64 form takes two parameters, the module ID and + offset. The form used, among others, on IA-32 takes a reference to + a special structure which contain the same information. The second + form seems to be more often used (in the moment) so we default to + it. Users of the IA-64 form have to provide adequate definitions + of the following macros. */ +#ifndef GET_ADDR_ARGS +# define GET_ADDR_ARGS tls_index *ti +#endif +#ifndef GET_ADDR_MODULE +# define GET_ADDR_MODULE ti->ti_module +#endif +#ifndef GET_ADDR_OFFSET +# define GET_ADDR_OFFSET ti->ti_offset +#endif + +/* + * Amount of excess space to allocate in the static TLS area + * to allow dynamic loading of modules defining IE-model TLS data. + */ +#define TLS_STATIC_SURPLUS 64 + DL_NNS * 100 + +/* Value used for dtv entries for which the allocation is delayed. */ +#define TLS_DTV_UNALLOCATED ((void *) -1l) + +/* + * We are trying to perform a static TLS relocation in MAP, but it was + * dynamically loaded. This can only work if there is enough surplus in + * the static TLS area already allocated for each running thread. If this + * object's TLS segment is too big to fit, we fail. If it fits, + * we set MAP->l_tls_offset and return. + * This function intentionally does not return any value but signals error + * directly, as static TLS should be rare and code handling it should + * not be inlined as much as possible. + */ +void +internal_function __attribute_noinline__ +_dl_allocate_static_tls (struct link_map *map) +{ + /* If the alignment requirements are too high fail. */ + if (map->l_tls_align > _dl_tls_static_align) + { +fail: + _dl_dprintf(2, "cannot allocate memory in static TLS block"); + _dl_exit(30); + } + +# ifdef TLS_TCB_AT_TP + size_t freebytes; + size_t n; + size_t blsize; + + freebytes = _dl_tls_static_size - _dl_tls_static_used - TLS_TCB_SIZE; + + blsize = map->l_tls_blocksize + map->l_tls_firstbyte_offset; + if (freebytes < blsize) + goto fail; + + n = (freebytes - blsize) & ~(map->l_tls_align - 1); + + size_t offset = _dl_tls_static_used + (freebytes - n + - map->l_tls_firstbyte_offset); + + map->l_tls_offset = _dl_tls_static_used = offset; +# elif defined(TLS_DTV_AT_TP) + size_t used; + size_t check; + + size_t offset = roundup_pow2 (_dl_tls_static_used, map->l_tls_align); + used = offset + map->l_tls_blocksize; + check = used; + + /* dl_tls_static_used includes the TCB at the beginning. */ + if (check > _dl_tls_static_size) + goto fail; + + map->l_tls_offset = offset; + _dl_tls_static_used = used; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* + * If the object is not yet relocated we cannot initialize the + * static TLS region. Delay it. + */ + if (((struct elf_resolve *) map)->init_flag & RELOCS_DONE) + { +#ifdef SHARED + /* + * Update the slot information data for at least the generation of + * the DSO we are allocating data for. + */ + if (__builtin_expect (THREAD_DTV()[0].counter != _dl_tls_generation, 0)) + (void) _dl_update_slotinfo (map->l_tls_modid); +#endif + _dl_init_static_tls (map); + } + else + map->l_need_tls_init = 1; +} + +#ifdef SHARED +/* Initialize static TLS area and DTV for current (only) thread. + libpthread implementations should provide their own hook + to handle all threads. */ +void +internal_function __attribute_noinline__ +_dl_nothread_init_static_tls (struct link_map *map) +{ +# ifdef TLS_TCB_AT_TP + void *dest = (char *) THREAD_SELF - map->l_tls_offset; +# elif defined(TLS_DTV_AT_TP) + void *dest = (char *) THREAD_SELF + map->l_tls_offset + TLS_PRE_TCB_SIZE; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* Fill in the DTV slot so that a later LD/GD access will find it. */ + dtv_t *dtv = THREAD_DTV (); + if (!(map->l_tls_modid <= dtv[-1].counter)) { + _dl_dprintf(2, "map->l_tls_modid <= dtv[-1].counter FAILED!\n"); + _dl_exit(30); + } + dtv[map->l_tls_modid].pointer.val = dest; + dtv[map->l_tls_modid].pointer.is_static = true; + + /* Initialize the memory. */ + _dl_memcpy(dest, map->l_tls_initimage, map->l_tls_initimage_size); + _dl_memset((dest + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); +} +#endif + +/* Taken from glibc/sysdeps/generic/dl-tls.c */ +static void +oom (void) +{ + _dl_debug_early("cannot allocate thread-local memory: ABORT\n"); + _dl_exit(30); +} + +size_t +internal_function +_dl_next_tls_modid (void) +{ + size_t result; + + if (__builtin_expect (_dl_tls_dtv_gaps, false)) + { + size_t disp = 0; + struct dtv_slotinfo_list *runp = _dl_tls_dtv_slotinfo_list; + + /* Note that this branch will never be executed during program + start since there are no gaps at that time. Therefore it + does not matter that the dl_tls_dtv_slotinfo is not allocated + yet when the function is called for the first times. + + NB: the offset +1 is due to the fact that DTV[0] is used + for something else. */ + result = _dl_tls_static_nelem + 1; + if (result <= _dl_tls_max_dtv_idx) + do + { + while (result - disp < runp->len) + { + if (runp->slotinfo[result - disp].map == NULL) + break; + + ++result; + _dl_assert (result <= _dl_tls_max_dtv_idx + 1); + } + + if (result - disp < runp->len) + break; + + disp += runp->len; + } + while ((runp = runp->next) != NULL); + + if (result > _dl_tls_max_dtv_idx) + { + /* The new index must indeed be exactly one higher than the + previous high. */ + _dl_assert (result == _dl_tls_max_dtv_idx + 1); + /* There is no gap anymore. */ + _dl_tls_dtv_gaps = false; + + goto nogaps; + } + } + else + { + /* No gaps, allocate a new entry. */ + nogaps: + + result = ++_dl_tls_max_dtv_idx; + } + + return result; +} + +void +internal_function +_dl_determine_tlsoffset (void) +{ + size_t max_align = TLS_TCB_ALIGN; + size_t freetop = 0; + size_t freebottom = 0; + + /* The first element of the dtv slot info list is allocated. */ + _dl_assert (_dl_tls_dtv_slotinfo_list != NULL); + /* There is at this point only one element in the + dl_tls_dtv_slotinfo_list list. */ + _dl_assert (_dl_tls_dtv_slotinfo_list->next == NULL); + + struct dtv_slotinfo *slotinfo = _dl_tls_dtv_slotinfo_list->slotinfo; + + /* Determining the offset of the various parts of the static TLS + block has several dependencies. In addition we have to work + around bugs in some toolchains. + + Each TLS block from the objects available at link time has a size + and an alignment requirement. The GNU ld computes the alignment + requirements for the data at the positions *in the file*, though. + I.e, it is not simply possible to allocate a block with the size + of the TLS program header entry. The data is layed out assuming + that the first byte of the TLS block fulfills + + p_vaddr mod p_align == &TLS_BLOCK mod p_align + + This means we have to add artificial padding at the beginning of + the TLS block. These bytes are never used for the TLS data in + this module but the first byte allocated must be aligned + according to mod p_align == 0 so that the first byte of the TLS + block is aligned according to p_vaddr mod p_align. This is ugly + and the linker can help by computing the offsets in the TLS block + assuming the first byte of the TLS block is aligned according to + p_align. + + The extra space which might be allocated before the first byte of + the TLS block need not go unused. The code below tries to use + that memory for the next TLS block. This can work if the total + memory requirement for the next TLS block is smaller than the + gap. */ + +# ifdef TLS_TCB_AT_TP + /* We simply start with zero. */ + size_t offset = 0; + + for (size_t cnt = 1; slotinfo[cnt].map != NULL; ++cnt) + { + _dl_assert (cnt < _dl_tls_dtv_slotinfo_list->len); + + size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset + & (slotinfo[cnt].map->l_tls_align - 1)); + size_t off; + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); + + if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize) + { + off = roundup_pow2 (freetop + slotinfo[cnt].map->l_tls_blocksize + - firstbyte, slotinfo[cnt].map->l_tls_align) + + firstbyte; + if (off <= freebottom) + { + freetop = off; + + /* XXX For some architectures we perhaps should store the + negative offset. */ + slotinfo[cnt].map->l_tls_offset = off; + continue; + } + } + + off = roundup_pow2 (offset + slotinfo[cnt].map->l_tls_blocksize + - firstbyte, slotinfo[cnt].map->l_tls_align) + + firstbyte; + if (off > offset + slotinfo[cnt].map->l_tls_blocksize + + (freebottom - freetop)) + { + freetop = offset; + freebottom = off - slotinfo[cnt].map->l_tls_blocksize; + } + offset = off; + + /* XXX For some architectures we perhaps should store the + negative offset. */ + slotinfo[cnt].map->l_tls_offset = off; + } + + _dl_tls_static_used = offset; + _dl_tls_static_size = (roundup_pow2 (offset + TLS_STATIC_SURPLUS, max_align) + + TLS_TCB_SIZE); +# elif defined(TLS_DTV_AT_TP) + /* The TLS blocks start right after the TCB. */ + size_t offset = TLS_TCB_SIZE; + size_t cnt; + + for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt) + { + _dl_assert (cnt < _dl_tls_dtv_slotinfo_list->len); + + size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset + & (slotinfo[cnt].map->l_tls_align - 1)); + size_t off; + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); + + if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom) + { + off = roundup_pow2 (freebottom, slotinfo[cnt].map->l_tls_align); + if (off - freebottom < firstbyte) + off += slotinfo[cnt].map->l_tls_align; + if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop) + { + slotinfo[cnt].map->l_tls_offset = off - firstbyte; + freebottom = (off + slotinfo[cnt].map->l_tls_blocksize + - firstbyte); + continue; + } + } + + off = roundup_pow2 (offset, slotinfo[cnt].map->l_tls_align); + if (off - offset < firstbyte) + off += slotinfo[cnt].map->l_tls_align; + + slotinfo[cnt].map->l_tls_offset = off - firstbyte; + if (off - firstbyte - offset > freetop - freebottom) + { + freebottom = offset; + freetop = off - firstbyte; + } + + offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte; + } + + _dl_tls_static_used = offset; + _dl_tls_static_size = roundup_pow2 (offset + TLS_STATIC_SURPLUS, + TLS_TCB_ALIGN); +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* The alignment requirement for the static TLS block. */ + _dl_tls_static_align = max_align; +} + +/* This is called only when the data structure setup was skipped at startup, + when there was no need for it then. Now we have dynamically loaded + something needing TLS, or libpthread needs it. */ +rtld_hidden_proto(_dl_tls_setup) +int +internal_function +_dl_tls_setup (void) +{ + _dl_assert (_dl_tls_dtv_slotinfo_list == NULL); + _dl_assert (_dl_tls_max_dtv_idx == 0); + + const size_t nelem = 2 + TLS_SLOTINFO_SURPLUS; + + _dl_tls_dtv_slotinfo_list + = _dl_calloc (1, (sizeof (struct dtv_slotinfo_list) + + nelem * sizeof (struct dtv_slotinfo))); + if (_dl_tls_dtv_slotinfo_list == NULL) + return -1; + + _dl_tls_dtv_slotinfo_list->len = nelem; + + /* Number of elements in the static TLS block. It can't be zero + because of various assumptions. The one element is null. */ + _dl_tls_static_nelem = _dl_tls_max_dtv_idx = 1; + + /* This initializes more variables for us. */ + _dl_determine_tlsoffset (); + + return 0; +} +rtld_hidden_def (_dl_tls_setup) + +static void * +internal_function +allocate_dtv (void *result) +{ + dtv_t *dtv; + size_t dtv_length; + + /* We allocate a few more elements in the dtv than are needed for the + initial set of modules. This should avoid in most cases expansions + of the dtv. */ + dtv_length = _dl_tls_max_dtv_idx + DTV_SURPLUS; + dtv = _dl_calloc (dtv_length + 2, sizeof (dtv_t)); + if (dtv != NULL) + { + /* This is the initial length of the dtv. */ + dtv[0].counter = dtv_length; + + /* The rest of the dtv (including the generation counter) is + Initialize with zero to indicate nothing there. */ + + /* Add the dtv to the thread data structures. */ + INSTALL_DTV (result, dtv); + } + else + result = NULL; + + return result; +} + +/* Get size and alignment requirements of the static TLS block. */ +void +internal_function +_dl_get_tls_static_info (size_t *sizep, size_t *alignp) +{ + *sizep = _dl_tls_static_size; + *alignp = _dl_tls_static_align; +} + +void * +internal_function +_dl_allocate_tls_storage (void) +{ + void *result; + size_t size = _dl_tls_static_size; + +# if defined(TLS_DTV_AT_TP) + /* Memory layout is: + [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ] + ^ This should be returned. */ + size += (TLS_PRE_TCB_SIZE + _dl_tls_static_align - 1) + & ~(_dl_tls_static_align - 1); +# endif + + /* Allocate a correctly aligned chunk of memory. */ + result = _dl_memalign (_dl_tls_static_align, size); + if (__builtin_expect (result != NULL, 1)) + { + /* Allocate the DTV. */ + void *allocated = result; + +# ifdef TLS_TCB_AT_TP + /* The TCB follows the TLS blocks. */ + result = (char *) result + size - TLS_TCB_SIZE; + + /* Clear the TCB data structure. We can't ask the caller (i.e. + libpthread) to do it, because we will initialize the DTV et al. */ + _dl_memset (result, '\0', TLS_TCB_SIZE); +# elif defined(TLS_DTV_AT_TP) + result = (char *) result + size - _dl_tls_static_size; + + /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it. + We can't ask the caller (i.e. libpthread) to do it, because we will + initialize the DTV et al. */ + _dl_memset ((char *) result - TLS_PRE_TCB_SIZE, '\0', + TLS_PRE_TCB_SIZE + TLS_TCB_SIZE); +# endif + + result = allocate_dtv (result); + if (result == NULL) + _dl_free (allocated); + } + + return result; +} + +void * +internal_function +_dl_allocate_tls_init (void *result) +{ + if (result == NULL) + /* The memory allocation failed. */ + return NULL; + + dtv_t *dtv = GET_DTV (result); + struct dtv_slotinfo_list *listp; + size_t total = 0; + size_t maxgen = 0; + + /* We have to prepare the dtv for all currently loaded modules using + TLS. For those which are dynamically loaded we add the values + indicating deferred allocation. */ + listp = _dl_tls_dtv_slotinfo_list; + while (1) + { + size_t cnt; + + for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) + { + struct link_map *map; + void *dest; + + /* Check for the total number of used slots. */ + if (total + cnt > _dl_tls_max_dtv_idx) + break; + + map = listp->slotinfo[cnt].map; + if (map == NULL) + /* Unused entry. */ + continue; + + /* Keep track of the maximum generation number. This might + not be the generation counter. */ + maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); + + if (map->l_tls_offset == NO_TLS_OFFSET) + { + /* For dynamically loaded modules we simply store + the value indicating deferred allocation. */ + dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; + dtv[map->l_tls_modid].pointer.is_static = false; + continue; + } + + _dl_assert (map->l_tls_modid == cnt); + _dl_assert (map->l_tls_blocksize >= map->l_tls_initimage_size); +# ifdef TLS_TCB_AT_TP + _dl_assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize); + dest = (char *) result - map->l_tls_offset; +# elif defined(TLS_DTV_AT_TP) + dest = (char *) result + map->l_tls_offset; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* Copy the initialization image and clear the BSS part. */ + dtv[map->l_tls_modid].pointer.val = dest; + dtv[map->l_tls_modid].pointer.is_static = true; + _dl_memcpy(dest, map->l_tls_initimage, map->l_tls_initimage_size); + _dl_memset((dest + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); + + } + + total += cnt; + if (total >= _dl_tls_max_dtv_idx) + break; + + listp = listp->next; + _dl_assert (listp != NULL); + } + + /* The DTV version is up-to-date now. */ + dtv[0].counter = maxgen; + + return result; +} + +void * +internal_function +_dl_allocate_tls (void *mem) +{ + return _dl_allocate_tls_init (mem == NULL + ? _dl_allocate_tls_storage () + : allocate_dtv (mem)); +} + +void +internal_function +_dl_deallocate_tls (void *tcb, bool dealloc_tcb) +{ + dtv_t *dtv = GET_DTV (tcb); + size_t cnt; + + /* We need to free the memory allocated for non-static TLS. */ + for (cnt = 0; cnt < dtv[-1].counter; ++cnt) + if (! dtv[1 + cnt].pointer.is_static + && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED) + _dl_free (dtv[1 + cnt].pointer.val); + + /* The array starts with dtv[-1]. */ + if (dtv != _dl_initial_dtv) + _dl_free (dtv - 1); + + if (dealloc_tcb) + { +# ifdef TLS_TCB_AT_TP + /* The TCB follows the TLS blocks. Back up to free the whole block. */ + tcb -= _dl_tls_static_size - TLS_TCB_SIZE; +# elif defined(TLS_DTV_AT_TP) + /* Back up the TLS_PRE_TCB_SIZE bytes. */ + tcb -= (TLS_PRE_TCB_SIZE + _dl_tls_static_align - 1) + & ~(_dl_tls_static_align - 1); +# endif + _dl_free (tcb); + } +} + +static void * +allocate_and_init (struct link_map *map) +{ + void *newp; + + newp = _dl_memalign (map->l_tls_align, map->l_tls_blocksize); + if (newp == NULL) + { + _dl_dprintf(2, "%s:%d: Out of memory!!!\n", __FUNCTION__, __LINE__); + _dl_exit(1); + } + + /* Initialize the memory. */ + _dl_memcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size); + _dl_memset ((newp + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); + + return newp; +} + +struct link_map * +_dl_update_slotinfo (unsigned long int req_modid) +{ + struct link_map *the_map = NULL; + dtv_t *dtv = THREAD_DTV (); + + /* The global dl_tls_dtv_slotinfo array contains for each module + index the generation counter current when the entry was created. + This array never shrinks so that all module indices which were + valid at some time can be used to access it. Before the first + use of a new module index in this function the array was extended + appropriately. Access also does not have to be guarded against + modifications of the array. It is assumed that pointer-size + values can be read atomically even in SMP environments. It is + possible that other threads at the same time dynamically load + code and therefore add to the slotinfo list. This is a problem + since we must not pick up any information about incomplete work. + The solution to this is to ignore all dtv slots which were + created after the one we are currently interested. We know that + dynamic loading for this module is completed and this is the last + load operation we know finished. */ + unsigned long int idx = req_modid; + struct dtv_slotinfo_list *listp = _dl_tls_dtv_slotinfo_list; + + _dl_debug_early ("Updating slotinfo for module %d\n", req_modid); + + while (idx >= listp->len) + { + idx -= listp->len; + listp = listp->next; + } + + if (dtv[0].counter < listp->slotinfo[idx].gen) + { + /* The generation counter for the slot is higher than what the + current dtv implements. We have to update the whole dtv but + only those entries with a generation counter <= the one for + the entry we need. */ + size_t new_gen = listp->slotinfo[idx].gen; + size_t total = 0; + + /* We have to look through the entire dtv slotinfo list. */ + listp = _dl_tls_dtv_slotinfo_list; + do + { + size_t cnt; + + for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) + { + size_t gen = listp->slotinfo[cnt].gen; + + if (gen > new_gen) + /* This is a slot for a generation younger than the + one we are handling now. It might be incompletely + set up so ignore it. */ + continue; + + /* If the entry is older than the current dtv layout we + know we don't have to handle it. */ + if (gen <= dtv[0].counter) + continue; + + /* If there is no map this means the entry is empty. */ + struct link_map *map = listp->slotinfo[cnt].map; + if (map == NULL) + { + /* If this modid was used at some point the memory + might still be allocated. */ + if (! dtv[total + cnt].pointer.is_static + && dtv[total + cnt].pointer.val != TLS_DTV_UNALLOCATED) + { + _dl_free (dtv[total + cnt].pointer.val); + dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED; + } + + continue; + } + + /* Check whether the current dtv array is large enough. */ + size_t modid = map->l_tls_modid; + _dl_assert (total + cnt == modid); + if (dtv[-1].counter < modid) + { + /* Reallocate the dtv. */ + dtv_t *newp; + size_t newsize = _dl_tls_max_dtv_idx + DTV_SURPLUS; + size_t oldsize = dtv[-1].counter; + + _dl_assert (map->l_tls_modid <= newsize); + + if (dtv == _dl_initial_dtv) + { + /* This is the initial dtv that was allocated + during rtld startup using the dl-minimal.c + malloc instead of the real malloc. We can't + free it, we have to abandon the old storage. */ + + newp = _dl_malloc ((2 + newsize) * sizeof (dtv_t)); + if (newp == NULL) + oom (); + _dl_memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t)); + } + else + { + newp = _dl_realloc (&dtv[-1], + (2 + newsize) * sizeof (dtv_t)); + if (newp == NULL) + oom (); + } + + newp[0].counter = newsize; + + /* Clear the newly allocated part. */ + _dl_memset (newp + 2 + oldsize, '\0', + (newsize - oldsize) * sizeof (dtv_t)); + + /* Point dtv to the generation counter. */ + dtv = &newp[1]; + + /* Install this new dtv in the thread data + structures. */ + INSTALL_NEW_DTV (dtv); + } + + /* If there is currently memory allocate for this + dtv entry free it. */ + /* XXX Ideally we will at some point create a memory + pool. */ + if (! dtv[modid].pointer.is_static + && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED) + /* Note that free is called for NULL is well. We + deallocate even if it is this dtv entry we are + supposed to load. The reason is that we call + memalign and not malloc. */ + _dl_free (dtv[modid].pointer.val); + + /* This module is loaded dynamically- We defer memory + allocation. */ + dtv[modid].pointer.is_static = false; + dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; + + if (modid == req_modid) + the_map = map; + } + + total += listp->len; + } + while ((listp = listp->next) != NULL); + + /* This will be the new maximum generation counter. */ + dtv[0].counter = new_gen; + } + + return the_map; +} + + +/* The generic dynamic and local dynamic model cannot be used in + statically linked applications. */ +void * +__tls_get_addr (GET_ADDR_ARGS) +{ + dtv_t *dtv = THREAD_DTV (); + struct link_map *the_map = NULL; + void *p; + + if (__builtin_expect (dtv[0].counter != _dl_tls_generation, 0)) + the_map = _dl_update_slotinfo (GET_ADDR_MODULE); + + p = dtv[GET_ADDR_MODULE].pointer.val; + + if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0)) + { + /* The allocation was deferred. Do it now. */ + if (the_map == NULL) + { + /* Find the link map for this module. */ + size_t idx = GET_ADDR_MODULE; + struct dtv_slotinfo_list *listp = _dl_tls_dtv_slotinfo_list; + + while (idx >= listp->len) + { + idx -= listp->len; + listp = listp->next; + } + + the_map = listp->slotinfo[idx].map; + } + + p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map); + dtv[GET_ADDR_MODULE].pointer.is_static = false; + } + + return (char *) p + GET_ADDR_OFFSET; +} + +void +_dl_add_to_slotinfo (struct link_map *l) +{ + /* Now that we know the object is loaded successfully add + modules containing TLS data to the dtv info table. We + might have to increase its size. */ + struct dtv_slotinfo_list *listp; + struct dtv_slotinfo_list *prevp; + size_t idx = l->l_tls_modid; + + _dl_debug_early("Adding to slotinfo for %s\n", l->l_name); + + /* Find the place in the dtv slotinfo list. */ + listp = _dl_tls_dtv_slotinfo_list; + prevp = NULL; /* Needed to shut up gcc. */ + do + { + /* Does it fit in the array of this list element? */ + if (idx < listp->len) + break; + idx -= listp->len; + prevp = listp; + listp = listp->next; + } + while (listp != NULL); + + if (listp == NULL) + { + /* When we come here it means we have to add a new element + to the slotinfo list. And the new module must be in + the first slot. */ + _dl_assert (idx == 0); + + listp = prevp->next = (struct dtv_slotinfo_list *) + _dl_malloc (sizeof (struct dtv_slotinfo_list) + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + if (listp == NULL) + { + /* We ran out of memory. We will simply fail this + call but don't undo anything we did so far. The + application will crash or be terminated anyway very + soon. */ + + /* We have to do this since some entries in the dtv + slotinfo array might already point to this + generation. */ + ++_dl_tls_generation; + + _dl_dprintf (_dl_debug_file, + "cannot create TLS data structures: ABORT\n"); + _dl_exit (127); + } + + listp->len = TLS_SLOTINFO_SURPLUS; + listp->next = NULL; + _dl_memset (listp->slotinfo, '\0', + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + } + + /* Add the information into the slotinfo data structure. */ + listp->slotinfo[idx].map = l; + listp->slotinfo[idx].gen = _dl_tls_generation + 1; + /* ??? ideally this would be done once per call to dlopen. However there's + no easy way to indicate whether a library used TLS, so do it here + instead. */ + /* Bump the TLS generation number. */ + _dl_tls_generation++; +} + +/* Taken from glibc/elf/rtld.c */ +static bool tls_init_tp_called; + +/* _dl_error_catch_tsd points to this for the single-threaded case. + It's reset by the thread library for multithreaded programs. */ +void ** __attribute__ ((const)) +_dl_initial_error_catch_tsd (void) +{ + static void *data; + return &data; +} + +#ifdef SHARED +void* +internal_function +init_tls (void); + +rtld_hidden_proto(init_tls) +void * +internal_function +init_tls (void) +{ + /* Number of elements in the static TLS block. */ + _dl_tls_static_nelem = _dl_tls_max_dtv_idx; + + /* Do not do this twice. The audit interface might have required + the DTV interfaces to be set up early. */ + if (_dl_initial_dtv != NULL) + return NULL; + + /* Allocate the array which contains the information about the + dtv slots. We allocate a few entries more than needed to + avoid the need for reallocation. */ + size_t nelem = _dl_tls_max_dtv_idx + 1 + TLS_SLOTINFO_SURPLUS; + + /* Allocate. */ + _dl_assert (_dl_tls_dtv_slotinfo_list == NULL); + _dl_tls_dtv_slotinfo_list = (struct dtv_slotinfo_list *) + _dl_calloc (sizeof (struct dtv_slotinfo_list) + + nelem * sizeof (struct dtv_slotinfo), 1); + /* No need to check the return value. If memory allocation failed + the program would have been terminated. */ + + struct dtv_slotinfo *slotinfo = _dl_tls_dtv_slotinfo_list->slotinfo; + _dl_tls_dtv_slotinfo_list->len = nelem; + _dl_tls_dtv_slotinfo_list->next = NULL; + + /* Fill in the information from the loaded modules. No namespace + but the base one can be filled at this time. */ + int i = 0; + struct link_map *l; + for (l = (struct link_map *) _dl_loaded_modules; l != NULL; l = l->l_next) + if (l->l_tls_blocksize != 0) + { + /* This is a module with TLS data. Store the map reference. + The generation counter is zero. */ + + /* Skeep slot[0]: it will be never used */ + slotinfo[++i].map = l; + } + _dl_assert (i == _dl_tls_max_dtv_idx); + + /* Compute the TLS offsets for the various blocks. */ + _dl_determine_tlsoffset (); + + /* Construct the static TLS block and the dtv for the initial + thread. For some platforms this will include allocating memory + for the thread descriptor. The memory for the TLS block will + never be freed. It should be allocated accordingly. The dtv + array can be changed if dynamic loading requires it. */ + void *tcbp = _dl_allocate_tls_storage (); + if (tcbp == NULL) { + _dl_debug_early("\ncannot allocate TLS data structures for initial thread"); + _dl_exit(30); + } + + /* Store for detection of the special case by __tls_get_addr + so it knows not to pass this dtv to the normal realloc. */ + _dl_initial_dtv = GET_DTV (tcbp); + + /* And finally install it for the main thread. If ld.so itself uses + TLS we know the thread pointer was initialized earlier. */ + const char *lossage = TLS_INIT_TP (tcbp, USE___THREAD); + if(__builtin_expect (lossage != NULL, 0)) { + _dl_debug_early("cannot set up thread-local storage: %s\n", lossage); + _dl_exit(30); + } + tls_init_tp_called = true; + + return tcbp; +} +rtld_hidden_def (init_tls) +#endif + diff --git a/ldso/ldso/i386/dl-sysdep.h b/ldso/ldso/i386/dl-sysdep.h index 6e84861e4..a66c80212 100644 --- a/ldso/ldso/i386/dl-sysdep.h +++ b/ldso/ldso/i386/dl-sysdep.h @@ -31,7 +31,9 @@ extern unsigned long _dl_l