diff options
Diffstat (limited to 'libpthread/nptl/allocatestack.c')
| -rw-r--r-- | libpthread/nptl/allocatestack.c | 1216 | 
1 files changed, 1216 insertions, 0 deletions
diff --git a/libpthread/nptl/allocatestack.c b/libpthread/nptl/allocatestack.c new file mode 100644 index 000000000..7d4f9fd1c --- /dev/null +++ b/libpthread/nptl/allocatestack.c @@ -0,0 +1,1216 @@ +/* Copyright (C) 2002-2007, 2009 Free Software Foundation, Inc. +   This file is part of the GNU C Library. +   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Lesser General Public +   License as published by the Free Software Foundation; either +   version 2.1 of the License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Lesser General Public License for more details. + +   You should have received a copy of the GNU Lesser General Public +   License along with the GNU C Library; if not, write to the Free +   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +   02111-1307 USA.  */ + +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <tls.h> +#include <lowlevellock.h> +#include <link.h> +#include <bits/kernel-features.h> + + +#ifndef NEED_SEPARATE_REGISTER_STACK + +/* Most architectures have exactly one stack pointer.  Some have more.  */ +# define STACK_VARIABLES void *stackaddr = NULL + +/* How to pass the values to the 'create_thread' function.  */ +# define STACK_VARIABLES_ARGS stackaddr + +/* How to declare function which gets there parameters.  */ +# define STACK_VARIABLES_PARMS void *stackaddr + +/* How to declare allocate_stack.  */ +# define ALLOCATE_STACK_PARMS void **stack + +/* This is how the function is called.  We do it this way to allow +   other variants of the function to have more parameters.  */ +# define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr) + +#else + +/* We need two stacks.  The kernel will place them but we have to tell +   the kernel about the size of the reserved address space.  */ +# define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0 + +/* How to pass the values to the 'create_thread' function.  */ +# define STACK_VARIABLES_ARGS stackaddr, stacksize + +/* How to declare function which gets there parameters.  */ +# define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize + +/* How to declare allocate_stack.  */ +# define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize + +/* This is how the function is called.  We do it this way to allow +   other variants of the function to have more parameters.  */ +# define ALLOCATE_STACK(attr, pd) \ +  allocate_stack (attr, pd, &stackaddr, &stacksize) + +#endif + + +/* Default alignment of stack.  */ +#ifndef STACK_ALIGN +# define STACK_ALIGN __alignof__ (long double) +#endif + +/* Default value for minimal stack size after allocating thread +   descriptor and guard.  */ +#ifndef MINIMAL_REST_STACK +# define MINIMAL_REST_STACK	4096 +#endif + + +/* Newer kernels have the MAP_STACK flag to indicate a mapping is used for +   a stack.  Use it when possible.  */ +#ifndef MAP_STACK +# define MAP_STACK 0 +#endif + +/* This yields the pointer that TLS support code calls the thread pointer.  */ +#if defined(TLS_TCB_AT_TP) +# define TLS_TPADJ(pd) (pd) +#elif defined(TLS_DTV_AT_TP) +# define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE)) +#endif + +/* Cache handling for not-yet free stacks.  */ + +/* Maximum size in kB of cache.  */ +static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */ +static size_t stack_cache_actsize; + +/* Mutex protecting this variable.  */ +static int stack_cache_lock = LLL_LOCK_INITIALIZER; + +/* List of queued stack frames.  */ +static LIST_HEAD (stack_cache); + +/* List of the stacks in use.  */ +static LIST_HEAD (stack_used); + +/* We need to record what list operations we are going to do so that, +   in case of an asynchronous interruption due to a fork() call, we +   can correct for the work.  */ +static uintptr_t in_flight_stack; + +/* List of the threads with user provided stacks in use.  No need to +   initialize this, since it's done in __pthread_initialize_minimal.  */ +list_t __stack_user __attribute__ ((nocommon)); +hidden_data_def (__stack_user) + +#if COLORING_INCREMENT != 0 +/* Number of threads created.  */ +static unsigned int nptl_ncreated; +#endif + + +/* Check whether the stack is still used or not.  */ +#define FREE_P(descr) ((descr)->tid <= 0) + + +static void +stack_list_del (list_t *elem) +{ +  in_flight_stack = (uintptr_t) elem; + +  atomic_write_barrier (); + +  list_del (elem); + +  atomic_write_barrier (); + +  in_flight_stack = 0; +} + + +static void +stack_list_add (list_t *elem, list_t *list) +{ +  in_flight_stack = (uintptr_t) elem | 1; + +  atomic_write_barrier (); + +  list_add (elem, list); + +  atomic_write_barrier (); + +  in_flight_stack = 0; +} + + +/* We create a double linked list of all cache entries.  Double linked +   because this allows removing entries from the end.  */ + + +/* Get a stack frame from the cache.  We have to match by size since +   some blocks might be too small or far too large.  */ +static struct pthread * +get_cached_stack (size_t *sizep, void **memp) +{ +  size_t size = *sizep; +  struct pthread *result = NULL; +  list_t *entry; + +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  /* Search the cache for a matching entry.  We search for the +     smallest stack which has at least the required size.  Note that +     in normal situations the size of all allocated stacks is the +     same.  As the very least there are only a few different sizes. +     Therefore this loop will exit early most of the time with an +     exact match.  */ +  list_for_each (entry, &stack_cache) +    { +      struct pthread *curr; + +      curr = list_entry (entry, struct pthread, list); +      if (FREE_P (curr) && curr->stackblock_size >= size) +	{ +	  if (curr->stackblock_size == size) +	    { +	      result = curr; +	      break; +	    } + +	  if (result == NULL +	      || result->stackblock_size > curr->stackblock_size) +	    result = curr; +	} +    } + +  if (__builtin_expect (result == NULL, 0) +      /* Make sure the size difference is not too excessive.  In that +	 case we do not use the block.  */ +      || __builtin_expect (result->stackblock_size > 4 * size, 0)) +    { +      /* Release the lock.  */ +      lll_unlock (stack_cache_lock, LLL_PRIVATE); + +      return NULL; +    } + +  /* Dequeue the entry.  */ +  stack_list_del (&result->list); + +  /* And add to the list of stacks in use.  */ +  stack_list_add (&result->list, &stack_used); + +  /* And decrease the cache size.  */ +  stack_cache_actsize -= result->stackblock_size; + +  /* Release the lock early.  */ +  lll_unlock (stack_cache_lock, LLL_PRIVATE); + +  /* Report size and location of the stack to the caller.  */ +  *sizep = result->stackblock_size; +  *memp = result->stackblock; + +  /* Cancellation handling is back to the default.  */ +  result->cancelhandling = 0; +  result->cleanup = NULL; + +  /* No pending event.  */ +  result->nextevent = NULL; + +  /* Clear the DTV.  */ +  dtv_t *dtv = GET_DTV (TLS_TPADJ (result)); +  memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t)); + +  /* Re-initialize the TLS.  */ +  _dl_allocate_tls_init (TLS_TPADJ (result)); + +  return result; +} + + +/* Free stacks until cache size is lower than LIMIT.  */ +void +__free_stacks (size_t limit) +{ +  /* We reduce the size of the cache.  Remove the last entries until +     the size is below the limit.  */ +  list_t *entry; +  list_t *prev; + +  /* Search from the end of the list.  */ +  list_for_each_prev_safe (entry, prev, &stack_cache) +    { +      struct pthread *curr; + +      curr = list_entry (entry, struct pthread, list); +      if (FREE_P (curr)) +	{ +	  /* Unlink the block.  */ +	  stack_list_del (entry); + +	  /* Account for the freed memory.  */ +	  stack_cache_actsize -= curr->stackblock_size; + +	  /* Free the memory associated with the ELF TLS.  */ +	  _dl_deallocate_tls (TLS_TPADJ (curr), false); + +	  /* Remove this block.  This should never fail.  If it does +	     something is really wrong.  */ +	  if (munmap (curr->stackblock, curr->stackblock_size) != 0) +	    abort (); + +	  /* Maybe we have freed enough.  */ +	  if (stack_cache_actsize <= limit) +	    break; +	} +    } +} + + +/* Add a stack frame which is not used anymore to the stack.  Must be +   called with the cache lock held.  */ +static inline void +__attribute ((always_inline)) +queue_stack (struct pthread *stack) +{ +  /* We unconditionally add the stack to the list.  The memory may +     still be in use but it will not be reused until the kernel marks +     the stack as not used anymore.  */ +  stack_list_add (&stack->list, &stack_cache); + +  stack_cache_actsize += stack->stackblock_size; +  if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0)) +    __free_stacks (stack_cache_maxsize); +} + + +static int +internal_function +change_stack_perm (struct pthread *pd +#ifdef NEED_SEPARATE_REGISTER_STACK +		   , size_t pagemask +#endif +		   ) +{ +#ifdef NEED_SEPARATE_REGISTER_STACK +  void *stack = (pd->stackblock +		 + (((((pd->stackblock_size - pd->guardsize) / 2) +		      & pagemask) + pd->guardsize) & pagemask)); +  size_t len = pd->stackblock + pd->stackblock_size - stack; +#elif _STACK_GROWS_DOWN +  void *stack = pd->stackblock + pd->guardsize; +  size_t len = pd->stackblock_size - pd->guardsize; +#elif _STACK_GROWS_UP +  void *stack = pd->stackblock; +  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock; +#else +# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" +#endif +  if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) +    return errno; + +  return 0; +} + + +static int +allocate_stack (const struct pthread_attr *attr, struct pthread **pdp, +		ALLOCATE_STACK_PARMS) +{ +  struct pthread *pd; +  size_t size; +  size_t pagesize_m1 = __getpagesize () - 1; +  void *stacktop; + +  assert (attr != NULL); +  assert (powerof2 (pagesize_m1 + 1)); +  assert (TCB_ALIGNMENT >= STACK_ALIGN); + +  /* Get the stack size from the attribute if it is set.  Otherwise we +     use the default we determined at start time.  */ +  size = attr->stacksize ?: __default_stacksize; + +  /* Get memory for the stack.  */ +  if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0)) +    { +      uintptr_t adj; + +      /* If the user also specified the size of the stack make sure it +	 is large enough.  */ +      if (attr->stacksize != 0 +	  && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK)) +	return EINVAL; + +      /* Adjust stack size for alignment of the TLS block.  */ +#if defined(TLS_TCB_AT_TP) +      adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE) +	    & __static_tls_align_m1; +      assert (size > adj + TLS_TCB_SIZE); +#elif defined(TLS_DTV_AT_TP) +      adj = ((uintptr_t) attr->stackaddr - __static_tls_size) +	    & __static_tls_align_m1; +      assert (size > adj); +#endif + +      /* The user provided some memory.  Let's hope it matches the +	 size...  We do not allocate guard pages if the user provided +	 the stack.  It is the user's responsibility to do this if it +	 is wanted.  */ +#if defined(TLS_TCB_AT_TP) +      pd = (struct pthread *) ((uintptr_t) attr->stackaddr +			       - TLS_TCB_SIZE - adj); +#elif defined(TLS_DTV_AT_TP) +      pd = (struct pthread *) (((uintptr_t) attr->stackaddr +			        - __static_tls_size - adj) +			       - TLS_PRE_TCB_SIZE); +#endif + +      /* The user provided stack memory needs to be cleared.  */ +      memset (pd, '\0', sizeof (struct pthread)); + +      /* The first TSD block is included in the TCB.  */ +      pd->specific[0] = pd->specific_1stblock; + +      /* Remember the stack-related values.  */ +      pd->stackblock = (char *) attr->stackaddr - size; +      pd->stackblock_size = size; + +      /* This is a user-provided stack.  It will not be queued in the +	 stack cache nor will the memory (except the TLS memory) be freed.  */ +      pd->user_stack = true; + +      /* This is at least the second thread.  */ +      pd->header.multiple_threads = 1; +#ifndef TLS_MULTIPLE_THREADS_IN_TCB +      __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1; +#endif + +#ifndef __ASSUME_PRIVATE_FUTEX +      /* The thread must know when private futexes are supported.  */ +      pd->header.private_futex = THREAD_GETMEM (THREAD_SELF, +						header.private_futex); +#endif + +#ifdef NEED_DL_SYSINFO +      /* Copy the sysinfo value from the parent.  */ +      THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO; +#endif + +      /* The process ID is also the same as that of the caller.  */ +      pd->pid = THREAD_GETMEM (THREAD_SELF, pid); + +      /* Allocate the DTV for this thread.  */ +      if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL) +	{ +	  /* Something went wrong.  */ +	  assert (errno == ENOMEM); +	  return EAGAIN; +	} + + +      /* Prepare to modify global data.  */ +      lll_lock (stack_cache_lock, LLL_PRIVATE); + +      /* And add to the list of stacks in use.  */ +      list_add (&pd->list, &__stack_user); + +      lll_unlock (stack_cache_lock, LLL_PRIVATE); +    } +  else +    { +      /* Allocate some anonymous memory.  If possible use the cache.  */ +      size_t guardsize; +      size_t reqsize; +      void *mem = 0; +      const int prot = (PROT_READ | PROT_WRITE); + +#if COLORING_INCREMENT != 0 +      /* Add one more page for stack coloring.  Don't do it for stacks +	 with 16 times pagesize or larger.  This might just cause +	 unnecessary misalignment.  */ +      if (size <= 16 * pagesize_m1) +	size += pagesize_m1 + 1; +#endif + +      /* Adjust the stack size for alignment.  */ +      size &= ~__static_tls_align_m1; +      assert (size != 0); + +      /* Make sure the size of the stack is enough for the guard and +	 eventually the thread descriptor.  */ +      guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1; +      if (__builtin_expect (size < ((guardsize + __static_tls_size +				     + MINIMAL_REST_STACK + pagesize_m1) +				    & ~pagesize_m1), +			    0)) +	/* The stack is too small (or the guard too large).  */ +	return EINVAL; + +      /* Try to get a stack from the cache.  */ +      reqsize = size; +      pd = get_cached_stack (&size, &mem); +      if (pd == NULL) +	{ +	  /* To avoid aliasing effects on a larger scale than pages we +	     adjust the allocated stack size if necessary.  This way +	     allocations directly following each other will not have +	     aliasing problems.  */ +#if MULTI_PAGE_ALIASING != 0 +	  if ((size % MULTI_PAGE_ALIASING) == 0) +	    size += pagesize_m1 + 1; +#endif + +	  mem = mmap (NULL, size, prot, +		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + +	  if (__builtin_expect (mem == MAP_FAILED, 0)) +	    { +	      if (errno == ENOMEM) +		__set_errno (EAGAIN); + +	       return errno; +	    } + +	  /* SIZE is guaranteed to be greater than zero. +	     So we can never get a null pointer back from mmap.  */ +	  assert (mem != NULL); + +#if COLORING_INCREMENT != 0 +	  /* Atomically increment NCREATED.  */ +	  unsigned int ncreated = atomic_increment_val (&nptl_ncreated); + +	  /* We chose the offset for coloring by incrementing it for +	     every new thread by a fixed amount.  The offset used +	     module the page size.  Even if coloring would be better +	     relative to higher alignment values it makes no sense to +	     do it since the mmap() interface does not allow us to +	     specify any alignment for the returned memory block.  */ +	  size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1; + +	  /* Make sure the coloring offsets does not disturb the alignment +	     of the TCB and static TLS block.  */ +	  if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0)) +	    coloring = (((coloring + __static_tls_align_m1) +			 & ~(__static_tls_align_m1)) +			& ~pagesize_m1); +#else +	  /* Unless specified we do not make any adjustments.  */ +# define coloring 0 +#endif + +	  /* Place the thread descriptor at the end of the stack.  */ +#if defined(TLS_TCB_AT_TP) +	  pd = (struct pthread *) ((char *) mem + size - coloring) - 1; +#elif defined(TLS_DTV_AT_TP) +	  pd = (struct pthread *) ((((uintptr_t) mem + size - coloring +				    - __static_tls_size) +				    & ~__static_tls_align_m1) +				   - TLS_PRE_TCB_SIZE); +#endif + +	  /* Remember the stack-related values.  */ +	  pd->stackblock = mem; +	  pd->stackblock_size = size; + +	  /* We allocated the first block thread-specific data array. +	     This address will not change for the lifetime of this +	     descriptor.  */ +	  pd->specific[0] = pd->specific_1stblock; + +	  /* This is at least the second thread.  */ +	  pd->header.multiple_threads = 1; +#ifndef TLS_MULTIPLE_THREADS_IN_TCB +	  __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1; +#endif + +#ifndef __ASSUME_PRIVATE_FUTEX +	  /* The thread must know when private futexes are supported.  */ +	  pd->header.private_futex = THREAD_GETMEM (THREAD_SELF, +                                                    header.private_futex); +#endif + +#ifdef NEED_DL_SYSINFO +	  /* Copy the sysinfo value from the parent.  */ +	  THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO; +#endif + +	  /* The process ID is also the same as that of the caller.  */ +	  pd->pid = THREAD_GETMEM (THREAD_SELF, pid); + +	  /* Allocate the DTV for this thread.  */ +	  if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL) +	    { +	      /* Something went wrong.  */ +	      assert (errno == ENOMEM); + +	      /* Free the stack memory we just allocated.  */ +	      (void) munmap (mem, size); + +	      return EAGAIN; +	    } + + +	  /* Prepare to modify global data.  */ +	  lll_lock (stack_cache_lock, LLL_PRIVATE); + +	  /* And add to the list of stacks in use.  */ +	  stack_list_add (&pd->list, &stack_used); + +	  lll_unlock (stack_cache_lock, LLL_PRIVATE); + + +	  /* Note that all of the stack and the thread descriptor is +	     zeroed.  This means we do not have to initialize fields +	     with initial value zero.  This is specifically true for +	     the 'tid' field which is always set back to zero once the +	     stack is not used anymore and for the 'guardsize' field +	     which will be read next.  */ +	} + +      /* Create or resize the guard area if necessary.  */ +      if (__builtin_expect (guardsize > pd->guardsize, 0)) +	{ +#ifdef NEED_SEPARATE_REGISTER_STACK +	  char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1); +#elif _STACK_GROWS_DOWN +	  char *guard = mem; +# elif _STACK_GROWS_UP +	  char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1); +#endif +	  if (mprotect (guard, guardsize, PROT_NONE) != 0) +	    { +	      int err; +	    mprot_error: +	      err = errno; + +	      lll_lock (stack_cache_lock, LLL_PRIVATE); + +	      /* Remove the thread from the list.  */ +	      stack_list_del (&pd->list); + +	      lll_unlock (stack_cache_lock, LLL_PRIVATE); + +	      /* Get rid of the TLS block we allocated.  */ +	      _dl_deallocate_tls (TLS_TPADJ (pd), false); + +	      /* Free the stack memory regardless of whether the size +		 of the cache is over the limit or not.  If this piece +		 of memory caused problems we better do not use it +		 anymore.  Uh, and we ignore possible errors.  There +		 is nothing we could do.  */ +	      (void) munmap (mem, size); + +	      return err; +	    } + +	  pd->guardsize = guardsize; +	} +      else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize, +				 0)) +	{ +	  /* The old guard area is too large.  */ + +#ifdef NEED_SEPARATE_REGISTER_STACK +	  char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1); +	  char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1); + +	  if (oldguard < guard +	      && mprotect (oldguard, guard - oldguard, prot) != 0) +	    goto mprot_error; + +	  if (mprotect (guard + guardsize, +			oldguard + pd->guardsize - guard - guardsize, +			prot) != 0) +	    goto mprot_error; +#elif _STACK_GROWS_DOWN +	  if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize, +			prot) != 0) +	    goto mprot_error; +#elif _STACK_GROWS_UP +	  if (mprotect ((char *) pd - pd->guardsize, +			pd->guardsize - guardsize, prot) != 0) +	    goto mprot_error; +#endif + +	  pd->guardsize = guardsize; +	} +      /* The pthread_getattr_np() calls need to get passed the size +	 requested in the attribute, regardless of how large the +	 actually used guardsize is.  */ +      pd->reported_guardsize = guardsize; +    } + +  /* Initialize the lock.  We have to do this unconditionally since the +     stillborn thread could be canceled while the lock is taken.  */ +  pd->lock = LLL_LOCK_INITIALIZER; + +  /* The robust mutex lists also need to be initialized +     unconditionally because the cleanup for the previous stack owner +     might have happened in the kernel.  */ +  pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock) +				  - offsetof (pthread_mutex_t, +					      __data.__list.__next)); +  pd->robust_head.list_op_pending = NULL; +#ifdef __PTHREAD_MUTEX_HAVE_PREV +  pd->robust_prev = &pd->robust_head; +#endif +  pd->robust_head.list = &pd->robust_head; + +  /* We place the thread descriptor at the end of the stack.  */ +  *pdp = pd; + +#if defined(TLS_TCB_AT_TP) +  /* The stack begins before the TCB and the static TLS block.  */ +  stacktop = ((char *) (pd + 1) - __static_tls_size); +#elif defined(TLS_DTV_AT_TP) +  stacktop = (char *) (pd - 1); +#endif + +#ifdef NEED_SEPARATE_REGISTER_STACK +  *stack = pd->stackblock; +  *stacksize = stacktop - *stack; +#elif _STACK_GROWS_DOWN +  *stack = stacktop; +#elif _STACK_GROWS_UP +  *stack = pd->stackblock; +  assert (*stack > 0); +#endif + +  return 0; +} + + +void +internal_function +__deallocate_stack (struct pthread *pd) +{ +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  /* Remove the thread from the list of threads with user defined +     stacks.  */ +  stack_list_del (&pd->list); + +  /* Not much to do.  Just free the mmap()ed memory.  Note that we do +     not reset the 'used' flag in the 'tid' field.  This is done by +     the kernel.  If no thread has been created yet this field is +     still zero.  */ +  if (__builtin_expect (! pd->user_stack, 1)) +    (void) queue_stack (pd); +  else +    /* Free the memory associated with the ELF TLS.  */ +    _dl_deallocate_tls (TLS_TPADJ (pd), false); + +  lll_unlock (stack_cache_lock, LLL_PRIVATE); +} + + +int +internal_function +__make_stacks_executable (void **stack_endp) +{ +  /* First the main thread's stack.  */ +  int err = EPERM; +  if (err != 0) +    return err; + +#ifdef NEED_SEPARATE_REGISTER_STACK +  const size_t pagemask = ~(__getpagesize () - 1); +#endif + +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  list_t *runp; +  list_for_each (runp, &stack_used) +    { +      err = change_stack_perm (list_entry (runp, struct pthread, list) +#ifdef NEED_SEPARATE_REGISTER_STACK +			       , pagemask +#endif +			       ); +      if (err != 0) +	break; +    } + +  /* Also change the permission for the currently unused stacks.  This +     might be wasted time but better spend it here than adding a check +     in the fast path.  */ +  if (err == 0) +    list_for_each (runp, &stack_cache) +      { +	err = change_stack_perm (list_entry (runp, struct pthread, list) +#ifdef NEED_SEPARATE_REGISTER_STACK +				 , pagemask +#endif +				 ); +	if (err != 0) +	  break; +      } + +  lll_unlock (stack_cache_lock, LLL_PRIVATE); + +  return err; +} + + +/* In case of a fork() call the memory allocation in the child will be +   the same but only one thread is running.  All stacks except that of +   the one running thread are not used anymore.  We have to recycle +   them.  */ +void +__reclaim_stacks (void) +{ +  struct pthread *self = (struct pthread *) THREAD_SELF; + +  /* No locking necessary.  The caller is the only stack in use.  But +     we have to be aware that we might have interrupted a list +     operation.  */ + +  if (in_flight_stack != 0) +    { +      bool add_p = in_flight_stack & 1; +      list_t *elem = (list_t *) (in_flight_stack & ~UINTMAX_C (1)); + +      if (add_p) +	{ +	  /* We always add at the beginning of the list.  So in this +	     case we only need to check the beginning of these lists.  */ +	  int check_list (list_t *l) +	  { +	    if (l->next->prev != l) +	      { +		assert (l->next->prev == elem); + +		elem->next = l->next; +		elem->prev = l; +		l->next = elem; + +		return 1; +	      } + +	    return 0; +	  } + +	  if (check_list (&stack_used) == 0) +	    (void) check_list (&stack_cache); +	} +      else +	{ +	  /* We can simply always replay the delete operation.  */ +	  elem->next->prev = elem->prev; +	  elem->prev->next = elem->next; +	} +    } + +  /* Mark all stacks except the still running one as free.  */ +  list_t *runp; +  list_for_each (runp, &stack_used) +    { +      struct pthread *curp = list_entry (runp, struct pthread, list); +      if (curp != self) +	{ +	  /* This marks the stack as free.  */ +	  curp->tid = 0; + +	  /* The PID field must be initialized for the new process.  */ +	  curp->pid = self->pid; + +	  /* Account for the size of the stack.  */ +	  stack_cache_actsize += curp->stackblock_size; + +	  if (curp->specific_used) +	    { +	      /* Clear the thread-specific data.  */ +	      memset (curp->specific_1stblock, '\0', +		      sizeof (curp->specific_1stblock)); + +	      curp->specific_used = false; + +	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt) +		if (curp->specific[cnt] != NULL) +		  { +		    memset (curp->specific[cnt], '\0', +			    sizeof (curp->specific_1stblock)); + +		    /* We have allocated the block which we do not +		       free here so re-set the bit.  */ +		    curp->specific_used = true; +		  } +	    } +	} +    } + +  /* Reset the PIDs in any cached stacks.  */ +  list_for_each (runp, &stack_cache) +    { +      struct pthread *curp = list_entry (runp, struct pthread, list); +      curp->pid = self->pid; +    } + +  /* Add the stack of all running threads to the cache.  */ +  list_splice (&stack_used, &stack_cache); + +  /* Remove the entry for the current thread to from the cache list +     and add it to the list of running threads.  Which of the two +     lists is decided by the user_stack flag.  */ +  stack_list_del (&self->list); + +  /* Re-initialize the lists for all the threads.  */ +  INIT_LIST_HEAD (&stack_used); +  INIT_LIST_HEAD (&__stack_user); + +  if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0)) +    list_add (&self->list, &__stack_user); +  else +    list_add (&self->list, &stack_used); + +  /* There is one thread running.  */ +  __nptl_nthreads = 1; + +  in_flight_stack = 0; + +  /* Initialize the lock.  */ +  stack_cache_lock = LLL_LOCK_INITIALIZER; +} + + +#if HP_TIMING_AVAIL +# undef __find_thread_by_id +/* Find a thread given the thread ID.  */ +attribute_hidden +struct pthread * +__find_thread_by_id (pid_t tid) +{ +  struct pthread *result = NULL; + +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  /* Iterate over the list with system-allocated threads first.  */ +  list_t *runp; +  list_for_each (runp, &stack_used) +    { +      struct pthread *curp; + +      curp = list_entry (runp, struct pthread, list); + +      if (curp->tid == tid) +	{ +	  result = curp; +	  goto out; +	} +    } + +  /* Now the list with threads using user-allocated stacks.  */ +  list_for_each (runp, &__stack_user) +    { +      struct pthread *curp; + +      curp = list_entry (runp, struct pthread, list); + +      if (curp->tid == tid) +	{ +	  result = curp; +	  goto out; +	} +    } + + out: +  lll_unlock (stack_cache_lock, LLL_PRIVATE); + +  return result; +} +#endif + + +static void +internal_function +setxid_mark_thread (struct xid_command *cmdp, struct pthread *t) +{ +  int ch; + +  /* Don't let the thread exit before the setxid handler runs.  */ +  t->setxid_futex = 0; + +  do +    { +      ch = t->cancelhandling; + +      /* If the thread is exiting right now, ignore it.  */ +      if ((ch & EXITING_BITMASK) != 0) +	return; +    } +  while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling, +					       ch | SETXID_BITMASK, ch)); +} + + +static void +internal_function +setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t) +{ +  int ch; + +  do +    { +      ch = t->cancelhandling; +      if ((ch & SETXID_BITMASK) == 0) +	return; +    } +  while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling, +					       ch & ~SETXID_BITMASK, ch)); + +  /* Release the futex just in case.  */ +  t->setxid_futex = 1; +  lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE); +} + + +static int +internal_function +setxid_signal_thread (struct xid_command *cmdp, struct pthread *t) +{ +  if ((t->cancelhandling & SETXID_BITMASK) == 0) +    return 0; + +  int val; +  INTERNAL_SYSCALL_DECL (err); +#if __ASSUME_TGKILL +  val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid), +			  t->tid, SIGSETXID); +#else +# ifdef __NR_tgkill +  val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid), +			  t->tid, SIGSETXID); +  if (INTERNAL_SYSCALL_ERROR_P (val, err) +      && INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS) +# endif +    val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID); +#endif + +  /* If this failed, it must have had not started yet or else exited.  */ +  if (!INTERNAL_SYSCALL_ERROR_P (val, err)) +    { +      atomic_increment (&cmdp->cntr); +      return 1; +    } +  else +    return 0; +} + + +int +attribute_hidden +__nptl_setxid (struct xid_command *cmdp) +{ +  int signalled; +  int result; +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  __xidcmd = cmdp; +  cmdp->cntr = 0; + +  struct pthread *self = THREAD_SELF; + +  /* Iterate over the list with system-allocated threads first.  */ +  list_t *runp; +  list_for_each (runp, &stack_used) +    { +      struct pthread *t = list_entry (runp, struct pthread, list); +      if (t == self) +	continue; + +      setxid_mark_thread (cmdp, t); +    } + +  /* Now the list with threads using user-allocated stacks.  */ +  list_for_each (runp, &__stack_user) +    { +      struct pthread *t = list_entry (runp, struct pthread, list); +      if (t == self) +	continue; + +      setxid_mark_thread (cmdp, t); +    } + +  /* Iterate until we don't succeed in signalling anyone.  That means +     we have gotten all running threads, and their children will be +     automatically correct once started.  */ +  do +    { +      signalled = 0; + +      list_for_each (runp, &stack_used) +	{ +	  struct pthread *t = list_entry (runp, struct pthread, list); +	  if (t == self) +	    continue; + +	  signalled += setxid_signal_thread (cmdp, t); +	} + +      list_for_each (runp, &__stack_user) +	{ +	  struct pthread *t = list_entry (runp, struct pthread, list); +	  if (t == self) +	    continue; + +	  signalled += setxid_signal_thread (cmdp, t); +	} + +      int cur = cmdp->cntr; +      while (cur != 0) +	{ +	  lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE); +	  cur = cmdp->cntr; +	} +    } +  while (signalled != 0); + +  /* Clean up flags, so that no thread blocks during exit waiting +     for a signal which will never come.  */ +  list_for_each (runp, &stack_used) +    { +      struct pthread *t = list_entry (runp, struct pthread, list); +      if (t == self) +	continue; + +      setxid_unmark_thread (cmdp, t); +    } + +  list_for_each (runp, &__stack_user) +    { +      struct pthread *t = list_entry (runp, struct pthread, list); +      if (t == self) +	continue; + +      setxid_unmark_thread (cmdp, t); +    } + +  /* This must be last, otherwise the current thread might not have +     permissions to send SIGSETXID syscall to the other threads.  */ +  INTERNAL_SYSCALL_DECL (err); +  result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3, +				 cmdp->id[0], cmdp->id[1], cmdp->id[2]); +  if (INTERNAL_SYSCALL_ERROR_P (result, err)) +    { +      __set_errno (INTERNAL_SYSCALL_ERRNO (result, err)); +      result = -1; +    } + +  lll_unlock (stack_cache_lock, LLL_PRIVATE); +  return result; +} + +static inline void __attribute__((always_inline)) +init_one_static_tls (struct pthread *curp, struct link_map *map) +{ +  dtv_t *dtv = GET_DTV (TLS_TPADJ (curp)); +# if defined(TLS_TCB_AT_TP) +  void *dest = (char *) curp - map->l_tls_offset; +# elif defined(TLS_DTV_AT_TP) +  void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE; +# else +#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + +  /* Fill in the DTV slot so that a later LD/GD access will find it.  */ +  dtv[map->l_tls_modid].pointer.val = dest; +  dtv[map->l_tls_modid].pointer.is_static = true; + +  /* Initialize the memory.  */ +  memset (mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size), +	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size); +} + +void +attribute_hidden +__pthread_init_static_tls (struct link_map *map) +{ +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  /* Iterate over the list with system-allocated threads first.  */ +  list_t *runp; +  list_for_each (runp, &stack_used) +    init_one_static_tls (list_entry (runp, struct pthread, list), map); + +  /* Now the list with threads using user-allocated stacks.  */ +  list_for_each (runp, &__stack_user) +    init_one_static_tls (list_entry (runp, struct pthread, list), map); + +  lll_unlock (stack_cache_lock, LLL_PRIVATE); +} + + +void +attribute_hidden +__wait_lookup_done (void) +{ +  lll_lock (stack_cache_lock, LLL_PRIVATE); + +  struct pthread *self = THREAD_SELF; + +  /* Iterate over the list with system-allocated threads first.  */ +  list_t *runp; +  list_for_each (runp, &stack_used) +    { +      struct pthread *t = list_entry (runp, struct pthread, list); +      if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) +	continue; + +      int *const gscope_flagp = &t->header.gscope_flag; + +      /* We have to wait until this thread is done with the global +	 scope.  First tell the thread that we are waiting and +	 possibly have to be woken.  */ +      if (atomic_compare_and_exchange_bool_acq (gscope_flagp, +						THREAD_GSCOPE_FLAG_WAIT, +						THREAD_GSCOPE_FLAG_USED)) +	continue; + +      do +	lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE); +      while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); +    } + +  /* Now the list with threads using user-allocated stacks.  */ +  list_for_each (runp, &__stack_user) +    { +      struct pthread *t = list_entry (runp, struct pthread, list); +      if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED) +	continue; + +      int *const gscope_flagp = &t->header.gscope_flag; + +      /* We have to wait until this thread is done with the global +	 scope.  First tell the thread that we are waiting and +	 possibly have to be woken.  */ +      if (atomic_compare_and_exchange_bool_acq (gscope_flagp, +						THREAD_GSCOPE_FLAG_WAIT, +						THREAD_GSCOPE_FLAG_USED)) +	continue; + +      do +	lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE); +      while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT); +    } + +  lll_unlock (stack_cache_lock, LLL_PRIVATE); +}  | 
