10 files changed, 2003 insertions, 0 deletions
diff --git a/ldso/ldso/sparc/DEFS.h b/ldso/ldso/sparc/DEFS.h
new file mode 100644
index 000000000..4b9abccfd
--- /dev/null
+++ b/ldso/ldso/sparc/DEFS.h
@@ -0,0 +1,5 @@
+#define	FUNC(name)	\
+	.global name;	\
+	.type name,@function;     \
+	.align 4;	\
+	name:
diff --git a/ldso/ldso/sparc/dl-sysdep.h b/ldso/ldso/sparc/dl-sysdep.h
new file mode 100644
index 000000000..1d4c0354f
--- /dev/null
+++ b/ldso/ldso/sparc/dl-sysdep.h
@@ -0,0 +1,131 @@
+
+/*
+ * Various assmbly language/system dependent  hacks that are required
+ * so that we can minimize the amount of platform specific code.
+ */
+#define LINUXBIN
+
+/*
+ * Define this if the system uses RELOCA.
+ */
+#define ELF_USES_RELOCA
+
+/*
+ * Get the address of the Global offset table.  This must be absolute, not
+ * relative.
+ */
+#define GET_GOT(X)     __asm__("\tmov %%l7,%0\n\t" : "=r" (X))
+
+/*
+ * Get a pointer to the argv array.  On many platforms this can be just
+ * the address if the first argument, on other platforms we need to
+ * do something a little more subtle here.  We assume that argc is stored
+ * at the word just below the argvp that we return here.
+ */
+#define GET_ARGV(ARGVP, ARGS) __asm__("\tadd %%fp,68,%0\n" : "=r" (ARGVP));
+
+/*
+ * Initialization sequence for a GOT.  For the Sparc, this points to the
+ * PLT, and we need to initialize a couple of the slots.  The PLT should
+ * look like:
+ *
+ *		save %sp, -64, %sp
+ *		call _dl_linux_resolve
+ *		nop
+ *		.word implementation_dependent
+ */
+#define INIT_GOT(GOT_BASE,MODULE) \
+{				\
+   GOT_BASE[0] = 0x9de3bfc0;  /* save %sp, -64, %sp */	\
+   GOT_BASE[1] = 0x40000000 | (((unsigned int) _dl_linux_resolve - (unsigned int) GOT_BASE - 4) >> 2);	\
+   GOT_BASE[2] = 0x01000000; /* nop */ 			\
+   GOT_BASE[3] = (int) MODULE;					\
+}
+
+/*
+ * Here is a macro to perform a relocation.  This is only used when
+ * bootstrapping the dynamic loader.
+ */
+#define PERFORM_BOOTSTRAP_RELOC(RELP,REL,SYMBOL,LOAD) \
+	switch(ELF32_R_TYPE((RELP)->r_info)) {		\
+	case R_SPARC_32:				\
+	  *REL = SYMBOL + (RELP)->r_addend;		\
+	  break;					\
+	case R_SPARC_GLOB_DAT:				\
+	  *REL = SYMBOL + (RELP)->r_addend;		\
+	  break;					\
+	case R_SPARC_JMP_SLOT:				\
+	  REL[1] = 0x03000000 | ((SYMBOL >> 10) & 0x3fffff);	\
+	  REL[2] = 0x81c06000 | (SYMBOL & 0x3ff);	\
+	  break;					\
+	case R_SPARC_NONE:				\
+	  break;					\
+        case R_SPARC_WDISP30:				\
+          break;                                        \
+	case R_SPARC_RELATIVE:				\
+	  *REL += (unsigned int) LOAD + (RELP)->r_addend; \
+	  break;					\
+	default:					\
+	  _dl_exit(1);					\
+	}
+
+
+/*
+ * Transfer control to the user's application, once the dynamic loader
+ * is done.  The crt calls atexit with $g1 if not null, so we need to
+ * ensure that it contains NULL.
+ */
+
+#define START()		\
+	__asm__ volatile ( \
+	                   "add %%g0,%%g0,%%g1\n\t" \
+			   "jmpl %0, %%o7\n\t"	\
+			   "restore %%g0,%%g0,%%g0\n\t" \
+		    	: /*"=r" (status) */ :	\
+		    	  "r" (_dl_elf_main): "g1", "o0", "o1")
+
+
+
+/* Here we define the magic numbers that this dynamic loader should accept */
+
+#define MAGIC1 EM_SPARC
+#undef  MAGIC2
+/* Used for error messages */
+#define ELF_TARGET "Sparc"
+
+#ifndef COMPILE_ASM
+extern unsigned int _dl_linux_resolver(unsigned int reloc_entry,
+					unsigned int * i);
+#endif
+
+/*
+ * Define this if you want a dynamic loader that works on Solaris.
+ */
+#define SOLARIS_COMPATIBLE
+
+/*
+ * Define this because we do not want to call .udiv in the library.
+ * Change on the plans -miguel:
+ * We just statically link against .udiv.  This is required
+ * if we want to be able to run on Sun4c machines.
+ */
+
+/* We now link .urem against this one */
+#ifdef USE_V8
+#define do_rem(result,n,base) ({ \
+volatile int __res; \
+__asm__("mov %%g0,%%Y\n\t" \
+	"sdiv %2,%3,%%l6\n\t" \
+	 "smul %%l6,%3,%%l6\n\t" \
+	 "sub  %2,%%l6,%0\n\t" \
+	 :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; })
+#else
+#define do_rem(a,b,c) a = _dl_urem (b,c);
+#endif
+/*
+ * dbx wants the binder to have a specific name.  Mustn't disappoint it.
+ */
+#ifdef SOLARIS_COMPATIBLE
+#define _dl_linux_resolve _elf_rtbndr
+#endif
+
diff --git a/ldso/ldso/sparc/elfinterp.c b/ldso/ldso/sparc/elfinterp.c
new file mode 100644
index 000000000..6f0d9f8fd
--- /dev/null
+++ b/ldso/ldso/sparc/elfinterp.c
@@ -0,0 +1,355 @@
+/* Run an ELF binary on a linux system.
+
+   Copyright (C) 1995, Eric Youngdale.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifndef VERBOSE_DLINKER
+#define VERBOSE_DLINKER
+#endif
+#ifdef VERBOSE_DLINKER
+static char * _dl_reltypes[] = { "R_SPARC_NONE", "R_SPARC_8",
+  "R_SPARC_16", "R_SPARC_32", "R_SPARC_DISP8", "R_SPARC_DISP16",
+  "R_SPARC_DISP32", "R_SPARC_WDISP30", "R_SPARC_WDISP22",
+  "R_SPARC_HI22", "R_SPARC_22", "R_SPARC_13", "R_SPARC_LO10",
+  "R_SPARC_GOT10", "R_SPARC_GOT13", "R_SPARC_GOT22", "R_SPARC_PC10",
+  "R_SPARC_PC22", "R_SPARC_WPLT30", "R_SPARC_COPY",
+  "R_SPARC_GLOB_DAT", "R_SPARC_JMP_SLOT", "R_SPARC_RELATIVE",
+  "R_SPARC_UA32"};
+#endif
+
+/* Program to load an ELF binary on a linux system, and run it.
+References to symbols in sharable libraries can be resolved by either
+an ELF sharable library or a linux style of shared library. */
+
+/* Disclaimer:  I have never seen any AT&T source code for SVr4, nor have
+   I ever taken any courses on internals.  This program was developed using
+   information available through the book "UNIX SYSTEM V RELEASE 4,
+   Programmers guide: Ansi C and Programming Support Tools", which did
+   a more than adequate job of explaining everything required to get this
+   working. */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+/*#include <stdlib.h>*/
+#include "string.h"
+#include <linux/unistd.h>
+#include <linux/fcntl.h>
+#include "hash.h"
+#include "linuxelf.h"
+#include "sysdep.h"
+#include "../syscall.h"
+#include "../string.h"
+
+#define SVR4_COMPATIBILITY
+
+extern char *_dl_progname;
+
+extern _dl_linux_resolve(void);
+
+unsigned int _dl_linux_resolver(unsigned int reloc_entry, unsigned int * plt)
+{
+  int reloc_type;
+  struct elf32_rela * this_reloc;
+  char * strtab;
+  struct elf32_sym * symtab; 
+  struct elf32_rela * rel_addr;
+  struct elf_resolve * tpnt;
+  int symtab_index;
+  char * new_addr;
+  char ** got_addr;
+  unsigned int instr_addr;
+  tpnt = (struct elf_resolve *) plt[2];
+
+  rel_addr = (struct elf32_rela *) (tpnt->dynamic_info[DT_JMPREL] + 
+				   tpnt->loadaddr);
+
+  /*
+   * Generate the correct relocation index into the .rela.plt section.
+   */
+  reloc_entry = (reloc_entry >> 12) - 0xc;
+
+  this_reloc = (struct elf32_rela *) ((char *) rel_addr + reloc_entry);
+
+  reloc_type = ELF32_R_TYPE(this_reloc->r_info);
+  symtab_index = ELF32_R_SYM(this_reloc->r_info);
+
+  symtab =  (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr);
+  strtab = (char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr);
+
+  _dl_fdprintf(2, "tpnt = %x\n", tpnt);
+  _dl_fdprintf(2, "reloc = %x\n", this_reloc);
+  _dl_fdprintf(2, "symtab = %x\n", symtab);
+  _dl_fdprintf(2, "strtab = %x\n", strtab);
+
+
+  if (reloc_type != R_SPARC_JMP_SLOT) {
+    _dl_fdprintf(2, "%s: incorrect relocation type in jump relocations (%d)\n",
+		  _dl_progname, reloc_type);
+    _dl_exit(30);
+  };
+
+  /* Address of jump instruction to fix up */
+  instr_addr  = ((int)this_reloc->r_offset  + (int)tpnt->loadaddr);
+  got_addr = (char **) instr_addr;
+
+  _dl_fdprintf(2, "symtab_index %d\n", symtab_index);
+
+#ifdef DEBUG
+  _dl_fdprintf(2, "Resolving symbol %s\n",
+	strtab + symtab[symtab_index].st_name);
+#endif
+
+  /* Get the address of the GOT entry */
+  new_addr = _dl_find_hash(strtab + symtab[symtab_index].st_name, 
+  			tpnt->symbol_scope, (int) got_addr, tpnt, 0);
+  if(!new_addr) {
+    _dl_fdprintf(2, "%s: can't resolve symbol '%s'\n",
+	       _dl_progname, strtab + symtab[symtab_index].st_name);
+    _dl_exit(31);
+  };
+/* #define DEBUG_LIBRARY */
+#ifdef DEBUG_LIBRARY
+  if((unsigned int) got_addr < 0x40000000) {
+    _dl_fdprintf(2, "Calling library function: %s\n",
+	       strtab + symtab[symtab_index].st_name);
+  } else {
+    got_addr[1] = (char *) (0x03000000 | (((unsigned int) new_addr >> 10) & 0x3fffff));
+    got_addr[2] = (char *) (0x81c06000 | ((unsigned int) new_addr & 0x3ff));
+  }
+#else
+  got_addr[1] = (char *) (0x03000000 | (((unsigned int) new_addr >> 10) & 0x3fffff));
+  got_addr[2] = (char *) (0x81c06000 | ((unsigned int) new_addr & 0x3ff));
+#endif
+  _dl_fdprintf(2, "Address = %x\n",new_addr);
+    _dl_exit(32);
+
+  return (unsigned int) new_addr;
+}
+
+void _dl_parse_lazy_relocation_information(struct elf_resolve * tpnt, int rel_addr,
+       int rel_size, int type){
+  int i;
+  char * strtab;
+  int reloc_type;
+  int symtab_index;
+  struct elf32_sym * symtab; 
+  struct elf32_rela * rpnt;
+  unsigned int * reloc_addr;
+
+  /* Now parse the relocation information */
+  rpnt = (struct elf32_rela *) (rel_addr + tpnt->loadaddr);
+
+  symtab =  (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr);
+  strtab = ( char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr);
+
+  for(i=0; i< rel_size; i += sizeof(struct elf32_rela), rpnt++){
+    reloc_addr = (int *) (tpnt->loadaddr + (int)rpnt->r_offset);
+    reloc_type = ELF32_R_TYPE(rpnt->r_info);
+    symtab_index = ELF32_R_SYM(rpnt->r_info);
+
+    /* When the dynamic linker bootstrapped itself, it resolved some symbols.
+       Make sure we do not do them again */
+    if(!symtab_index && tpnt->libtype == program_interpreter) continue;
+    if(symtab_index && tpnt->libtype == program_interpreter &&
+       _dl_symbol(strtab + symtab[symtab_index].st_name))
+      continue;
+
+    switch(reloc_type){
+    case R_SPARC_NONE:
+      break;
+    case R_SPARC_JMP_SLOT:
+      break;
+    default:
+      _dl_fdprintf(2, "%s: (LAZY) can't handle reloc type ", _dl_progname);
+#ifdef VERBOSE_DLINKER
+      _dl_fdprintf(2, "%s ", _dl_reltypes[reloc_type]);
+#endif
+      if(symtab_index) _dl_fdprintf(2, "'%s'\n",
+				  strtab + symtab[symtab_index].st_name);
+      _dl_exit(33);
+    };
+  };
+}
+
+int _dl_parse_relocation_information(struct elf_resolve * tpnt, int rel_addr,
+       int rel_size, int type){
+  int i;
+  char * strtab;
+  int reloc_type;
+  int goof = 0;
+  struct elf32_sym * symtab; 
+  struct elf32_rela * rpnt;
+  unsigned int * reloc_addr;
+  unsigned int symbol_addr;
+  int symtab_index;
+  /* Now parse the relocation information */
+
+  rpnt = (struct elf32_rela *) (rel_addr + tpnt->loadaddr);
+
+  symtab =  (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr);
+  strtab = ( char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr);
+
+  for(i=0; i< rel_size; i+= sizeof(struct elf32_rela), rpnt++){
+    reloc_addr = (int *) (tpnt->loadaddr + (int)rpnt->r_offset);
+    reloc_type = ELF32_R_TYPE(rpnt->r_info);
+    symtab_index = ELF32_R_SYM(rpnt->r_info);
+    symbol_addr = 0;
+
+    if(!symtab_index && tpnt->libtype == program_interpreter) continue;
+
+    if(symtab_index) {
+
+      if(tpnt->libtype == program_interpreter && 
+	 _dl_symbol(strtab + symtab[symtab_index].st_name))
+	continue;
+
+      symbol_addr = (unsigned int) 
+	_dl_find_hash(strtab + symtab[symtab_index].st_name,
+			      tpnt->symbol_scope, (int) reloc_addr, 
+		      (reloc_type == R_SPARC_JMP_SLOT ? tpnt : NULL), 0);
+
+      if(!symbol_addr &&
+	 ELF32_ST_BIND(symtab [symtab_index].st_info) == STB_GLOBAL) {
+	_dl_fdprintf(2, "%s: can't resolve symbol '%s'\n",
+		     _dl_progname, strtab + symtab[symtab_index].st_name);
+	goof++;
+      };
+    };
+    switch(reloc_type){
+    case R_SPARC_NONE:
+	break;
+    case R_SPARC_32:
+      *reloc_addr = symbol_addr + rpnt->r_addend;
+      break;
+    case R_SPARC_DISP32:
+      *reloc_addr = symbol_addr + rpnt->r_addend - (unsigned int) reloc_addr;
+      break;
+    case R_SPARC_GLOB_DAT:
+      *reloc_addr = symbol_addr + rpnt->r_addend;
+      break;
+    case R_SPARC_JMP_SLOT:
+      reloc_addr[1] = 0x03000000 | ((symbol_addr >> 10) & 0x3fffff);
+      reloc_addr[2] = 0x81c06000 | (symbol_addr & 0x3ff);
+      break;
+    case R_SPARC_RELATIVE:
+      *reloc_addr += (unsigned int) tpnt->loadaddr + rpnt->r_addend;
+      break;
+    case R_SPARC_HI22:
+      if (!symbol_addr)
+        symbol_addr = tpnt->loadaddr + rpnt->r_addend;
+      else
+	symbol_addr += rpnt->r_addend;
+      *reloc_addr = (*reloc_addr & 0xffc00000)|(symbol_addr >> 10);
+      break;
+    case R_SPARC_LO10:
+      if (!symbol_addr)
+        symbol_addr = tpnt->loadaddr + rpnt->r_addend;
+      else
+	symbol_addr += rpnt->r_addend;
+      *reloc_addr = (*reloc_addr & ~0x3ff)|(symbol_addr & 0x3ff);
+      break;
+    case R_SPARC_WDISP30:
+      *reloc_addr = (*reloc_addr & 0xc0000000)|
+	((symbol_addr - (unsigned int) reloc_addr) >> 2);
+      break;
+    case R_SPARC_COPY:
+#if 0 /* This one is done later */
+      _dl_fdprintf(2, "Doing copy for symbol ");
+      if(symtab_index) _dl_fdprintf(2, strtab + symtab[symtab_index].st_name);
+      _dl_fdprintf(2, "\n");
+      _dl_memcpy((void *) symtab[symtab_index].st_value,
+		 (void *) symbol_addr, 
+		 symtab[symtab_index].st_size);
+#endif
+      break;
+    default:
+      _dl_fdprintf(2, "%s: can't handle reloc type ", _dl_progname);
+#ifdef VERBOSE_DLINKER
+      _dl_fdprintf(2, "%s ", _dl_reltypes[reloc_type]);
+#endif
+      if (symtab_index)
+	_dl_fdprintf(2, "'%s'\n", strtab + symtab[symtab_index].st_name);
+      _dl_exit(34);
+    };
+
+  };
+  return goof;
+}
+
+
+/* This is done as a separate step, because there are cases where
+   information is first copied and later initialized.  This results in
+   the wrong information being copied.  Someone at Sun was complaining about
+   a bug in the handling of _COPY by SVr4, and this may in fact be what he
+   was talking about.  Sigh. */
+
+/* No, there are cases where the SVr4 linker fails to emit COPY relocs
+   at all */
+
+int _dl_parse_copy_information(struct dyn_elf * xpnt, int rel_addr,
+       int rel_size, int type)
+{
+  int i;
+  char * strtab;
+  int reloc_type;
+  int goof = 0;
+  struct elf32_sym * symtab; 
+  struct elf32_rela * rpnt;
+  unsigned int * reloc_addr;
+  unsigned int symbol_addr;
+  struct elf_resolve *tpnt;
+  int symtab_index;
+  /* Now parse the relocation information */
+
+  tpnt = xpnt->dyn;
+  
+  rpnt = (struct elf32_rela *) (rel_addr + tpnt->loadaddr);
+
+  symtab =  (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr);
+  strtab = ( char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr);
+
+  for(i=0; i< rel_size; i+= sizeof(struct elf32_rela), rpnt++){
+    reloc_addr = (int *) (tpnt->loadaddr + (int)rpnt->r_offset);
+    reloc_type = ELF32_R_TYPE(rpnt->r_info);
+    if(reloc_type != R_SPARC_COPY) continue;
+    symtab_index = ELF32_R_SYM(rpnt->r_info);
+    symbol_addr = 0;
+    if(!symtab_index && tpnt->libtype == program_interpreter) continue;
+    if(symtab_index) {
+
+      if(tpnt->libtype == program_interpreter && 
+	 _dl_symbol(strtab + symtab[symtab_index].st_name))
+	continue;
+
+      symbol_addr = (unsigned int) 
+	_dl_find_hash(strtab + symtab[symtab_index].st_name,
+			      xpnt->next, (int) reloc_addr, NULL, 1);
+      if(!symbol_addr) {
+	_dl_fdprintf(2, "%s: can't resolve symbol '%s'\n",
+		   _dl_progname, strtab + symtab[symtab_index].st_name);
+	goof++;
+      };
+    };
+    if (!goof)
+      _dl_memcpy((char *) symtab[symtab_index].st_value, 
+		  (char *) symbol_addr, 
+		  symtab[symtab_index].st_size);
+  };
+  return goof;
+}
+
+
diff --git a/ldso/ldso/sparc/ld_sysdep.h b/ldso/ldso/sparc/ld_sysdep.h
new file mode 100644
index 000000000..1d4c0354f
--- /dev/null
+++ b/ldso/ldso/sparc/ld_sysdep.h
@@ -0,0 +1,131 @@
+
+/*
+ * Various assmbly language/system dependent  hacks that are required
+ * so that we can minimize the amount of platform specific code.
+ */
+#define LINUXBIN
+
+/*
+ * Define this if the system uses RELOCA.
+ */
+#define ELF_USES_RELOCA
+
+/*
+ * Get the address of the Global offset table.  This must be absolute, not
+ * relative.
+ */
+#define GET_GOT(X)     __asm__("\tmov %%l7,%0\n\t" : "=r" (X))
+
+/*
+ * Get a pointer to the argv array.  On many platforms this can be just
+ * the address if the first argument, on other platforms we need to
+ * do something a little more subtle here.  We assume that argc is stored
+ * at the word just below the argvp that we return here.
+ */
+#define GET_ARGV(ARGVP, ARGS) __asm__("\tadd %%fp,68,%0\n" : "=r" (ARGVP));
+
+/*
+ * Initialization sequence for a GOT.  For the Sparc, this points to the
+ * PLT, and we need to initialize a couple of the slots.  The PLT should
+ * look like:
+ *
+ *		save %sp, -64, %sp
+ *		call _dl_linux_resolve
+ *		nop
+ *		.word implementation_dependent
+ */
+#define INIT_GOT(GOT_BASE,MODULE) \
+{				\
+   GOT_BASE[0] = 0x9de3bfc0;  /* save %sp, -64, %sp */	\
+   GOT_BASE[1] = 0x40000000 | (((unsigned int) _dl_linux_resolve - (unsigned int) GOT_BASE - 4) >> 2);	\
+   GOT_BASE[2] = 0x01000000; /* nop */ 			\
+   GOT_BASE[3] = (int) MODULE;					\
+}
+
+/*
+ * Here is a macro to perform a relocation.  This is only used when
+ * bootstrapping the dynamic loader.
+ */
+#define PERFORM_BOOTSTRAP_RELOC(RELP,REL,SYMBOL,LOAD) \
+	switch(ELF32_R_TYPE((RELP)->r_info)) {		\
+	case R_SPARC_32:				\
+	  *REL = SYMBOL + (RELP)->r_addend;		\
+	  break;					\
+	case R_SPARC_GLOB_DAT:				\
+	  *REL = SYMBOL + (RELP)->r_addend;		\
+	  break;					\
+	case R_SPARC_JMP_SLOT:				\
+	  REL[1] = 0x03000000 | ((SYMBOL >> 10) & 0x3fffff);	\
+	  REL[2] = 0x81c06000 | (SYMBOL & 0x3ff);	\
+	  break;					\
+	case R_SPARC_NONE:				\
+	  break;					\
+        case R_SPARC_WDISP30:				\
+          break;                                        \
+	case R_SPARC_RELATIVE:				\
+	  *REL += (unsigned int) LOAD + (RELP)->r_addend; \
+	  break;					\
+	default:					\
+	  _dl_exit(1);					\
+	}
+
+
+/*
+ * Transfer control to the user's application, once the dynamic loader
+ * is done.  The crt calls atexit with $g1 if not null, so we need to
+ * ensure that it contains NULL.
+ */
+
+#define START()		\
+	__asm__ volatile ( \
+	                   "add %%g0,%%g0,%%g1\n\t" \
+			   "jmpl %0, %%o7\n\t"	\
+			   "restore %%g0,%%g0,%%g0\n\t" \
+		    	: /*"=r" (status) */ :	\
+		    	  "r" (_dl_elf_main): "g1", "o0", "o1")
+
+
+
+/* Here we define the magic numbers that this dynamic loader should accept */
+
+#define MAGIC1 EM_SPARC
+#undef  MAGIC2
+/* Used for error messages */
+#define ELF_TARGET "Sparc"
+
+#ifndef COMPILE_ASM
+extern unsigned int _dl_linux_resolver(unsigned int reloc_entry,
+					unsigned int * i);
+#endif
+
+/*
+ * Define this if you want a dynamic loader that works on Solaris.
+ */
+#define SOLARIS_COMPATIBLE
+
+/*
+ * Define this because we do not want to call .udiv in the library.
+ * Change on the plans -miguel:
+ * We just statically link against .udiv.  This is required
+ * if we want to be able to run on Sun4c machines.
+ */
+
+/* We now link .urem against this one */
+#ifdef USE_V8
+#define do_rem(result,n,base) ({ \
+volatile int __res; \
+__asm__("mov %%g0,%%Y\n\t" \
+	"sdiv %2,%3,%%l6\n\t" \
+	 "smul %%l6,%3,%%l6\n\t" \
+	 "sub  %2,%%l6,%0\n\t" \
+	 :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; })
+#else
+#define do_rem(a,b,c) a = _dl_urem (b,c);
+#endif
+/*
+ * dbx wants the binder to have a specific name.  Mustn't disappoint it.
+ */
+#ifdef SOLARIS_COMPATIBLE
+#define _dl_linux_resolve _elf_rtbndr
+#endif
+
diff --git a/ldso/ldso/sparc/resolve.S b/ldso/ldso/sparc/resolve.S
new file mode 100644
index 000000000..ea985b5c8
--- /dev/null
+++ b/ldso/ldso/sparc/resolve.S
@@ -0,0 +1,25 @@
+/*
+ * These are various helper routines that are needed to run an ELF image.
+ */
+#define COMPILE_ASM
+#include "sysdep.h"
+
+.text
+	.align 16
+
+.globl _dl_linux_resolve
+_dl_linux_resolve:
+	/*
+ 	 * Call the resolver - pass the address of the PLT so that we can
+	 * figure out which module we are in.
+	 */
+	mov %o7,%o1
+	call  _dl_linux_resolver
+	mov %g1,%o0
+
+	jmpl %o0,%o7
+	restore
+.LFE2:
+
+	.type	_dl_linux_resolve,#function
+	.size _dl_linux_resolve,.LFE2-_dl_linux_resolve
diff --git a/ldso/ldso/sparc/sdiv.S b/ldso/ldso/sparc/sdiv.S
new file mode 100644
index 000000000..5e52e1959
--- /dev/null
+++ b/ldso/ldso/sparc/sdiv.S
@@ -0,0 +1,369 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .div	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifndef __linux__
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include "/usr/include/machine/trap.h"
+#endif
+#else
+#include <asm/traps.h>
+#endif
+
+FUNC(_dl_div)
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	xor	%o1, %o0, %g6	! compute sign in any case
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+Lgot_result:
+	! check to see if answer should be < 0
+	tst	%g6
+	bl,a	1f
+	sub %g0, %o2, %o2
+1:
+	retl
+	mov %o2, %o0
diff --git a/ldso/ldso/sparc/sysdep.h b/ldso/ldso/sparc/sysdep.h
new file mode 100644
index 000000000..1d4c0354f
--- /dev/null
+++ b/ldso/ldso/sparc/sysdep.h
@@ -0,0 +1,131 @@
+
+/*
+ * Various assmbly language/system dependent  hacks that are required
+ * so that we can minimize the amount of platform specific code.
+ */
+#define LINUXBIN
+
+/*
+ * Define this if the system uses RELOCA.
+ */
+#define ELF_USES_RELOCA
+
+/*
+ * Get the address of the Global offset table.  This must be absolute, not
+ * relative.
+ */
+#define GET_GOT(X)     __asm__("\tmov %%l7,%0\n\t" : "=r" (X))
+
+/*
+ * Get a pointer to the argv array.  On many platforms this can be just
+ * the address if the first argument, on other platforms we need to
+ * do something a little more subtle here.  We assume that argc is stored
+ * at the word just below the argvp that we return here.
+ */
+#define GET_ARGV(ARGVP, ARGS) __asm__("\tadd %%fp,68,%0\n" : "=r" (ARGVP));
+
+/*
+ * Initialization sequence for a GOT.  For the Sparc, this points to the
+ * PLT, and we need to initialize a couple of the slots.  The PLT should
+ * look like:
+ *
+ *		save %sp, -64, %sp
+ *		call _dl_linux_resolve
+ *		nop
+ *		.word implementation_dependent
+ */
+#define INIT_GOT(GOT_BASE,MODULE) \
+{				\
+   GOT_BASE[0] = 0x9de3bfc0;  /* save %sp, -64, %sp */	\
+   GOT_BASE[1] = 0x40000000 | (((unsigned int) _dl_linux_resolve - (unsigned int) GOT_BASE - 4) >> 2);	\
+   GOT_BASE[2] = 0x01000000; /* nop */ 			\
+   GOT_BASE[3] = (int) MODULE;					\
+}
+
+/*
+ * Here is a macro to perform a relocation.  This is only used when
+ * bootstrapping the dynamic loader.
+ */
+#define PERFORM_BOOTSTRAP_RELOC(RELP,REL,SYMBOL,LOAD) \
+	switch(ELF32_R_TYPE((RELP)->r_info)) {		\
+	case R_SPARC_32:				\
+	  *REL = SYMBOL + (RELP)->r_addend;		\
+	  break;					\
+	case R_SPARC_GLOB_DAT:				\
+	  *REL = SYMBOL + (RELP)->r_addend;		\
+	  break;					\
+	case R_SPARC_JMP_SLOT:				\
+	  REL[1] = 0x03000000 | ((SYMBOL >> 10) & 0x3fffff);	\
+	  REL[2] = 0x81c06000 | (SYMBOL & 0x3ff);	\
+	  break;					\
+	case R_SPARC_NONE:				\
+	  break;					\
+        case R_SPARC_WDISP30:				\
+          break;                                        \
+	case R_SPARC_RELATIVE:				\
+	  *REL += (unsigned int) LOAD + (RELP)->r_addend; \
+	  break;					\
+	default:					\
+	  _dl_exit(1);					\
+	}
+
+
+/*
+ * Transfer control to the user's application, once the dynamic loader
+ * is done.  The crt calls atexit with $g1 if not null, so we need to
+ * ensure that it contains NULL.
+ */
+
+#define START()		\
+	__asm__ volatile ( \
+	                   "add %%g0,%%g0,%%g1\n\t" \
+			   "jmpl %0, %%o7\n\t"	\
+			   "restore %%g0,%%g0,%%g0\n\t" \
+		    	: /*"=r" (status) */ :	\
+		    	  "r" (_dl_elf_main): "g1", "o0", "o1")
+
+
+
+/* Here we define the magic numbers that this dynamic loader should accept */
+
+#define MAGIC1 EM_SPARC
+#undef  MAGIC2
+/* Used for error messages */
+#define ELF_TARGET "Sparc"
+
+#ifndef COMPILE_ASM
+extern unsigned int _dl_linux_resolver(unsigned int reloc_entry,
+					unsigned int * i);
+#endif
+
+/*
+ * Define this if you want a dynamic loader that works on Solaris.
+ */
+#define SOLARIS_COMPATIBLE
+
+/*
+ * Define this because we do not want to call .udiv in the library.
+ * Change on the plans -miguel:
+ * We just statically link against .udiv.  This is required
+ * if we want to be able to run on Sun4c machines.
+ */
+
+/* We now link .urem against this one */
+#ifdef USE_V8
+#define do_rem(result,n,base) ({ \
+volatile int __res; \
+__asm__("mov %%g0,%%Y\n\t" \
+	"sdiv %2,%3,%%l6\n\t" \
+	 "smul %%l6,%3,%%l6\n\t" \
+	 "sub  %2,%%l6,%0\n\t" \
+	 :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; })
+#else
+#define do_rem(a,b,c) a = _dl_urem (b,c);
+#endif
+/*
+ * dbx wants the binder to have a specific name.  Mustn't disappoint it.
+ */
+#ifdef SOLARIS_COMPATIBLE
+#define _dl_linux_resolve _elf_rtbndr
+#endif
+
diff --git a/ldso/ldso/sparc/udiv.S b/ldso/ldso/sparc/udiv.S
new file mode 100644
index 000000000..df4e5385e
--- /dev/null
+++ b/ldso/ldso/sparc/udiv.S
@@ -0,0 +1,351 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .udiv	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  false		false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+#include "DEFS.h"
+#ifndef __linux__
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include "/usr/include/machine/trap.h"
+#endif
+#else
+#include <asm/traps.h>
+#endif
+
+FUNC(_dl_udiv)
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+Lgot_result:
+
+	retl
+	mov %o2, %o0
diff --git a/ldso/ldso/sparc/umul.S b/ldso/ldso/sparc/umul.S
new file mode 100644
index 000000000..7a26c295c
--- /dev/null
+++ b/ldso/ldso/sparc/umul.S
@@ -0,0 +1,153 @@
+/*
+ * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies.  Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ *	call	.umul
+ *	nop
+ *	bnz	overflow	(or tnz)
+ */
+
+#include "DEFS.h"
+FUNC(.umul)
+	or	%o0, %o1, %o4
+	mov	%o0, %y		! multiplier -> Y
+	andncc	%o4, 0xfff, %g0	! test bits 12..31 of *both* args
+	be	Lmul_shortway	! if zero, can do it the short way
+	andcc	%g0, %g0, %o4	! zero the partial product and clear N and V
+
+	/*
+	 * Long multiply.  32 steps, followed by a final shift step.
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %o1, %o4	! 13
+	mulscc	%o4, %o1, %o4	! 14
+	mulscc	%o4, %o1, %o4	! 15
+	mulscc	%o4, %o1, %o4	! 16
+	mulscc	%o4, %o1, %o4	! 17
+	mulscc	%o4, %o1, %o4	! 18
+	mulscc	%o4, %o1, %o4	! 19
+	mulscc	%o4, %o1, %o4	! 20
+	mulscc	%o4, %o1, %o4	! 21
+	mulscc	%o4, %o1, %o4	! 22
+	mulscc	%o4, %o1, %o4	! 23
+	mulscc	%o4, %o1, %o4	! 24
+	mulscc	%o4, %o1, %o4	! 25
+	mulscc	%o4, %o1, %o4	! 26
+	mulscc	%o4, %o1, %o4	! 27
+	mulscc	%o4, %o1, %o4	! 28
+	mulscc	%o4, %o1, %o4	! 29
+	mulscc	%o4, %o1, %o4	! 30
+	mulscc	%o4, %o1, %o4	! 31
+	mulscc	%o4, %o1, %o4	! 32
+	mulscc	%o4, %g0, %o4	! final shift
+
+
+	/*
+	 * Normally, with the shift-and-add approach, if both numbers are
+	 * positive you get the correct result.  With 32-bit two's-complement
+	 * numbers, -x is represented as
+	 *
+	 *		  x		    32
+	 *	( 2  -  ------ ) mod 2  *  2
+	 *		   32
+	 *		  2
+	 *
+	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
+	 * we can treat this as if the radix point were just to the left
+	 * of the sign bit (multiply by 2^32), and get
+	 *
+	 *	-x  =  (2 - x) mod 2
+	 *
+	 * Then, ignoring the `mod 2's for convenience:
+	 *
+	 *   x *  y	= xy
+	 *  -x *  y	= 2y - xy
+	 *   x * -y	= 2x - xy
+	 *  -x * -y	= 4 - 2x - 2y + xy
+	 *
+	 * For signed multiplies, we subtract (x << 32) from the partial
+	 * product to fix this problem for negative multipliers (see mul.s).
+	 * Because of the way the shift into the partial product is calculated
+	 * (N xor V), this term is automatically removed for the multiplicand,
+	 * so we don't have to adjust.
+	 *
+	 * But for unsigned multiplies, the high order bit wasn't a sign bit,
+	 * and the correction is wrong.  So for unsigned multiplies where the
+	 * high order bit is one, we end up with xy - (y << 32).  To fix it
+	 * we add y << 32.
+	 */
+#if 0
+	tst	%o1
+	bl,a	1f		! if %o1 < 0 (high order bit = 1),
+	add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
+1:	rd	%y, %o0		! get lower half of product
+	retl
+	addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
+#else
+	/* Faster code from tege@sics.se.  */
+	sra	%o1, 31, %o2	! make mask from sign bit
+	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1
+	rd	%y, %o0		! get lower half of product
+	retl
+	addcc	%o4, %o2, %o1	! add compensation and put upper half in place
+#endif
+
+Lmul_shortway:
+	/*
+	 * Short multiply.  12 steps, followed by a final shift step.
+	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+	 * but there is no problem with %o0 being negative (unlike above),
+	 * and overflow is impossible (the answer is at most 24 bits long).
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %g0, %o4	! final shift
+
+	/*
+	 * %o4 has 20 of the bits that should be in the result; %y has
+	 * the bottom 12 (as %y's top 12).  That is:
+	 *
+	 *	  %o4		    %y
+	 * +----------------+----------------+
+	 * | -12- |   -20-  | -12- |   -20-  |
+	 * +------(---------+------)---------+
+	 *	   -----result-----
+	 *
+	 * The 12 bits of %o4 left of the `result' area are all zero;
+	 * in fact, all top 20 bits of %o4 are zero.
+	 */
+
+	rd	%y, %o5
+	sll	%o4, 12, %o0	! shift middle bits left 12
+	srl	%o5, 20, %o5	! shift low bits right 20
+	or	%o5, %o0, %o0
+	retl
+	addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
diff --git a/ldso/ldso/sparc/urem.S b/ldso/ldso/sparc/urem.S
new file mode 100644
index 000000000..8d304038b
--- /dev/null
+++ b/ldso/ldso/sparc/urem.S
@@ -0,0 +1,352 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .urem	name of function to generate
+ *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  false		false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include "DEFS.h"
+#ifdef __linux__
+#include <asm/traps.h>
+#else
+#ifdef __svr4__
+#include <sys/trap.h>
+#else
+#include "/usr/include/machine/trap.h"
+#endif
+#endif
+
+FUNC(_dl_urem)
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	ST_DIV0
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g7
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g7.
+	2:	addcc	%o5, %o5, %o5
+		bcc	Lnot_too_big
+		add	%g7, 1, %g7
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	Ldo_single_div
+		sub	%g7, 1, %g7
+
+	Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g7
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	Ldo_single_div:
+		subcc	%g7, 1, %g7
+		bl	Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	Lend_single_divloop
+		nop
+	Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	Lend_single_divloop:
+		subcc	%g7, 1, %g7
+		bge	Lsingle_divloop
+		tst	%o3
+		b,a	Lend_regular_divide
+
+Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	L.1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	L.2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	L.3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	L.4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+L.4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+L.3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	L.4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+L.4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+L.2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	L.3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	L.4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+L.4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+L.3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	L.4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+L.4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+L.1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	L.2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	L.3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	L.4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+L.4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+L.3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	L.4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+L.4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+L.2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	L.3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	L.4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+L.4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+L.3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	L.4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+L.4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	Ldivloop
+	tst	%o3
+	bl,a	Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+
+Lgot_result:
+
+	retl
+	mov %o3, %o0