diff options
Diffstat (limited to 'ldso/ldso/sparc')
-rw-r--r-- | ldso/ldso/sparc/DEFS.h | 5 | ||||
-rw-r--r-- | ldso/ldso/sparc/dl-sysdep.h | 131 | ||||
-rw-r--r-- | ldso/ldso/sparc/elfinterp.c | 355 | ||||
-rw-r--r-- | ldso/ldso/sparc/ld_sysdep.h | 131 | ||||
-rw-r--r-- | ldso/ldso/sparc/resolve.S | 25 | ||||
-rw-r--r-- | ldso/ldso/sparc/sdiv.S | 369 | ||||
-rw-r--r-- | ldso/ldso/sparc/sysdep.h | 131 | ||||
-rw-r--r-- | ldso/ldso/sparc/udiv.S | 351 | ||||
-rw-r--r-- | ldso/ldso/sparc/umul.S | 153 | ||||
-rw-r--r-- | ldso/ldso/sparc/urem.S | 352 |
10 files changed, 2003 insertions, 0 deletions
diff --git a/ldso/ldso/sparc/DEFS.h b/ldso/ldso/sparc/DEFS.h new file mode 100644 index 000000000..4b9abccfd --- /dev/null +++ b/ldso/ldso/sparc/DEFS.h @@ -0,0 +1,5 @@ +#define FUNC(name) \ + .global name; \ + .type name,@function; \ + .align 4; \ + name: diff --git a/ldso/ldso/sparc/dl-sysdep.h b/ldso/ldso/sparc/dl-sysdep.h new file mode 100644 index 000000000..1d4c0354f --- /dev/null +++ b/ldso/ldso/sparc/dl-sysdep.h @@ -0,0 +1,131 @@ + +/* + * Various assmbly language/system dependent hacks that are required + * so that we can minimize the amount of platform specific code. + */ +#define LINUXBIN + +/* + * Define this if the system uses RELOCA. + */ +#define ELF_USES_RELOCA + +/* + * Get the address of the Global offset table. This must be absolute, not + * relative. + */ +#define GET_GOT(X) __asm__("\tmov %%l7,%0\n\t" : "=r" (X)) + +/* + * Get a pointer to the argv array. On many platforms this can be just + * the address if the first argument, on other platforms we need to + * do something a little more subtle here. We assume that argc is stored + * at the word just below the argvp that we return here. + */ +#define GET_ARGV(ARGVP, ARGS) __asm__("\tadd %%fp,68,%0\n" : "=r" (ARGVP)); + +/* + * Initialization sequence for a GOT. For the Sparc, this points to the + * PLT, and we need to initialize a couple of the slots. The PLT should + * look like: + * + * save %sp, -64, %sp + * call _dl_linux_resolve + * nop + * .word implementation_dependent + */ +#define INIT_GOT(GOT_BASE,MODULE) \ +{ \ + GOT_BASE[0] = 0x9de3bfc0; /* save %sp, -64, %sp */ \ + GOT_BASE[1] = 0x40000000 | (((unsigned int) _dl_linux_resolve - (unsigned int) GOT_BASE - 4) >> 2); \ + GOT_BASE[2] = 0x01000000; /* nop */ \ + GOT_BASE[3] = (int) MODULE; \ +} + +/* + * Here is a macro to perform a relocation. This is only used when + * bootstrapping the dynamic loader. + */ +#define PERFORM_BOOTSTRAP_RELOC(RELP,REL,SYMBOL,LOAD) \ + switch(ELF32_R_TYPE((RELP)->r_info)) { \ + case R_SPARC_32: \ + *REL = SYMBOL + (RELP)->r_addend; \ + break; \ + case R_SPARC_GLOB_DAT: \ + *REL = SYMBOL + (RELP)->r_addend; \ + break; \ + case R_SPARC_JMP_SLOT: \ + REL[1] = 0x03000000 | ((SYMBOL >> 10) & 0x3fffff); \ + REL[2] = 0x81c06000 | (SYMBOL & 0x3ff); \ + break; \ + case R_SPARC_NONE: \ + break; \ + case R_SPARC_WDISP30: \ + break; \ + case R_SPARC_RELATIVE: \ + *REL += (unsigned int) LOAD + (RELP)->r_addend; \ + break; \ + default: \ + _dl_exit(1); \ + } + + +/* + * Transfer control to the user's application, once the dynamic loader + * is done. The crt calls atexit with $g1 if not null, so we need to + * ensure that it contains NULL. + */ + +#define START() \ + __asm__ volatile ( \ + "add %%g0,%%g0,%%g1\n\t" \ + "jmpl %0, %%o7\n\t" \ + "restore %%g0,%%g0,%%g0\n\t" \ + : /*"=r" (status) */ : \ + "r" (_dl_elf_main): "g1", "o0", "o1") + + + +/* Here we define the magic numbers that this dynamic loader should accept */ + +#define MAGIC1 EM_SPARC +#undef MAGIC2 +/* Used for error messages */ +#define ELF_TARGET "Sparc" + +#ifndef COMPILE_ASM +extern unsigned int _dl_linux_resolver(unsigned int reloc_entry, + unsigned int * i); +#endif + +/* + * Define this if you want a dynamic loader that works on Solaris. + */ +#define SOLARIS_COMPATIBLE + +/* + * Define this because we do not want to call .udiv in the library. + * Change on the plans -miguel: + * We just statically link against .udiv. This is required + * if we want to be able to run on Sun4c machines. + */ + +/* We now link .urem against this one */ +#ifdef USE_V8 +#define do_rem(result,n,base) ({ \ +volatile int __res; \ +__asm__("mov %%g0,%%Y\n\t" \ + "sdiv %2,%3,%%l6\n\t" \ + "smul %%l6,%3,%%l6\n\t" \ + "sub %2,%%l6,%0\n\t" \ + :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; }) +#else +#define do_rem(a,b,c) a = _dl_urem (b,c); +#endif +/* + * dbx wants the binder to have a specific name. Mustn't disappoint it. + */ +#ifdef SOLARIS_COMPATIBLE +#define _dl_linux_resolve _elf_rtbndr +#endif + diff --git a/ldso/ldso/sparc/elfinterp.c b/ldso/ldso/sparc/elfinterp.c new file mode 100644 index 000000000..6f0d9f8fd --- /dev/null +++ b/ldso/ldso/sparc/elfinterp.c @@ -0,0 +1,355 @@ +/* Run an ELF binary on a linux system. + + Copyright (C) 1995, Eric Youngdale. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef VERBOSE_DLINKER +#define VERBOSE_DLINKER +#endif +#ifdef VERBOSE_DLINKER +static char * _dl_reltypes[] = { "R_SPARC_NONE", "R_SPARC_8", + "R_SPARC_16", "R_SPARC_32", "R_SPARC_DISP8", "R_SPARC_DISP16", + "R_SPARC_DISP32", "R_SPARC_WDISP30", "R_SPARC_WDISP22", + "R_SPARC_HI22", "R_SPARC_22", "R_SPARC_13", "R_SPARC_LO10", + "R_SPARC_GOT10", "R_SPARC_GOT13", "R_SPARC_GOT22", "R_SPARC_PC10", + "R_SPARC_PC22", "R_SPARC_WPLT30", "R_SPARC_COPY", + "R_SPARC_GLOB_DAT", "R_SPARC_JMP_SLOT", "R_SPARC_RELATIVE", + "R_SPARC_UA32"}; +#endif + +/* Program to load an ELF binary on a linux system, and run it. +References to symbols in sharable libraries can be resolved by either +an ELF sharable library or a linux style of shared library. */ + +/* Disclaimer: I have never seen any AT&T source code for SVr4, nor have + I ever taken any courses on internals. This program was developed using + information available through the book "UNIX SYSTEM V RELEASE 4, + Programmers guide: Ansi C and Programming Support Tools", which did + a more than adequate job of explaining everything required to get this + working. */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/unistd.h> +/*#include <stdlib.h>*/ +#include "string.h" +#include <linux/unistd.h> +#include <linux/fcntl.h> +#include "hash.h" +#include "linuxelf.h" +#include "sysdep.h" +#include "../syscall.h" +#include "../string.h" + +#define SVR4_COMPATIBILITY + +extern char *_dl_progname; + +extern _dl_linux_resolve(void); + +unsigned int _dl_linux_resolver(unsigned int reloc_entry, unsigned int * plt) +{ + int reloc_type; + struct elf32_rela * this_reloc; + char * strtab; + struct elf32_sym * symtab; + struct elf32_rela * rel_addr; + struct elf_resolve * tpnt; + int symtab_index; + char * new_addr; + char ** got_addr; + unsigned int instr_addr; + tpnt = (struct elf_resolve *) plt[2]; + + rel_addr = (struct elf32_rela *) (tpnt->dynamic_info[DT_JMPREL] + + tpnt->loadaddr); + + /* + * Generate the correct relocation index into the .rela.plt section. + */ + reloc_entry = (reloc_entry >> 12) - 0xc; + + this_reloc = (struct elf32_rela *) ((char *) rel_addr + reloc_entry); + + reloc_type = ELF32_R_TYPE(this_reloc->r_info); + symtab_index = ELF32_R_SYM(this_reloc->r_info); + + symtab = (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr); + strtab = (char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr); + + _dl_fdprintf(2, "tpnt = %x\n", tpnt); + _dl_fdprintf(2, "reloc = %x\n", this_reloc); + _dl_fdprintf(2, "symtab = %x\n", symtab); + _dl_fdprintf(2, "strtab = %x\n", strtab); + + + if (reloc_type != R_SPARC_JMP_SLOT) { + _dl_fdprintf(2, "%s: incorrect relocation type in jump relocations (%d)\n", + _dl_progname, reloc_type); + _dl_exit(30); + }; + + /* Address of jump instruction to fix up */ + instr_addr = ((int)this_reloc->r_offset + (int)tpnt->loadaddr); + got_addr = (char **) instr_addr; + + _dl_fdprintf(2, "symtab_index %d\n", symtab_index); + +#ifdef DEBUG + _dl_fdprintf(2, "Resolving symbol %s\n", + strtab + symtab[symtab_index].st_name); +#endif + + /* Get the address of the GOT entry */ + new_addr = _dl_find_hash(strtab + symtab[symtab_index].st_name, + tpnt->symbol_scope, (int) got_addr, tpnt, 0); + if(!new_addr) { + _dl_fdprintf(2, "%s: can't resolve symbol '%s'\n", + _dl_progname, strtab + symtab[symtab_index].st_name); + _dl_exit(31); + }; +/* #define DEBUG_LIBRARY */ +#ifdef DEBUG_LIBRARY + if((unsigned int) got_addr < 0x40000000) { + _dl_fdprintf(2, "Calling library function: %s\n", + strtab + symtab[symtab_index].st_name); + } else { + got_addr[1] = (char *) (0x03000000 | (((unsigned int) new_addr >> 10) & 0x3fffff)); + got_addr[2] = (char *) (0x81c06000 | ((unsigned int) new_addr & 0x3ff)); + } +#else + got_addr[1] = (char *) (0x03000000 | (((unsigned int) new_addr >> 10) & 0x3fffff)); + got_addr[2] = (char *) (0x81c06000 | ((unsigned int) new_addr & 0x3ff)); +#endif + _dl_fdprintf(2, "Address = %x\n",new_addr); + _dl_exit(32); + + return (unsigned int) new_addr; +} + +void _dl_parse_lazy_relocation_information(struct elf_resolve * tpnt, int rel_addr, + int rel_size, int type){ + int i; + char * strtab; + int reloc_type; + int symtab_index; + struct elf32_sym * symtab; + struct elf32_rela * rpnt; + unsigned int * reloc_addr; + + /* Now parse the relocation information */ + rpnt = (struct elf32_rela *) (rel_addr + tpnt->loadaddr); + + symtab = (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr); + strtab = ( char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr); + + for(i=0; i< rel_size; i += sizeof(struct elf32_rela), rpnt++){ + reloc_addr = (int *) (tpnt->loadaddr + (int)rpnt->r_offset); + reloc_type = ELF32_R_TYPE(rpnt->r_info); + symtab_index = ELF32_R_SYM(rpnt->r_info); + + /* When the dynamic linker bootstrapped itself, it resolved some symbols. + Make sure we do not do them again */ + if(!symtab_index && tpnt->libtype == program_interpreter) continue; + if(symtab_index && tpnt->libtype == program_interpreter && + _dl_symbol(strtab + symtab[symtab_index].st_name)) + continue; + + switch(reloc_type){ + case R_SPARC_NONE: + break; + case R_SPARC_JMP_SLOT: + break; + default: + _dl_fdprintf(2, "%s: (LAZY) can't handle reloc type ", _dl_progname); +#ifdef VERBOSE_DLINKER + _dl_fdprintf(2, "%s ", _dl_reltypes[reloc_type]); +#endif + if(symtab_index) _dl_fdprintf(2, "'%s'\n", + strtab + symtab[symtab_index].st_name); + _dl_exit(33); + }; + }; +} + +int _dl_parse_relocation_information(struct elf_resolve * tpnt, int rel_addr, + int rel_size, int type){ + int i; + char * strtab; + int reloc_type; + int goof = 0; + struct elf32_sym * symtab; + struct elf32_rela * rpnt; + unsigned int * reloc_addr; + unsigned int symbol_addr; + int symtab_index; + /* Now parse the relocation information */ + + rpnt = (struct elf32_rela *) (rel_addr + tpnt->loadaddr); + + symtab = (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr); + strtab = ( char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr); + + for(i=0; i< rel_size; i+= sizeof(struct elf32_rela), rpnt++){ + reloc_addr = (int *) (tpnt->loadaddr + (int)rpnt->r_offset); + reloc_type = ELF32_R_TYPE(rpnt->r_info); + symtab_index = ELF32_R_SYM(rpnt->r_info); + symbol_addr = 0; + + if(!symtab_index && tpnt->libtype == program_interpreter) continue; + + if(symtab_index) { + + if(tpnt->libtype == program_interpreter && + _dl_symbol(strtab + symtab[symtab_index].st_name)) + continue; + + symbol_addr = (unsigned int) + _dl_find_hash(strtab + symtab[symtab_index].st_name, + tpnt->symbol_scope, (int) reloc_addr, + (reloc_type == R_SPARC_JMP_SLOT ? tpnt : NULL), 0); + + if(!symbol_addr && + ELF32_ST_BIND(symtab [symtab_index].st_info) == STB_GLOBAL) { + _dl_fdprintf(2, "%s: can't resolve symbol '%s'\n", + _dl_progname, strtab + symtab[symtab_index].st_name); + goof++; + }; + }; + switch(reloc_type){ + case R_SPARC_NONE: + break; + case R_SPARC_32: + *reloc_addr = symbol_addr + rpnt->r_addend; + break; + case R_SPARC_DISP32: + *reloc_addr = symbol_addr + rpnt->r_addend - (unsigned int) reloc_addr; + break; + case R_SPARC_GLOB_DAT: + *reloc_addr = symbol_addr + rpnt->r_addend; + break; + case R_SPARC_JMP_SLOT: + reloc_addr[1] = 0x03000000 | ((symbol_addr >> 10) & 0x3fffff); + reloc_addr[2] = 0x81c06000 | (symbol_addr & 0x3ff); + break; + case R_SPARC_RELATIVE: + *reloc_addr += (unsigned int) tpnt->loadaddr + rpnt->r_addend; + break; + case R_SPARC_HI22: + if (!symbol_addr) + symbol_addr = tpnt->loadaddr + rpnt->r_addend; + else + symbol_addr += rpnt->r_addend; + *reloc_addr = (*reloc_addr & 0xffc00000)|(symbol_addr >> 10); + break; + case R_SPARC_LO10: + if (!symbol_addr) + symbol_addr = tpnt->loadaddr + rpnt->r_addend; + else + symbol_addr += rpnt->r_addend; + *reloc_addr = (*reloc_addr & ~0x3ff)|(symbol_addr & 0x3ff); + break; + case R_SPARC_WDISP30: + *reloc_addr = (*reloc_addr & 0xc0000000)| + ((symbol_addr - (unsigned int) reloc_addr) >> 2); + break; + case R_SPARC_COPY: +#if 0 /* This one is done later */ + _dl_fdprintf(2, "Doing copy for symbol "); + if(symtab_index) _dl_fdprintf(2, strtab + symtab[symtab_index].st_name); + _dl_fdprintf(2, "\n"); + _dl_memcpy((void *) symtab[symtab_index].st_value, + (void *) symbol_addr, + symtab[symtab_index].st_size); +#endif + break; + default: + _dl_fdprintf(2, "%s: can't handle reloc type ", _dl_progname); +#ifdef VERBOSE_DLINKER + _dl_fdprintf(2, "%s ", _dl_reltypes[reloc_type]); +#endif + if (symtab_index) + _dl_fdprintf(2, "'%s'\n", strtab + symtab[symtab_index].st_name); + _dl_exit(34); + }; + + }; + return goof; +} + + +/* This is done as a separate step, because there are cases where + information is first copied and later initialized. This results in + the wrong information being copied. Someone at Sun was complaining about + a bug in the handling of _COPY by SVr4, and this may in fact be what he + was talking about. Sigh. */ + +/* No, there are cases where the SVr4 linker fails to emit COPY relocs + at all */ + +int _dl_parse_copy_information(struct dyn_elf * xpnt, int rel_addr, + int rel_size, int type) +{ + int i; + char * strtab; + int reloc_type; + int goof = 0; + struct elf32_sym * symtab; + struct elf32_rela * rpnt; + unsigned int * reloc_addr; + unsigned int symbol_addr; + struct elf_resolve *tpnt; + int symtab_index; + /* Now parse the relocation information */ + + tpnt = xpnt->dyn; + + rpnt = (struct elf32_rela *) (rel_addr + tpnt->loadaddr); + + symtab = (struct elf32_sym *) (tpnt->dynamic_info[DT_SYMTAB] + tpnt->loadaddr); + strtab = ( char *) (tpnt->dynamic_info[DT_STRTAB] + tpnt->loadaddr); + + for(i=0; i< rel_size; i+= sizeof(struct elf32_rela), rpnt++){ + reloc_addr = (int *) (tpnt->loadaddr + (int)rpnt->r_offset); + reloc_type = ELF32_R_TYPE(rpnt->r_info); + if(reloc_type != R_SPARC_COPY) continue; + symtab_index = ELF32_R_SYM(rpnt->r_info); + symbol_addr = 0; + if(!symtab_index && tpnt->libtype == program_interpreter) continue; + if(symtab_index) { + + if(tpnt->libtype == program_interpreter && + _dl_symbol(strtab + symtab[symtab_index].st_name)) + continue; + + symbol_addr = (unsigned int) + _dl_find_hash(strtab + symtab[symtab_index].st_name, + xpnt->next, (int) reloc_addr, NULL, 1); + if(!symbol_addr) { + _dl_fdprintf(2, "%s: can't resolve symbol '%s'\n", + _dl_progname, strtab + symtab[symtab_index].st_name); + goof++; + }; + }; + if (!goof) + _dl_memcpy((char *) symtab[symtab_index].st_value, + (char *) symbol_addr, + symtab[symtab_index].st_size); + }; + return goof; +} + + diff --git a/ldso/ldso/sparc/ld_sysdep.h b/ldso/ldso/sparc/ld_sysdep.h new file mode 100644 index 000000000..1d4c0354f --- /dev/null +++ b/ldso/ldso/sparc/ld_sysdep.h @@ -0,0 +1,131 @@ + +/* + * Various assmbly language/system dependent hacks that are required + * so that we can minimize the amount of platform specific code. + */ +#define LINUXBIN + +/* + * Define this if the system uses RELOCA. + */ +#define ELF_USES_RELOCA + +/* + * Get the address of the Global offset table. This must be absolute, not + * relative. + */ +#define GET_GOT(X) __asm__("\tmov %%l7,%0\n\t" : "=r" (X)) + +/* + * Get a pointer to the argv array. On many platforms this can be just + * the address if the first argument, on other platforms we need to + * do something a little more subtle here. We assume that argc is stored + * at the word just below the argvp that we return here. + */ +#define GET_ARGV(ARGVP, ARGS) __asm__("\tadd %%fp,68,%0\n" : "=r" (ARGVP)); + +/* + * Initialization sequence for a GOT. For the Sparc, this points to the + * PLT, and we need to initialize a couple of the slots. The PLT should + * look like: + * + * save %sp, -64, %sp + * call _dl_linux_resolve + * nop + * .word implementation_dependent + */ +#define INIT_GOT(GOT_BASE,MODULE) \ +{ \ + GOT_BASE[0] = 0x9de3bfc0; /* save %sp, -64, %sp */ \ + GOT_BASE[1] = 0x40000000 | (((unsigned int) _dl_linux_resolve - (unsigned int) GOT_BASE - 4) >> 2); \ + GOT_BASE[2] = 0x01000000; /* nop */ \ + GOT_BASE[3] = (int) MODULE; \ +} + +/* + * Here is a macro to perform a relocation. This is only used when + * bootstrapping the dynamic loader. + */ +#define PERFORM_BOOTSTRAP_RELOC(RELP,REL,SYMBOL,LOAD) \ + switch(ELF32_R_TYPE((RELP)->r_info)) { \ + case R_SPARC_32: \ + *REL = SYMBOL + (RELP)->r_addend; \ + break; \ + case R_SPARC_GLOB_DAT: \ + *REL = SYMBOL + (RELP)->r_addend; \ + break; \ + case R_SPARC_JMP_SLOT: \ + REL[1] = 0x03000000 | ((SYMBOL >> 10) & 0x3fffff); \ + REL[2] = 0x81c06000 | (SYMBOL & 0x3ff); \ + break; \ + case R_SPARC_NONE: \ + break; \ + case R_SPARC_WDISP30: \ + break; \ + case R_SPARC_RELATIVE: \ + *REL += (unsigned int) LOAD + (RELP)->r_addend; \ + break; \ + default: \ + _dl_exit(1); \ + } + + +/* + * Transfer control to the user's application, once the dynamic loader + * is done. The crt calls atexit with $g1 if not null, so we need to + * ensure that it contains NULL. + */ + +#define START() \ + __asm__ volatile ( \ + "add %%g0,%%g0,%%g1\n\t" \ + "jmpl %0, %%o7\n\t" \ + "restore %%g0,%%g0,%%g0\n\t" \ + : /*"=r" (status) */ : \ + "r" (_dl_elf_main): "g1", "o0", "o1") + + + +/* Here we define the magic numbers that this dynamic loader should accept */ + +#define MAGIC1 EM_SPARC +#undef MAGIC2 +/* Used for error messages */ +#define ELF_TARGET "Sparc" + +#ifndef COMPILE_ASM +extern unsigned int _dl_linux_resolver(unsigned int reloc_entry, + unsigned int * i); +#endif + +/* + * Define this if you want a dynamic loader that works on Solaris. + */ +#define SOLARIS_COMPATIBLE + +/* + * Define this because we do not want to call .udiv in the library. + * Change on the plans -miguel: + * We just statically link against .udiv. This is required + * if we want to be able to run on Sun4c machines. + */ + +/* We now link .urem against this one */ +#ifdef USE_V8 +#define do_rem(result,n,base) ({ \ +volatile int __res; \ +__asm__("mov %%g0,%%Y\n\t" \ + "sdiv %2,%3,%%l6\n\t" \ + "smul %%l6,%3,%%l6\n\t" \ + "sub %2,%%l6,%0\n\t" \ + :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; }) +#else +#define do_rem(a,b,c) a = _dl_urem (b,c); +#endif +/* + * dbx wants the binder to have a specific name. Mustn't disappoint it. + */ +#ifdef SOLARIS_COMPATIBLE +#define _dl_linux_resolve _elf_rtbndr +#endif + diff --git a/ldso/ldso/sparc/resolve.S b/ldso/ldso/sparc/resolve.S new file mode 100644 index 000000000..ea985b5c8 --- /dev/null +++ b/ldso/ldso/sparc/resolve.S @@ -0,0 +1,25 @@ +/* + * These are various helper routines that are needed to run an ELF image. + */ +#define COMPILE_ASM +#include "sysdep.h" + +.text + .align 16 + +.globl _dl_linux_resolve +_dl_linux_resolve: + /* + * Call the resolver - pass the address of the PLT so that we can + * figure out which module we are in. + */ + mov %o7,%o1 + call _dl_linux_resolver + mov %g1,%o0 + + jmpl %o0,%o7 + restore +.LFE2: + + .type _dl_linux_resolve,#function + .size _dl_linux_resolve,.LFE2-_dl_linux_resolve diff --git a/ldso/ldso/sparc/sdiv.S b/ldso/ldso/sparc/sdiv.S new file mode 100644 index 000000000..5e52e1959 --- /dev/null +++ b/ldso/ldso/sparc/sdiv.S @@ -0,0 +1,369 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .div name of function to generate + * div div=div => %o0 / %o1; div=rem => %o0 % %o1 + * true true=true => signed; true=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include "DEFS.h" +#ifndef __linux__ +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include "/usr/include/machine/trap.h" +#endif +#else +#include <asm/traps.h> +#endif + +FUNC(_dl_div) + ! compute sign of result; if neither is negative, no problem + orcc %o1, %o0, %g0 ! either negative? + bge 2f ! no, go do the divide + xor %o1, %o0, %g6 ! compute sign in any case + tst %o1 + bge 1f + tst %o0 + ! %o1 is definitely negative; %o0 might also be negative + bge 2f ! if %o0 not negative... + sub %g0, %o1, %o1 ! in any case, make %o1 nonneg +1: ! %o0 is negative, %o1 is nonnegative + sub %g0, %o0, %o0 ! make %o0 nonnegative +2: + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + sub %o2, 1, %o2 + + +Lgot_result: + ! check to see if answer should be < 0 + tst %g6 + bl,a 1f + sub %g0, %o2, %o2 +1: + retl + mov %o2, %o0 diff --git a/ldso/ldso/sparc/sysdep.h b/ldso/ldso/sparc/sysdep.h new file mode 100644 index 000000000..1d4c0354f --- /dev/null +++ b/ldso/ldso/sparc/sysdep.h @@ -0,0 +1,131 @@ + +/* + * Various assmbly language/system dependent hacks that are required + * so that we can minimize the amount of platform specific code. + */ +#define LINUXBIN + +/* + * Define this if the system uses RELOCA. + */ +#define ELF_USES_RELOCA + +/* + * Get the address of the Global offset table. This must be absolute, not + * relative. + */ +#define GET_GOT(X) __asm__("\tmov %%l7,%0\n\t" : "=r" (X)) + +/* + * Get a pointer to the argv array. On many platforms this can be just + * the address if the first argument, on other platforms we need to + * do something a little more subtle here. We assume that argc is stored + * at the word just below the argvp that we return here. + */ +#define GET_ARGV(ARGVP, ARGS) __asm__("\tadd %%fp,68,%0\n" : "=r" (ARGVP)); + +/* + * Initialization sequence for a GOT. For the Sparc, this points to the + * PLT, and we need to initialize a couple of the slots. The PLT should + * look like: + * + * save %sp, -64, %sp + * call _dl_linux_resolve + * nop + * .word implementation_dependent + */ +#define INIT_GOT(GOT_BASE,MODULE) \ +{ \ + GOT_BASE[0] = 0x9de3bfc0; /* save %sp, -64, %sp */ \ + GOT_BASE[1] = 0x40000000 | (((unsigned int) _dl_linux_resolve - (unsigned int) GOT_BASE - 4) >> 2); \ + GOT_BASE[2] = 0x01000000; /* nop */ \ + GOT_BASE[3] = (int) MODULE; \ +} + +/* + * Here is a macro to perform a relocation. This is only used when + * bootstrapping the dynamic loader. + */ +#define PERFORM_BOOTSTRAP_RELOC(RELP,REL,SYMBOL,LOAD) \ + switch(ELF32_R_TYPE((RELP)->r_info)) { \ + case R_SPARC_32: \ + *REL = SYMBOL + (RELP)->r_addend; \ + break; \ + case R_SPARC_GLOB_DAT: \ + *REL = SYMBOL + (RELP)->r_addend; \ + break; \ + case R_SPARC_JMP_SLOT: \ + REL[1] = 0x03000000 | ((SYMBOL >> 10) & 0x3fffff); \ + REL[2] = 0x81c06000 | (SYMBOL & 0x3ff); \ + break; \ + case R_SPARC_NONE: \ + break; \ + case R_SPARC_WDISP30: \ + break; \ + case R_SPARC_RELATIVE: \ + *REL += (unsigned int) LOAD + (RELP)->r_addend; \ + break; \ + default: \ + _dl_exit(1); \ + } + + +/* + * Transfer control to the user's application, once the dynamic loader + * is done. The crt calls atexit with $g1 if not null, so we need to + * ensure that it contains NULL. + */ + +#define START() \ + __asm__ volatile ( \ + "add %%g0,%%g0,%%g1\n\t" \ + "jmpl %0, %%o7\n\t" \ + "restore %%g0,%%g0,%%g0\n\t" \ + : /*"=r" (status) */ : \ + "r" (_dl_elf_main): "g1", "o0", "o1") + + + +/* Here we define the magic numbers that this dynamic loader should accept */ + +#define MAGIC1 EM_SPARC +#undef MAGIC2 +/* Used for error messages */ +#define ELF_TARGET "Sparc" + +#ifndef COMPILE_ASM +extern unsigned int _dl_linux_resolver(unsigned int reloc_entry, + unsigned int * i); +#endif + +/* + * Define this if you want a dynamic loader that works on Solaris. + */ +#define SOLARIS_COMPATIBLE + +/* + * Define this because we do not want to call .udiv in the library. + * Change on the plans -miguel: + * We just statically link against .udiv. This is required + * if we want to be able to run on Sun4c machines. + */ + +/* We now link .urem against this one */ +#ifdef USE_V8 +#define do_rem(result,n,base) ({ \ +volatile int __res; \ +__asm__("mov %%g0,%%Y\n\t" \ + "sdiv %2,%3,%%l6\n\t" \ + "smul %%l6,%3,%%l6\n\t" \ + "sub %2,%%l6,%0\n\t" \ + :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; }) +#else +#define do_rem(a,b,c) a = _dl_urem (b,c); +#endif +/* + * dbx wants the binder to have a specific name. Mustn't disappoint it. + */ +#ifdef SOLARIS_COMPATIBLE +#define _dl_linux_resolve _elf_rtbndr +#endif + diff --git a/ldso/ldso/sparc/udiv.S b/ldso/ldso/sparc/udiv.S new file mode 100644 index 000000000..df4e5385e --- /dev/null +++ b/ldso/ldso/sparc/udiv.S @@ -0,0 +1,351 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .udiv name of function to generate + * div div=div => %o0 / %o1; div=rem => %o0 % %o1 + * false false=true => signed; false=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + +#include "DEFS.h" +#ifndef __linux__ +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include "/usr/include/machine/trap.h" +#endif +#else +#include <asm/traps.h> +#endif + +FUNC(_dl_udiv) + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + sub %o2, 1, %o2 + + +Lgot_result: + + retl + mov %o2, %o0 diff --git a/ldso/ldso/sparc/umul.S b/ldso/ldso/sparc/umul.S new file mode 100644 index 000000000..7a26c295c --- /dev/null +++ b/ldso/ldso/sparc/umul.S @@ -0,0 +1,153 @@ +/* + * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the + * upper 32 bits of the 64-bit product). + * + * This code optimizes short (less than 13-bit) multiplies. Short + * multiplies require 25 instruction cycles, and long ones require + * 45 instruction cycles. + * + * On return, overflow has occurred (%o1 is not zero) if and only if + * the Z condition code is clear, allowing, e.g., the following: + * + * call .umul + * nop + * bnz overflow (or tnz) + */ + +#include "DEFS.h" +FUNC(.umul) + or %o0, %o1, %o4 + mov %o0, %y ! multiplier -> Y + andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args + be Lmul_shortway ! if zero, can do it the short way + andcc %g0, %g0, %o4 ! zero the partial product and clear N and V + + /* + * Long multiply. 32 steps, followed by a final shift step. + */ + mulscc %o4, %o1, %o4 ! 1 + mulscc %o4, %o1, %o4 ! 2 + mulscc %o4, %o1, %o4 ! 3 + mulscc %o4, %o1, %o4 ! 4 + mulscc %o4, %o1, %o4 ! 5 + mulscc %o4, %o1, %o4 ! 6 + mulscc %o4, %o1, %o4 ! 7 + mulscc %o4, %o1, %o4 ! 8 + mulscc %o4, %o1, %o4 ! 9 + mulscc %o4, %o1, %o4 ! 10 + mulscc %o4, %o1, %o4 ! 11 + mulscc %o4, %o1, %o4 ! 12 + mulscc %o4, %o1, %o4 ! 13 + mulscc %o4, %o1, %o4 ! 14 + mulscc %o4, %o1, %o4 ! 15 + mulscc %o4, %o1, %o4 ! 16 + mulscc %o4, %o1, %o4 ! 17 + mulscc %o4, %o1, %o4 ! 18 + mulscc %o4, %o1, %o4 ! 19 + mulscc %o4, %o1, %o4 ! 20 + mulscc %o4, %o1, %o4 ! 21 + mulscc %o4, %o1, %o4 ! 22 + mulscc %o4, %o1, %o4 ! 23 + mulscc %o4, %o1, %o4 ! 24 + mulscc %o4, %o1, %o4 ! 25 + mulscc %o4, %o1, %o4 ! 26 + mulscc %o4, %o1, %o4 ! 27 + mulscc %o4, %o1, %o4 ! 28 + mulscc %o4, %o1, %o4 ! 29 + mulscc %o4, %o1, %o4 ! 30 + mulscc %o4, %o1, %o4 ! 31 + mulscc %o4, %o1, %o4 ! 32 + mulscc %o4, %g0, %o4 ! final shift + + + /* + * Normally, with the shift-and-add approach, if both numbers are + * positive you get the correct result. With 32-bit two's-complement + * numbers, -x is represented as + * + * x 32 + * ( 2 - ------ ) mod 2 * 2 + * 32 + * 2 + * + * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, + * we can treat this as if the radix point were just to the left + * of the sign bit (multiply by 2^32), and get + * + * -x = (2 - x) mod 2 + * + * Then, ignoring the `mod 2's for convenience: + * + * x * y = xy + * -x * y = 2y - xy + * x * -y = 2x - xy + * -x * -y = 4 - 2x - 2y + xy + * + * For signed multiplies, we subtract (x << 32) from the partial + * product to fix this problem for negative multipliers (see mul.s). + * Because of the way the shift into the partial product is calculated + * (N xor V), this term is automatically removed for the multiplicand, + * so we don't have to adjust. + * + * But for unsigned multiplies, the high order bit wasn't a sign bit, + * and the correction is wrong. So for unsigned multiplies where the + * high order bit is one, we end up with xy - (y << 32). To fix it + * we add y << 32. + */ +#if 0 + tst %o1 + bl,a 1f ! if %o1 < 0 (high order bit = 1), + add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) +1: rd %y, %o0 ! get lower half of product + retl + addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 +#else + /* Faster code from tege@sics.se. */ + sra %o1, 31, %o2 ! make mask from sign bit + and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 + rd %y, %o0 ! get lower half of product + retl + addcc %o4, %o2, %o1 ! add compensation and put upper half in place +#endif + +Lmul_shortway: + /* + * Short multiply. 12 steps, followed by a final shift step. + * The resulting bits are off by 12 and (32-12) = 20 bit positions, + * but there is no problem with %o0 being negative (unlike above), + * and overflow is impossible (the answer is at most 24 bits long). + */ + mulscc %o4, %o1, %o4 ! 1 + mulscc %o4, %o1, %o4 ! 2 + mulscc %o4, %o1, %o4 ! 3 + mulscc %o4, %o1, %o4 ! 4 + mulscc %o4, %o1, %o4 ! 5 + mulscc %o4, %o1, %o4 ! 6 + mulscc %o4, %o1, %o4 ! 7 + mulscc %o4, %o1, %o4 ! 8 + mulscc %o4, %o1, %o4 ! 9 + mulscc %o4, %o1, %o4 ! 10 + mulscc %o4, %o1, %o4 ! 11 + mulscc %o4, %o1, %o4 ! 12 + mulscc %o4, %g0, %o4 ! final shift + + /* + * %o4 has 20 of the bits that should be in the result; %y has + * the bottom 12 (as %y's top 12). That is: + * + * %o4 %y + * +----------------+----------------+ + * | -12- | -20- | -12- | -20- | + * +------(---------+------)---------+ + * -----result----- + * + * The 12 bits of %o4 left of the `result' area are all zero; + * in fact, all top 20 bits of %o4 are zero. + */ + + rd %y, %o5 + sll %o4, 12, %o0 ! shift middle bits left 12 + srl %o5, 20, %o5 ! shift low bits right 20 + or %o5, %o0, %o0 + retl + addcc %g0, %g0, %o1 ! %o1 = zero, and set Z diff --git a/ldso/ldso/sparc/urem.S b/ldso/ldso/sparc/urem.S new file mode 100644 index 000000000..8d304038b --- /dev/null +++ b/ldso/ldso/sparc/urem.S @@ -0,0 +1,352 @@ + /* This file is generated from divrem.m4; DO NOT EDIT! */ +/* + * Division and remainder, from Appendix E of the Sparc Version 8 + * Architecture Manual, with fixes from Gordon Irlam. + */ + +/* + * Input: dividend and divisor in %o0 and %o1 respectively. + * + * m4 parameters: + * .urem name of function to generate + * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 + * false false=true => signed; false=false => unsigned + * + * Algorithm parameters: + * N how many bits per iteration we try to get (4) + * WORDSIZE total number of bits (32) + * + * Derived constants: + * TOPBITS number of bits in the top decade of a number + * + * Important variables: + * Q the partial quotient under development (initially 0) + * R the remainder so far, initially the dividend + * ITER number of main division loop iterations required; + * equal to ceil(log2(quotient) / N). Note that this + * is the log base (2^N) of the quotient. + * V the current comparand, initially divisor*2^(ITER*N-1) + * + * Cost: + * Current estimate for non-large dividend is + * ceil(log2(quotient) / N) * (10 + 7N/2) + C + * A large dividend is one greater than 2^(31-TOPBITS) and takes a + * different path, as the upper bits of the quotient must be developed + * one bit at a time. + */ + + + +#include "DEFS.h" +#ifdef __linux__ +#include <asm/traps.h> +#else +#ifdef __svr4__ +#include <sys/trap.h> +#else +#include "/usr/include/machine/trap.h" +#endif +#endif + +FUNC(_dl_urem) + + ! Ready to divide. Compute size of quotient; scale comparand. + orcc %o1, %g0, %o5 + bne 1f + mov %o0, %o3 + + ! Divide by zero trap. If it returns, return 0 (about as + ! wrong as possible, but that is what SunOS does...). + ta ST_DIV0 + retl + clr %o0 + +1: + cmp %o3, %o5 ! if %o1 exceeds %o0, done + blu Lgot_result ! (and algorithm fails otherwise) + clr %o2 + sethi %hi(1 << (32 - 4 - 1)), %g1 + cmp %o3, %g1 + blu Lnot_really_big + clr %o4 + + ! Here the dividend is >= 2**(31-N) or so. We must be careful here, + ! as our usual N-at-a-shot divide step will cause overflow and havoc. + ! The number of bits in the result here is N*ITER+SC, where SC <= N. + ! Compute ITER in an unorthodox manner: know we need to shift V into + ! the top decade: so do not even bother to compare to R. + 1: + cmp %o5, %g1 + bgeu 3f + mov 1, %g7 + sll %o5, 4, %o5 + b 1b + add %o4, 1, %o4 + + ! Now compute %g7. + 2: addcc %o5, %o5, %o5 + bcc Lnot_too_big + add %g7, 1, %g7 + + ! We get here if the %o1 overflowed while shifting. + ! This means that %o3 has the high-order bit set. + ! Restore %o5 and subtract from %o3. + sll %g1, 4, %g1 ! high order bit + srl %o5, 1, %o5 ! rest of %o5 + add %o5, %g1, %o5 + b Ldo_single_div + sub %g7, 1, %g7 + + Lnot_too_big: + 3: cmp %o5, %o3 + blu 2b + nop + be Ldo_single_div + nop + /* NB: these are commented out in the V8-Sparc manual as well */ + /* (I do not understand this) */ + ! %o5 > %o3: went too far: back up 1 step + ! srl %o5, 1, %o5 + ! dec %g7 + ! do single-bit divide steps + ! + ! We have to be careful here. We know that %o3 >= %o5, so we can do the + ! first divide step without thinking. BUT, the others are conditional, + ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- + ! order bit set in the first step, just falling into the regular + ! division loop will mess up the first time around. + ! So we unroll slightly... + Ldo_single_div: + subcc %g7, 1, %g7 + bl Lend_regular_divide + nop + sub %o3, %o5, %o3 + mov 1, %o2 + b Lend_single_divloop + nop + Lsingle_divloop: + sll %o2, 1, %o2 + bl 1f + srl %o5, 1, %o5 + ! %o3 >= 0 + sub %o3, %o5, %o3 + b 2f + add %o2, 1, %o2 + 1: ! %o3 < 0 + add %o3, %o5, %o3 + sub %o2, 1, %o2 + 2: + Lend_single_divloop: + subcc %g7, 1, %g7 + bge Lsingle_divloop + tst %o3 + b,a Lend_regular_divide + +Lnot_really_big: +1: + sll %o5, 4, %o5 + cmp %o5, %o3 + bleu 1b + addcc %o4, 1, %o4 + be Lgot_result + sub %o4, 1, %o4 + + tst %o3 ! set up for initial iteration +Ldivloop: + sll %o2, 4, %o2 + ! depth 1, accumulated bits 0 + bl L.1.16 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 2, accumulated bits 1 + bl L.2.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits 3 + bl L.3.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 7 + bl L.4.23 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (7*2+1), %o2 + +L.4.23: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (7*2-1), %o2 + + +L.3.19: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 5 + bl L.4.21 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (5*2+1), %o2 + +L.4.21: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (5*2-1), %o2 + + + +L.2.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits 1 + bl L.3.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits 3 + bl L.4.19 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (3*2+1), %o2 + +L.4.19: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (3*2-1), %o2 + + +L.3.17: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits 1 + bl L.4.17 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (1*2+1), %o2 + +L.4.17: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (1*2-1), %o2 + + + + +L.1.16: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 2, accumulated bits -1 + bl L.2.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 3, accumulated bits -1 + bl L.3.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -1 + bl L.4.15 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2+1), %o2 + +L.4.15: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-1*2-1), %o2 + + +L.3.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -3 + bl L.4.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2+1), %o2 + +L.4.13: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-3*2-1), %o2 + + + +L.2.15: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 3, accumulated bits -3 + bl L.3.13 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + ! depth 4, accumulated bits -5 + bl L.4.11 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2+1), %o2 + +L.4.11: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-5*2-1), %o2 + + +L.3.13: + ! remainder is negative + addcc %o3,%o5,%o3 + ! depth 4, accumulated bits -7 + bl L.4.9 + srl %o5,1,%o5 + ! remainder is positive + subcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2+1), %o2 + +L.4.9: + ! remainder is negative + addcc %o3,%o5,%o3 + b 9f + add %o2, (-7*2-1), %o2 + + + + + 9: +Lend_regular_divide: + subcc %o4, 1, %o4 + bge Ldivloop + tst %o3 + bl,a Lgot_result + ! non-restoring fixup here (one instruction only!) + add %o3, %o1, %o3 + + +Lgot_result: + + retl + mov %o3, %o0 |