From 124ec188720b6bdea85ade49e7ea195161b12fce Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sat, 5 Jan 2008 10:05:27 +0000 Subject: Chris Zankel writes: The following patches add support for the Xtensa processor architecture to uClibc. They are based on a recent SVN checkout (12/05/2007). The first patch (attached to this post) adds Xtensa support to various shared configuration and make files. The following patches then include the Xtensa specific files and directories. I welcome any feedback and would appreciate it if you could include the patches into the mainline tree. I am certainly committed to maintain the port. Bob Wilson was kind enough to review the patches. Some notes about the architecture: Xtensa is a configurable and extensible processor architecture developed by Tensilica. For more information, please visit: www.linux-xtensa.org. --- Rules.mak | 3 +- extra/Configs/Config.in | 7 + extra/Configs/Config.xtensa | 12 + include/elf.h | 58 ++++ ldso/include/dl-string.h | 2 +- ldso/ldso/xtensa/dl-debug.h | 61 ++++ ldso/ldso/xtensa/dl-startup.h | 106 +++++++ ldso/ldso/xtensa/dl-syscalls.h | 7 + ldso/ldso/xtensa/dl-sysdep.h | 132 +++++++++ ldso/ldso/xtensa/elfinterp.c | 285 +++++++++++++++++++ ldso/ldso/xtensa/resolve.S | 61 ++++ libc/string/xtensa/Makefile | 13 + libc/string/xtensa/memcpy.S | 297 +++++++++++++++++++ libc/string/xtensa/memset.S | 165 +++++++++++ libc/string/xtensa/strcmp.S | 313 +++++++++++++++++++++ libc/string/xtensa/strcpy.S | 150 ++++++++++ libc/string/xtensa/strlen.S | 104 +++++++ libc/string/xtensa/strncpy.S | 241 ++++++++++++++++ libc/sysdeps/linux/xtensa/Makefile | 13 + libc/sysdeps/linux/xtensa/Makefile.arch | 14 + libc/sysdeps/linux/xtensa/__longjmp.S | 126 +++++++++ libc/sysdeps/linux/xtensa/__syscall_error.c | 18 ++ libc/sysdeps/linux/xtensa/bits/endian.h | 10 + libc/sysdeps/linux/xtensa/bits/fcntl.h | 196 +++++++++++++ libc/sysdeps/linux/xtensa/bits/ipc.h | 54 ++++ libc/sysdeps/linux/xtensa/bits/kernel_stat.h | 57 ++++ libc/sysdeps/linux/xtensa/bits/kernel_types.h | 48 ++++ libc/sysdeps/linux/xtensa/bits/mathdef.h | 43 +++ libc/sysdeps/linux/xtensa/bits/mman.h | 104 +++++++ libc/sysdeps/linux/xtensa/bits/msq.h | 88 ++++++ libc/sysdeps/linux/xtensa/bits/setjmp.h | 46 +++ libc/sysdeps/linux/xtensa/bits/shm.h | 115 ++++++++ libc/sysdeps/linux/xtensa/bits/sigcontextinfo.h | 33 +++ libc/sysdeps/linux/xtensa/bits/stackinfo.h | 28 ++ libc/sysdeps/linux/xtensa/bits/stat.h | 153 ++++++++++ libc/sysdeps/linux/xtensa/bits/syscalls.h | 140 +++++++++ .../linux/xtensa/bits/uClibc_arch_features.h | 44 +++ libc/sysdeps/linux/xtensa/bits/uClibc_page.h | 31 ++ libc/sysdeps/linux/xtensa/bits/wordsize.h | 19 ++ libc/sysdeps/linux/xtensa/bits/xtensa-config.h | 53 ++++ libc/sysdeps/linux/xtensa/brk.c | 43 +++ libc/sysdeps/linux/xtensa/bsd-_setjmp.S | 1 + libc/sysdeps/linux/xtensa/bsd-setjmp.S | 1 + libc/sysdeps/linux/xtensa/clone.S | 103 +++++++ libc/sysdeps/linux/xtensa/crt1.S | 119 ++++++++ libc/sysdeps/linux/xtensa/crti.S | 16 ++ libc/sysdeps/linux/xtensa/crtn.S | 8 + libc/sysdeps/linux/xtensa/fork.c | 25 ++ libc/sysdeps/linux/xtensa/mmap.S | 57 ++++ libc/sysdeps/linux/xtensa/posix_fadvise.c | 29 ++ libc/sysdeps/linux/xtensa/posix_fadvise64.c | 39 +++ libc/sysdeps/linux/xtensa/pread_write.c | 193 +++++++++++++ libc/sysdeps/linux/xtensa/setjmp.S | 131 +++++++++ libc/sysdeps/linux/xtensa/sys/procfs.h | 121 ++++++++ libc/sysdeps/linux/xtensa/sys/ptrace.h | 156 ++++++++++ libc/sysdeps/linux/xtensa/sys/ucontext.h | 49 ++++ libc/sysdeps/linux/xtensa/syscall.S | 42 +++ libc/sysdeps/linux/xtensa/sysdep.h | 160 +++++++++++ libc/sysdeps/linux/xtensa/vfork.S | 170 +++++++++++ libc/sysdeps/linux/xtensa/windowspill.S | 95 +++++++ .../linuxthreads.old/sysdeps/xtensa/pt-machine.h | 48 ++++ test/Rules.mak | 1 + 62 files changed, 5055 insertions(+), 2 deletions(-) create mode 100644 extra/Configs/Config.xtensa create mode 100644 ldso/ldso/xtensa/dl-debug.h create mode 100644 ldso/ldso/xtensa/dl-startup.h create mode 100644 ldso/ldso/xtensa/dl-syscalls.h create mode 100644 ldso/ldso/xtensa/dl-sysdep.h create mode 100644 ldso/ldso/xtensa/elfinterp.c create mode 100644 ldso/ldso/xtensa/resolve.S create mode 100644 libc/string/xtensa/Makefile create mode 100644 libc/string/xtensa/memcpy.S create mode 100644 libc/string/xtensa/memset.S create mode 100644 libc/string/xtensa/strcmp.S create mode 100644 libc/string/xtensa/strcpy.S create mode 100644 libc/string/xtensa/strlen.S create mode 100644 libc/string/xtensa/strncpy.S create mode 100644 libc/sysdeps/linux/xtensa/Makefile create mode 100644 libc/sysdeps/linux/xtensa/Makefile.arch create mode 100644 libc/sysdeps/linux/xtensa/__longjmp.S create mode 100644 libc/sysdeps/linux/xtensa/__syscall_error.c create mode 100644 libc/sysdeps/linux/xtensa/bits/endian.h create mode 100644 libc/sysdeps/linux/xtensa/bits/fcntl.h create mode 100644 libc/sysdeps/linux/xtensa/bits/ipc.h create mode 100644 libc/sysdeps/linux/xtensa/bits/kernel_stat.h create mode 100644 libc/sysdeps/linux/xtensa/bits/kernel_types.h create mode 100644 libc/sysdeps/linux/xtensa/bits/mathdef.h create mode 100644 libc/sysdeps/linux/xtensa/bits/mman.h create mode 100644 libc/sysdeps/linux/xtensa/bits/msq.h create mode 100644 libc/sysdeps/linux/xtensa/bits/setjmp.h create mode 100644 libc/sysdeps/linux/xtensa/bits/shm.h create mode 100644 libc/sysdeps/linux/xtensa/bits/sigcontextinfo.h create mode 100644 libc/sysdeps/linux/xtensa/bits/stackinfo.h create mode 100644 libc/sysdeps/linux/xtensa/bits/stat.h create mode 100644 libc/sysdeps/linux/xtensa/bits/syscalls.h create mode 100644 libc/sysdeps/linux/xtensa/bits/uClibc_arch_features.h create mode 100644 libc/sysdeps/linux/xtensa/bits/uClibc_page.h create mode 100644 libc/sysdeps/linux/xtensa/bits/wordsize.h create mode 100644 libc/sysdeps/linux/xtensa/bits/xtensa-config.h create mode 100644 libc/sysdeps/linux/xtensa/brk.c create mode 100644 libc/sysdeps/linux/xtensa/bsd-_setjmp.S create mode 100644 libc/sysdeps/linux/xtensa/bsd-setjmp.S create mode 100644 libc/sysdeps/linux/xtensa/clone.S create mode 100644 libc/sysdeps/linux/xtensa/crt1.S create mode 100644 libc/sysdeps/linux/xtensa/crti.S create mode 100644 libc/sysdeps/linux/xtensa/crtn.S create mode 100644 libc/sysdeps/linux/xtensa/fork.c create mode 100644 libc/sysdeps/linux/xtensa/mmap.S create mode 100644 libc/sysdeps/linux/xtensa/posix_fadvise.c create mode 100644 libc/sysdeps/linux/xtensa/posix_fadvise64.c create mode 100644 libc/sysdeps/linux/xtensa/pread_write.c create mode 100644 libc/sysdeps/linux/xtensa/setjmp.S create mode 100644 libc/sysdeps/linux/xtensa/sys/procfs.h create mode 100644 libc/sysdeps/linux/xtensa/sys/ptrace.h create mode 100644 libc/sysdeps/linux/xtensa/sys/ucontext.h create mode 100644 libc/sysdeps/linux/xtensa/syscall.S create mode 100644 libc/sysdeps/linux/xtensa/sysdep.h create mode 100644 libc/sysdeps/linux/xtensa/vfork.S create mode 100644 libc/sysdeps/linux/xtensa/windowspill.S create mode 100644 libpthread/linuxthreads.old/sysdeps/xtensa/pt-machine.h diff --git a/Rules.mak b/Rules.mak index c14a907d7..99d7efa2d 100644 --- a/Rules.mak +++ b/Rules.mak @@ -50,7 +50,8 @@ BUILD_CFLAGS = -O2 -Wall export ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun.*/sparc/ -e s/sparc.*/sparc/ \ -e s/arm.*/arm/ -e s/sa110/arm/ -e s/sh.*/sh/ \ -e s/s390x/s390/ -e s/parisc.*/hppa/ \ - -e s/ppc.*/powerpc/ -e s/mips.*/mips/ ) + -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ + -e s/xtensa.*/xtensa/ ) #--------------------------------------------------------- diff --git a/extra/Configs/Config.in b/extra/Configs/Config.in index ec4cb4bdc..37dc0ba1e 100644 --- a/extra/Configs/Config.in +++ b/extra/Configs/Config.in @@ -82,6 +82,9 @@ config TARGET_vax config TARGET_x86_64 bool "x86_64" +config TARGET_xtensa + bool "xtensa" + endchoice @@ -183,6 +186,10 @@ if TARGET_x86_64 source "extra/Configs/Config.x86_64" endif +if TARGET_xtensa +source "extra/Configs/Config.xtensa" +endif + config TARGET_SUBARCH string default "e500" if CONFIG_E500 diff --git a/extra/Configs/Config.xtensa b/extra/Configs/Config.xtensa new file mode 100644 index 000000000..75132471a --- /dev/null +++ b/extra/Configs/Config.xtensa @@ -0,0 +1,12 @@ +# +# For a description of the syntax of this configuration file, +# see extra/config/kconfig-language.txt +# + +config TARGET_ARCH + string + default "xtensa" + +config ARCH_CFLAGS + string + diff --git a/include/elf.h b/include/elf.h index 5ce5c694a..4c6d09012 100644 --- a/include/elf.h +++ b/include/elf.h @@ -2977,6 +2977,64 @@ typedef Elf32_Addr Elf32_Conflict; /* Keep this the last entry. */ #define R_NIOS2_NUM 22 +/* Xtensa-specific declarations */ + +/* Xtensa values for the Dyn d_tag field. */ +#define DT_XTENSA_GOT_LOC_OFF (DT_LOPROC + 0) +#define DT_XTENSA_GOT_LOC_SZ (DT_LOPROC + 1) +#define DT_XTENSA_NUM 2 + +/* Xtensa relocations. */ +#define R_XTENSA_NONE 0 +#define R_XTENSA_32 1 +#define R_XTENSA_RTLD 2 +#define R_XTENSA_GLOB_DAT 3 +#define R_XTENSA_JMP_SLOT 4 +#define R_XTENSA_RELATIVE 5 +#define R_XTENSA_PLT 6 +#define R_XTENSA_OP0 8 +#define R_XTENSA_OP1 9 +#define R_XTENSA_OP2 10 +#define R_XTENSA_ASM_EXPAND 11 +#define R_XTENSA_ASM_SIMPLIFY 12 +#define R_XTENSA_GNU_VTINHERIT 15 +#define R_XTENSA_GNU_VTENTRY 16 +#define R_XTENSA_DIFF8 17 +#define R_XTENSA_DIFF16 18 +#define R_XTENSA_DIFF32 19 +#define R_XTENSA_SLOT0_OP 20 +#define R_XTENSA_SLOT1_OP 21 +#define R_XTENSA_SLOT2_OP 22 +#define R_XTENSA_SLOT3_OP 23 +#define R_XTENSA_SLOT4_OP 24 +#define R_XTENSA_SLOT5_OP 25 +#define R_XTENSA_SLOT6_OP 26 +#define R_XTENSA_SLOT7_OP 27 +#define R_XTENSA_SLOT8_OP 28 +#define R_XTENSA_SLOT9_OP 29 +#define R_XTENSA_SLOT10_OP 30 +#define R_XTENSA_SLOT11_OP 31 +#define R_XTENSA_SLOT12_OP 32 +#define R_XTENSA_SLOT13_OP 33 +#define R_XTENSA_SLOT14_OP 34 +#define R_XTENSA_SLOT0_ALT 35 +#define R_XTENSA_SLOT1_ALT 36 +#define R_XTENSA_SLOT2_ALT 37 +#define R_XTENSA_SLOT3_ALT 38 +#define R_XTENSA_SLOT4_ALT 39 +#define R_XTENSA_SLOT5_ALT 40 +#define R_XTENSA_SLOT6_ALT 41 +#define R_XTENSA_SLOT7_ALT 42 +#define R_XTENSA_SLOT8_ALT 43 +#define R_XTENSA_SLOT9_ALT 44 +#define R_XTENSA_SLOT10_ALT 45 +#define R_XTENSA_SLOT11_ALT 46 +#define R_XTENSA_SLOT12_ALT 47 +#define R_XTENSA_SLOT13_ALT 48 +#define R_XTENSA_SLOT14_ALT 49 +/* Keep this the last entry. */ +#define R_XTENSA_NUM 50 + __END_DECLS #endif /* elf.h */ diff --git a/ldso/include/dl-string.h b/ldso/include/dl-string.h index bf993b29c..746bd91c6 100644 --- a/ldso/include/dl-string.h +++ b/ldso/include/dl-string.h @@ -286,7 +286,7 @@ static __always_inline char * _dl_simple_ltoahex(char * local, unsigned long i) * This requires that load_addr must already be defined... */ #if defined(mc68000) || defined(__arm__) || defined(__thumb__) || \ defined(__mips__) || defined(__sh__) || defined(__powerpc__) || \ - defined(__avr32__) + defined(__avr32__) || defined(__xtensa__) # define CONSTANT_STRING_GOT_FIXUP(X) \ if ((X) < (const char *) load_addr) (X) += load_addr # define NO_EARLY_SEND_STDERR diff --git a/ldso/ldso/xtensa/dl-debug.h b/ldso/ldso/xtensa/dl-debug.h new file mode 100644 index 000000000..327defc07 --- /dev/null +++ b/ldso/ldso/xtensa/dl-debug.h @@ -0,0 +1,61 @@ +/* vi: set sw=4 ts=4: */ +/* Xtensa ELF shared library loader suppport + * + * Copyright (C) 2007 Tensilica Inc. + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +static const char *_dl_reltypes_tab[] = +{ + "R_XTENSA_NONE", + "R_XTENSA_32", + "R_XTENSA_RTLD", + "R_XTENSA_GLOB_DAT", + "R_XTENSA_JMP_SLOT", + "R_XTENSA_RELATIVE", + "R_XTENSA_PLT", + "R_XTENSA_UNUSED7", + "R_XTENSA_OP0", + "R_XTENSA_OP1", + "R_XTENSA_OP2", + "R_XTENSA_ASM_EXPAND", + "R_XTENSA_ASM_SIMPLIFY", + "R_XTENSA_UNUSED13", + "R_XTENSA_UNUSED14", + "R_XTENSA_GNU_VTINHERIT", + "R_XTENSA_GNU_VTENTRY", + "R_XTENSA_DIFF8", + "R_XTENSA_DIFF16", + "R_XTENSA_DIFF32", + "R_XTENSA_SLOT0_OP", + "R_XTENSA_SLOT1_OP", + "R_XTENSA_SLOT2_OP", + "R_XTENSA_SLOT3_OP", + "R_XTENSA_SLOT4_OP", + "R_XTENSA_SLOT5_OP", + "R_XTENSA_SLOT6_OP", + "R_XTENSA_SLOT7_OP", + "R_XTENSA_SLOT8_OP", + "R_XTENSA_SLOT9_OP", + "R_XTENSA_SLOT10_OP", + "R_XTENSA_SLOT11_OP", + "R_XTENSA_SLOT12_OP", + "R_XTENSA_SLOT13_OP", + "R_XTENSA_SLOT14_OP", + "R_XTENSA_SLOT0_ALT", + "R_XTENSA_SLOT1_ALT", + "R_XTENSA_SLOT2_ALT", + "R_XTENSA_SLOT3_ALT", + "R_XTENSA_SLOT4_ALT", + "R_XTENSA_SLOT5_ALT", + "R_XTENSA_SLOT6_ALT", + "R_XTENSA_SLOT7_ALT", + "R_XTENSA_SLOT8_ALT", + "R_XTENSA_SLOT9_ALT", + "R_XTENSA_SLOT10_ALT", + "R_XTENSA_SLOT11_ALT", + "R_XTENSA_SLOT12_ALT", + "R_XTENSA_SLOT13_ALT", + "R_XTENSA_SLOT14_ALT" +}; diff --git a/ldso/ldso/xtensa/dl-startup.h b/ldso/ldso/xtensa/dl-startup.h new file mode 100644 index 000000000..2fd012846 --- /dev/null +++ b/ldso/ldso/xtensa/dl-startup.h @@ -0,0 +1,106 @@ +/* vi: set sw=4 ts=4: */ +/* + * Xtensa ELF code used by dl-startup.c. + * + * Copyright (C) 2007 Tensilica Inc. + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + * Parts taken from glibc/sysdeps/xtensa/dl-machine.h. + */ + +__asm__ ( + " .text\n" + " .align 4\n" + " .global _start\n" + " .type _start, @function\n" + "_start:\n" + " # Compute load offset in a2: the GOT has not yet been relocated\n" + " # but the entries for local symbols contain the relative offsets\n" + " # and we can explicitly add the load offset in this code.\n" + " _call0 0f\n" + " .align 4\n" + "0: movi a3, _start+3\n" + " sub a2, a0, a3\n" + " # Make sure a0 is cleared to mark the top of stack.\n" + " movi a0, 0\n" + " # user_entry_point = _dl_start(pointer to argument block)\n" + " movi a4, _dl_start\n" + " mov a6, sp\n" + " add a4, a4, a2\n" + " callx4 a4\n" + " # Save user_entry_point so we can jump to it.\n" + " mov a3, a6\n" + " l32i a7, sp, 0 # load argc\n" + " # Load _dl_skip_args into a4.\n" + " movi a4, _dl_skip_args\n" + " l32i a4, a4, 0\n" + " bnez a4, .Lfixup_stack\n" + ".Lfixup_stack_ret:\n" + " # Pass finalizer (_dl_fini) in a2 to the user entry point.\n" + " movi a2, _dl_fini\n" + " # Jump to user's entry point (_start).\n" + " jx a3\n" + ".Lfixup_stack:\n" + " # argc -= _dl_skip_args (with argc @ sp+0)\n" + " sub a7, a7, a4\n" + " s32i a7, sp, 0\n" + " # Shift everything by _dl_skip_args.\n" + " addi a5, sp, 4 # a5 = destination ptr = argv\n" + " add a4, a5, a4 # a4 = source ptr = argv + _dl_skip_args\n" + " # Shift argv.\n" + "1: l32i a6, a4, 0\n" + " addi a4, a4, 4\n" + " s32i a6, a5, 0\n" + " addi a5, a5, 4\n" + " bnez a6, 1b\n" + " # Shift envp.\n" + "2: l32i a6, a4, 0\n" + " addi a4, a4, 4\n" + " s32i a6, a5, 0\n" + " addi a5, a5, 4\n" + " bnez a6, 2b\n" + " # Shift auxiliary table.\n" + "3: l32i a6, a4, 0\n" + " l32i a8, a4, 4\n" + " addi a4, a4, 8\n" + " s32i a6, a5, 0\n" + " s32i a8, a5, 4\n" + " addi a5, a5, 8\n" + " bnez a6, 3b\n" + " j .Lfixup_stack_ret"); + +/* Get a pointer to the argv value. */ +#define GET_ARGV(ARGVP, ARGS) ARGVP = (((unsigned long *) ARGS) + 1) + +/* Function calls are not safe until the GOT relocations have been done. */ +#define NO_FUNCS_BEFORE_BOOTSTRAP + +#define PERFORM_BOOTSTRAP_GOT(tpnt) \ +do { \ + xtensa_got_location *got_loc; \ + unsigned long l_addr = tpnt->loadaddr; \ + Elf32_Word relative_count; \ + unsigned long rel_addr; \ + int x; \ +\ + got_loc = (xtensa_got_location *) \ + (tpnt->dynamic_info[DT_XTENSA (GOT_LOC_OFF)] + l_addr); \ +\ + for (x = 0; x < tpnt->dynamic_info[DT_XTENSA (GOT_LOC_SZ)]; x++) { \ + Elf32_Addr got_start, got_end; \ + got_start = got_loc[x].offset & ~(PAGE_SIZE - 1); \ + got_end = ((got_loc[x].offset + got_loc[x].length + PAGE_SIZE - 1) \ + & ~(PAGE_SIZE - 1)); \ + _dl_mprotect ((void *)(got_start + l_addr), got_end - got_start, \ + PROT_READ | PROT_WRITE | PROT_EXEC); \ + } \ +\ + /* The following is a stripped down version of the code following \ + the invocation of PERFORM_BOOTSTRAP_GOT in dl-startup.c. That \ + code is skipped when PERFORM_BOOTSTRAP_GOT is defined, so it has \ + to be done here instead. */ \ + relative_count = tpnt->dynamic_info[DT_RELCONT_IDX]; \ + rel_addr = tpnt->dynamic_info[DT_RELOC_TABLE_ADDR]; \ + if (rel_addr) \ + elf_machine_relative(load_addr, rel_addr, relative_count); \ +} while (0) diff --git a/ldso/ldso/xtensa/dl-syscalls.h b/ldso/ldso/xtensa/dl-syscalls.h new file mode 100644 index 000000000..4b42a57e0 --- /dev/null +++ b/ldso/ldso/xtensa/dl-syscalls.h @@ -0,0 +1,7 @@ +/* We can't use the real errno in ldso, since it has not yet + * been dynamicly linked in yet. */ +#include "sys/syscall.h" +extern int _dl_errno; +#undef __set_errno +#define __set_errno(X) {(_dl_errno) = (X);} + diff --git a/ldso/ldso/xtensa/dl-sysdep.h b/ldso/ldso/xtensa/dl-sysdep.h new file mode 100644 index 000000000..afbbf3bfe --- /dev/null +++ b/ldso/ldso/xtensa/dl-sysdep.h @@ -0,0 +1,132 @@ +/* Machine-dependent ELF dynamic relocation. + Parts copied from glibc/sysdeps/xtensa/dl-machine.h + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +/* Define this if the system uses RELOCA. */ +#define ELF_USES_RELOCA +#include +#include + +/* Translate a processor specific dynamic tag to the index + in l_info array. */ +#define DT_XTENSA(x) (DT_XTENSA_##x - DT_LOPROC + DT_NUM + OS_NUM) + +typedef struct xtensa_got_location_struct { + Elf32_Off offset; + Elf32_Word length; +} xtensa_got_location; + +/* Initialization sequence for the GOT. */ +#define INIT_GOT(GOT_BASE, MODULE) \ + do { \ + xtensa_got_location *got_loc; \ + Elf32_Addr l_addr = MODULE->loadaddr; \ + int x; \ + \ + got_loc = (xtensa_got_location *) \ + (MODULE->dynamic_info[DT_XTENSA (GOT_LOC_OFF)] + l_addr); \ + \ + for (x = 0; x < MODULE->dynamic_info[DT_XTENSA (GOT_LOC_SZ)]; x++) \ + { \ + Elf32_Addr got_start, got_end; \ + got_start = got_loc[x].offset & ~(PAGE_SIZE - 1); \ + got_end = ((got_loc[x].offset + got_loc[x].length + PAGE_SIZE - 1) \ + & ~(PAGE_SIZE - 1)); \ + _dl_mprotect ((void *)(got_start + l_addr) , got_end - got_start, \ + PROT_READ | PROT_WRITE | PROT_EXEC); \ + } \ + \ + /* Fill in first GOT entry according to the ABI. */ \ + GOT_BASE[0] = (unsigned long) _dl_linux_resolve; \ + } while (0) + +/* Parse dynamic info */ +#define ARCH_NUM 2 +#define ARCH_DYNAMIC_INFO(dpnt, dynamic, debug_addr) \ + do { \ + if (dpnt->d_tag == DT_XTENSA_GOT_LOC_OFF) \ + dynamic[DT_XTENSA (GOT_LOC_OFF)] = dpnt->d_un.d_ptr; \ + else if (dpnt->d_tag == DT_XTENSA_GOT_LOC_SZ) \ + dynamic[DT_XTENSA (GOT_LOC_SZ)] = dpnt->d_un.d_val; \ + } while (0) + +/* Here we define the magic numbers that this dynamic loader should accept. */ +#define MAGIC1 EM_XTENSA +#undef MAGIC2 + +/* Used for error messages. */ +#define ELF_TARGET "Xtensa" + +struct elf_resolve; +extern unsigned long _dl_linux_resolver (struct elf_resolve *, int); + +/* 4096 bytes alignment */ +#define PAGE_ALIGN 0xfffff000 +#define ADDR_ALIGN 0xfff +#define OFFS_ALIGN 0x7ffff000 + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so + undefined references should not be allowed to define the value. */ +#define elf_machine_type_class(type) \ + (((type) == R_XTENSA_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) + +/* Return the link-time address of _DYNAMIC. */ +static inline Elf32_Addr +elf_machine_dynamic (void) +{ + /* This function is only used while bootstrapping the runtime linker. + The "_DYNAMIC" symbol is always local so its GOT entry will initially + contain the link-time address. */ + return (Elf32_Addr) &_DYNAMIC; +} + +/* Return the run-time load address of the shared object. */ +static inline Elf32_Addr +elf_machine_load_address (void) +{ + Elf32_Addr addr, tmp; + + /* At this point, the runtime linker is being bootstrapped and the GOT + entry used for ".Lhere" will contain the link address. The CALL0 will + produce the dynamic address of ".Lhere" + 3. Thus, the end result is + equal to "dynamic_address(.Lhere) - link_address(.Lhere)". */ + __asm__ ("\ + movi %0, .Lhere\n\ + mov %1, a0\n\ +.Lhere: _call0 0f\n\ + .align 4\n\ +0: sub %0, a0, %0\n\ + mov a0, %1" + : "=a" (addr), "=a" (tmp)); + + return addr - 3; +} + +static inline void +elf_machine_relative (Elf32_Addr load_off, const Elf32_Addr rel_addr, + Elf32_Word relative_count) +{ + Elf32_Rela *rpnt = (Elf32_Rela *) rel_addr; + while (relative_count--) + { + Elf32_Addr *const reloc_addr = (Elf32_Addr *) (load_off + rpnt->r_offset); + *reloc_addr += load_off + rpnt->r_addend; + rpnt++; + } +} diff --git a/ldso/ldso/xtensa/elfinterp.c b/ldso/ldso/xtensa/elfinterp.c new file mode 100644 index 000000000..a459431b1 --- /dev/null +++ b/ldso/ldso/xtensa/elfinterp.c @@ -0,0 +1,285 @@ +/* vi: set sw=4 ts=4: */ +/* Xtensa ELF shared library loader suppport + * + * Copyright (C) 2007 Tensilica Inc. + * Copyright (c) 1994-2000 Eric Youngdale, Peter MacDonald, + * David Engel, Hongjiu Lu and Mitch D'Souza + * Copyright (C) 2001-2004 Erik Andersen + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the above contributors may not be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "ldso.h" + +unsigned long +_dl_linux_resolver (struct elf_resolve *tpnt, int reloc_entry) +{ + int reloc_type; + ELF_RELOC *this_reloc; + char *strtab; + Elf32_Sym *symtab; + int symtab_index; + char *rel_addr; + char *new_addr; + char **got_addr; + char *symname; + + rel_addr = (char *) tpnt->dynamic_info[DT_JMPREL]; + this_reloc = (ELF_RELOC *) (rel_addr + reloc_entry); + reloc_type = ELF32_R_TYPE (this_reloc->r_info); + symtab_index = ELF32_R_SYM (this_reloc->r_info); + + symtab = (Elf32_Sym *) tpnt->dynamic_info[DT_SYMTAB]; + strtab = (char *) tpnt->dynamic_info[DT_STRTAB]; + symname = strtab + symtab[symtab_index].st_name; + + if (unlikely (reloc_type != R_XTENSA_JMP_SLOT)) { + _dl_dprintf (2, "%s: Incorrect relocation type in jump relocations\n", + _dl_progname); + _dl_exit (1); + } + + /* Address of the literal to fix up. */ + got_addr = (char **) (this_reloc->r_offset + tpnt->loadaddr); + + /* Get the address of the GOT entry. */ + new_addr = _dl_find_hash (symname, tpnt->symbol_scope, tpnt, + ELF_RTYPE_CLASS_PLT); + if (unlikely (!new_addr)) { + _dl_dprintf (2, "%s: can't resolve symbol '%s'\n", + _dl_progname, symname); + _dl_exit (1); + } + +#if defined (__SUPPORT_LD_DEBUG__) + if (_dl_debug_bindings) { + _dl_dprintf (_dl_debug_file, "\nresolve function: %s", symname); + if (_dl_debug_detail) + _dl_dprintf (_dl_debug_file, "\n\tpatched %x ==> %x @ %x\n", + *got_addr, new_addr, got_addr); + } + if (!_dl_debug_nofixups) + *got_addr = new_addr; +#else + *got_addr = new_addr; +#endif + + return (unsigned long) new_addr; +} + + +static int +_dl_parse (struct elf_resolve *tpnt, struct dyn_elf *scope, + unsigned long rel_addr, unsigned long rel_size, + int (*reloc_fnc) (struct elf_resolve *tpnt, struct dyn_elf *scope, + ELF_RELOC *rpnt, Elf32_Sym *symtab, char *strtab)) +{ + unsigned int i; + char *strtab; + Elf32_Sym *symtab; + ELF_RELOC *rpnt; + int symtab_index; + + /* Parse the relocation information. */ + rpnt = (ELF_RELOC *) rel_addr; + rel_size /= sizeof (ELF_RELOC); + + symtab = (Elf32_Sym *) tpnt->dynamic_info[DT_SYMTAB]; + strtab = (char *) tpnt->dynamic_info[DT_STRTAB]; + + for (i = 0; i < rel_size; i++, rpnt++) { + int res; + + symtab_index = ELF32_R_SYM (rpnt->r_info); + + debug_sym (symtab, strtab, symtab_index); + debug_reloc (symtab, strtab, rpnt); + + res = reloc_fnc (tpnt, scope, rpnt, symtab, strtab); + + if (res == 0) + continue; + + _dl_dprintf (2, "\n%s: ", _dl_progname); + + if (symtab_index) + _dl_dprintf (2, "symbol '%s': ", + strtab + symtab[symtab_index].st_name); + + if (unlikely (res < 0)) { + int reloc_type = ELF32_R_TYPE (rpnt->r_info); +#if defined (__SUPPORT_LD_DEBUG__) + _dl_dprintf (2, "can't handle reloc type %s\n", + _dl_reltypes (reloc_type)); +#else + _dl_dprintf (2, "can't handle reloc type %x\n", reloc_type); +#endif + _dl_exit (-res); + } + if (unlikely (res > 0)) { + _dl_dprintf (2, "can't resolve symbol\n"); + return res; + } + } + + return 0; +} + + +static int +_dl_do_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope, + ELF_RELOC *rpnt, Elf32_Sym *symtab, char *strtab) +{ + int reloc_type; + int symtab_index; + char *symname; + Elf32_Sym *sym; + Elf32_Addr *reloc_addr; + Elf32_Addr symbol_addr; +#if defined (__SUPPORT_LD_DEBUG__) + Elf32_Addr old_val; +#endif + + reloc_addr = (Elf32_Addr *) (tpnt->loadaddr + rpnt->r_offset); + reloc_type = ELF32_R_TYPE (rpnt->r_info); + symtab_index = ELF32_R_SYM (rpnt->r_info); + sym = &symtab[symtab_index]; + symbol_addr = 0; + symname = strtab + sym->st_name; + + if (symtab_index) { + symbol_addr = (Elf32_Addr) + _dl_find_hash (symname, scope, tpnt, + elf_machine_type_class (reloc_type)); + + /* + * We want to allow undefined references to weak symbols - this might + * have been intentional. We should not be linking local symbols + * here, so all bases should be covered. + */ + if (unlikely (!symbol_addr && + ELF32_ST_BIND (sym->st_info) != STB_WEAK)) { + _dl_dprintf (2, "%s: can't resolve symbol '%s'\n", + _dl_progname, symname); + _dl_exit (1); + } + } + +#if defined (__SUPPORT_LD_DEBUG__) + old_val = *reloc_addr; +#endif + + switch (reloc_type) { + case R_XTENSA_NONE: + break; + + case R_XTENSA_GLOB_DAT: + case R_XTENSA_JMP_SLOT: + *reloc_addr = symbol_addr + rpnt->r_addend; + break; + + case R_XTENSA_RTLD: + if (rpnt->r_addend == 1) { + /* Grab the function pointer stashed at the beginning of the + GOT by the GOT_INIT function. */ + *reloc_addr = *(Elf32_Addr *) tpnt->dynamic_info[DT_PLTGOT]; + } else if (rpnt->r_addend == 2) { + /* Store the link map for the object. */ + *reloc_addr = (Elf32_Addr) tpnt; + } else { + _dl_exit (1); + } + break; + + case R_XTENSA_RELATIVE: + *reloc_addr += tpnt->loadaddr + rpnt->r_addend; + break; + + default: + return -1; /* Calls _dl_exit(1). */ + } +#if defined (__SUPPORT_LD_DEBUG__) + if (_dl_debug_reloc && _dl_debug_detail) + _dl_dprintf (_dl_debug_file, "\tpatched: %x ==> %x @ %x", + old_val, *reloc_addr, reloc_addr); +#endif + + return 0; +} + + +static int +_dl_do_lazy_reloc (struct elf_resolve *tpnt, struct dyn_elf *scope, + ELF_RELOC *rpnt, Elf32_Sym *symtab, char *strtab) +{ + int reloc_type; + Elf32_Addr *reloc_addr; +#if defined (__SUPPORT_LD_DEBUG__) + Elf32_Addr old_val; +#endif + + reloc_addr = (Elf32_Addr *) (tpnt->loadaddr + rpnt->r_offset); + reloc_type = ELF32_R_TYPE (rpnt->r_info); + +#if defined (__SUPPORT_LD_DEBUG__) + old_val = *reloc_addr; +#endif + + switch (reloc_type) { + case R_XTENSA_JMP_SLOT: + /* Perform a RELATIVE reloc on the GOT entry that transfers + to the stub function. */ + *reloc_addr += tpnt->loadaddr; + break; + case R_XTENSA_NONE: + break; + default: + _dl_exit (1); + } + +#if defined (__SUPPORT_LD_DEBUG__) + if (_dl_debug_reloc && _dl_debug_detail) + _dl_dprintf (_dl_debug_file, "\tpatched: %x ==> %x @ %x", + old_val, *reloc_addr, reloc_addr); +#endif + return 0; + +} + +void +_dl_parse_lazy_relocation_information (struct dyn_elf *rpnt, + unsigned long rel_addr, + unsigned long rel_size) +{ + (void) _dl_parse (rpnt->dyn, NULL, rel_addr, rel_size, _dl_do_lazy_reloc); +} + +int +_dl_parse_relocation_information (struct dyn_elf *rpnt, + unsigned long rel_addr, + unsigned long rel_size) +{ + return _dl_parse (rpnt->dyn, rpnt->dyn->symbol_scope, rel_addr, rel_size, + _dl_do_reloc); +} diff --git a/ldso/ldso/xtensa/resolve.S b/ldso/ldso/xtensa/resolve.S new file mode 100644 index 000000000..fb298391c --- /dev/null +++ b/ldso/ldso/xtensa/resolve.S @@ -0,0 +1,61 @@ +/* Xtensa dynamic resolver. + Parts copied from glibc/sysdeps/xtensa/dl-trampoline.S + Copyright (C) 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#define MIN_FRAME_SIZE 32 + +#ifdef __XTENSA_EB__ +#define XTENSA_IMM12_FLD_OFFSET 8 +#else /* __XTENSA_EL__ */ +#define XTENSA_IMM12_FLD_OFFSET 12 +#endif /* __XTENSA_EL__ */ + + .text + .align 4 + .global _dl_linux_resolve + .type _dl_linux_resolve, @function +_dl_linux_resolve: + /* Fix up the high 2 bits of the return address. */ + mov a14, a0 // save a0 temporarily + _call0 0f + .align 4 +0: extui a13, a0, 30, 2 + slli a13, a13, 30 + mov a0, a14 // restore a0 + slli a12, a0, 2 + srli a12, a12, 2 + or a12, a12, a13 + + /* Call the fixup function. */ + movi a8, _dl_linux_resolver + callx8 a8 + + /* Extract the target's frame size from the ENTRY instruction. */ + l32i a11, a10, 0 + extui a11, a11, XTENSA_IMM12_FLD_OFFSET, 12 + slli a11, a11, 3 + + addi a11, a11, -MIN_FRAME_SIZE + sub a11, sp, a11 + movsp sp, a11 + + /* Jump to the next instruction past the ENTRY. */ + addi a10, a10, 3 + jx a10 + .size _dl_linux_resolve, . - _dl_linux_resolve diff --git a/libc/string/xtensa/Makefile b/libc/string/xtensa/Makefile new file mode 100644 index 000000000..0a95346fd --- /dev/null +++ b/libc/string/xtensa/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen +# +# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/xtensa/memcpy.S b/libc/string/xtensa/memcpy.S new file mode 100644 index 000000000..19f3a6818 --- /dev/null +++ b/libc/string/xtensa/memcpy.S @@ -0,0 +1,297 @@ +/* Optimized memcpy for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include + + .macro src_b r, w0, w1 +#ifdef __XTENSA_EB__ + src \r, \w0, \w1 +#else + src \r, \w1, \w0 +#endif + .endm + + .macro ssa8 r +#ifdef __XTENSA_EB__ + ssa8b \r +#else + ssa8l \r +#endif + .endm + +/* If the Xtensa Unaligned Load Exception option is not used, this + code can run a few cycles faster by relying on the low address bits + being ignored. However, if the code is then run with an Xtensa ISS + client that checks for unaligned accesses, it will produce a lot of + warning messages. Set this flag to disable the use of unaligned + accesses and keep the ISS happy. */ + +#define UNALIGNED_ADDRESSES_CHECKED 1 + +/* Do not use .literal_position in the ENTRY macro. */ +#undef LITERAL_POSITION +#define LITERAL_POSITION + + +/* void *memcpy (void *dst, const void *src, size_t len) + + The algorithm is as follows: + + If the destination is unaligned, align it by conditionally + copying 1- and/or 2-byte pieces. + + If the source is aligned, copy 16 bytes with a loop, and then finish up + with 8, 4, 2, and 1-byte copies conditional on the length. + + Else (if source is unaligned), do the same, but use SRC to align the + source data. + + This code tries to use fall-through branches for the common + case of aligned source and destination and multiple of 4 (or 8) length. */ + + +/* Byte by byte copy. */ + + .text + .align 4 + .literal_position +__memcpy_aux: + + /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ + (0 mod 4 alignment for LBEG). */ + .byte 0 + +.Lbytecopy: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a7, a3, a4 // a7 = end address for source +#endif +1: l8ui a6, a3, 0 + addi a3, a3, 1 + s8i a6, a5, 0 + addi a5, a5, 1 +#if !XCHAL_HAVE_LOOPS + blt a3, a7, 1b +#endif +2: retw + + +/* Destination is unaligned. */ + + .align 4 +.Ldst1mod2: // dst is only byte aligned + + /* Do short copies byte-by-byte. */ + _bltui a4, 7, .Lbytecopy + + /* Copy 1 byte. */ + l8ui a6, a3, 0 + addi a3, a3, 1 + addi a4, a4, -1 + s8i a6, a5, 0 + addi a5, a5, 1 + + /* Return to main algorithm if dst is now aligned. */ + _bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short copies byte-by-byte. */ + _bltui a4, 6, .Lbytecopy + + /* Copy 2 bytes. */ + l8ui a6, a3, 0 + l8ui a7, a3, 1 + addi a3, a3, 2 + addi a4, a4, -2 + s8i a6, a5, 0 + s8i a7, a5, 1 + addi a5, a5, 2 + + /* dst is now aligned; return to main algorithm. */ + j .Ldstaligned + + +ENTRY (memcpy) + /* a2 = dst, a3 = src, a4 = len */ + + mov a5, a2 // copy dst so that a2 is return value + _bbsi.l a2, 0, .Ldst1mod2 + _bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + + /* Check if source is aligned. */ + movi a8, 3 + _bany a3, a8, .Lsrcunaligned + + /* Destination and source are word-aligned, use word copy. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a8, a7, 4 + add a8, a8, a3 // a8 = end of last 16B source chunk +#endif +1: l32i a6, a3, 0 + l32i a7, a3, 4 + s32i a6, a5, 0 + l32i a6, a3, 8 + s32i a7, a5, 4 + l32i a7, a3, 12 + s32i a6, a5, 8 + addi a3, a3, 16 + s32i a7, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + blt a3, a8, 1b +#endif + + /* Copy any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Copy 8 bytes. */ + l32i a6, a3, 0 + l32i a7, a3, 4 + addi a3, a3, 8 + s32i a6, a5, 0 + s32i a7, a5, 4 + addi a5, a5, 8 + +3: bbsi.l a4, 2, 4f + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + retw + + /* Copy 4 bytes. */ +4: l32i a6, a3, 0 + addi a3, a3, 4 + s32i a6, a5, 0 + addi a5, a5, 4 + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + retw + + /* Copy 2 bytes. */ +5: l16ui a6, a3, 0 + addi a3, a3, 2 + s16i a6, a5, 0 + addi a5, a5, 2 + bbsi.l a4, 0, 6f + retw + + /* Copy 1 byte. */ +6: l8ui a6, a3, 0 + s8i a6, a5, 0 + +.Ldone: + retw + + +/* Destination is aligned; source is unaligned. */ + + .align 4 +.Lsrcunaligned: + /* Avoid loading anything for zero-length copies. */ + _beqz a4, .Ldone + + /* Copy 16 bytes per iteration for word-aligned dst and + unaligned src. */ + ssa8 a3 // set shift amount from byte offset +#if UNALIGNED_ADDRESSES_CHECKED + and a11, a3, a8 // save unalignment offset for below + sub a3, a3, a11 // align a3 +#endif + l32i a6, a3, 0 // load first word +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a10, a7, 4 + add a10, a10, a3 // a10 = end of last 16B source chunk +#endif +1: l32i a7, a3, 4 + l32i a8, a3, 8 + src_b a6, a6, a7 + s32i a6, a5, 0 + l32i a9, a3, 12 + src_b a7, a7, a8 + s32i a7, a5, 4 + l32i a6, a3, 16 + src_b a8, a8, a9 + s32i a8, a5, 8 + addi a3, a3, 16 + src_b a9, a9, a6 + s32i a9, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + blt a3, a10, 1b +#endif + +2: bbci.l a4, 3, 3f + + /* Copy 8 bytes. */ + l32i a7, a3, 4 + l32i a8, a3, 8 + src_b a6, a6, a7 + s32i a6, a5, 0 + addi a3, a3, 8 + src_b a7, a7, a8 + s32i a7, a5, 4 + addi a5, a5, 8 + mov a6, a8 + +3: bbci.l a4, 2, 4f + + /* Copy 4 bytes. */ + l32i a7, a3, 4 + addi a3, a3, 4 + src_b a6, a6, a7 + s32i a6, a5, 0 + addi a5, a5, 4 + mov a6, a7 +4: +#if UNALIGNED_ADDRESSES_CHECKED + add a3, a3, a11 // readjust a3 with correct misalignment +#endif + bbsi.l a4, 1, 5f + bbsi.l a4, 0, 6f + retw + + /* Copy 2 bytes. */ +5: l8ui a6, a3, 0 + l8ui a7, a3, 1 + addi a3, a3, 2 + s8i a6, a5, 0 + s8i a7, a5, 1 + addi a5, a5, 2 + bbsi.l a4, 0, 6f + retw + + /* Copy 1 byte. */ +6: l8ui a6, a3, 0 + s8i a6, a5, 0 + retw + +libc_hidden_def (memcpy) diff --git a/libc/string/xtensa/memset.S b/libc/string/xtensa/memset.S new file mode 100644 index 000000000..c0928825d --- /dev/null +++ b/libc/string/xtensa/memset.S @@ -0,0 +1,165 @@ +/* Optimized memset for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include + +/* Do not use .literal_position in the ENTRY macro. */ +#undef LITERAL_POSITION +#define LITERAL_POSITION + +/* void *memset (void *dst, int c, size_t length) + + The algorithm is as follows: + + Create a word with c in all byte positions. + + If the destination is aligned, set 16B chunks with a loop, and then + finish up with 8B, 4B, 2B, and 1B stores conditional on the length. + + If the destination is unaligned, align it by conditionally + setting 1B and/or 2B and then go to aligned case. + + This code tries to use fall-through branches for the common + case of an aligned destination (except for the branches to + the alignment labels). */ + + +/* Byte-by-byte set. */ + + .text + .align 4 + .literal_position +__memset_aux: + + /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ + (0 mod 4 alignment for LBEG). */ + .byte 0 + +.Lbyteset: +#if XCHAL_HAVE_LOOPS + loopnez a4, 2f +#else + beqz a4, 2f + add a6, a5, a4 // a6 = ending address +#endif +1: s8i a3, a5, 0 + addi a5, a5, 1 +#if !XCHAL_HAVE_LOOPS + blt a5, a6, 1b +#endif +2: retw + + +/* Destination is unaligned. */ + + .align 4 + +.Ldst1mod2: // dst is only byte aligned + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 1 byte. */ + s8i a3, a5, 0 + addi a5, a5, 1 + addi a4, a4, -1 + + /* Now retest if dst is aligned. */ + _bbci.l a5, 1, .Ldstaligned + +.Ldst2mod4: // dst has 16-bit alignment + + /* Do short sizes byte-by-byte. */ + bltui a4, 8, .Lbyteset + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 + addi a4, a4, -2 + + /* dst is now aligned; return to main algorithm */ + j .Ldstaligned + + +ENTRY (memset) + /* a2 = dst, a3 = c, a4 = length */ + + /* Duplicate character into all bytes of word. */ + extui a3, a3, 0, 8 + slli a7, a3, 8 + or a3, a3, a7 + slli a7, a3, 16 + or a3, a3, a7 + + mov a5, a2 // copy dst so that a2 is return value + + /* Check if dst is unaligned. */ + _bbsi.l a2, 0, .Ldst1mod2 + _bbsi.l a2, 1, .Ldst2mod4 +.Ldstaligned: + + /* Get number of loop iterations with 16B per iteration. */ + srli a7, a4, 4 + + /* Destination is word-aligned. */ +#if XCHAL_HAVE_LOOPS + loopnez a7, 2f +#else + beqz a7, 2f + slli a6, a7, 4 + add a6, a6, a5 // a6 = end of last 16B chunk +#endif + /* Set 16 bytes per iteration. */ +1: s32i a3, a5, 0 + s32i a3, a5, 4 + s32i a3, a5, 8 + s32i a3, a5, 12 + addi a5, a5, 16 +#if !XCHAL_HAVE_LOOPS + blt a5, a6, 1b +#endif + + /* Set any leftover pieces smaller than 16B. */ +2: bbci.l a4, 3, 3f + + /* Set 8 bytes. */ + s32i a3, a5, 0 + s32i a3, a5, 4 + addi a5, a5, 8 + +3: bbci.l a4, 2, 4f + + /* Set 4 bytes. */ + s32i a3, a5, 0 + addi a5, a5, 4 + +4: bbci.l a4, 1, 5f + + /* Set 2 bytes. */ + s16i a3, a5, 0 + addi a5, a5, 2 + +5: bbci.l a4, 0, 6f + + /* Set 1 byte. */ + s8i a3, a5, 0 +6: retw + +libc_hidden_def (memset) diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S new file mode 100644 index 000000000..90c418d12 --- /dev/null +++ b/libc/string/xtensa/strcmp.S @@ -0,0 +1,313 @@ +/* Optimized strcmp for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + +#define MASK4 0x40404040 + + .literal .Lmask0, MASK0 + .literal .Lmask1, MASK1 + .literal .Lmask2, MASK2 + .literal .Lmask3, MASK3 + .literal .Lmask4, MASK4 + + .text +ENTRY (strcmp) + /* a2 = s1, a3 = s2 */ + + l8ui a8, a2, 0 // byte 0 from s1 + l8ui a9, a3, 0 // byte 0 from s2 + movi a10, 3 // mask + bne a8, a9, .Lretdiff + + or a11, a2, a3 + bnone a11, a10, .Laligned + + xor a11, a2, a3 // compare low two bits of s1 and s2 + bany a11, a10, .Lunaligned // if they have different alignment + + /* s1/s2 are not word-aligned. */ + addi a2, a2, 1 // advance s1 + beqz a8, .Leq // bytes equal, if zero, strings are equal + addi a3, a3, 1 // advance s2 + bnone a2, a10, .Laligned // if s1/s2 now aligned + l8ui a8, a2, 0 // byte 1 from s1 + l8ui a9, a3, 0 // byte 1 from s2 + addi a2, a2, 1 // advance s1 + bne a8, a9, .Lretdiff // if different, return difference + beqz a8, .Leq // bytes equal, if zero, strings are equal + addi a3, a3, 1 // advance s2 + bnone a2, a10, .Laligned // if s1/s2 now aligned + l8ui a8, a2, 0 // byte 2 from s1 + l8ui a9, a3, 0 // byte 2 from s2 + addi a2, a2, 1 // advance s1 + bne a8, a9, .Lretdiff // if different, return difference + beqz a8, .Leq // bytes equal, if zero, strings are equal + addi a3, a3, 1 // advance s2 + j .Laligned + +/* s1 and s2 have different alignment. + + If the zero-overhead loop option is available, use an (almost) + infinite zero-overhead loop with conditional exits so we only pay + for taken branches when exiting the loop. + + Note: It is important for this unaligned case to come before the + code for aligned strings, because otherwise some of the branches + above cannot reach and have to be transformed to branches around + jumps. The unaligned code is smaller and the branches can reach + over it. */ + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Lunaligned: +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, .Lretdiff // loop forever (almost anyway) +#endif +.Lnextbyte: + l8ui a8, a2, 0 + l8ui a9, a3, 0 + addi a2, a2, 1 + bne a8, a9, .Lretdiff + addi a3, a3, 1 +#if XCHAL_HAVE_LOOPS + beqz a8, .Lretdiff +#else + bnez a8, .Lnextbyte +#endif +.Lretdiff: + sub a2, a8, a9 + retw + +/* s1 is word-aligned; s2 is word-aligned. + + If the zero-overhead loop option is available, use an (almost) + infinite zero-overhead loop with conditional exits so we only pay + for taken branches when exiting the loop. */ + +/* New algorithm, relying on the fact that all normal ASCII is between + 32 and 127. + + Rather than check all bytes for zero: + Take one word (4 bytes). Call it w1. + Shift w1 left by one into w1'. + Or w1 and w1'. For all normal ASCII bit 6 will be 1; for zero it won't. + Check that all 4 bit 6's (one for each byte) are one: + If they are, we are definitely not done. + If they are not, we are probably done, but need to check for zero. */ + + .align 4 +#if XCHAL_HAVE_LOOPS +.Laligned: + .begin no-transform + l32r a4, .Lmask0 // mask for byte 0 + l32r a7, .Lmask4 + /* Loop forever. (a4 is more than than the maximum number + of iterations) */ + loop a4, .Laligned_done + + /* First unrolled loop body. */ + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + slli a5, a8, 1 + bne a8, a9, .Lwne2 + or a9, a8, a5 + bnall a9, a7, .Lprobeq + + /* Second unrolled loop body. */ + l32i a8, a2, 4 // get word from s1+4 + l32i a9, a3, 4 // get word from s2+4 + slli a5, a8, 1 + bne a8, a9, .Lwne2 + or a9, a8, a5 + bnall a9, a7, .Lprobeq2 + + addi a2, a2, 8 // advance s1 pointer + addi a3, a3, 8 // advance s2 pointer +.Laligned_done: + or a1, a1, a1 // nop + +.Lprobeq2: + /* Adjust pointers to account for the loop unrolling. */ + addi a2, a2, 4 + addi a3, a3, 4 + +#else /* !XCHAL_HAVE_LOOPS */ + +.Laligned: + movi a4, MASK0 // mask for byte 0 + movi a7, MASK4 + j .Lfirstword +.Lnextword: + addi a2, a2, 4 // advance s1 pointer + addi a3, a3, 4 // advance s2 pointer +.Lfirstword: + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + slli a5, a8, 1 + bne a8, a9, .Lwne2 + or a9, a8, a5 + ball a9, a7, .Lnextword +#endif /* !XCHAL_HAVE_LOOPS */ + + /* align (0 mod 4) */ +.Lprobeq: + /* Words are probably equal, but check for sure. + If not, loop over the rest of string using normal algorithm. */ + + bnone a8, a4, .Leq // if byte 0 is zero + l32r a5, .Lmask1 // mask for byte 1 + l32r a6, .Lmask2 // mask for byte 2 + bnone a8, a5, .Leq // if byte 1 is zero + l32r a7, .Lmask3 // mask for byte 3 + bnone a8, a6, .Leq // if byte 2 is zero + bnone a8, a7, .Leq // if byte 3 is zero + addi.n a2, a2, 4 // advance s1 pointer + addi.n a3, a3, 4 // advance s2 pointer +#if XCHAL_HAVE_LOOPS + + /* align (1 mod 4) */ + loop a4, .Leq // loop forever (a4 is bigger than max iters) + .end no-transform + + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + addi a2, a2, 4 // advance s1 pointer + bne a8, a9, .Lwne + bnone a8, a4, .Leq // if byte 0 is zero + bnone a8, a5, .Leq // if byte 1 is zero + bnone a8, a6, .Leq // if byte 2 is zero + bnone a8, a7, .Leq // if byte 3 is zero + addi a3, a3, 4 // advance s2 pointer + +#else /* !XCHAL_HAVE_LOOPS */ + + j .Lfirstword2 +.Lnextword2: + addi a3, a3, 4 // advance s2 pointer +.Lfirstword2: + l32i a8, a2, 0 // get word from s1 + l32i a9, a3, 0 // get word from s2 + addi a2, a2, 4 // advance s1 pointer + bne a8, a9, .Lwne + bnone a8, a4, .Leq // if byte 0 is zero + bnone a8, a5, .Leq // if byte 1 is zero + bnone a8, a6, .Leq // if byte 2 is zero + bany a8, a7, .Lnextword2 // if byte 3 is zero +#endif /* !XCHAL_HAVE_LOOPS */ + + /* Words are equal; some byte is zero. */ +.Leq: movi a2, 0 // return equal + retw + +.Lwne2: /* Words are not equal. On big-endian processors, if none of the + bytes are zero, the return value can be determined by a simple + comparison. */ +#ifdef __XTENSA_EB__ + or a10, a8, a5 + bnall a10, a7, .Lsomezero + bgeu a8, a9, .Lposreturn + movi a2, -1 + retw +.Lposreturn: + movi a2, 1 + retw +.Lsomezero: // There is probably some zero byte. +#endif /* __XTENSA_EB__ */ +.Lwne: /* Words are not equal. */ + xor a2, a8, a9 // get word with nonzero in byte that differs + bany a2, a4, .Ldiff0 // if byte 0 differs + movi a5, MASK1 // mask for byte 1 + bnone a8, a4, .Leq // if byte 0 is zero + bany a2, a5, .Ldiff1 // if byte 1 differs + movi a6, MASK2 // mask for byte 2 + bnone a8, a5, .Leq // if byte 1 is zero + bany a2, a6, .Ldiff2 // if byte 2 differs + bnone a8, a6, .Leq // if byte 2 is zero +#ifdef __XTENSA_EB__ +.Ldiff3: +.Ldiff2: +.Ldiff1: + /* Byte 0 is equal (at least) and there is a difference before a zero + byte. Just subtract words to get the return value. + The high order equal bytes cancel, leaving room for the sign. */ + sub a2, a8, a9 + retw + +.Ldiff0: + /* Need to make room for the sign, so can't subtract whole words. */ + extui a10, a8, 24, 8 + extui a11, a9, 24, 8 + sub a2, a10, a11 + retw + +#else /* !__XTENSA_EB__ */ + /* Little-endian is a little more difficult because can't subtract + whole words. */ +.Ldiff3: + /* Bytes 0-2 are equal; byte 3 is different. + For little-endian need to have a sign bit for the difference. */ + extui a10, a8, 24, 8 + extui a11, a9, 24, 8 + sub a2, a10, a11 + retw + +.Ldiff0: + /* Byte 0 is different. */ + extui a10, a8, 0, 8 + extui a11, a9, 0, 8 + sub a2, a10, a11 + retw + +.Ldiff1: + /* Byte 0 is equal; byte 1 is different. */ + extui a10, a8, 8, 8 + extui a11, a9, 8, 8 + sub a2, a10, a11 + retw + +.Ldiff2: + /* Bytes 0-1 are equal; byte 2 is different. */ + extui a10, a8, 16, 8 + extui a11, a9, 16, 8 + sub a2, a10, a11 + retw + +#endif /* !__XTENSA_EB */ + +libc_hidden_def (strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias (strcmp, strcoll) +libc_hidden_def (strcoll) +#endif diff --git a/libc/string/xtensa/strcpy.S b/libc/string/xtensa/strcpy.S new file mode 100644 index 000000000..108070384 --- /dev/null +++ b/libc/string/xtensa/strcpy.S @@ -0,0 +1,150 @@ +/* Optimized strcpy for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + + .text +ENTRY (strcpy) + /* a2 = dst, a3 = src */ + + mov a10, a2 // leave dst in return value register + movi a4, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a3, 0, .Lsrc1mod2 + bbsi.l a3, 1, .Lsrc2mod4 +.Lsrcaligned: + + /* Check if the destination is aligned. */ + movi a8, 3 + bnone a10, a8, .Laligned + + j .Ldstunaligned + +.Lsrc1mod2: // src address is odd + l8ui a8, a3, 0 // get byte 0 + addi a3, a3, 1 // advance src pointer + s8i a8, a10, 0 // store byte 0 + beqz a8, 1f // if byte 0 is zero + addi a10, a10, 1 // advance dst pointer + bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned + +.Lsrc2mod4: // src address is 2 mod 4 + l8ui a8, a3, 0 // get byte 0 + /* 1-cycle interlock */ + s8i a8, a10, 0 // store byte 0 + beqz a8, 1f // if byte 0 is zero + l8ui a8, a3, 1 // get byte 0 + addi a3, a3, 2 // advance src pointer + s8i a8, a10, 1 // store byte 0 + addi a10, a10, 2 // advance dst pointer + bnez a8, .Lsrcaligned +1: retw + + +/* dst is word-aligned; src is word-aligned. */ + + .align 4 +#if XCHAL_HAVE_LOOPS + /* (2 mod 4) alignment for loop instruction */ +.Laligned: + _movi.n a8, 0 // set up for the maximum loop count + loop a8, .Lz3 // loop forever (almost anyway) + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + bnone a8, a7, .Lz3 // if byte 3 is zero + addi a10, a10, 4 // advance dst pointer + +#else /* !XCHAL_HAVE_LOOPS */ + +1: addi a10, a10, 4 // advance dst pointer +.Laligned: + l32i a8, a3, 0 // get word from src + addi a3, a3, 4 // advance src pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero + s32i a8, a10, 0 // store word to dst + bany a8, a7, 1b // if byte 3 is zero +#endif /* !XCHAL_HAVE_LOOPS */ + +.Lz3: /* Byte 3 is zero. */ + retw + +.Lz0: /* Byte 0 is zero. */ +#ifdef __XTENSA_EB__ + movi a8, 0 +#endif + s8i a8, a10, 0 + retw + +.Lz1: /* Byte 1 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + retw + +.Lz2: /* Byte 2 is zero. */ +#ifdef __XTENSA_EB__ + extui a8, a8, 16, 16 +#endif + s16i a8, a10, 0 + movi a8, 0 + s8i a8, a10, 2 + retw + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Ldstunaligned: + +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, 2f // loop forever (almost anyway) +#endif +1: l8ui a8, a3, 0 + addi a3, a3, 1 + s8i a8, a10, 0 + addi a10, a10, 1 +#if XCHAL_HAVE_LOOPS + beqz a8, 2f +#else + bnez a8, 1b +#endif +2: retw + +libc_hidden_def (strcpy) diff --git a/libc/string/xtensa/strlen.S b/libc/string/xtensa/strlen.S new file mode 100644 index 000000000..dd72c16fa --- /dev/null +++ b/libc/string/xtensa/strlen.S @@ -0,0 +1,104 @@ +/* Optimized strlen for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + + .text +ENTRY (strlen) + /* a2 = s */ + + addi a3, a2, -4 // because we overincrement at the end + movi a4, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a2, 0, .L1mod2 + bbsi.l a2, 1, .L2mod4 + j .Laligned + +.L1mod2: // address is odd + l8ui a8, a3, 4 // get byte 0 + addi a3, a3, 1 // advance string pointer + beqz a8, .Lz3 // if byte 0 is zero + bbci.l a3, 1, .Laligned // if string pointer is now word-aligned + +.L2mod4: // address is 2 mod 4 + addi a3, a3, 2 // advance ptr for aligned access + l32i a8, a3, 0 // get word with first two bytes of string + bnone a8, a6, .Lz2 // if byte 2 (of word, not string) is zero + bany a8, a7, .Laligned // if byte 3 (of word, not string) is nonzero + + /* Byte 3 is zero. */ + addi a3, a3, 3 // point to zero byte + sub a2, a3, a2 // subtract to get length + retw + + +/* String is word-aligned. */ + + .align 4 + /* (2 mod 4) alignment for loop instruction */ +.Laligned: +#if XCHAL_HAVE_LOOPS + _movi.n a8, 0 // set up for the maximum loop count + loop a8, .Lz3 // loop forever (almost anyway) +#endif +1: l32i a8, a3, 4 // get next word of string + addi a3, a3, 4 // advance string pointer + bnone a8, a4, .Lz0 // if byte 0 is zero + bnone a8, a5, .Lz1 // if byte 1 is zero + bnone a8, a6, .Lz2 // if byte 2 is zero +#if XCHAL_HAVE_LOOPS + bnone a8, a7, .Lz3 // if byte 3 is zero +#else + bany a8, a7, 1b // repeat if byte 3 is non-zero +#endif + +.Lz3: /* Byte 3 is zero. */ + addi a3, a3, 3 // point to zero byte + /* Fall through.... */ + +.Lz0: /* Byte 0 is zero. */ + sub a2, a3, a2 // subtract to get length + retw + +.Lz1: /* Byte 1 is zero. */ + addi a3, a3, 1 // point to zero byte + sub a2, a3, a2 // subtract to get length + retw + +.Lz2: /* Byte 2 is zero. */ + addi a3, a3, 2 // point to zero byte + sub a2, a3, a2 // subtract to get length + retw + +libc_hidden_def (strlen) diff --git a/libc/string/xtensa/strncpy.S b/libc/string/xtensa/strncpy.S new file mode 100644 index 000000000..7ba2ef77d --- /dev/null +++ b/libc/string/xtensa/strncpy.S @@ -0,0 +1,241 @@ +/* Optimized strcpy for Xtensa. + Copyright (C) 2001, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 51 Franklin Street - Fifth Floor, + Boston, MA 02110-1301, USA. */ + +#include "../../sysdeps/linux/xtensa/sysdep.h" +#include + +#ifdef __XTENSA_EB__ +#define MASK0 0xff000000 +#define MASK1 0x00ff0000 +#define MASK2 0x0000ff00 +#define MASK3 0x000000ff +#else +#define MASK0 0x000000ff +#define MASK1 0x0000ff00 +#define MASK2 0x00ff0000 +#define MASK3 0xff000000 +#endif + +/* Do not use .literal_position in the ENTRY macro. */ +#undef LITERAL_POSITION +#define LITERAL_POSITION + + .text + .align 4 + .literal_position +__strncpy_aux: + +.Lsrc1mod2: // src address is odd + l8ui a8, a3, 0 // get byte 0 + addi a3, a3, 1 // advance src pointer + s8i a8, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + beqz a8, .Lfill // if byte 0 is zero + bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned + +.Lsrc2mod4: // src address is 2 mod 4 + l8ui a8, a3, 0 // get byte 0 + addi a4, a4, -1 // decrement n + s8i a8, a10, 0 // store byte 0 + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + beqz a8, .Lfill // if byte 0 is zero + l8ui a8, a3, 1 // get byte 0 + addi a3, a3, 2 // advance src pointer + s8i a8, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, .Lret // if n is zero + addi a10, a10, 1 // advance dst pointer + bnez a8, .Lsrcaligned + j .Lfill + +.Lret: + retw + + +ENTRY (strncpy) + /* a2 = dst, a3 = src */ + + mov a10, a2 // leave dst in return value register + beqz a4, .Lret // if n is zero + + movi a11, MASK0 + movi a5, MASK1 + movi a6, MASK2 + movi a7, MASK3 + bbsi.l a3, 0, .Lsrc1mod2 + bbsi.l a3, 1, .Lsrc2mod4 +.Lsrcaligned: + + /* Check if the destination is aligned. */ + movi a8, 3 + bnone a10, a8, .Laligned + + j .Ldstunaligned + + +/* Fill the dst with zeros -- n is at least 1. */ + +.Lfill: + movi a9, 0 + bbsi.l a10, 0, .Lfill1mod2 + bbsi.l a10, 1, .Lfill2mod4 +.Lfillaligned: + blti a4, 4, .Lfillcleanup + + /* Loop filling complete words with zero. */ +#if XCHAL_HAVE_LOOPS + + srai a8, a4, 2 + loop a8, 1f + s32i a9, a10, 0 + addi a10, a10, 4 + +1: slli a8, a8, 2 + sub a4, a4, a8 + +#else /* !XCHAL_HAVE_LOOPS */ + +1: s32i a9, a10, 0 + addi a10, a10, 4 + addi a4, a4, -4 + bgei a4, 4, 1b + +#endif /* !XCHAL_HAVE_LOOPS */ + + beqz a4, 2f + +.Lfillcleanup: + /* Fill leftover (1 to 3) bytes with zero. */ + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + addi a10, a10, 1 + bnez a4, .Lfillcleanup + +2: retw + +.Lfill1mod2: // dst address is odd + s8i a9, a10, 0 // store byte 0 + addi a4, a4, -1 // decrement n + beqz a4, 2b // if n is zero + addi a10, a10, 1 // advance dst pointer + bbci.l a10, 1, .Lfillaligned // if