diff options
author | Christian Bruel <christian.bruel@st.com> | 2010-12-17 09:58:25 +0100 |
---|---|---|
committer | Carmelo Amoroso <carmelo.amoroso@st.com> | 2010-12-17 10:02:44 +0100 |
commit | 6ac247452e646c2187f2f559143c8c087b0542e0 (patch) | |
tree | e514d6f6aead35f8b6f3a2385443b822c260378b | |
parent | c61707353e16b8e22cab1a150cc3a6bf9b178e1c (diff) |
libm_sh: add optimised assembly implementation of lroundf and lrintf
* libc/sysdeps/linux/sh/sysdep.h: Add LOCAL macro
* libm/sh/sh4/Makefile.arch: Include asm source in the build
* libm/sh/sh4/s_lrintf.S [NEW]: optimised asm lrintf
* libm/sh/sh4/s_lroundf.S [NEW]: optimised asm lroundf
Signed-off-by: Christian Bruel <christian.bruel@st.com>
Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
-rw-r--r-- | libc/sysdeps/linux/sh/sysdep.h | 1 | ||||
-rw-r--r-- | libm/sh/sh4/Makefile.arch | 8 | ||||
-rw-r--r-- | libm/sh/sh4/s_lrintf.S | 52 | ||||
-rw-r--r-- | libm/sh/sh4/s_lroundf.S | 39 |
4 files changed, 97 insertions, 3 deletions
diff --git a/libc/sysdeps/linux/sh/sysdep.h b/libc/sysdeps/linux/sh/sysdep.h index 2ef0a3305..8b3c68220 100644 --- a/libc/sysdeps/linux/sh/sysdep.h +++ b/libc/sysdeps/linux/sh/sysdep.h @@ -26,6 +26,7 @@ /* Syntactic details of assembler. */ +#define LOCAL(X) .L_##X #define ALIGNARG(log2) log2 /* For ELF we need the `.type' directive to make shared libs work right. */ #define ASM_TYPE_DIRECTIVE(name,typearg) .type name,@##typearg; diff --git a/libm/sh/sh4/Makefile.arch b/libm/sh/sh4/Makefile.arch index 122d84da2..e38e99c15 100644 --- a/libm/sh/sh4/Makefile.arch +++ b/libm/sh/sh4/Makefile.arch @@ -7,11 +7,13 @@ # ifeq ($(UCLIBC_HAS_FENV),y) -libm_ARCH_SRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c) -libm_ARCH_OBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC)) +libm_ARCH_CSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c) +libm_ARCH_COBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC)) +libm_ARCH_SSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.S) +libm_ARCH_SOBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.S,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SSRC)) endif -libm_ARCH_OBJS:=$(libm_ARCH_OBJ) +libm_ARCH_OBJS:=$(libm_ARCH_COBJ) $(libm_ARCH_SOBJ) ifeq ($(DOPIC),y) libm-a-y+=$(libm_ARCH_OBJS:.o=.os) diff --git a/libm/sh/sh4/s_lrintf.S b/libm/sh/sh4/s_lrintf.S new file mode 100644 index 000000000..d8cec329c --- /dev/null +++ b/libm/sh/sh4/s_lrintf.S @@ -0,0 +1,52 @@ +/* Round argument to nearest integer value. SH4 version. + * According to ISO/IEC 9899:1999. This version doesn't handle range error. + * If arg is not finite or if the result cannot be represented into a long, + * return an unspecified value. No exception raised. + * + * Copyright (C) 2010 STMicroelectronics Ltd. + * + * Author: Christian Bruel <christian.bruel@st.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <sysdep.h> + +ENTRY(lrintf) + mov #0,r0 + sts fpscr,r3 + lds r0,fpscr + flds fr5,fpul + mov.l LOCAL(mask),r1 + sts fpul,r2 + and r2,r1 + mov.l LOCAL(midway),r2 + or r1,r2 + lds r2,fpul + fsts fpul,fr2 + fadd fr2,fr5 + ftrc fr5,fpul + sts fpul,r0 + float fpul,fr2 + fcmp/eq fr5,fr2 + bf/s 0f + mov #1,r2 + tst r1,r1 + and r0,r2 + movt r1 + shal r1 + tst r2,r2 + add #-1,r1 + bt 0f + sub r1,r0 +0: + rts + lds r3,fpscr + + .align 2 +LOCAL(mask): + .long 0x80000000 +LOCAL(midway): + .long 1056964608 + +END(lrintf) diff --git a/libm/sh/sh4/s_lroundf.S b/libm/sh/sh4/s_lroundf.S new file mode 100644 index 000000000..fda3a4b91 --- /dev/null +++ b/libm/sh/sh4/s_lroundf.S @@ -0,0 +1,39 @@ +/* Round argument toward 0. SH4 version. + * According to ISO/IEC 9899:1999. This version doesn't handle range error. + * If arg is not finite or if the result cannot be represented into a long, + * return an unspecified value. No exception raised. + * + * Copyright (C) 2010 STMicroelectronics Ltd. + * + * Author: Christian Bruel <christian.bruel@st.com> + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +#include <sysdep.h> + +ENTRY(lroundf) + mov #0,r0 + sts fpscr,r3 + lds r0,fpscr + flds fr5,fpul + mov.l LOCAL(mask),r1 + sts fpul,r2 + and r2,r1 + mov.l LOCAL(midway),r2 + or r1,r2 + lds r2,fpul + fsts fpul,fr2 + fadd fr2,fr5 + ftrc fr5,fpul + sts fpul,r0 + rts + lds r3,fpscr + + .align 2 +LOCAL(mask): + .long 0x80000000 +LOCAL(midway): + .long 1056964608 + +END(lroundf) |