summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Bruel <christian.bruel@st.com>2010-12-17 09:58:25 +0100
committerCarmelo Amoroso <carmelo.amoroso@st.com>2010-12-17 10:02:44 +0100
commit6ac247452e646c2187f2f559143c8c087b0542e0 (patch)
treee514d6f6aead35f8b6f3a2385443b822c260378b
parentc61707353e16b8e22cab1a150cc3a6bf9b178e1c (diff)
libm_sh: add optimised assembly implementation of lroundf and lrintf
* libc/sysdeps/linux/sh/sysdep.h: Add LOCAL macro * libm/sh/sh4/Makefile.arch: Include asm source in the build * libm/sh/sh4/s_lrintf.S [NEW]: optimised asm lrintf * libm/sh/sh4/s_lroundf.S [NEW]: optimised asm lroundf Signed-off-by: Christian Bruel <christian.bruel@st.com> Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
-rw-r--r--libc/sysdeps/linux/sh/sysdep.h1
-rw-r--r--libm/sh/sh4/Makefile.arch8
-rw-r--r--libm/sh/sh4/s_lrintf.S52
-rw-r--r--libm/sh/sh4/s_lroundf.S39
4 files changed, 97 insertions, 3 deletions
diff --git a/libc/sysdeps/linux/sh/sysdep.h b/libc/sysdeps/linux/sh/sysdep.h
index 2ef0a3305..8b3c68220 100644
--- a/libc/sysdeps/linux/sh/sysdep.h
+++ b/libc/sysdeps/linux/sh/sysdep.h
@@ -26,6 +26,7 @@
/* Syntactic details of assembler. */
+#define LOCAL(X) .L_##X
#define ALIGNARG(log2) log2
/* For ELF we need the `.type' directive to make shared libs work right. */
#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,@##typearg;
diff --git a/libm/sh/sh4/Makefile.arch b/libm/sh/sh4/Makefile.arch
index 122d84da2..e38e99c15 100644
--- a/libm/sh/sh4/Makefile.arch
+++ b/libm/sh/sh4/Makefile.arch
@@ -7,11 +7,13 @@
#
ifeq ($(UCLIBC_HAS_FENV),y)
-libm_ARCH_SRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
-libm_ARCH_OBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_CSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
+libm_ARCH_COBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_SSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.S)
+libm_ARCH_SOBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.S,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SSRC))
endif
-libm_ARCH_OBJS:=$(libm_ARCH_OBJ)
+libm_ARCH_OBJS:=$(libm_ARCH_COBJ) $(libm_ARCH_SOBJ)
ifeq ($(DOPIC),y)
libm-a-y+=$(libm_ARCH_OBJS:.o=.os)
diff --git a/libm/sh/sh4/s_lrintf.S b/libm/sh/sh4/s_lrintf.S
new file mode 100644
index 000000000..d8cec329c
--- /dev/null
+++ b/libm/sh/sh4/s_lrintf.S
@@ -0,0 +1,52 @@
+/* Round argument to nearest integer value. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lrintf)
+ mov #0,r0
+ sts fpscr,r3
+ lds r0,fpscr
+ flds fr5,fpul
+ mov.l LOCAL(mask),r1
+ sts fpul,r2
+ and r2,r1
+ mov.l LOCAL(midway),r2
+ or r1,r2
+ lds r2,fpul
+ fsts fpul,fr2
+ fadd fr2,fr5
+ ftrc fr5,fpul
+ sts fpul,r0
+ float fpul,fr2
+ fcmp/eq fr5,fr2
+ bf/s 0f
+ mov #1,r2
+ tst r1,r1
+ and r0,r2
+ movt r1
+ shal r1
+ tst r2,r2
+ add #-1,r1
+ bt 0f
+ sub r1,r0
+0:
+ rts
+ lds r3,fpscr
+
+ .align 2
+LOCAL(mask):
+ .long 0x80000000
+LOCAL(midway):
+ .long 1056964608
+
+END(lrintf)
diff --git a/libm/sh/sh4/s_lroundf.S b/libm/sh/sh4/s_lroundf.S
new file mode 100644
index 000000000..fda3a4b91
--- /dev/null
+++ b/libm/sh/sh4/s_lroundf.S
@@ -0,0 +1,39 @@
+/* Round argument toward 0. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lroundf)
+ mov #0,r0
+ sts fpscr,r3
+ lds r0,fpscr
+ flds fr5,fpul
+ mov.l LOCAL(mask),r1
+ sts fpul,r2
+ and r2,r1
+ mov.l LOCAL(midway),r2
+ or r1,r2
+ lds r2,fpul
+ fsts fpul,fr2
+ fadd fr2,fr5
+ ftrc fr5,fpul
+ sts fpul,r0
+ rts
+ lds r3,fpscr
+
+ .align 2
+LOCAL(mask):
+ .long 0x80000000
+LOCAL(midway):
+ .long 1056964608
+
+END(lroundf)