summaryrefslogtreecommitdiff
path: root/libc/string
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2004-05-14 10:29:45 +0000
committerEric Andersen <andersen@codepoet.org>2004-05-14 10:29:45 +0000
commit2baa0cccce48cc0c2fffcb0a6191dc383aceec72 (patch)
tree13a913121b861f10bd813b3259f9bbc524871508 /libc/string
parentf98d73a77145e0d56ae08c910ed4e9964fd28cff (diff)
Alexandre Oliva writes:
This patch introduces optimized versions of memcpy and memset for frv.
Diffstat (limited to 'libc/string')
-rw-r--r--libc/string/Makefile2
-rw-r--r--libc/string/frv/Makefile39
-rw-r--r--libc/string/frv/memcpy.S124
-rw-r--r--libc/string/frv/memset.S155
4 files changed, 319 insertions, 1 deletions
diff --git a/libc/string/Makefile b/libc/string/Makefile
index 19c62b32d..37a57cc26 100644
--- a/libc/string/Makefile
+++ b/libc/string/Makefile
@@ -23,7 +23,7 @@ DIRS=
ifeq ($(TARGET_ARCH),$(wildcard $(TARGET_ARCH)))
DIRS = $(TARGET_ARCH)
endif
-ALL_SUBDIRS = i386 arm sh64 powerpc
+ALL_SUBDIRS = arm frv i386 sh64 powerpc
MSRC= wstring.c
MOBJ= basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \
diff --git a/libc/string/frv/Makefile b/libc/string/frv/Makefile
new file mode 100644
index 000000000..5424e9b94
--- /dev/null
+++ b/libc/string/frv/Makefile
@@ -0,0 +1,39 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2004 Alexandre Oliva <aoliva@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Library General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Library General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+TOPDIR=../../../
+include $(TOPDIR)Rules.mak
+
+SSRC=memcpy.S memset.S
+SOBJS=$(patsubst %.S,%.o, $(SSRC))
+OBJS=$(SOBJS)
+
+all: $(OBJS) $(LIBC)
+
+$(LIBC): ar-target
+
+ar-target: $(OBJS)
+ $(AR) $(ARFLAGS) $(LIBC) $(OBJS)
+
+$(SOBJS): %.o : %.S
+ $(CC) $(CFLAGS) -c $< -o $@
+ $(STRIPTOOL) -x -R .note -R .comment $*.o
+
+clean:
+ $(RM) *.[oa] *~ core
+
diff --git a/libc/string/frv/memcpy.S b/libc/string/frv/memcpy.S
new file mode 100644
index 000000000..63cc523a9
--- /dev/null
+++ b/libc/string/frv/memcpy.S
@@ -0,0 +1,124 @@
+/* memcpy.S: optimised assembly memcpy
+ *
+ * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+ .text
+ .p2align 4
+
+###############################################################################
+#
+# void *memcpy(void *to, const char *from, size_t count)
+#
+# - NOTE: must not use any stack. exception detection performs function return
+# to caller's fixup routine, aborting the remainder of the copy
+#
+###############################################################################
+ .globl memcpy
+ .type memcpy,@function
+memcpy:
+ or.p gr8,gr9,gr4
+ orcc gr10,gr0,gr0,icc3
+ or.p gr10,gr4,gr4
+ beqlr icc3,#0
+
+ # optimise based on best common alignment for to, from & count
+ andicc.p gr4,#0x1f,gr0,icc0
+ setlos #8,gr11
+ andicc.p gr4,#0x0f,gr0,icc1
+ beq icc0,#0,memcpy_32
+ andicc.p gr4,#0x07,gr0,icc0
+ beq icc1,#0,memcpy_16
+ andicc.p gr4,#0x03,gr0,icc1
+ beq icc0,#0,memcpy_8
+ andicc.p gr4,#0x01,gr0,icc0
+ beq icc1,#0,memcpy_4
+ setlos.p #1,gr11
+ beq icc0,#0,memcpy_2
+
+ # do byte by byte copy
+ sub.p gr8,gr11,gr3
+ sub gr9,gr11,gr9
+0: ldubu.p @(gr9,gr11),gr4
+ subicc gr10,#1,gr10,icc0
+ stbu.p gr4,@(gr3,gr11)
+ bne icc0,#2,0b
+ bralr
+
+ # do halfword by halfword copy
+memcpy_2:
+ setlos #2,gr11
+ sub.p gr8,gr11,gr3
+ sub gr9,gr11,gr9
+0: lduhu.p @(gr9,gr11),gr4
+ subicc gr10,#2,gr10,icc0
+ sthu.p gr4,@(gr3,gr11)
+ bne icc0,#2,0b
+ bralr
+
+ # do word by word copy
+memcpy_4:
+ setlos #4,gr11
+ sub.p gr8,gr11,gr3
+ sub gr9,gr11,gr9
+0: ldu.p @(gr9,gr11),gr4
+ subicc gr10,#4,gr10,icc0
+ stu.p gr4,@(gr3,gr11)
+ bne icc0,#2,0b
+ bralr
+
+ # do double-word by double-word copy
+memcpy_8:
+ sub.p gr8,gr11,gr3
+ sub gr9,gr11,gr9
+0: lddu.p @(gr9,gr11),gr4
+ subicc gr10,#8,gr10,icc0
+ stdu.p gr4,@(gr3,gr11)
+ bne icc0,#2,0b
+ bralr
+
+ # do quad-word by quad-word copy
+memcpy_16:
+ sub.p gr8,gr11,gr3
+ sub gr9,gr11,gr9
+0: lddu @(gr9,gr11),gr4
+ lddu.p @(gr9,gr11),gr6
+ subicc gr10,#16,gr10,icc0
+ stdu gr4,@(gr3,gr11)
+ stdu.p gr6,@(gr3,gr11)
+ bne icc0,#2,0b
+ bralr
+
+ # do eight-word by eight-word copy
+memcpy_32:
+ sub.p gr8,gr11,gr3
+ sub gr9,gr11,gr9
+0: lddu @(gr9,gr11),gr4
+ lddu @(gr9,gr11),gr6
+ lddu @(gr9,gr11),gr12
+ lddu.p @(gr9,gr11),gr14
+ subicc gr10,#32,gr10,icc0
+ stdu gr4,@(gr3,gr11)
+ stdu gr6,@(gr3,gr11)
+ stdu gr12,@(gr3,gr11)
+ stdu.p gr14,@(gr3,gr11)
+ bne icc0,#2,0b
+ bralr
+
+ .size memcpy, .-memcpy
diff --git a/libc/string/frv/memset.S b/libc/string/frv/memset.S
new file mode 100644
index 000000000..50d4c24c9
--- /dev/null
+++ b/libc/string/frv/memset.S
@@ -0,0 +1,155 @@
+/* memset.S: optimised assembly memset
+ *
+ * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+ .text
+ .p2align 4
+
+###############################################################################
+#
+# void *memset(void *p, char ch, size_t count)
+#
+# - NOTE: must not use any stack. exception detection performs function return
+# to caller's fixup routine, aborting the remainder of the set
+# GR4, GR7, GR8, and GR11 must be managed
+#
+###############################################################################
+ .globl memset
+ .type memset,@function
+memset:
+ orcc.p gr10,gr0,gr5,icc3 ; GR5 = count
+ andi gr9,#0xff,gr9
+ or.p gr8,gr0,gr4 ; GR4 = address
+ beqlr icc3,#0
+
+ # conditionally write a byte to 2b-align the address
+ setlos.p #1,gr6
+ andicc gr4,#1,gr0,icc0
+ ckne icc0,cc7
+ cstb.p gr9,@(gr4,gr0) ,cc7,#1
+ csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
+ cadd.p gr4,gr6,gr4 ,cc7,#1
+ beqlr icc3,#0
+
+ # conditionally write a word to 4b-align the address
+ andicc.p gr4,#2,gr0,icc0
+ subicc gr5,#2,gr0,icc1
+ setlos.p #2,gr6
+ ckne icc0,cc7
+ slli.p gr9,#8,gr12 ; need to double up the pattern
+ cknc icc1,cc5
+ or.p gr9,gr12,gr12
+ andcr cc7,cc5,cc7
+
+ csth.p gr12,@(gr4,gr0) ,cc7,#1
+ csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
+ cadd.p gr4,gr6,gr4 ,cc7,#1
+ beqlr icc3,#0
+
+ # conditionally write a dword to 8b-align the address
+ andicc.p gr4,#4,gr0,icc0
+ subicc gr5,#4,gr0,icc1
+ setlos.p #4,gr6
+ ckne icc0,cc7
+ slli.p gr12,#16,gr13 ; need to quadruple-up the pattern
+ cknc icc1,cc5
+ or.p gr13,gr12,gr12
+ andcr cc7,cc5,cc7
+
+ cst.p gr12,@(gr4,gr0) ,cc7,#1
+ csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
+ cadd.p gr4,gr6,gr4 ,cc7,#1
+ beqlr icc3,#0
+
+ or.p gr12,gr12,gr13 ; need to octuple-up the pattern
+
+ # the address is now 8b-aligned - loop around writing 64b chunks
+ setlos #8,gr7
+ subi.p gr4,#8,gr4 ; store with update index does weird stuff
+ setlos #64,gr6
+
+ subicc gr5,#64,gr0,icc0
+0: cknc icc0,cc7
+ cstdu gr12,@(gr4,gr7) ,cc7,#1
+ cstdu gr12,@(gr4,gr7) ,cc7,#1
+ cstdu gr12,@(gr4,gr7) ,cc7,#1
+ cstdu gr12,@(gr4,gr7) ,cc7,#1
+ cstdu gr12,@(gr4,gr7) ,cc7,#1
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ subicc gr5,#64,gr0,icc0
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ beqlr icc3,#0
+ bnc icc0,#2,0b
+
+ # now do 32-byte remnant
+ subicc.p gr5,#32,gr0,icc0
+ setlos #32,gr6
+ cknc icc0,cc7
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ setlos #16,gr6
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ subicc gr5,#16,gr0,icc0
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ beqlr icc3,#0
+
+ # now do 16-byte remnant
+ cknc icc0,cc7
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ beqlr icc3,#0
+
+ # now do 8-byte remnant
+ subicc gr5,#8,gr0,icc1
+ cknc icc1,cc7
+ cstdu.p gr12,@(gr4,gr7) ,cc7,#1
+ csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
+ setlos.p #4,gr7
+ beqlr icc3,#0
+
+ # now do 4-byte remnant
+ subicc gr5,#4,gr0,icc0
+ addi.p gr4,#4,gr4
+ cknc icc0,cc7
+ cstu.p gr12,@(gr4,gr7) ,cc7,#1
+ csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
+ subicc.p gr5,#2,gr0,icc1
+ beqlr icc3,#0
+
+ # now do 2-byte remnant
+ setlos #2,gr7
+ addi.p gr4,#2,gr4
+ cknc icc1,cc7
+ csthu.p gr12,@(gr4,gr7) ,cc7,#1
+ csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3
+ subicc.p gr5,#1,gr0,icc0
+ beqlr icc3,#0
+
+ # now do 1-byte remnant
+ setlos #0,gr7
+ addi.p gr4,#2,gr4
+ cknc icc0,cc7
+ cstb.p gr12,@(gr4,gr0) ,cc7,#1
+ bralr
+ .size memset, .-memset