diff options
author | Eric Andersen <andersen@codepoet.org> | 2004-05-14 10:29:45 +0000 |
---|---|---|
committer | Eric Andersen <andersen@codepoet.org> | 2004-05-14 10:29:45 +0000 |
commit | 2baa0cccce48cc0c2fffcb0a6191dc383aceec72 (patch) | |
tree | 13a913121b861f10bd813b3259f9bbc524871508 | |
parent | f98d73a77145e0d56ae08c910ed4e9964fd28cff (diff) |
Alexandre Oliva writes:
This patch introduces optimized versions of memcpy and memset for
frv.
-rw-r--r-- | libc/string/Makefile | 2 | ||||
-rw-r--r-- | libc/string/frv/Makefile | 39 | ||||
-rw-r--r-- | libc/string/frv/memcpy.S | 124 | ||||
-rw-r--r-- | libc/string/frv/memset.S | 155 |
4 files changed, 319 insertions, 1 deletions
diff --git a/libc/string/Makefile b/libc/string/Makefile index 19c62b32d..37a57cc26 100644 --- a/libc/string/Makefile +++ b/libc/string/Makefile @@ -23,7 +23,7 @@ DIRS= ifeq ($(TARGET_ARCH),$(wildcard $(TARGET_ARCH))) DIRS = $(TARGET_ARCH) endif -ALL_SUBDIRS = i386 arm sh64 powerpc +ALL_SUBDIRS = arm frv i386 sh64 powerpc MSRC= wstring.c MOBJ= basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \ diff --git a/libc/string/frv/Makefile b/libc/string/frv/Makefile new file mode 100644 index 000000000..5424e9b94 --- /dev/null +++ b/libc/string/frv/Makefile @@ -0,0 +1,39 @@ +# Makefile for uClibc +# +# Copyright (C) 2004 Alexandre Oliva <aoliva@redhat.com> +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU Library General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more +# details. +# +# You should have received a copy of the GNU Library General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +TOPDIR=../../../ +include $(TOPDIR)Rules.mak + +SSRC=memcpy.S memset.S +SOBJS=$(patsubst %.S,%.o, $(SSRC)) +OBJS=$(SOBJS) + +all: $(OBJS) $(LIBC) + +$(LIBC): ar-target + +ar-target: $(OBJS) + $(AR) $(ARFLAGS) $(LIBC) $(OBJS) + +$(SOBJS): %.o : %.S + $(CC) $(CFLAGS) -c $< -o $@ + $(STRIPTOOL) -x -R .note -R .comment $*.o + +clean: + $(RM) *.[oa] *~ core + diff --git a/libc/string/frv/memcpy.S b/libc/string/frv/memcpy.S new file mode 100644 index 000000000..63cc523a9 --- /dev/null +++ b/libc/string/frv/memcpy.S @@ -0,0 +1,124 @@ +/* memcpy.S: optimised assembly memcpy + * + * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + + .text + .p2align 4 + +############################################################################### +# +# void *memcpy(void *to, const char *from, size_t count) +# +# - NOTE: must not use any stack. exception detection performs function return +# to caller's fixup routine, aborting the remainder of the copy +# +############################################################################### + .globl memcpy + .type memcpy,@function +memcpy: + or.p gr8,gr9,gr4 + orcc gr10,gr0,gr0,icc3 + or.p gr10,gr4,gr4 + beqlr icc3,#0 + + # optimise based on best common alignment for to, from & count + andicc.p gr4,#0x1f,gr0,icc0 + setlos #8,gr11 + andicc.p gr4,#0x0f,gr0,icc1 + beq icc0,#0,memcpy_32 + andicc.p gr4,#0x07,gr0,icc0 + beq icc1,#0,memcpy_16 + andicc.p gr4,#0x03,gr0,icc1 + beq icc0,#0,memcpy_8 + andicc.p gr4,#0x01,gr0,icc0 + beq icc1,#0,memcpy_4 + setlos.p #1,gr11 + beq icc0,#0,memcpy_2 + + # do byte by byte copy + sub.p gr8,gr11,gr3 + sub gr9,gr11,gr9 +0: ldubu.p @(gr9,gr11),gr4 + subicc gr10,#1,gr10,icc0 + stbu.p gr4,@(gr3,gr11) + bne icc0,#2,0b + bralr + + # do halfword by halfword copy +memcpy_2: + setlos #2,gr11 + sub.p gr8,gr11,gr3 + sub gr9,gr11,gr9 +0: lduhu.p @(gr9,gr11),gr4 + subicc gr10,#2,gr10,icc0 + sthu.p gr4,@(gr3,gr11) + bne icc0,#2,0b + bralr + + # do word by word copy +memcpy_4: + setlos #4,gr11 + sub.p gr8,gr11,gr3 + sub gr9,gr11,gr9 +0: ldu.p @(gr9,gr11),gr4 + subicc gr10,#4,gr10,icc0 + stu.p gr4,@(gr3,gr11) + bne icc0,#2,0b + bralr + + # do double-word by double-word copy +memcpy_8: + sub.p gr8,gr11,gr3 + sub gr9,gr11,gr9 +0: lddu.p @(gr9,gr11),gr4 + subicc gr10,#8,gr10,icc0 + stdu.p gr4,@(gr3,gr11) + bne icc0,#2,0b + bralr + + # do quad-word by quad-word copy +memcpy_16: + sub.p gr8,gr11,gr3 + sub gr9,gr11,gr9 +0: lddu @(gr9,gr11),gr4 + lddu.p @(gr9,gr11),gr6 + subicc gr10,#16,gr10,icc0 + stdu gr4,@(gr3,gr11) + stdu.p gr6,@(gr3,gr11) + bne icc0,#2,0b + bralr + + # do eight-word by eight-word copy +memcpy_32: + sub.p gr8,gr11,gr3 + sub gr9,gr11,gr9 +0: lddu @(gr9,gr11),gr4 + lddu @(gr9,gr11),gr6 + lddu @(gr9,gr11),gr12 + lddu.p @(gr9,gr11),gr14 + subicc gr10,#32,gr10,icc0 + stdu gr4,@(gr3,gr11) + stdu gr6,@(gr3,gr11) + stdu gr12,@(gr3,gr11) + stdu.p gr14,@(gr3,gr11) + bne icc0,#2,0b + bralr + + .size memcpy, .-memcpy diff --git a/libc/string/frv/memset.S b/libc/string/frv/memset.S new file mode 100644 index 000000000..50d4c24c9 --- /dev/null +++ b/libc/string/frv/memset.S @@ -0,0 +1,155 @@ +/* memset.S: optimised assembly memset + * + * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + + .text + .p2align 4 + +############################################################################### +# +# void *memset(void *p, char ch, size_t count) +# +# - NOTE: must not use any stack. exception detection performs function return +# to caller's fixup routine, aborting the remainder of the set +# GR4, GR7, GR8, and GR11 must be managed +# +############################################################################### + .globl memset + .type memset,@function +memset: + orcc.p gr10,gr0,gr5,icc3 ; GR5 = count + andi gr9,#0xff,gr9 + or.p gr8,gr0,gr4 ; GR4 = address + beqlr icc3,#0 + + # conditionally write a byte to 2b-align the address + setlos.p #1,gr6 + andicc gr4,#1,gr0,icc0 + ckne icc0,cc7 + cstb.p gr9,@(gr4,gr0) ,cc7,#1 + csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 + cadd.p gr4,gr6,gr4 ,cc7,#1 + beqlr icc3,#0 + + # conditionally write a word to 4b-align the address + andicc.p gr4,#2,gr0,icc0 + subicc gr5,#2,gr0,icc1 + setlos.p #2,gr6 + ckne icc0,cc7 + slli.p gr9,#8,gr12 ; need to double up the pattern + cknc icc1,cc5 + or.p gr9,gr12,gr12 + andcr cc7,cc5,cc7 + + csth.p gr12,@(gr4,gr0) ,cc7,#1 + csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 + cadd.p gr4,gr6,gr4 ,cc7,#1 + beqlr icc3,#0 + + # conditionally write a dword to 8b-align the address + andicc.p gr4,#4,gr0,icc0 + subicc gr5,#4,gr0,icc1 + setlos.p #4,gr6 + ckne icc0,cc7 + slli.p gr12,#16,gr13 ; need to quadruple-up the pattern + cknc icc1,cc5 + or.p gr13,gr12,gr12 + andcr cc7,cc5,cc7 + + cst.p gr12,@(gr4,gr0) ,cc7,#1 + csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 + cadd.p gr4,gr6,gr4 ,cc7,#1 + beqlr icc3,#0 + + or.p gr12,gr12,gr13 ; need to octuple-up the pattern + + # the address is now 8b-aligned - loop around writing 64b chunks + setlos #8,gr7 + subi.p gr4,#8,gr4 ; store with update index does weird stuff + setlos #64,gr6 + + subicc gr5,#64,gr0,icc0 +0: cknc icc0,cc7 + cstdu gr12,@(gr4,gr7) ,cc7,#1 + cstdu gr12,@(gr4,gr7) ,cc7,#1 + cstdu gr12,@(gr4,gr7) ,cc7,#1 + cstdu gr12,@(gr4,gr7) ,cc7,#1 + cstdu gr12,@(gr4,gr7) ,cc7,#1 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + subicc gr5,#64,gr0,icc0 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + beqlr icc3,#0 + bnc icc0,#2,0b + + # now do 32-byte remnant + subicc.p gr5,#32,gr0,icc0 + setlos #32,gr6 + cknc icc0,cc7 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + setlos #16,gr6 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + subicc gr5,#16,gr0,icc0 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + beqlr icc3,#0 + + # now do 16-byte remnant + cknc icc0,cc7 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + beqlr icc3,#0 + + # now do 8-byte remnant + subicc gr5,#8,gr0,icc1 + cknc icc1,cc7 + cstdu.p gr12,@(gr4,gr7) ,cc7,#1 + csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 + setlos.p #4,gr7 + beqlr icc3,#0 + + # now do 4-byte remnant + subicc gr5,#4,gr0,icc0 + addi.p gr4,#4,gr4 + cknc icc0,cc7 + cstu.p gr12,@(gr4,gr7) ,cc7,#1 + csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 + subicc.p gr5,#2,gr0,icc1 + beqlr icc3,#0 + + # now do 2-byte remnant + setlos #2,gr7 + addi.p gr4,#2,gr4 + cknc icc1,cc7 + csthu.p gr12,@(gr4,gr7) ,cc7,#1 + csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 + subicc.p gr5,#1,gr0,icc0 + beqlr icc3,#0 + + # now do 1-byte remnant + setlos #0,gr7 + addi.p gr4,#2,gr4 + cknc icc0,cc7 + cstb.p gr12,@(gr4,gr0) ,cc7,#1 + bralr + .size memset, .-memset |