summaryrefslogtreecommitdiff
path: root/libc/string/sparc/sparc64/memset.S
diff options
context:
space:
mode:
authorMike Frysinger <vapier@gentoo.org>2005-08-26 00:01:54 +0000
committerMike Frysinger <vapier@gentoo.org>2005-08-26 00:01:54 +0000
commitd71ff82e7019e91ae9f76af391e7f677d421e205 (patch)
treeb28b78dd5619b59cd28d11f007543e69fbe4bf12 /libc/string/sparc/sparc64/memset.S
parentb11c2a4d0b7cf8095b93ae70fc187f0feadae5a1 (diff)
import sparc-optimized string functions from glibc
Diffstat (limited to 'libc/string/sparc/sparc64/memset.S')
-rw-r--r--libc/string/sparc/sparc64/memset.S313
1 files changed, 313 insertions, 0 deletions
diff --git a/libc/string/sparc/sparc64/memset.S b/libc/string/sparc/sparc64/memset.S
new file mode 100644
index 000000000..2e23f92a8
--- /dev/null
+++ b/libc/string/sparc/sparc64/memset.S
@@ -0,0 +1,313 @@
+/* Set a block of memory to some byte value.
+ For UltraSPARC.
+ Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David S. Miller (davem@caip.rutgers.edu) and
+ Jakub Jelinek (jj@ultra.linux.cz).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <asm/asi.h>
+#ifndef XCC
+#define XCC xcc
+#define USE_BPR
+#endif
+#define FPRS_FEF 4
+
+#define SET_BLOCKS(base, offset, source) \
+ stx source, [base - offset - 0x18]; \
+ stx source, [base - offset - 0x10]; \
+ stx source, [base - offset - 0x08]; \
+ stx source, [base - offset - 0x00];
+
+ /* Well, memset is a lot easier to get right than bcopy... */
+ .text
+ .align 32
+ENTRY(memset)
+ andcc %o1, 0xff, %o1
+ mov %o0, %o5
+ be,a,pt %icc, 50f
+#ifndef USE_BPR
+ srl %o2, 0, %o1
+#else
+ mov %o2, %o1
+#endif
+ cmp %o2, 7
+#ifndef USE_BPR
+ srl %o2, 0, %o2
+#endif
+ bleu,pn %XCC, 17f
+ andcc %o0, 3, %g5
+ be,pt %xcc, 4f
+ and %o1, 0xff, %o1
+ cmp %g5, 3
+ be,pn %xcc, 2f
+ stb %o1, [%o0 + 0x00]
+ cmp %g5, 2
+ be,pt %xcc, 2f
+ stb %o1, [%o0 + 0x01]
+ stb %o1, [%o0 + 0x02]
+2: sub %g5, 4, %g5
+ sub %o0, %g5, %o0
+ add %o2, %g5, %o2
+4: sllx %o1, 8, %g1
+ andcc %o0, 4, %g0
+ or %o1, %g1, %o1
+ sllx %o1, 16, %g1
+ or %o1, %g1, %o1
+ be,pt %xcc, 2f
+ sllx %o1, 32, %g1
+ stw %o1, [%o0]
+ sub %o2, 4, %o2
+ add %o0, 4, %o0
+2: cmp %o2, 128
+ or %o1, %g1, %o1
+ blu,pn %xcc, 9f
+ andcc %o0, 0x38, %g5
+ be,pn %icc, 6f
+ mov 64, %o4
+ andcc %o0, 8, %g0
+ be,pn %icc, 1f
+ sub %o4, %g5, %o4
+ stx %o1, [%o0]
+ add %o0, 8, %o0
+1: andcc %o4, 16, %g0
+ be,pn %icc, 1f
+ sub %o2, %o4, %o2
+ stx %o1, [%o0]
+ stx %o1, [%o0 + 8]
+ add %o0, 16, %o0
+1: andcc %o4, 32, %g0
+ be,pn %icc, 7f
+ andncc %o2, 0x3f, %o3
+ stw %o1, [%o0]
+ stw %o1, [%o0 + 4]
+ stw %o1, [%o0 + 8]
+ stw %o1, [%o0 + 12]
+ stw %o1, [%o0 + 16]
+ stw %o1, [%o0 + 20]
+ stw %o1, [%o0 + 24]
+ stw %o1, [%o0 + 28]
+ add %o0, 32, %o0
+7: be,pn %xcc, 9f
+ nop
+ ldd [%o0 - 8], %f0
+18: wr %g0, ASI_BLK_P, %asi
+ membar #StoreStore | #LoadStore
+ andcc %o3, 0xc0, %g5
+ and %o2, 0x3f, %o2
+ fmovd %f0, %f2
+ fmovd %f0, %f4
+ andn %o3, 0xff, %o3
+ fmovd %f0, %f6
+ cmp %g5, 64
+ fmovd %f0, %f8
+ fmovd %f0, %f10
+ fmovd %f0, %f12
+ brz,pn %g5, 10f
+ fmovd %f0, %f14
+ be,pn %icc, 2f
+ stda %f0, [%o0 + 0x00] %asi
+ cmp %g5, 128
+ be,pn %icc, 2f
+ stda %f0, [%o0 + 0x40] %asi
+ stda %f0, [%o0 + 0x80] %asi
+2: brz,pn %o3, 12f
+ add %o0, %g5, %o0
+10: stda %f0, [%o0 + 0x00] %asi
+ stda %f0, [%o0 + 0x40] %asi
+ stda %f0, [%o0 + 0x80] %asi
+ stda %f0, [%o0 + 0xc0] %asi
+11: subcc %o3, 256, %o3
+ bne,pt %xcc, 10b
+ add %o0, 256, %o0
+12: wr %g0, FPRS_FEF, %fprs
+ membar #StoreLoad | #StoreStore
+9: andcc %o2, 0x78, %g5
+ be,pn %xcc, 13f
+ andcc %o2, 7, %o2
+14: rd %pc, %o4
+ srl %g5, 1, %o3
+ sub %o4, %o3, %o4
+ jmpl %o4 + (13f - 14b), %g0
+ add %o0, %g5, %o0
+12: SET_BLOCKS (%o0, 0x68, %o1)
+ SET_BLOCKS (%o0, 0x48, %o1)
+ SET_BLOCKS (%o0, 0x28, %o1)
+ SET_BLOCKS (%o0, 0x08, %o1)
+13: be,pn %xcc, 8f
+ andcc %o2, 4, %g0
+ be,pn %xcc, 1f
+ andcc %o2, 2, %g0
+ stw %o1, [%o0]
+ add %o0, 4, %o0
+1: be,pn %xcc, 1f
+ andcc %o2, 1, %g0
+ sth %o1, [%o0]
+ add %o0, 2, %o0
+1: bne,a,pn %xcc, 8f
+ stb %o1, [%o0]
+8: retl
+ mov %o5, %o0
+17: brz,pn %o2, 0f
+8: add %o0, 1, %o0
+ subcc %o2, 1, %o2
+ bne,pt %xcc, 8b
+ stb %o1, [%o0 - 1]
+0: retl
+ mov %o5, %o0
+
+6: stx %o1, [%o0]
+ andncc %o2, 0x3f, %o3
+ be,pn %xcc, 9b
+ nop
+ ba,pt %xcc, 18b
+ ldd [%o0], %f0
+END(memset)
+
+#define ZERO_BLOCKS(base, offset, source) \
+ stx source, [base - offset - 0x38]; \
+ stx source, [base - offset - 0x30]; \
+ stx source, [base - offset - 0x28]; \
+ stx source, [base - offset - 0x20]; \
+ stx source, [base - offset - 0x18]; \
+ stx source, [base - offset - 0x10]; \
+ stx source, [base - offset - 0x08]; \
+ stx source, [base - offset - 0x00];
+
+ .text
+ .align 32
+ENTRY(__bzero)
+#ifndef USE_BPR
+ srl %o1, 0, %o1
+#endif
+ mov %o0, %o5
+50: cmp %o1, 7
+ bleu,pn %xcc, 17f
+ andcc %o0, 3, %o2
+ be,a,pt %xcc, 4f
+ andcc %o0, 4, %g0
+ cmp %o2, 3
+ be,pn %xcc, 2f
+ stb %g0, [%o0 + 0x00]
+ cmp %o2, 2
+ be,pt %xcc, 2f
+ stb %g0, [%o0 + 0x01]
+ stb %g0, [%o0 + 0x02]
+2: sub %o2, 4, %o2
+ sub %o0, %o2, %o0
+ add %o1, %o2, %o1
+ andcc %o0, 4, %g0
+4: be,pt %xcc, 2f
+ cmp %o1, 128
+ stw %g0, [%o0]
+ sub %o1, 4, %o1
+ add %o0, 4, %o0
+2: blu,pn %xcc, 9f
+ andcc %o0, 0x38, %o2
+ be,pn %icc, 6f
+ mov 64, %o4
+ andcc %o0, 8, %g0
+ be,pn %icc, 1f
+ sub %o4, %o2, %o4
+ stx %g0, [%o0]
+ add %o0, 8, %o0
+1: andcc %o4, 16, %g0
+ be,pn %icc, 1f
+ sub %o1, %o4, %o1
+ stx %g0, [%o0]
+ stx %g0, [%o0 + 8]
+ add %o0, 16, %o0
+1: andcc %o4, 32, %g0
+ be,pn %icc, 7f
+ andncc %o1, 0x3f, %o3
+ stx %g0, [%o0]
+ stx %g0, [%o0 + 8]
+ stx %g0, [%o0 + 16]
+ stx %g0, [%o0 + 24]
+ add %o0, 32, %o0
+6: andncc %o1, 0x3f, %o3
+7: be,pn %xcc, 9f
+ wr %g0, ASI_BLK_P, %asi
+ membar #StoreLoad | #StoreStore | #LoadStore
+ fzero %f0
+ andcc %o3, 0xc0, %o2
+ and %o1, 0x3f, %o1
+ fzero %f2
+ andn %o3, 0xff, %o3
+ faddd %f0, %f2, %f4
+ fmuld %f0, %f2, %f6
+ cmp %o2, 64
+ faddd %f0, %f2, %f8
+ fmuld %f0, %f2, %f10
+ faddd %f0, %f2, %f12
+ brz,pn %o2, 10f
+ fmuld %f0, %f2, %f14
+ be,pn %icc, 2f
+ stda %f0, [%o0 + 0x00] %asi
+ cmp %o2, 128
+ be,pn %icc, 2f
+ stda %f0, [%o0 + 0x40] %asi
+ stda %f0, [%o0 + 0x80] %asi
+2: brz,pn %o3, 12f
+ add %o0, %o2, %o0
+10: stda %f0, [%o0 + 0x00] %asi
+ stda %f0, [%o0 + 0x40] %asi
+ stda %f0, [%o0 + 0x80] %asi
+ stda %f0, [%o0 + 0xc0] %asi
+11: subcc %o3, 256, %o3
+ bne,pt %xcc, 10b
+ add %o0, 256, %o0
+12: wr %g0, FPRS_FEF, %fprs
+ membar #StoreLoad | #StoreStore
+9: andcc %o1, 0xf8, %o2
+ be,pn %xcc, 13f
+ andcc %o1, 7, %o1
+14: rd %pc, %o4
+ srl %o2, 1, %o3
+ sub %o4, %o3, %o4
+ jmpl %o4 + (13f - 14b), %g0
+ add %o0, %o2, %o0
+12: ZERO_BLOCKS (%o0, 0xc8, %g0)
+ ZERO_BLOCKS (%o0, 0x88, %g0)
+ ZERO_BLOCKS (%o0, 0x48, %g0)
+ ZERO_BLOCKS (%o0, 0x08, %g0)
+13: be,pn %xcc, 8f
+ andcc %o1, 4, %g0
+ be,pn %xcc, 1f
+ andcc %o1, 2, %g0
+ stw %g0, [%o0]
+ add %o0, 4, %o0
+1: be,pn %xcc, 1f
+ andcc %o1, 1, %g0
+ sth %g0, [%o0]
+ add %o0, 2, %o0
+1: bne,a,pn %xcc, 8f
+ stb %g0, [%o0]
+8: retl
+ mov %o5, %o0
+17: be,pn %xcc, 13b
+ orcc %o1, 0, %g0
+ be,pn %xcc, 0f
+8: add %o0, 1, %o0
+ subcc %o1, 1, %o1
+ bne,pt %xcc, 8b
+ stb %g0, [%o0 - 1]
+0: retl
+ mov %o5, %o0
+END(__bzero)
+
+weak_alias (__bzero, bzero)