summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Frysinger <vapier@gentoo.org>2006-05-30 09:13:53 +0000
committerMike Frysinger <vapier@gentoo.org>2006-05-30 09:13:53 +0000
commit4fcc031a7085a47b9a027a20a919574f8aab0768 (patch)
treeff657fef5414526db2af82bd7af016abaf8a6bd3
parent63e3fcd76cd372cc5a30c414237b6a64d1a210dd (diff)
import some optimized functions from blackfin cvs
-rw-r--r--libc/string/bfin/Makefile13
-rw-r--r--libc/string/bfin/memchr.S54
-rw-r--r--libc/string/bfin/memcmp.S101
-rw-r--r--libc/string/bfin/memcpy.S74
-rw-r--r--libc/string/bfin/memmove.S95
-rw-r--r--libc/string/bfin/memset.S86
-rw-r--r--libc/string/bfin/strcmp.S121
7 files changed, 544 insertions, 0 deletions
diff --git a/libc/string/bfin/Makefile b/libc/string/bfin/Makefile
new file mode 100644
index 000000000..0a95346fd
--- /dev/null
+++ b/libc/string/bfin/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/bfin/memchr.S b/libc/string/bfin/memchr.S
new file mode 100644
index 000000000..6ecaf37a1
--- /dev/null
+++ b/libc/string/bfin/memchr.S
@@ -0,0 +1,54 @@
+/* memchr.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memchr(const void *s, int c, size_t n);
+ * R0 = address (s)
+ * R1 = sought byte (c)
+ * R2 = count (n)
+ *
+ * Returns pointer to located character.
+ */
+
+.text
+
+.align 2
+
+.global _memchr
+.type _memchr, STT_FUNC
+_memchr:
+ P0 = R0; // P0 = address
+ P2 = R2; // P2 = count
+ R1 = R1.B(Z);
+ CC = R2 == 0;
+ IF CC JUMP failed;
+
+bytes:
+ LSETUP (byte_loop_s , byte_loop_e) LC0=P2;
+
+byte_loop_s:
+ R3 = B[P0++](Z);
+ CC = R3 == R1;
+ IF CC JUMP found;
+ NOP;
+byte_loop_e:
+
+failed:
+ R0=0;
+ RTS;
+
+found:
+ R0 = P0;
+ R0 += -1;
+ RTS;
+
+.size _memchr,.-_memchr
+
+libc_hidden_def (memchr)
diff --git a/libc/string/bfin/memcmp.S b/libc/string/bfin/memcmp.S
new file mode 100644
index 000000000..f2679d5ae
--- /dev/null
+++ b/libc/string/bfin/memcmp.S
@@ -0,0 +1,101 @@
+/* memcmp.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* int memcmp(const void *s1, const void *s2, size_t n);
+ * R0 = First Address (s1)
+ * R1 = Second Address (s2)
+ * R2 = count (n)
+ *
+ * Favours word aligned data.
+ */
+
+.text
+
+.align 2
+
+.global _memcmp
+.type _memcmp, STT_FUNC
+_memcmp:
+ I1 = P3;
+ P0 = R0; // P0 = s1 address
+ P3 = R1; // P3 = s2 Address
+ P2 = R2 ; // P2 = count
+ CC = R2 <= 7(IU);
+ IF CC JUMP too_small;
+ I0 = R1; // s2
+ R1 = R1 | R0; // OR addresses together
+ R1 <<= 30; // check bottom two bits
+ CC = AZ; // AZ set if zero.
+ IF !CC JUMP bytes ; // Jump if addrs not aligned.
+
+ P1 = P2 >> 2; // count = n/4
+ R3 = 3;
+ R2 = R2 & R3; // remainder
+ P2 = R2; // set remainder
+
+ LSETUP (quad_loop_s , quad_loop_e) LC0=P1;
+quad_loop_s:
+#if !defined(__WORKAROUND_AVOID_DAG1)
+ MNOP || R0 = [P0++] || R1 = [I0++];
+#else
+ R0 = [P0++];
+ R1 = [I0++];
+#endif
+ CC = R0 == R1;
+ IF !CC JUMP quad_different;
+quad_loop_e:
+ NOP;
+
+ P3 = I0; // s2
+too_small:
+ CC = P2 == 0; //Check zero count
+ IF CC JUMP finished; // very unlikely
+
+bytes:
+ LSETUP (byte_loop_s , byte_loop_e) LC0=P2;
+byte_loop_s:
+ R1 = B[P3++](Z); // *s2
+ R0 = B[P0++](Z); // *s1
+ CC = R0 == R1;
+ IF !CC JUMP different;
+byte_loop_e:
+ NOP;
+
+different:
+ R0 = R0 - R1;
+ P3 = I1;
+ RTS;
+
+quad_different:
+ // We've read two quads which don't match.
+ // Can't just compare them, because we're
+ // a little-endian machine, so the MSBs of
+ // the regs occur at later addresses in the
+ // string.
+ // Arrange to re-read those two quads again,
+ // byte-by-byte.
+ P0 += -4; // back up to the start of the
+ P3 = I0; // quads, and increase the
+ P2 += 4; // remainder count
+ P3 += -4;
+ JUMP bytes;
+
+finished:
+ R0 = 0;
+ P3 = I1;
+ RTS;
+.size _memcmp,.-_memcmp
+
+libc_hidden_def (memcmp)
+
+#ifdef __UCLIBC_SUSV3_LEGACY__
+strong_alias (memcmp,bcmp)
+#endif
diff --git a/libc/string/bfin/memcpy.S b/libc/string/bfin/memcpy.S
new file mode 100644
index 000000000..e7ba7048e
--- /dev/null
+++ b/libc/string/bfin/memcpy.S
@@ -0,0 +1,74 @@
+/* memcpy.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memcpy(void *dest, const void *src, size_t n);
+ * R0 = To Address (dest) (leave unchanged to form result)
+ * R1 = From Address (src)
+ * R2 = count
+ *
+ * Note: Favours word alignment
+ */
+
+.text
+
+.align 2
+
+.global _memcpy
+.type _memcpy, STT_FUNC
+_memcpy:
+ [--SP] = P3;
+ P0 = R0; // P0 = To address
+ P3 = R1; // P3 = From Address
+ P2 = R2 ; // P2 = count
+ CC = R2 <= 7(IU);
+ IF CC JUMP too_small;
+ I0 = R1;
+ R3 = R1 | R0; // OR addresses together
+ R3 <<= 30; // check bottom two bits
+ CC = AZ; // AZ set if zero.
+ IF !CC JUMP bytes ; // Jump if addrs not aligned.
+ P1 = P2 >> 2; // count = n/4
+ P1 += -1;
+ R3 = 3;
+ R2 = R2 & R3; // remainder
+ P2 = R2; // set remainder
+ R1 = [I0++];
+#if !defined(__WORKAROUND_AVOID_DAG1)
+ LSETUP (quad_loop , quad_loop) LC0=P1;
+quad_loop: MNOP || [P0++] = R1 || R1 = [I0++];
+#else
+ LSETUP (quad_loop_s , quad_loop_e) LC0=P1;
+quad_loop_s: [P0++] = R1;
+quad_loop_e: R1 = [I0++];
+#endif
+ [P0++] = R1;
+
+ CC = P2 == 0; // any remaining bytes?
+ P3 = I0; // Ammend P3 for remaining copy
+ IF !CC JUMP bytes;
+ P3 = [SP++];
+ RTS;
+
+too_small:
+ CC = P2 == 0; //Check zero count
+ IF CC JUMP finished; // very unlikely
+
+bytes:
+ LSETUP (byte_loop_s , byte_loop_e) LC0=P2;
+byte_loop_s: R1 = B[P3++](Z);
+byte_loop_e: B[P0++] = R1;
+
+finished:
+ P3 = [SP++];
+ RTS;
+.size _memcpy,.-_memcpy
+
+libc_hidden_def (memcpy)
diff --git a/libc/string/bfin/memmove.S b/libc/string/bfin/memmove.S
new file mode 100644
index 000000000..3d446f326
--- /dev/null
+++ b/libc/string/bfin/memmove.S
@@ -0,0 +1,95 @@
+/* memmove.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memmove(void *dest, const void *src, size_t n);
+ * R0 = To Address (dest) (leave unchanged to form result)
+ * R1 = From Address (src)
+ * R2 = count (n)
+ *
+ * Note: Data may overlap
+ */
+
+.text
+
+.align 2
+
+.global _memmove
+.type _memmove, STT_FUNC
+_memmove:
+ I1 = P3;
+ P0 = R0; // P0 = To address
+ P3 = R1; // P3 = From Address
+ P2 = R2 ; // P2 = count
+ CC = P2 == 0; //Check zero count
+ IF CC JUMP finished; // very unlikely
+
+ CC = R1 < R0 (IU); // From < To
+ IF !CC JUMP no_overlap;
+ R3 = R1 + R2;
+ CC = R0 <= R3 (IU); // (From+len) >= To
+ IF CC JUMP overlap;
+no_overlap:
+ R3 = 11;
+ CC = R2 <= R3;
+ IF CC JUMP bytes;
+ R3 = R1 | R0; // OR addresses together
+ R3 <<= 30; // check bottom two bits
+ CC = AZ; // AZ set if zero.
+ IF !CC JUMP bytes ; // Jump if addrs not aligned.
+
+ I0 = P3;
+ P1 = P2 >> 2; // count = n/4
+ P1 += -1;
+ R3 = 3;
+ R2 = R2 & R3; // remainder
+ P2 = R2; // set remainder
+ R1 = [I0++];
+
+#if !defined(__WORKAROUND_AVOID_DAG1)
+ LSETUP (quad_loop , quad_loop) LC0=P1;
+quad_loop: MNOP || [P0++] = R1 || R1 = [I0++];
+#else
+ LSETUP (quad_loop_s, quad_loop_e) LC0=P1;
+quad_loop_s: [P0++] = R1;
+quad_loop_e: R1 = [I0++];
+#endif
+ [P0++] = R1;
+
+ CC = P2 == 0; // any remaining bytes?
+ P3 = I0; // Ammend P3 to updated ptr.
+ IF !CC JUMP bytes;
+ P3 = I1;
+ RTS;
+
+bytes: LSETUP (byte2_s , byte2_e) LC0=P2;
+byte2_s: R1 = B[P3++](Z);
+byte2_e: B[P0++] = R1;
+
+finished:
+ P3 = I1;
+ RTS;
+
+overlap:
+ P2 += -1;
+ P0 = P0 + P2;
+ P3 = P3 + P2;
+ R1 = B[P3--] (Z);
+ CC = P2 == 0;
+ IF CC JUMP no_loop;
+ LSETUP (ol_s, ol_e) LC0 = P2;
+ol_s: B[P0--] = R1;
+ol_e: R1 = B[P3--] (Z);
+no_loop: B[P0] = R1;
+ P3 = I1;
+ RTS;
+.size _memmove,.-_memmove
+
+libc_hidden_def (memmove)
diff --git a/libc/string/bfin/memset.S b/libc/string/bfin/memset.S
new file mode 100644
index 000000000..bd8eb4b6a
--- /dev/null
+++ b/libc/string/bfin/memset.S
@@ -0,0 +1,86 @@
+/* memset.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* void *memset(void *s, int c, size_t n);
+ * R0 = address (s) (leave unchanged to form result)
+ * R1 = filler byte (c)
+ * R2 = count (n)
+ *
+ * Note: Favours word aligned data.
+ */
+
+.text
+
+.align 2
+
+.global _memset
+.type _memset, STT_FUNC
+_memset:
+ P0 = R0 ; // P0 = address
+ P2 = R2 ; // P2 = count
+ R3 = R0 + R2; // end
+ CC = R2 <= 7(IU);
+ IF CC JUMP too_small;
+ R1 = R1.B (Z); // R1 = fill char
+ R2 = 3;
+ R2 = R0 & R2; // addr bottom two bits
+ CC = R2 == 0; // AZ set if zero.
+ IF !CC JUMP force_align ; // Jump if addr not aligned.
+
+aligned:
+ P1 = P2 >> 2; // count = n/4
+ R2 = R1 << 8; // create quad filler
+ R2.L = R2.L + R1.L(NS);
+ R2.H = R2.L + R1.H(NS);
+ P2 = R3;
+
+ LSETUP (quad_loop , quad_loop) LC0=P1;
+quad_loop:
+ [P0++] = R2;
+
+ CC = P0 == P2;
+ IF !CC JUMP bytes_left;
+ RTS;
+
+bytes_left:
+ R2 = R3; // end point
+ R3 = P0; // current position
+ R2 = R2 - R3; // bytes left
+ P2 = R2;
+
+too_small:
+ CC = P2 == 0; //Check zero count
+ IF CC JUMP finished; // Unusual
+
+bytes: LSETUP (byte_loop , byte_loop) LC0=P2;
+byte_loop: B[P0++] = R1;
+
+finished:
+ RTS;
+
+force_align:
+ CC = BITTST (R0, 0 ); // odd byte
+ R0 = 4;
+ R0 = R0 - R2;
+ P1 = R0;
+ R0 = P0; // Recover return address
+ IF !CC JUMP skip1;
+ B[P0++] = R1;
+skip1:
+ CC = R2 <= 2; // 2 bytes
+ P2 -= P1; // reduce count
+ IF !CC JUMP aligned;
+ B[P0++] = R1;
+ B[P0++] = R1;
+ JUMP aligned;
+.size _memset,.-_memset
+
+libc_hidden_def (memset)
diff --git a/libc/string/bfin/strcmp.S b/libc/string/bfin/strcmp.S
new file mode 100644
index 000000000..6365024ec
--- /dev/null
+++ b/libc/string/bfin/strcmp.S
@@ -0,0 +1,121 @@
+/* strcmp.S
+ * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU Library General
+ * Public License. See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
+ *
+ * Non-LGPL License also available as part of VisualDSP++
+ * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
+ */
+
+/* Fast strcmp() for Blackfin.
+ * When both strings are aligned, this processes four characters at
+ * a time. Uses a hw loop with "very big" count to loop "forever",
+ * until difference or a terminating zero is found.
+ * Once the end-case word has been identified, breaks out of the
+ * loop to check more carefully (same as the unaligned case).
+ */
+
+.text
+
+.align 2
+
+.global _strcmp
+.type _strcmp, STT_FUNC
+_strcmp:
+ [--sp] = (R7:4);
+ p1 = r0;
+ p2 = r1;
+
+ p0 = -1; // (need for loop counter init)
+
+ // check if byte aligned
+ r0 = r0 | r1; // check both pointers at same time
+ r0 <<= 30; // dump all but last 2 bits
+ cc = az; // are they zero?
+ if !cc jump unaligned; // no; use unaligned code.
+ // fall-thru for aligned case..
+
+ // note that r0 is zero from the previous...
+ // p0 set to -1
+
+ lsetup (beginloop, endloop) lc0=p0;
+ // pick up first words
+ r1 = [p1++];
+ r2 = [p2++];
+ // make up mask: 0FF0FF
+ r7 = 0xFF;
+ r7.h = 0xFF;
+ // loop : 9 cycles to check 4 characters
+ cc = r1 == r2;
+beginloop:
+ if !cc jump notequal4; // compare failure, exit loop
+
+ // starting with 44332211
+ // see if char 3 or char 1 is 0
+ r3 = r1 & r7; // form 00330011
+ // add to zero, and (r2 is free, reload)
+ r6 = r3 +|+ r0 || r2 = [p2++] || nop;
+ cc = az; // true if either is zero
+ r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020)
+ // (trick, saves having another mask)
+ // add to zero, and (r1 is free, reload)
+ r6 = r3 +|+ r0 || r1 = [p1++] || nop;
+ cc |= az; // true if either is zero
+ if cc jump zero4; // leave if a zero somewhere
+endloop:
+ cc = r1 == r2;
+
+ // loop exits
+notequal4: // compare failure on 4-char compare
+ // address pointers are one word ahead;
+ // faster to use zero4 exit code
+ p1 += 4;
+ p2 += 4;
+
+zero4: // one of the bytes in word 1 is zero
+ // but we've already fetched the next word; so
+ // backup two to look at failing word again
+ p1 += -8;
+ p2 += -8;
+
+
+
+ // here when pointers are unaligned: checks one
+ // character at a time. Also use at the end of
+ // the word-check algorithm to figure out what happened
+unaligned:
+ // R0 is non-zero from before.
+ // p0 set to -1
+
+ r0 = 0 (Z);
+ r1 = B[p1++] (Z);
+ r2 = B[p2++] (Z);
+ lsetup (beginloop1, endloop1) lc0=p0;
+
+beginloop1:
+ cc = r1; // first char must be non-zero
+ // chars must be the same
+ r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
+ cc &= az;
+ r3 = r0 - r2; // second char must be non-zero
+ cc &= an;
+ if !cc jump exitloop1;
+endloop1:
+ r2 = B[p2++] (Z);
+
+exitloop1: // here means we found a zero or a difference.
+ // we have r2(N), p2(N), r1(N+1), p1(N+2)
+ r1=B[p1+ -2] (Z);
+ r0 = r1 - r2;
+ (r7:4) = [sp++];
+ rts;
+.size _strcmp,.-_strcmp
+
+libc_hidden_def (strcmp)
+
+#ifndef __UCLIBC_HAS_LOCALE__
+strong_alias (strcmp,strcoll)
+libc_hidden_def (strcoll)
+#endif