From 4fcc031a7085a47b9a027a20a919574f8aab0768 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 30 May 2006 09:13:53 +0000 Subject: import some optimized functions from blackfin cvs --- libc/string/bfin/memmove.S | 95 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 libc/string/bfin/memmove.S (limited to 'libc/string/bfin/memmove.S') diff --git a/libc/string/bfin/memmove.S b/libc/string/bfin/memmove.S new file mode 100644 index 000000000..3d446f326 --- /dev/null +++ b/libc/string/bfin/memmove.S @@ -0,0 +1,95 @@ +/* memmove.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* void *memmove(void *dest, const void *src, size_t n); + * R0 = To Address (dest) (leave unchanged to form result) + * R1 = From Address (src) + * R2 = count (n) + * + * Note: Data may overlap + */ + +.text + +.align 2 + +.global _memmove +.type _memmove, STT_FUNC +_memmove: + I1 = P3; + P0 = R0; // P0 = To address + P3 = R1; // P3 = From Address + P2 = R2 ; // P2 = count + CC = P2 == 0; //Check zero count + IF CC JUMP finished; // very unlikely + + CC = R1 < R0 (IU); // From < To + IF !CC JUMP no_overlap; + R3 = R1 + R2; + CC = R0 <= R3 (IU); // (From+len) >= To + IF CC JUMP overlap; +no_overlap: + R3 = 11; + CC = R2 <= R3; + IF CC JUMP bytes; + R3 = R1 | R0; // OR addresses together + R3 <<= 30; // check bottom two bits + CC = AZ; // AZ set if zero. + IF !CC JUMP bytes ; // Jump if addrs not aligned. + + I0 = P3; + P1 = P2 >> 2; // count = n/4 + P1 += -1; + R3 = 3; + R2 = R2 & R3; // remainder + P2 = R2; // set remainder + R1 = [I0++]; + +#if !defined(__WORKAROUND_AVOID_DAG1) + LSETUP (quad_loop , quad_loop) LC0=P1; +quad_loop: MNOP || [P0++] = R1 || R1 = [I0++]; +#else + LSETUP (quad_loop_s, quad_loop_e) LC0=P1; +quad_loop_s: [P0++] = R1; +quad_loop_e: R1 = [I0++]; +#endif + [P0++] = R1; + + CC = P2 == 0; // any remaining bytes? + P3 = I0; // Ammend P3 to updated ptr. + IF !CC JUMP bytes; + P3 = I1; + RTS; + +bytes: LSETUP (byte2_s , byte2_e) LC0=P2; +byte2_s: R1 = B[P3++](Z); +byte2_e: B[P0++] = R1; + +finished: + P3 = I1; + RTS; + +overlap: + P2 += -1; + P0 = P0 + P2; + P3 = P3 + P2; + R1 = B[P3--] (Z); + CC = P2 == 0; + IF CC JUMP no_loop; + LSETUP (ol_s, ol_e) LC0 = P2; +ol_s: B[P0--] = R1; +ol_e: R1 = B[P3--] (Z); +no_loop: B[P0] = R1; + P3 = I1; + RTS; +.size _memmove,.-_memmove + +libc_hidden_def (memmove) -- cgit v1.2.3