From 4fcc031a7085a47b9a027a20a919574f8aab0768 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 30 May 2006 09:13:53 +0000 Subject: import some optimized functions from blackfin cvs --- libc/string/bfin/memcmp.S | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 libc/string/bfin/memcmp.S (limited to 'libc/string/bfin/memcmp.S') diff --git a/libc/string/bfin/memcmp.S b/libc/string/bfin/memcmp.S new file mode 100644 index 000000000..f2679d5ae --- /dev/null +++ b/libc/string/bfin/memcmp.S @@ -0,0 +1,101 @@ +/* memcmp.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* int memcmp(const void *s1, const void *s2, size_t n); + * R0 = First Address (s1) + * R1 = Second Address (s2) + * R2 = count (n) + * + * Favours word aligned data. + */ + +.text + +.align 2 + +.global _memcmp +.type _memcmp, STT_FUNC +_memcmp: + I1 = P3; + P0 = R0; // P0 = s1 address + P3 = R1; // P3 = s2 Address + P2 = R2 ; // P2 = count + CC = R2 <= 7(IU); + IF CC JUMP too_small; + I0 = R1; // s2 + R1 = R1 | R0; // OR addresses together + R1 <<= 30; // check bottom two bits + CC = AZ; // AZ set if zero. + IF !CC JUMP bytes ; // Jump if addrs not aligned. + + P1 = P2 >> 2; // count = n/4 + R3 = 3; + R2 = R2 & R3; // remainder + P2 = R2; // set remainder + + LSETUP (quad_loop_s , quad_loop_e) LC0=P1; +quad_loop_s: +#if !defined(__WORKAROUND_AVOID_DAG1) + MNOP || R0 = [P0++] || R1 = [I0++]; +#else + R0 = [P0++]; + R1 = [I0++]; +#endif + CC = R0 == R1; + IF !CC JUMP quad_different; +quad_loop_e: + NOP; + + P3 = I0; // s2 +too_small: + CC = P2 == 0; //Check zero count + IF CC JUMP finished; // very unlikely + +bytes: + LSETUP (byte_loop_s , byte_loop_e) LC0=P2; +byte_loop_s: + R1 = B[P3++](Z); // *s2 + R0 = B[P0++](Z); // *s1 + CC = R0 == R1; + IF !CC JUMP different; +byte_loop_e: + NOP; + +different: + R0 = R0 - R1; + P3 = I1; + RTS; + +quad_different: + // We've read two quads which don't match. + // Can't just compare them, because we're + // a little-endian machine, so the MSBs of + // the regs occur at later addresses in the + // string. + // Arrange to re-read those two quads again, + // byte-by-byte. + P0 += -4; // back up to the start of the + P3 = I0; // quads, and increase the + P2 += 4; // remainder count + P3 += -4; + JUMP bytes; + +finished: + R0 = 0; + P3 = I1; + RTS; +.size _memcmp,.-_memcmp + +libc_hidden_def (memcmp) + +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias (memcmp,bcmp) +#endif -- cgit v1.2.3