1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
/* strcmp.S
* Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved.
*
* This file is subject to the terms and conditions of the GNU Library General
* Public License. See the file "COPYING.LIB" in the main directory of this
* archive for more details.
*
* Non-LGPL License also available as part of VisualDSP++
* http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
*/
/* Fast strcmp() for Blackfin.
* When both strings are aligned, this processes four characters at
* a time. Uses a hw loop with "very big" count to loop "forever",
* until difference or a terminating zero is found.
* Once the end-case word has been identified, breaks out of the
* loop to check more carefully (same as the unaligned case).
*/
#include <features.h>
.text
.align 2
.global _strcmp
.type _strcmp, STT_FUNC
_strcmp:
[--sp] = (R7:4);
p1 = r0;
p2 = r1;
p0 = -1; // (need for loop counter init)
// check if byte aligned
r0 = r0 | r1; // check both pointers at same time
r0 <<= 30; // dump all but last 2 bits
cc = az; // are they zero?
if !cc jump unaligned; // no; use unaligned code.
// fall-thru for aligned case..
// note that r0 is zero from the previous...
// p0 set to -1
lsetup (beginloop, endloop) lc0=p0;
// pick up first words
r1 = [p1++];
r2 = [p2++];
// make up mask: 0FF0FF
r7 = 0xFF;
r7.h = 0xFF;
// loop : 9 cycles to check 4 characters
cc = r1 == r2;
beginloop:
if !cc jump notequal4; // compare failure, exit loop
// starting with 44332211
// see if char 3 or char 1 is 0
r3 = r1 & r7; // form 00330011
// add to zero, and (r2 is free, reload)
r6 = r3 +|+ r0 || r2 = [p2++] || nop;
cc = az; // true if either is zero
r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020)
// (trick, saves having another mask)
// add to zero, and (r1 is free, reload)
r6 = r3 +|+ r0 || r1 = [p1++] || nop;
cc |= az; // true if either is zero
if cc jump zero4; // leave if a zero somewhere
endloop:
cc = r1 == r2;
// loop exits
notequal4: // compare failure on 4-char compare
// address pointers are one word ahead;
// faster to use zero4 exit code
p1 += 4;
p2 += 4;
zero4: // one of the bytes in word 1 is zero
// but we've already fetched the next word; so
// backup two to look at failing word again
p1 += -8;
p2 += -8;
// here when pointers are unaligned: checks one
// character at a time. Also use at the end of
// the word-check algorithm to figure out what happened
unaligned:
// R0 is non-zero from before.
// p0 set to -1
r0 = 0 (Z);
r1 = B[p1++] (Z);
r2 = B[p2++] (Z);
lsetup (beginloop1, endloop1) lc0=p0;
beginloop1:
cc = r1; // first char must be non-zero
// chars must be the same
r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
cc &= az;
r3 = r0 - r2; // second char must be non-zero
cc &= an;
if !cc jump exitloop1;
endloop1:
r2 = B[p2++] (Z);
exitloop1: // here means we found a zero or a difference.
// we have r2(N), p2(N), r1(N+1), p1(N+2)
r1=B[p1+ -2] (Z);
r0 = r1 - r2;
(r7:4) = [sp++];
rts;
.size _strcmp,.-_strcmp
libc_hidden_def (strcmp)
#ifndef __UCLIBC_HAS_LOCALE__
strong_alias (strcmp,strcoll)
libc_hidden_def (strcoll)
#endif
|