1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
.macro GET_FRONT_BITS rx ry
#ifdef __cskyLE__
lsr \rx, \ry
#else
lsl \rx, \ry
#endif
.endm
.macro GET_AFTER_BITS rx ry
#ifdef __cskyLE__
lsl \rx, \ry
#else
lsr \rx, \ry
#endif
.endm
#ifdef WANT_WIDE
# define Wmemcpy wmemcpy
#else
# define Wmemcpy memcpy
#endif
/* void *memcpy(void *dest, const void *src, size_t n); */
.text
.align 2
.global Wmemcpy
.type Wmemcpy, @function
Wmemcpy:
mov r7, r2
cmplti r4, 4 /* If len less than 4 bytes */
jbt .L_copy_by_byte
mov r6, r2
andi r6, 3
cmpnei r6, 0
jbt .L_dest_not_aligned /* If dest is not 4 bytes aligned */
.L0:
mov r6, r3
andi r6, 3
cmpnei r6, 0
jbt .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */
cmplti r4, 16 /* dest and src are all aligned */
jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */
subi sp, 8
stw r8, (sp, 0)
stw r9, (sp, 4)
.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */
ldw r1, (r3, 0)
ldw r5, (r3, 4)
ldw r8, (r3, 8)
ldw r9, (r3, 12)
stw r1, (r7, 0)
stw r5, (r7, 4)
stw r8, (r7, 8)
stw r9, (r7, 12)
subi r4, 16
addi r3, 16
addi r7, 16
cmplti r4, 16
jbf .L_aligned_and_len_larger_16bytes
ldw r8, (sp, 0)
ldw r9, (sp, 4)
addi sp, 8
.L_aligned_and_len_less_16bytes:
cmplti r4, 4
jbt .L_copy_by_byte
ldw r1, (r3, 0)
stw r1, (r7, 0)
subi r4, 4
addi r3, 4
addi r7, 4
jbr .L_aligned_and_len_less_16bytes
.L_copy_by_byte: /* len less than 4 bytes */
cmpnei r4, 0
jbf .L_return
ldb r1, (r3, 0)
stb r1, (r7, 0)
subi r4, 1
addi r3, 1
addi r7, 1
jbr .L_copy_by_byte
.L_return:
rts
/* If dest is not aligned, we copy some bytes to make dest align.
Then we should judge whether src is aligned. */
.L_dest_not_aligned:
mov r5, r3 /* consider overlapped case */
rsub r5, r5, r7
abs r5, r5
cmplt r5, r4
jbt .L_copy_by_byte
.L1:
ldb r1, (r3, 0) /* makes the dest align. */
stb r1, (r7, 0)
addi r6, 1
subi r4, 1
addi r3, 1
addi r7, 1
cmpnei r6, 4
jbt .L1
cmplti r4, 4
jbt .L_copy_by_byte
jbf .L0 /* judge whether the src is aligned. */
.L_dest_aligned_but_src_not_aligned:
mov r5, r3 /* consider overlapped case*/
rsub r5, r5, r7
abs r5, r5
cmplt r5, r4
jbt .L_copy_by_byte
bclri r3, 0
bclri r3, 1
ldw r1, (r3, 0)
addi r3, 4
subi sp, 16
stw r11, (sp,0)
stw r12, (sp,4)
stw r13, (sp,8)
movi r5, 8
mult r5, r6 /* r6 is used to store tne misaligned bits */
mov r12, r5
rsubi r5, 31
addi r5, 1
mov r13, r5
cmplti r4, 16
jbt .L_not_aligned_and_len_less_16bytes
stw r8, (sp, 12)
subi sp, 8
stw r9, (sp, 0)
stw r10, (sp, 4)
.L_not_aligned_and_len_larger_16bytes:
ldw r5, (r3, 0)
ldw r11, (r3, 4)
ldw r8, (r3, 8)
ldw r9, (r3, 12)
GET_FRONT_BITS r1 r12 /* little or big endian? */
mov r10, r5
GET_AFTER_BITS r5 r13
or r5, r1
GET_FRONT_BITS r10 r12
mov r1, r11
GET_AFTER_BITS r11 r13
or r11, r10
GET_FRONT_BITS r1 r12
mov r10, r8
GET_AFTER_BITS r8 r13
or r8, r1
GET_FRONT_BITS r10 r12
mov r1, r9
GET_AFTER_BITS r9 r13
or r9, r10
stw r5, (r7, 0)
stw r11, (r7, 4)
stw r8, (r7, 8)
stw r9, (r7, 12)
subi r4, 16
addi r3, 16
addi r7, 16
cmplti r4, 16
jbf .L_not_aligned_and_len_larger_16bytes
ldw r9, (sp, 0)
ldw r10, (sp, 4)
addi sp, 8
ldw r8, (sp,12)
.L_not_aligned_and_len_less_16bytes:
cmplti r4, 4
jbf .L2
rsubi r6, 4 /* r6 is used to stored the misaligned bits */
subu r3, r6 /* initial the position */
ldw r11, (sp, 0)
ldw r12, (sp, 4)
ldw r13, (sp, 8)
addi sp, 16
jbr .L_copy_by_byte
.L2:
ldw r5, (r3, 0)
GET_FRONT_BITS r1 r12
mov r11, r1
mov r1, r5
GET_AFTER_BITS r5 r13
or r5, r11
stw r5, (r7, 0)
subi r4, 4
addi r3, 4
addi r7, 4
jbr .L_not_aligned_and_len_less_16bytes
.size Wmemcpy, .-Wmemcpy
libc_hidden_def(Wmemcpy)
.weak Wmemcpy
|