1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
|
/*
* Copyright (C) 2004-2007 Atmel Corporation
*
* This file is subject to the terms and conditions of the GNU Lesser General
* Public License. See the file "COPYING.LIB" in the main directory of this
* archive for more details.
*/
/* Don't use r12 as dst since we must return it unmodified */
#define dst r9
#define src r11
#define len r10
.text
.global memcpy
.type memcpy, @function
memcpy:
pref src[0]
mov dst, r12
/* If we have less than 32 bytes, don't do anything fancy */
cp.w len, 32
brge .Lmore_than_31
sub len, 1
retlt r12
1: ld.ub r8, src++
st.b dst++, r8
sub len, 1
brge 1b
retal r12
.Lmore_than_31:
pushm r0-r7, lr
/* Check alignment */
mov r8, src
andl r8, 31, COH
brne .Lunaligned_src
mov r8, dst
andl r8, 3, COH
brne .Lunaligned_dst
.Laligned_copy:
sub len, 32
brlt .Lless_than_32
1: /* Copy 32 bytes at a time */
ldm src, r0-r7
sub src, -32
stm dst, r0-r7
sub dst, -32
sub len, 32
brge 1b
.Lless_than_32:
/* Copy 16 more bytes if possible */
sub len, -16
brlt .Lless_than_16
ldm src, r0-r3
sub src, -16
sub len, 16
stm dst, r0-r3
sub dst, -16
.Lless_than_16:
/* Do the remaining as byte copies */
neg len
add pc, pc, len << 2
.rept 15
ld.ub r0, src++
st.b dst++, r0
.endr
popm r0-r7, pc
.Lunaligned_src:
/* Make src cacheline-aligned. r8 = (src & 31) */
rsub r8, r8, 32
sub len, r8
1: ld.ub r0, src++
st.b dst++, r0
sub r8, 1
brne 1b
/* If dst is word-aligned, we're ready to go */
pref src[0]
mov r8, 3
tst dst, r8
breq .Laligned_copy
.Lunaligned_dst:
/* src is aligned, but dst is not. Expect bad performance */
sub len, 4
brlt 2f
1: ld.w r0, src++
st.w dst++, r0
sub len, 4
brge 1b
2: neg len
add pc, pc, len << 2
.rept 3
ld.ub r0, src++
st.b dst++, r0
.endr
popm r0-r7, pc
.size memcpy, . - memcpy
libc_hidden_def(memcpy)
|