summaryrefslogtreecommitdiff
path: root/libc/string/xtensa/strncpy.S
blob: 7ba2ef77de0a8379d5a4af16d1644af26a99f8cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
/* Optimized strcpy for Xtensa.
   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
   Boston, MA 02110-1301, USA.  */

#include "../../sysdeps/linux/xtensa/sysdep.h"
#include <bits/xtensa-config.h>

#ifdef __XTENSA_EB__
#define	MASK0 0xff000000
#define	MASK1 0x00ff0000
#define	MASK2 0x0000ff00
#define	MASK3 0x000000ff
#else
#define	MASK0 0x000000ff
#define	MASK1 0x0000ff00
#define	MASK2 0x00ff0000
#define	MASK3 0xff000000
#endif

/* Do not use .literal_position in the ENTRY macro.  */
#undef LITERAL_POSITION
#define LITERAL_POSITION

	.text
	.align	4
	.literal_position
__strncpy_aux:

.Lsrc1mod2: // src address is odd
	l8ui	a8, a3, 0	// get byte 0
	addi	a3, a3, 1	// advance src pointer
	s8i	a8, a10, 0	// store byte 0
	addi	a4, a4, -1	// decrement n
	beqz    a4, .Lret       // if n is zero
	addi	a10, a10, 1	// advance dst pointer
	beqz	a8, .Lfill	// if byte 0 is zero
	bbci.l	a3, 1, .Lsrcaligned // if src is now word-aligned

.Lsrc2mod4: // src address is 2 mod 4
	l8ui	a8, a3, 0	// get byte 0
	addi	a4, a4, -1	// decrement n
	s8i	a8, a10, 0	// store byte 0
	beqz    a4, .Lret       // if n is zero
	addi	a10, a10, 1	// advance dst pointer
	beqz	a8, .Lfill	// if byte 0 is zero
	l8ui	a8, a3, 1	// get byte 0
	addi	a3, a3, 2	// advance src pointer
	s8i	a8, a10, 0	// store byte 0
	addi	a4, a4, -1	// decrement n
	beqz    a4, .Lret       // if n is zero
	addi	a10, a10, 1	// advance dst pointer
	bnez	a8, .Lsrcaligned
	j	.Lfill

.Lret:
	retw


ENTRY (strncpy)
	/* a2 = dst, a3 = src */

	mov	a10, a2		// leave dst in return value register
	beqz    a4, .Lret       // if n is zero

	movi	a11, MASK0
	movi	a5, MASK1
	movi	a6, MASK2
	movi	a7, MASK3
	bbsi.l	a3, 0, .Lsrc1mod2
	bbsi.l	a3, 1, .Lsrc2mod4
.Lsrcaligned:

	/* Check if the destination is aligned.  */
	movi	a8, 3
	bnone	a10, a8, .Laligned

	j	.Ldstunaligned


/* Fill the dst with zeros -- n is at least 1.  */

.Lfill:
	movi	a9, 0
	bbsi.l	a10, 0, .Lfill1mod2
	bbsi.l	a10, 1, .Lfill2mod4
.Lfillaligned:
	blti	a4, 4, .Lfillcleanup

	/* Loop filling complete words with zero.  */
#if XCHAL_HAVE_LOOPS

	srai	a8, a4, 2
	loop	a8, 1f
	s32i	a9, a10, 0
	addi	a10, a10, 4

1:	slli	a8, a8, 2
	sub	a4, a4, a8

#else /* !XCHAL_HAVE_LOOPS */

1:	s32i	a9, a10, 0
	addi	a10, a10, 4
	addi	a4, a4, -4
	bgei    a4, 4, 1b

#endif /* !XCHAL_HAVE_LOOPS */

	beqz	a4, 2f

.Lfillcleanup:
	/* Fill leftover (1 to 3) bytes with zero.  */
	s8i	a9, a10, 0	// store byte 0
	addi	a4, a4, -1	// decrement n
	addi	a10, a10, 1
	bnez    a4, .Lfillcleanup 

2:	retw
	
.Lfill1mod2: // dst address is odd
	s8i	a9, a10, 0	// store byte 0
	addi	a4, a4, -1	// decrement n
	beqz    a4, 2b		// if n is zero
	addi    a10, a10, 1	// advance dst pointer
	bbci.l	a10, 1, .Lfillaligned // if dst is now word-aligned

.Lfill2mod4: // dst address is 2 mod 4
	s8i	a9, a10, 0	// store byte 0
	addi	a4, a4, -1	// decrement n
	beqz    a4, 2b		// if n is zero
	s8i	a9, a10, 1	// store byte 1
	addi	a4, a4, -1	// decrement n
	beqz    a4, 2b		// if n is zero
	addi    a10, a10, 2	// advance dst pointer
	j	.Lfillaligned


/* dst is word-aligned; src is word-aligned; n is at least 1.  */

	.align	4
	/* (2 mod 4) alignment for loop instruction */
.Laligned:
#if XCHAL_HAVE_LOOPS
	_movi.n	a8, 0		// set up for the maximum loop count
	loop	a8, 1f		// loop forever (almost anyway)
	blti	a4, 5, .Ldstunaligned // n is near limit; do one at a time
	l32i	a8, a3, 0	// get word from src
	addi	a3, a3, 4	// advance src pointer
	bnone	a8, a11, .Lz0	// if byte 0 is zero
	bnone	a8, a5, .Lz1	// if byte 1 is zero
	bnone	a8, a6, .Lz2	// if byte 2 is zero
	s32i	a8, a10, 0	// store word to dst
	addi	a4, a4, -4	// decrement n
	addi	a10, a10, 4	// advance dst pointer
	bnone	a8, a7, .Lfill	// if byte 3 is zero
1:	

#else /* !XCHAL_HAVE_LOOPS */

1:	blti	a4, 5, .Ldstunaligned // n is near limit; do one at a time
	l32i	a8, a3, 0	// get word from src
	addi	a3, a3, 4	// advance src pointer
	bnone	a8, a11, .Lz0	// if byte 0 is zero
	bnone	a8, a5, .Lz1	// if byte 1 is zero
	bnone	a8, a6, .Lz2	// if byte 2 is zero
	s32i	a8, a10, 0	// store word to dst
	addi	a4, a4, -4	// decrement n
	addi	a10, a10, 4	// advance dst pointer
	bany	a8, a7, 1b	// no zeroes
#endif /* !XCHAL_HAVE_LOOPS */

	j	.Lfill

.Lz0:	/* Byte 0 is zero.  */
#ifdef __XTENSA_EB__
	movi	a8, 0
#endif
	s8i	a8, a10, 0
	addi	a4, a4, -1	// decrement n
	addi	a10, a10, 1	// advance dst pointer
	j	.Lfill

.Lz1:	/* Byte 1 is zero.  */
#ifdef __XTENSA_EB__
        extui   a8, a8, 16, 16
#endif
	s16i	a8, a10, 0
	addi	a4, a4, -2	// decrement n
	addi	a10, a10, 2	// advance dst pointer
	j	.Lfill

.Lz2:	/* Byte 2 is zero.  */
#ifdef __XTENSA_EB__
        extui   a8, a8, 16, 16
#endif
	s16i	a8, a10, 0
	movi	a8, 0
	s8i	a8, a10, 2
	addi	a4, a4, -3	// decrement n
	addi	a10, a10, 3	// advance dst pointer
	j	.Lfill

	.align	4
	/* (2 mod 4) alignment for loop instruction */
.Ldstunaligned:

#if XCHAL_HAVE_LOOPS
	_movi.n	a8, 0		// set up for the maximum loop count
	loop	a8, 2f		// loop forever (almost anyway)
#endif
1:	l8ui	a8, a3, 0
	addi	a3, a3, 1
	s8i	a8, a10, 0
	addi	a4, a4, -1
	beqz	a4, 3f
	addi	a10, a10, 1
#if XCHAL_HAVE_LOOPS
	beqz	a8, 2f
#else
	bnez	a8, 1b
#endif
2:	j	.Lfill

3:	retw

libc_hidden_def (strncpy)