diff options
Diffstat (limited to 'libc')
| -rw-r--r-- | libc/string/arc/arcv2/memcpy.S | 236 | ||||
| -rw-r--r-- | libc/string/arc/arcv2/memset.S | 85 | ||||
| -rw-r--r-- | libc/string/arc/arcv2/strcmp.S | 83 | 
3 files changed, 404 insertions, 0 deletions
| diff --git a/libc/string/arc/arcv2/memcpy.S b/libc/string/arc/arcv2/memcpy.S new file mode 100644 index 000000000..7573daf51 --- /dev/null +++ b/libc/string/arc/arcv2/memcpy.S @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +#ifdef __LITTLE_ENDIAN__ +# define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; << +# define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >> +# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM +# define MERGE_2(RX,RY,IMM) +# define EXTRACT_1(RX,RY,IMM)	and	RX, RY, 0xFFFF +# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, IMM +#else +# define SHIFT_1(RX,RY,IMM)	lsr	RX, RY, IMM	; >> +# define SHIFT_2(RX,RY,IMM)	asl	RX, RY, IMM	; << +# define MERGE_1(RX,RY,IMM)	asl	RX, RY, IMM	; << +# define MERGE_2(RX,RY,IMM)	asl	RX, RY, IMM	; << +# define EXTRACT_1(RX,RY,IMM)	lsr	RX, RY, IMM +# define EXTRACT_2(RX,RY,IMM)	lsr	RX, RY, 0x08 +#endif + +#ifdef __LL64__ +# define PREFETCH_READ(RX)	prefetch [RX, 56] +# define PREFETCH_WRITE(RX)	prefetchw [RX, 64] +# define LOADX(DST,RX)		ldd.ab	DST, [RX, 8] +# define STOREX(SRC,RX)		std.ab	SRC, [RX, 8] +# define ZOLSHFT		5 +# define ZOLAND			0x1F +#else +# define PREFETCH_READ(RX)	prefetch [RX, 28] +# define PREFETCH_WRITE(RX)	prefetchw [RX, 32] +# define LOADX(DST,RX)		ld.ab	DST, [RX, 4] +# define STOREX(SRC,RX)		st.ab	SRC, [RX, 4] +# define ZOLSHFT		4 +# define ZOLAND			0xF +#endif + +ENTRY(memcpy) +	prefetch  [r1]		; Prefetch the read location +	prefetchw [r0]		; Prefetch the write location +	mov.f	0, r2 +;;; if size is zero +	jz.d	[blink] +	mov	r3, r0		; don't clobber ret val + +;;; if size <= 8 +	cmp	r2, 8 +	bls.d	@.Lsmallchunk +	mov.f	lp_count, r2 + +	and.f	r4, r0, 0x03 +	rsub	lp_count, r4, 4 +	lpnz	@.Laligndestination +	;; LOOP BEGIN +	ldb.ab	r5, [r1,1] +	sub	r2, r2, 1 +	stb.ab	r5, [r3,1] +.Laligndestination: + +;;; Check the alignment of the source +	and.f	r4, r1, 0x03 +	bnz.d	@.Lsourceunaligned + +;;; CASE 0: Both source and destination are 32bit aligned +;;; Convert len to Dwords, unfold x4 +	lsr.f	lp_count, r2, ZOLSHFT +	lpnz	@.Lcopy32_64bytes +	;; LOOP START +	LOADX (r6, r1) +	PREFETCH_READ (r1) +	PREFETCH_WRITE (r3) +	LOADX (r8, r1) +	LOADX (r10, r1) +	LOADX (r4, r1) +	STOREX (r6, r3) +	STOREX (r8, r3) +	STOREX (r10, r3) +	STOREX (r4, r3) +.Lcopy32_64bytes: + +	and.f	lp_count, r2, ZOLAND ;Last remaining 31 bytes +.Lsmallchunk: +	lpnz	@.Lcopyremainingbytes +	;; LOOP START +	ldb.ab	r5, [r1,1] +	stb.ab	r5, [r3,1] +.Lcopyremainingbytes: + +	j	[blink] +;;; END CASE 0 + +.Lsourceunaligned: +	cmp	r4, 2 +	beq.d	@.LunalignedOffby2 +	sub	r2, r2, 1 + +	bhi.d	@.LunalignedOffby3 +	ldb.ab	r5, [r1, 1] + +;;; CASE 1: The source is unaligned, off by 1 +	;; Hence I need to read 1 byte for a 16bit alignment +	;; and 2bytes to reach 32bit alignment +	ldh.ab	r6, [r1, 2] +	sub	r2, r2, 2 +	;; Convert to words, unfold x2 +	lsr.f	lp_count, r2, 3 +	MERGE_1 (r6, r6, 8) +	MERGE_2 (r5, r5, 24) +	or	r5, r5, r6 + +	;; Both src and dst are aligned +	lpnz	@.Lcopy8bytes_1 +	;; LOOP START +	ld.ab	r6, [r1, 4] +	prefetch [r1, 28]	;Prefetch the next read location +	ld.ab	r8, [r1,4] +	prefetchw [r3, 32]	;Prefetch the next write location + +	SHIFT_1	(r7, r6, 24) +	or	r7, r7, r5 +	SHIFT_2	(r5, r6, 8) + +	SHIFT_1	(r9, r8, 24) +	or	r9, r9, r5 +	SHIFT_2	(r5, r8, 8) + +	st.ab	r7, [r3, 4] +	st.ab	r9, [r3, 4] +.Lcopy8bytes_1: + +	;; Write back the remaining 16bits +	EXTRACT_1 (r6, r5, 16) +	sth.ab	r6, [r3, 2] +	;; Write back the remaining 8bits +	EXTRACT_2 (r5, r5, 16) +	stb.ab	r5, [r3, 1] + +	and.f	lp_count, r2, 0x07 ;Last 8bytes +	lpnz	@.Lcopybytewise_1 +	;; LOOP START +	ldb.ab	r6, [r1,1] +	stb.ab	r6, [r3,1] +.Lcopybytewise_1: +	j	[blink] + +.LunalignedOffby2: +;;; CASE 2: The source is unaligned, off by 2 +	ldh.ab	r5, [r1, 2] +	sub	r2, r2, 1 + +	;; Both src and dst are aligned +	;; Convert to words, unfold x2 +	lsr.f	lp_count, r2, 3 +#ifdef __BIG_ENDIAN__ +	asl.nz	r5, r5, 16 +#endif +	lpnz	@.Lcopy8bytes_2 +	;; LOOP START +	ld.ab	r6, [r1, 4] +	prefetch [r1, 28]	;Prefetch the next read location +	ld.ab	r8, [r1,4] +	prefetchw [r3, 32]	;Prefetch the next write location + +	SHIFT_1	(r7, r6, 16) +	or	r7, r7, r5 +	SHIFT_2	(r5, r6, 16) + +	SHIFT_1	(r9, r8, 16) +	or	r9, r9, r5 +	SHIFT_2	(r5, r8, 16) + +	st.ab	r7, [r3, 4] +	st.ab	r9, [r3, 4] +.Lcopy8bytes_2: + +#ifdef __BIG_ENDIAN__ +	lsr.nz	r5, r5, 16 +#endif +	sth.ab	r5, [r3, 2] + +	and.f	lp_count, r2, 0x07 ;Last 8bytes +	lpnz	@.Lcopybytewise_2 +	;; LOOP START +	ldb.ab	r6, [r1,1] +	stb.ab	r6, [r3,1] +.Lcopybytewise_2: +	j	[blink] + +.LunalignedOffby3: +;;; CASE 3: The source is unaligned, off by 3 +;;; Hence, I need to read 1byte for achieve the 32bit alignment + +	;; Both src and dst are aligned +	;; Convert to words, unfold x2 +	lsr.f	lp_count, r2, 3 +#ifdef __BIG_ENDIAN__ +	asl.ne	r5, r5, 24 +#endif +	lpnz	@.Lcopy8bytes_3 +	;; LOOP START +	ld.ab	r6, [r1, 4] +	prefetch [r1, 28]	;Prefetch the next read location +	ld.ab	r8, [r1,4] +	prefetchw [r3, 32]	;Prefetch the next write location + +	SHIFT_1	(r7, r6, 8) +	or	r7, r7, r5 +	SHIFT_2	(r5, r6, 24) + +	SHIFT_1	(r9, r8, 8) +	or	r9, r9, r5 +	SHIFT_2	(r5, r8, 24) + +	st.ab	r7, [r3, 4] +	st.ab	r9, [r3, 4] +.Lcopy8bytes_3: + +#ifdef __BIG_ENDIAN__ +	lsr.nz	r5, r5, 24 +#endif +	stb.ab	r5, [r3, 1] + +	and.f	lp_count, r2, 0x07 ;Last 8bytes +	lpnz	@.Lcopybytewise_3 +	;; LOOP START +	ldb.ab	r6, [r1,1] +	stb.ab	r6, [r3,1] +.Lcopybytewise_3: +	j	[blink] + +END(memcpy) +libc_hidden_def(memcpy) diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S new file mode 100644 index 000000000..d076ad1cd --- /dev/null +++ b/libc/string/arc/arcv2/memset.S @@ -0,0 +1,85 @@ + +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +#ifdef DONT_USE_PREALLOC +#define PREWRITE(A,B)	prefetchw [(A),(B)] +#else +#define PREWRITE(A,B)	prealloc [(A),(B)] +#endif + +ENTRY(memset) +	prefetchw [r0]		; Prefetch the write location +	mov.f	0, r2 +;;; if size is zero +	jz.d	[blink] +	mov	r3, r0		; don't clobber ret val + +;;; if length < 8 +	brls.d.nt	r2, 8, .Lsmallchunk +	mov.f	lp_count,r2 + +	and.f	r4, r0, 0x03 +	rsub	lp_count, r4, 4 +	lpnz	@.Laligndestination +	;; LOOP BEGIN +	stb.ab	r1, [r3,1] +	sub	r2, r2, 1 +.Laligndestination: + +;;; Destination is aligned +	and	r1, r1, 0xFF +	asl	r4, r1, 8 +	or	r4, r4, r1 +	asl	r5, r4, 16 +	or	r5, r5, r4 +	mov	r4, r5 + +	sub3	lp_count, r2, 8 +	cmp     r2, 64 +	bmsk.hi	r2, r2, 5 +	mov.ls	lp_count, 0 +	add3.hi	r2, r2, 8 + +;;; Convert len to Dwords, unfold x8 +	lsr.f	lp_count, lp_count, 6 +	lpnz	@.Lset64bytes +	;; LOOP START +	PREWRITE(r3, 64)	;Prefetch the next write location +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +.Lset64bytes: + +	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes +	lpnz	.Lset32bytes +	;; LOOP START +	prefetchw [r3, 32]	;Prefetch the next write location +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +.Lset32bytes: + +	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes +.Lsmallchunk: +	lpnz	.Lcopy3bytes +	;; LOOP START +	stb.ab	r1, [r3, 1] +.Lcopy3bytes: + +	j	[blink] + +END(memset) +libc_hidden_def(memset) diff --git a/libc/string/arc/arcv2/strcmp.S b/libc/string/arc/arcv2/strcmp.S new file mode 100644 index 000000000..2e0e64a0c --- /dev/null +++ b/libc/string/arc/arcv2/strcmp.S @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +ENTRY(strcmp) +	or	r2, r0, r1 +	bmsk_s	r2, r2, 1 +	brne	r2, 0, @.Lcharloop + +;;; s1 and s2 are word aligned +	ld.ab	r2, [r0, 4] + +	mov_s	r12, 0x01010101 +	ror	r11, r12 +	.align  4 +.LwordLoop: +	ld.ab	r3, [r1, 4] +	;; Detect NULL char in str1 +	sub	r4, r2, r12 +	ld.ab	r5, [r0, 4] +	bic	r4, r4, r2 +	and	r4, r4, r11 +	brne.d.nt	r4, 0, .LfoundNULL +	;; Check if the read locations are the same +	cmp	r2, r3 +	beq.d	.LwordLoop +	mov.eq	r2, r5 + +	;; A match is found, spot it out +#ifdef __LITTLE_ENDIAN__ +	swape	r3, r3 +	mov_s	r0, 1 +	swape	r2, r2 +#else +	mov_s	r0, 1 +#endif +	cmp_s	r2, r3 +	j_s.d	[blink] +	bset.lo	r0, r0, 31 + +	.align 4 +.LfoundNULL: +#ifdef __BIG_ENDIAN__ +	swape	r4, r4 +	swape	r2, r2 +	swape	r3, r3 +#endif +	;; Find null byte +	ffs	r0, r4 +	bmsk	r2, r2, r0 +	bmsk	r3, r3, r0 +	swape	r2, r2 +	swape	r3, r3 +	;; make the return value +	sub.f	r0, r2, r3 +	mov.hi	r0, 1 +	j_s.d	[blink] +	bset.lo	r0, r0, 31 + +	.align 4 +.Lcharloop: +	ldb.ab	r2, [r0, 1] +	ldb.ab	r3, [r1, 1] +	nop +	breq	r2, 0, .Lcmpend +	breq	r2, r3, .Lcharloop + +	.align 4 +.Lcmpend: +	j_s.d	[blink] +	sub	r0, r2, r3 +END(strcmp) +libc_hidden_def(strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias(strcmp,strcoll) +libc_hidden_def(strcoll) +#endif | 
