diff options
Diffstat (limited to 'libc/string/arc/memset.S')
| -rw-r--r-- | libc/string/arc/memset.S | 115 | 
1 files changed, 113 insertions, 2 deletions
| diff --git a/libc/string/arc/memset.S b/libc/string/arc/memset.S index f4048455a..0b74ddc7f 100644 --- a/libc/string/arc/memset.S +++ b/libc/string/arc/memset.S @@ -1,5 +1,5 @@  /* - * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)   * Copyright (C) 2007 ARC International (UK) LTD   *   * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. @@ -7,10 +7,15 @@  #include <sysdep.h> -#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */ +#if !defined(__ARC700__) && !defined(__ARCHS__) +#error "Neither ARC700 nor ARCHS is defined!" +#endif  ENTRY(memset) +#ifdef __ARC700__ +#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */ +  	mov_s	r4,r0  	or	r12,r0,r2  	bmsk.f	r12,r12,1 @@ -47,5 +52,111 @@ ENTRY(memset)  	stb.ab	r1,[r4,1]  .Ltiny_end:  	j_s	[blink] +#endif /* __ARC700__ */ + +#ifdef __ARCHS__ +#ifdef DONT_USE_PREALLOC +#define PREWRITE(A,B)	prefetchw [(A),(B)] +#else +#define PREWRITE(A,B)	prealloc [(A),(B)] +#endif + +	prefetchw [r0]		; Prefetch the write location +	mov.f	0, r2 +;;; if size is zero +	jz.d	[blink] +	mov	r3, r0		; don't clobber ret val + +;;; if length < 8 +	brls.d.nt	r2, 8, .Lsmallchunk +	mov.f	lp_count,r2 + +	and.f	r4, r0, 0x03 +	rsub	lp_count, r4, 4 +	lpnz	@.Laligndestination +	;; LOOP BEGIN +	stb.ab	r1, [r3,1] +	sub	r2, r2, 1 +.Laligndestination: + +;;; Destination is aligned +	and	r1, r1, 0xFF +	asl	r4, r1, 8 +	or	r4, r4, r1 +	asl	r5, r4, 16 +	or	r5, r5, r4 +	mov	r4, r5 + +	sub3	lp_count, r2, 8 +	cmp     r2, 64 +	bmsk.hi	r2, r2, 5 +	mov.ls	lp_count, 0 +	add3.hi	r2, r2, 8 + +;;; Convert len to Dwords, unfold x8 +	lsr.f	lp_count, lp_count, 6 +	lpnz	@.Lset64bytes +	;; LOOP START +	PREWRITE(r3, 64)	;Prefetch the next write location +#if defined(__LL64__) || defined(__ARC_LL64__) +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +#else +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +#endif +.Lset64bytes: + +	lsr.f	lp_count, r2, 5 ;Last remaining  max 124 bytes +	lpnz	.Lset32bytes +	;; LOOP START +	prefetchw [r3, 32]	;Prefetch the next write location +#if defined(__LL64__) || defined(__ARC_LL64__) +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +	std.ab	r4, [r3, 8] +#else +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +	st.ab	r4, [r3, 4] +#endif +.Lset32bytes: + +	and.f	lp_count, r2, 0x1F ;Last remaining 31 bytes +.Lsmallchunk: +	lpnz	.Lcopy3bytes +	;; LOOP START +	stb.ab	r1, [r3, 1] +.Lcopy3bytes: + +	j	[blink] +#endif /* __ARCHS__ */ +  END(memset)  libc_hidden_def(memset) | 
