diff options
author | Vineet Gupta <vgupta@synopsys.com> | 2015-02-19 19:14:00 +0530 |
---|---|---|
committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2015-02-20 11:32:02 +0100 |
commit | df273fedf4fef25983ef6ec7e42b03d54f44960d (patch) | |
tree | d2194f5200e142d9e3a75c0b5106b776a0ee2a20 /libc/string/arc/arcv2/memset.S | |
parent | afab56958f1cbb47b831ee3ebff231dfbae74af2 (diff) |
ARCv2: optimised string routines
Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Diffstat (limited to 'libc/string/arc/arcv2/memset.S')
-rw-r--r-- | libc/string/arc/arcv2/memset.S | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/libc/string/arc/arcv2/memset.S b/libc/string/arc/arcv2/memset.S new file mode 100644 index 000000000..d076ad1cd --- /dev/null +++ b/libc/string/arc/arcv2/memset.S @@ -0,0 +1,85 @@ + +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + +#include <features.h> +#include <sysdep.h> + +#ifdef DONT_USE_PREALLOC +#define PREWRITE(A,B) prefetchw [(A),(B)] +#else +#define PREWRITE(A,B) prealloc [(A),(B)] +#endif + +ENTRY(memset) + prefetchw [r0] ; Prefetch the write location + mov.f 0, r2 +;;; if size is zero + jz.d [blink] + mov r3, r0 ; don't clobber ret val + +;;; if length < 8 + brls.d.nt r2, 8, .Lsmallchunk + mov.f lp_count,r2 + + and.f r4, r0, 0x03 + rsub lp_count, r4, 4 + lpnz @.Laligndestination + ;; LOOP BEGIN + stb.ab r1, [r3,1] + sub r2, r2, 1 +.Laligndestination: + +;;; Destination is aligned + and r1, r1, 0xFF + asl r4, r1, 8 + or r4, r4, r1 + asl r5, r4, 16 + or r5, r5, r4 + mov r4, r5 + + sub3 lp_count, r2, 8 + cmp r2, 64 + bmsk.hi r2, r2, 5 + mov.ls lp_count, 0 + add3.hi r2, r2, 8 + +;;; Convert len to Dwords, unfold x8 + lsr.f lp_count, lp_count, 6 + lpnz @.Lset64bytes + ;; LOOP START + PREWRITE(r3, 64) ;Prefetch the next write location + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +.Lset64bytes: + + lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes + lpnz .Lset32bytes + ;; LOOP START + prefetchw [r3, 32] ;Prefetch the next write location + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] + std.ab r4, [r3, 8] +.Lset32bytes: + + and.f lp_count, r2, 0x1F ;Last remaining 31 bytes +.Lsmallchunk: + lpnz .Lcopy3bytes + ;; LOOP START + stb.ab r1, [r3, 1] +.Lcopy3bytes: + + j [blink] + +END(memset) +libc_hidden_def(memset) |