diff options
author | Austin Foxley <austinf@cetoncorp.com> | 2009-11-22 12:17:38 -0800 |
---|---|---|
committer | Austin Foxley <austinf@cetoncorp.com> | 2009-11-22 12:17:38 -0800 |
commit | 5c9ef58ec4bcb2def9e30f0b156f9cfcb1d0d163 (patch) | |
tree | f8f889678b653d5275c285a037b9b43f27a91192 /libc/string/sh/strlen.S | |
parent | f757db2d319ccc5f7034165046fb2bb58901afb1 (diff) |
sh: Add new optimisation to the SH4 memcpy
This optimization is based on prefetching and 64bit data transfer via FPU
(only for the little endianess)
Tests shows that:
----------------------------------------
Memory bandwidth | Gain
| sh4-300 | sh4-200
----------------------------------------
512 bytes to 16KiB | ~20% | ~25%
from 32KiB to 16MiB | ~190% | ~5%
----------------------------------------
Signed-off-by: Austin Foxley <austinf@cetoncorp.com>
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
Diffstat (limited to 'libc/string/sh/strlen.S')
-rw-r--r-- | libc/string/sh/strlen.S | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/libc/string/sh/strlen.S b/libc/string/sh/strlen.S new file mode 100644 index 000000000..1ccecc17b --- /dev/null +++ b/libc/string/sh/strlen.S @@ -0,0 +1,75 @@ +/* $Id: strlen.S,v 1.2 2001/06/29 14:07:15 gniibe Exp $ + * + * "strlen" implementation of SuperH + * + * Copyright (C) 1999 Kaz Kojima + * + * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. + */ + +/* size_t strlen (const char *s) */ + +#include <sysdep.h> +#include <endian.h> + +ENTRY(strlen) + mov r4,r0 + and #3,r0 + tst r0,r0 + bt/s 1f + mov #0,r2 + + add #-1,r0 + shll2 r0 + shll r0 + braf r0 + nop + + mov.b @r4+,r1 + tst r1,r1 + bt 8f + add #1,r2 + + mov.b @r4+,r1 + tst r1,r1 + bt 8f + add #1,r2 + + mov.b @r4+,r1 + tst r1,r1 + bt 8f + add #1,r2 + +1: + mov #0,r3 +2: + mov.l @r4+,r1 + cmp/str r3,r1 + bf/s 2b + add #4,r2 + + add #-4,r2 +#ifndef __LITTLE_ENDIAN__ + swap.b r1,r1 + swap.w r1,r1 + swap.b r1,r1 +#endif + extu.b r1,r0 + tst r0,r0 + bt/s 8f + shlr8 r1 + add #1,r2 + extu.b r1,r0 + tst r0,r0 + bt/s 8f + shlr8 r1 + add #1,r2 + extu.b r1,r0 + tst r0,r0 + bt 8f + add #1,r2 +8: + rts + mov r2,r0 +END(strlen) +libc_hidden_def (strlen) |