diff options
Diffstat (limited to 'libc/string')
-rw-r--r-- | libc/string/mips/memcpy.S | 119 | ||||
-rw-r--r-- | libc/string/mips/memset.S | 71 |
2 files changed, 186 insertions, 4 deletions
diff --git a/libc/string/mips/memcpy.S b/libc/string/mips/memcpy.S index 155bc1128..9b05ee6da 100644 --- a/libc/string/mips/memcpy.S +++ b/libc/string/mips/memcpy.S @@ -22,11 +22,124 @@ #include <endian.h> #include "sysdep.h" +/* void *memcpy(void *s1, const void *s2, size_t n); */ + #ifdef __mips64 -#error mips32 code being compiled for mips64! + +#include <sys/asm.h> + +#if __BYTE_ORDER == __BIG_ENDIAN +# define LDHI ldl /* high part is left in big-endian */ +# define SDHI sdl /* high part is left in big-endian */ +# define LDLO ldr /* low part is right in big-endian */ +# define SDLO sdr /* low part is right in big-endian */ +#else +# define LDHI ldr /* high part is right in little-endian */ +# define SDHI sdr /* high part is right in little-endian */ +# define LDLO ldl /* low part is left in little-endian */ +# define SDLO sdl /* low part is left in little-endian */ #endif -/* void *memcpy(void *s1, const void *s2, size_t n); */ +ENTRY (memcpy) + .set noreorder + + slti t0, a2, 16 # Less than 16? + bne t0, zero, L(last16) + move v0, a0 # Setup exit value before too late + + xor t0, a1, a0 # Find a0/a1 displacement + andi t0, 0x7 + bne t0, zero, L(shift) # Go handle the unaligned case + PTR_SUBU t1, zero, a1 + andi t1, 0x7 # a0/a1 are aligned, but are we + beq t1, zero, L(chk8w) # starting in the middle of a word? + PTR_SUBU a2, t1 + LDHI t0, 0(a1) # Yes we are... take care of that + PTR_ADDU a1, t1 + SDHI t0, 0(a0) + PTR_ADDU a0, t1 + +L(chk8w): + andi t0, a2, 0x3f # 64 or more bytes left? + beq t0, a2, L(chk1w) + PTR_SUBU a3, a2, t0 # Yes + PTR_ADDU a3, a1 # a3 = end address of loop + move a2, t0 # a2 = what will be left after loop +L(lop8w): + ld t0, 0(a1) # Loop taking 8 words at a time + ld t1, 8(a1) + ld t2, 16(a1) + ld t3, 24(a1) + ld ta0, 32(a1) + ld ta1, 40(a1) + ld ta2, 48(a1) + ld ta3, 56(a1) + PTR_ADDIU a0, 64 + PTR_ADDIU a1, 64 + sd t0, -64(a0) + sd t1, -56(a0) + sd t2, -48(a0) + sd t3, -40(a0) + sd ta0, -32(a0) + sd ta1, -24(a0) + sd ta2, -16(a0) + bne a1, a3, L(lop8w) + sd ta3, -8(a0) + +L(chk1w): + andi t0, a2, 0x7 # 8 or more bytes left? + beq t0, a2, L(last16) + PTR_SUBU a3, a2, t0 # Yes, handle them one dword at a time + PTR_ADDU a3, a1 # a3 again end address + move a2, t0 +L(lop1w): + ld t0, 0(a1) + PTR_ADDIU a0, 8 + PTR_ADDIU a1, 8 + bne a1, a3, L(lop1w) + sd t0, -8(a0) + +L(last16): + blez a2, L(lst16e) # Handle last 16 bytes, one at a time + PTR_ADDU a3, a2, a1 +L(lst16l): + lb t0, 0(a1) + PTR_ADDIU a0, 1 + PTR_ADDIU a1, 1 + bne a1, a3, L(lst16l) + sb t0, -1(a0) +L(lst16e): + jr ra # Bye, bye + nop + +L(shift): + PTR_SUBU a3, zero, a0 # Src and Dest unaligned + andi a3, 0x7 # (unoptimized case...) + beq a3, zero, L(shft1) + PTR_SUBU a2, a3 # a2 = bytes left + LDHI t0, 0(a1) # Take care of first odd part + LDLO t0, 7(a1) + PTR_ADDU a1, a3 + SDHI t0, 0(a0) + PTR_ADDU a0, a3 +L(shft1): + andi t0, a2, 0x7 + PTR_SUBU a3, a2, t0 + PTR_ADDU a3, a1 +L(shfth): + LDHI t1, 0(a1) # Limp through, dword by dword + LDLO t1, 7(a1) + PTR_ADDIU a0, 8 + PTR_ADDIU a1, 8 + bne a1, a3, L(shfth) + sd t1, -8(a0) + b L(last16) # Handle anything which may be left + move a2, t0 + + .set reorder +END (memcpy) + +#else /* !__mips64 */ #if __BYTE_ORDER == __BIG_ENDIAN # define LWHI lwl /* high part is left in big-endian */ @@ -139,4 +252,6 @@ L(shfth): .set reorder END (memcpy) +#endif /* !__mips64 */ + libc_hidden_def(memcpy) diff --git a/libc/string/mips/memset.S b/libc/string/mips/memset.S index 9169ad58a..ff0554ff9 100644 --- a/libc/string/mips/memset.S +++ b/libc/string/mips/memset.S @@ -22,11 +22,76 @@ #include <endian.h> #include "sysdep.h" +/* void *memset(void *s, int c, size_t n). */ + #ifdef __mips64 -#error mips32 code being compiled for mips64! + +#include <sys/asm.h> + +#if __BYTE_ORDER == __BIG_ENDIAN +# define SDHI sdl /* high part is left in big-endian */ +#else +# define SDHI sdr /* high part is right in little-endian */ #endif -/* void *memset(void *s, int c, size_t n). */ +ENTRY (memset) + .set noreorder + + slti ta1, a2, 16 # Less than 16? + bne ta1, zero, L(last16) + move v0, a0 # Setup exit value before too late + + beq a1, zero, L(ueven) # If zero pattern, no need to extend + andi a1, 0xff # Avoid problems with bogus arguments + dsll ta0, a1, 8 + or a1, ta0 + dsll ta0, a1, 16 + or a1, ta0 # a1 is now pattern in full word + dsll ta0, a1, 32 + or a1, ta0 # a1 is now pattern in double word + +L(ueven): + PTR_SUBU ta0, zero, a0 # Unaligned address? + andi ta0, 0x7 + beq ta0, zero, L(chkw) + PTR_SUBU a2, ta0 + SDHI a1, 0(a0) # Yes, handle first unaligned part + PTR_ADDU a0, ta0 # Now both a0 and a2 are updated + +L(chkw): + andi ta0, a2, 0xf # Enough left for one loop iteration? + beq ta0, a2, L(chkl) + PTR_SUBU a3, a2, ta0 + PTR_ADDU a3, a0 # a3 is last loop address +1 + move a2, ta0 # a2 is now # of bytes left after loop +L(loopw): + PTR_ADDIU a0, 16 # Handle 2 dwords pr. iteration + sd a1, -16(a0) + bne a0, a3, L(loopw) + sd a1, -8(a0) + +L(chkl): + andi ta0, a2, 0x8 # Check if there is at least a double + beq ta0, zero, L(last16) # word remaining after the loop + PTR_SUBU a2, ta0 + sd a1, 0(a0) # Yes... + PTR_ADDIU a0, 8 + +L(last16): + blez a2, L(exit) # Handle last 16 bytes (if cnt>0) + PTR_ADDU a3, a2, a0 # a3 is last address +1 +L(lst16l): + PTR_ADDIU a0, 1 + bne a0, a3, L(lst16l) + sb a1, -1(a0) +L(exit): + j ra # Bye, bye + nop + + .set reorder +END (memset) + +#else /* !__mips64 */ #if __BYTE_ORDER == __BIG_ENDIAN # define SWHI swl /* high part is left in big-endian */ @@ -89,4 +154,6 @@ L(exit): .set reorder END (memset) +#endif /* !__mips64 */ + libc_hidden_def(memset) |