diff options
Diffstat (limited to 'libc/string/csky/cskyv2/abiv2_memcpy.S')
-rw-r--r-- | libc/string/csky/cskyv2/abiv2_memcpy.S | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/libc/string/csky/cskyv2/abiv2_memcpy.S b/libc/string/csky/cskyv2/abiv2_memcpy.S new file mode 100644 index 000000000..c112ec01b --- /dev/null +++ b/libc/string/csky/cskyv2/abiv2_memcpy.S @@ -0,0 +1,184 @@ +.macro GET_FRONT_BITS rx ry +#ifdef __cskyLE__ + lsr \rx, \ry +#else + lsl \rx, \ry +#endif +.endm + +.macro GET_AFTER_BITS rx ry +#ifdef __cskyLE__ + lsl \rx, \ry +#else + lsr \rx, \ry +#endif +.endm + + +#ifdef WANT_WIDE +# define Wmemcpy wmemcpy +#else +# define Wmemcpy memcpy +#endif + +/* void *memcpy(void *dest, const void *src, size_t n); */ + + .text + .align 2 + .global Wmemcpy + .type Wmemcpy, @function +Wmemcpy: + mov r3, r0 + cmplti r2, 4 /* If len less than 4 bytes */ + jbt .L_copy_by_byte + + mov r12, r0 + andi r12, 3 + bnez r12, .L_dest_not_aligned /* If dest is not 4 bytes aligned */ +.L0: + mov r12, r1 + andi r12, 3 + bnez r12, .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */ + + cmplti r2, 16 /* dest and src are all aligned */ + jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */ + +.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */ + ldw r18, (r1, 0) + ldw r19, (r1, 4) + ldw r20, (r1, 8) + ldw r21, (r1, 12) + stw r18, (r3, 0) + stw r19, (r3, 4) + stw r20, (r3, 8) + stw r21, (r3, 12) + subi r2, 16 + addi r1, 16 + addi r3, 16 + cmplti r2, 16 + jbf .L_aligned_and_len_larger_16bytes + +.L_aligned_and_len_less_16bytes: + cmplti r2, 4 + jbt .L_copy_by_byte + ldw r18, (r1, 0) + stw r18, (r3, 0) + subi r2, 4 + addi r1, 4 + addi r3, 4 + jbr .L_aligned_and_len_less_16bytes + +.L_copy_by_byte: /* len less than 4 bytes */ + cmpnei r2, 0 + jbf .L_return + ldb r18, (r1, 0) + stb r18, (r3, 0) + subi r2, 1 + addi r1, 1 + addi r3, 1 + jbr .L_copy_by_byte + +.L_return: + rts + +/* If dest is not aligned, just copying some bytes makes the dest align. + After that, we judge whether the src is aligned. */ + +.L_dest_not_aligned: + rsub r13, r1, r3 /* consider overlapped case */ + abs r13, r13 + cmplt r13, r2 + jbt .L_copy_by_byte + +.L1: + ldb r18, (r1, 0) /* makes the dest align. */ + stb r18, (r3, 0) + addi r12, 1 + subi r2, 1 + addi r1, 1 + addi r3, 1 + cmpnei r12, 4 + jbt .L1 + cmplti r2, 4 + jbt .L_copy_by_byte + jbf .L0 /* judge whether the src is aligned. */ + +.L_dest_aligned_but_src_not_aligned: + rsub r13, r1, r3 /* consider overlapped case */ + abs r13, r13 + cmplt r13, r2 + jbt .L_copy_by_byte + + bclri r1, 0 + bclri r1, 1 + ldw r18, (r1, 0) + addi r1, 4 + + movi r13, 8 + mult r13, r12 + mov r24, r13 /* r12 is used to store the misaligned bits */ + rsubi r13, 32 + mov r25, r13 + + cmplti r2, 16 + jbt .L_not_aligned_and_len_less_16bytes + +.L_not_aligned_and_len_larger_16bytes: + ldw r20, (r1, 0) + ldw r21, (r1, 4) + ldw r22, (r1, 8) + ldw r23, (r1, 12) + + GET_FRONT_BITS r18 r24 /* little or big endian? */ + mov r19, r20 + GET_AFTER_BITS r20 r25 + or r20, r18 + + GET_FRONT_BITS r19 r24 + mov r18, r21 + GET_AFTER_BITS r21 r13 + or r21, r19 + + GET_FRONT_BITS r18 r24 + mov r19, r22 + GET_AFTER_BITS r22 r25 + or r22, r18 + + GET_FRONT_BITS r19 r24 + mov r18, r23 + GET_AFTER_BITS r23 r25 + or r23, r19 + + stw r20, (r3, 0) + stw r21, (r3, 4) + stw r22, (r3, 8) + stw r23, (r3, 12) + subi r2, 16 + addi r1, 16 + addi r3, 16 + cmplti r2, 16 + jbf .L_not_aligned_and_len_larger_16bytes + +.L_not_aligned_and_len_less_16bytes: + cmplti r2, 4 + jbf .L2 + rsubi r12, 4 /* r12 is used to stored the misaligned bits */ + subu r1, r12 /* initial the position */ + jbr .L_copy_by_byte +.L2: + ldw r21, (r1, 0) + GET_FRONT_BITS r18 r24 + mov r19, r18 + mov r18, r21 + GET_AFTER_BITS r21 r25 + or r21, r19 + stw r21, (r3, 0) + subi r2, 4 + addi r1, 4 + addi r3, 4 + jbr .L_not_aligned_and_len_less_16bytes + +.size Wmemcpy, .-Wmemcpy + +libc_hidden_def(Wmemcpy) +.weak Wmemcpy |