summaryrefslogtreecommitdiff
path: root/libc/string/csky/cskyv1/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string/csky/cskyv1/memcpy.S')
-rw-r--r--libc/string/csky/cskyv1/memcpy.S211
1 files changed, 211 insertions, 0 deletions
diff --git a/libc/string/csky/cskyv1/memcpy.S b/libc/string/csky/cskyv1/memcpy.S
new file mode 100644
index 000000000..dfa7f64a4
--- /dev/null
+++ b/libc/string/csky/cskyv1/memcpy.S
@@ -0,0 +1,211 @@
+.macro GET_FRONT_BITS rx ry
+#ifdef __cskyLE__
+ lsr \rx, \ry
+#else
+ lsl \rx, \ry
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx ry
+#ifdef __cskyLE__
+ lsl \rx, \ry
+#else
+ lsr \rx, \ry
+#endif
+.endm
+
+
+#ifdef WANT_WIDE
+# define Wmemcpy wmemcpy
+#else
+# define Wmemcpy memcpy
+#endif
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+
+ .text
+ .align 2
+ .global Wmemcpy
+ .type Wmemcpy, @function
+Wmemcpy:
+ mov r7, r2
+ cmplti r4, 4 /* If len less than 4 bytes */
+ jbt .L_copy_by_byte
+
+ mov r6, r2
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_not_aligned /* If dest is not 4 bytes aligned */
+.L0:
+ mov r6, r3
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */
+
+ cmplti r4, 16 /* dest and src are all aligned */
+ jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */
+
+ subi sp, 8
+ stw r8, (sp, 0)
+ stw r9, (sp, 4)
+.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */
+ ldw r1, (r3, 0)
+ ldw r5, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+ stw r1, (r7, 0)
+ stw r5, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_aligned_and_len_larger_16bytes
+ ldw r8, (sp, 0)
+ ldw r9, (sp, 4)
+ addi sp, 8
+
+.L_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ ldw r1, (r3, 0)
+ stw r1, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ jbr .L_aligned_and_len_less_16bytes
+
+.L_copy_by_byte: /* len less than 4 bytes */
+ cmpnei r4, 0
+ jbf .L_return
+ ldb r1, (r3, 0)
+ stb r1, (r7, 0)
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ jbr .L_copy_by_byte
+
+.L_return:
+ rts
+
+/* If dest is not aligned, we copy some bytes to make dest align.
+ Then we should judge whether src is aligned. */
+
+.L_dest_not_aligned:
+ mov r5, r3 /* consider overlapped case */
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ jbt .L_copy_by_byte
+
+.L1:
+ ldb r1, (r3, 0) /* makes the dest align. */
+ stb r1, (r7, 0)
+ addi r6, 1
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ cmpnei r6, 4
+ jbt .L1
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ jbf .L0 /* judge whether the src is aligned. */
+
+.L_dest_aligned_but_src_not_aligned:
+ mov r5, r3 /* consider overlapped case*/
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ jbt .L_copy_by_byte
+
+ bclri r3, 0
+ bclri r3, 1
+ ldw r1, (r3, 0)
+ addi r3, 4
+
+ subi sp, 16
+ stw r11, (sp,0)
+ stw r12, (sp,4)
+ stw r13, (sp,8)
+ movi r5, 8
+ mult r5, r6 /* r6 is used to store tne misaligned bits */
+ mov r12, r5
+ rsubi r5, 31
+ addi r5, 1
+ mov r13, r5
+
+ cmplti r4, 16
+ jbt .L_not_aligned_and_len_less_16bytes
+
+ stw r8, (sp, 12)
+ subi sp, 8
+ stw r9, (sp, 0)
+ stw r10, (sp, 4)
+.L_not_aligned_and_len_larger_16bytes:
+ ldw r5, (r3, 0)
+ ldw r11, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+
+ GET_FRONT_BITS r1 r12 /* little or big endian? */
+ mov r10, r5
+ GET_AFTER_BITS r5 r13
+ or r5, r1
+
+ GET_FRONT_BITS r10 r12
+ mov r1, r11
+ GET_AFTER_BITS r11 r13
+ or r11, r10
+
+ GET_FRONT_BITS r1 r12
+ mov r10, r8
+ GET_AFTER_BITS r8 r13
+ or r8, r1
+
+ GET_FRONT_BITS r10 r12
+ mov r1, r9
+ GET_AFTER_BITS r9 r13
+ or r9, r10
+
+ stw r5, (r7, 0)
+ stw r11, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_not_aligned_and_len_larger_16bytes
+ ldw r9, (sp, 0)
+ ldw r10, (sp, 4)
+ addi sp, 8
+ ldw r8, (sp,12)
+
+.L_not_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ jbf .L2
+ rsubi r6, 4 /* r6 is used to stored the misaligned bits */
+ subu r3, r6 /* initial the position */
+ ldw r11, (sp, 0)
+ ldw r12, (sp, 4)
+ ldw r13, (sp, 8)
+ addi sp, 16
+ jbr .L_copy_by_byte
+.L2:
+ ldw r5, (r3, 0)
+ GET_FRONT_BITS r1 r12
+ mov r11, r1
+ mov r1, r5
+ GET_AFTER_BITS r5 r13
+ or r5, r11
+ stw r5, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ jbr .L_not_aligned_and_len_less_16bytes
+
+.size Wmemcpy, .-Wmemcpy
+
+libc_hidden_def(Wmemcpy)
+.weak Wmemcpy