summaryrefslogtreecommitdiff
path: root/libc/string/csky/cskyv1
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string/csky/cskyv1')
-rw-r--r--libc/string/csky/cskyv1/memcpy.S211
-rw-r--r--libc/string/csky/cskyv1/strcmp.S185
-rw-r--r--libc/string/csky/cskyv1/strcpy.S139
3 files changed, 535 insertions, 0 deletions
diff --git a/libc/string/csky/cskyv1/memcpy.S b/libc/string/csky/cskyv1/memcpy.S
new file mode 100644
index 000000000..dfa7f64a4
--- /dev/null
+++ b/libc/string/csky/cskyv1/memcpy.S
@@ -0,0 +1,211 @@
+.macro GET_FRONT_BITS rx ry
+#ifdef __cskyLE__
+ lsr \rx, \ry
+#else
+ lsl \rx, \ry
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx ry
+#ifdef __cskyLE__
+ lsl \rx, \ry
+#else
+ lsr \rx, \ry
+#endif
+.endm
+
+
+#ifdef WANT_WIDE
+# define Wmemcpy wmemcpy
+#else
+# define Wmemcpy memcpy
+#endif
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+
+ .text
+ .align 2
+ .global Wmemcpy
+ .type Wmemcpy, @function
+Wmemcpy:
+ mov r7, r2
+ cmplti r4, 4 /* If len less than 4 bytes */
+ jbt .L_copy_by_byte
+
+ mov r6, r2
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_not_aligned /* If dest is not 4 bytes aligned */
+.L0:
+ mov r6, r3
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */
+
+ cmplti r4, 16 /* dest and src are all aligned */
+ jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */
+
+ subi sp, 8
+ stw r8, (sp, 0)
+ stw r9, (sp, 4)
+.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */
+ ldw r1, (r3, 0)
+ ldw r5, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+ stw r1, (r7, 0)
+ stw r5, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_aligned_and_len_larger_16bytes
+ ldw r8, (sp, 0)
+ ldw r9, (sp, 4)
+ addi sp, 8
+
+.L_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ ldw r1, (r3, 0)
+ stw r1, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ jbr .L_aligned_and_len_less_16bytes
+
+.L_copy_by_byte: /* len less than 4 bytes */
+ cmpnei r4, 0
+ jbf .L_return
+ ldb r1, (r3, 0)
+ stb r1, (r7, 0)
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ jbr .L_copy_by_byte
+
+.L_return:
+ rts
+
+/* If dest is not aligned, we copy some bytes to make dest align.
+ Then we should judge whether src is aligned. */
+
+.L_dest_not_aligned:
+ mov r5, r3 /* consider overlapped case */
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ jbt .L_copy_by_byte
+
+.L1:
+ ldb r1, (r3, 0) /* makes the dest align. */
+ stb r1, (r7, 0)
+ addi r6, 1
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ cmpnei r6, 4
+ jbt .L1
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ jbf .L0 /* judge whether the src is aligned. */
+
+.L_dest_aligned_but_src_not_aligned:
+ mov r5, r3 /* consider overlapped case*/
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ jbt .L_copy_by_byte
+
+ bclri r3, 0
+ bclri r3, 1
+ ldw r1, (r3, 0)
+ addi r3, 4
+
+ subi sp, 16
+ stw r11, (sp,0)
+ stw r12, (sp,4)
+ stw r13, (sp,8)
+ movi r5, 8
+ mult r5, r6 /* r6 is used to store tne misaligned bits */
+ mov r12, r5
+ rsubi r5, 31
+ addi r5, 1
+ mov r13, r5
+
+ cmplti r4, 16
+ jbt .L_not_aligned_and_len_less_16bytes
+
+ stw r8, (sp, 12)
+ subi sp, 8
+ stw r9, (sp, 0)
+ stw r10, (sp, 4)
+.L_not_aligned_and_len_larger_16bytes:
+ ldw r5, (r3, 0)
+ ldw r11, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+
+ GET_FRONT_BITS r1 r12 /* little or big endian? */
+ mov r10, r5
+ GET_AFTER_BITS r5 r13
+ or r5, r1
+
+ GET_FRONT_BITS r10 r12
+ mov r1, r11
+ GET_AFTER_BITS r11 r13
+ or r11, r10
+
+ GET_FRONT_BITS r1 r12
+ mov r10, r8
+ GET_AFTER_BITS r8 r13
+ or r8, r1
+
+ GET_FRONT_BITS r10 r12
+ mov r1, r9
+ GET_AFTER_BITS r9 r13
+ or r9, r10
+
+ stw r5, (r7, 0)
+ stw r11, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_not_aligned_and_len_larger_16bytes
+ ldw r9, (sp, 0)
+ ldw r10, (sp, 4)
+ addi sp, 8
+ ldw r8, (sp,12)
+
+.L_not_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ jbf .L2
+ rsubi r6, 4 /* r6 is used to stored the misaligned bits */
+ subu r3, r6 /* initial the position */
+ ldw r11, (sp, 0)
+ ldw r12, (sp, 4)
+ ldw r13, (sp, 8)
+ addi sp, 16
+ jbr .L_copy_by_byte
+.L2:
+ ldw r5, (r3, 0)
+ GET_FRONT_BITS r1 r12
+ mov r11, r1
+ mov r1, r5
+ GET_AFTER_BITS r5 r13
+ or r5, r11
+ stw r5, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ jbr .L_not_aligned_and_len_less_16bytes
+
+.size Wmemcpy, .-Wmemcpy
+
+libc_hidden_def(Wmemcpy)
+.weak Wmemcpy
diff --git a/libc/string/csky/cskyv1/strcmp.S b/libc/string/csky/cskyv1/strcmp.S
new file mode 100644
index 000000000..e22f29ebd
--- /dev/null
+++ b/libc/string/csky/cskyv1/strcmp.S
@@ -0,0 +1,185 @@
+#include <features.h>
+#include <endian.h>
+
+#ifdef WANT_WIDE
+# define Wstrcmp wcscmp
+# define Wstrcoll wcscoll
+#else
+# define Wstrcmp strcmp
+# define Wstrcoll strcoll
+#endif
+
+/* FIXME attention!!! it may be a bug when WANT_WIDE define */
+/*libc_hidden_proto(Wstrcmp)*/
+ .align 2
+ .global Wstrcmp
+ .type Wstrcmp, @function
+Wstrcmp:
+ mov r6, r2
+
+ or r2, r3
+ andi r2, 0x3
+ cmpnei r2, 0x0 /* d or s is aligned ?*/
+ bt 4f /* if not aligned, goto 4f*/
+ 1: /* if aligned, load word each time.*/
+ ldw r2, (r6, 0)
+ ldw r7, (r3, 0)
+ cmpne r2, r7
+ bt 1f /* if d[i] != s[i], goto 1f */
+ tstnbz r2 /* if d[i] == s[i], check if d or s is at the end. */
+ bf 3f /* if at the end, goto 3f (finish comparing) */
+
+ ldw r2, (r6, 4)
+ ldw r7, (r3, 4)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 8)
+ ldw r7, (r3, 8)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 12)
+ ldw r7, (r3, 12)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 16)
+ ldw r7, (r3, 16)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 20)
+ ldw r7, (r3, 20)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 24)
+ ldw r7, (r3, 24)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 28)
+ ldw r7, (r3, 28)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ addi r6, 32
+ addi r3, 32
+ br 1b
+
+#ifdef __CSKYBE__
+ /* d[i] != s[i] in word, so we check byte 0 ? */
+ 1:
+ xtrb0 r1, r2
+ mov r4, r1
+ xtrb0 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb1 r1, r2
+ mov r4, r1
+ xtrb1 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb2 r1, r2
+ mov r4, r1
+ xtrb2 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb3 r1, r2
+ mov r4, r1
+ xtrb3 r1, r7
+
+#else /* little endian */
+ /* d[i] != s[i] in word, so we check byte 0 ? */
+1:
+ xtrb3 r1, r2
+ mov r4, r1
+ xtrb3 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb2 r1, r2
+ mov r4, r1
+ xtrb2 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb1 r1, r2
+ mov r4, r1
+ xtrb1 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb0 r1, r2
+ mov r4, r1
+ xtrb0 r1, r7
+
+#endif
+ /* get the result when d[i] != s[i] */
+2:
+ subu r4, r1
+ mov r2, r4
+ jmp r15
+
+ /* return when d[i] == s[i] */
+3:
+ subu r2, r7
+ jmp r15
+
+ /* cmp when d or s is not aligned */
+4:
+ ldb r2, (r6,0)
+ ldb r7, (r3, 0)
+ cmpne r2, r7
+ bt 3b
+ addi r3, 1
+ addi r6, 1
+ cmpnei r2, 0
+ bt 4b
+ jmp r15
+
+ .size Wstrcmp, .-Wstrcmp
+
+libc_hidden_def(Wstrcmp)
+.weak Wstrcmp
+#ifndef __UCLIBC_HAS_LOCALE__
+/* libc_hidden_proto(Wstrcoll) */
+strong_alias(Wstrcmp,Wstrcoll)
+libc_hidden_def(Wstrcoll)
+#endif
diff --git a/libc/string/csky/cskyv1/strcpy.S b/libc/string/csky/cskyv1/strcpy.S
new file mode 100644
index 000000000..c2f1e7a0f
--- /dev/null
+++ b/libc/string/csky/cskyv1/strcpy.S
@@ -0,0 +1,139 @@
+#include <features.h>
+#include <endian.h>
+
+#ifdef WANT_WIDE
+# define Wstrcpy wcscpy
+#else
+# define Wstrcpy strcpy
+#endif
+
+ .align 2
+ .global Wstrcpy
+ .type Wstrcpy, @function
+Wstrcpy:
+
+ mov r6, r2
+ mov r7, r3
+ or r7, r6
+ andi r7, 3
+ cmpnei r7, 0
+ bf 2f
+ 1:
+ ldb r5, (r3)
+ stb r5, (r6)
+ addi r3, 1
+ addi r6, 1
+ cmpnei r5, 0
+ bt 1b
+ 1:
+ jmp r15
+
+ 2:
+ ldw r5, (r3)
+ tstnbz r5
+ bf 10f
+ stw r5, (r6)
+
+ ldw r5, (r3, 4)
+ tstnbz r5
+ bf 3f
+ stw r5, (r6, 4)
+
+ ldw r5, (r3, 8)
+ tstnbz r5
+ bf 4f
+ stw r5, (r6, 8)
+
+ ldw r5, (r3, 12)
+ tstnbz r5
+ bf 5f
+ stw r5, (r6, 12)
+
+ ldw r5, (r3, 16)
+ tstnbz r5
+ bf 6f
+ stw r5, (r6, 16)
+
+ ldw r5, (r3, 20)
+ tstnbz r5
+ bf 7f
+ stw r5, (r6, 20)
+
+ ldw r5, (r3, 24)
+ tstnbz r5
+ bf 8f
+ stw r5, (r6, 24)
+
+ ldw r5, (r3, 28)
+ tstnbz r5
+ bf 9f
+ stw r5, (r6, 28)
+
+ addi r6, 32
+ addi r3, 32
+ br 2b
+
+ 3:
+ addi r6, 4
+ br 10f
+
+ 4:
+ addi r6, 8
+ br 10f
+
+ 5:
+ addi r6, 12
+ br 10f
+
+ 6:
+ addi r6, 16
+ br 10f
+
+ 7:
+ addi r6, 20
+ br 10f
+
+ 8:
+ addi r6, 24
+ br 10f
+
+ 9:
+ addi r6, 28
+
+ 10:
+#ifdef __CSKYBE__
+ xtrb0 r1, r5
+ stb r1, (r6)
+ cmpnei r1, 0
+ bf 5f
+ xtrb1 r1, r5
+ stb r1, (r6, 1)
+ cmpnei r1, 0
+ bf 5f
+ xtrb2 r1, r5
+ stb r1, (r6, 2 )
+ cmpnei r1, 0
+ bf 5f
+ stw r5, (r6)
+
+#else
+ xtrb3 r1, r5
+ stb r1, (r6)
+ cmpnei r1, 0
+ bf 5f
+ xtrb2 r1, r5
+ stb r1, (r6, 1)
+ cmpnei r1, 0
+ bf 5f
+ xtrb1 r1, r5
+ stb r1, (r6, 2)
+ cmpnei r1, 0
+ bf 5f
+ stw r5, (r6)
+#endif
+ 5:
+ jmp r15
+
+
+libc_hidden_def(Wstrcpy)
+.weak Wstrcpy