summaryrefslogtreecommitdiff
path: root/libc/string
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string')
-rw-r--r--libc/string/csky/Makefile6
-rw-r--r--libc/string/csky/cskyv1/memcpy.S211
-rw-r--r--libc/string/csky/cskyv1/strcmp.S185
-rw-r--r--libc/string/csky/cskyv1/strcpy.S139
-rw-r--r--libc/string/csky/cskyv2/abiv2_memcpy.S184
-rw-r--r--libc/string/csky/cskyv2/abiv2_strcmp.S168
-rw-r--r--libc/string/csky/cskyv2/abiv2_strcpy.S129
-rw-r--r--libc/string/csky/cskyv2/macro.S13
-rw-r--r--libc/string/csky/memcpy.S7
-rw-r--r--libc/string/csky/strcmp.S7
-rw-r--r--libc/string/csky/strcpy.S7
11 files changed, 1056 insertions, 0 deletions
diff --git a/libc/string/csky/Makefile b/libc/string/csky/Makefile
new file mode 100644
index 000000000..ce5add623
--- /dev/null
+++ b/libc/string/csky/Makefile
@@ -0,0 +1,6 @@
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/csky/cskyv1/memcpy.S b/libc/string/csky/cskyv1/memcpy.S
new file mode 100644
index 000000000..dfa7f64a4
--- /dev/null
+++ b/libc/string/csky/cskyv1/memcpy.S
@@ -0,0 +1,211 @@
+.macro GET_FRONT_BITS rx ry
+#ifdef __cskyLE__
+ lsr \rx, \ry
+#else
+ lsl \rx, \ry
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx ry
+#ifdef __cskyLE__
+ lsl \rx, \ry
+#else
+ lsr \rx, \ry
+#endif
+.endm
+
+
+#ifdef WANT_WIDE
+# define Wmemcpy wmemcpy
+#else
+# define Wmemcpy memcpy
+#endif
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+
+ .text
+ .align 2
+ .global Wmemcpy
+ .type Wmemcpy, @function
+Wmemcpy:
+ mov r7, r2
+ cmplti r4, 4 /* If len less than 4 bytes */
+ jbt .L_copy_by_byte
+
+ mov r6, r2
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_not_aligned /* If dest is not 4 bytes aligned */
+.L0:
+ mov r6, r3
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */
+
+ cmplti r4, 16 /* dest and src are all aligned */
+ jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */
+
+ subi sp, 8
+ stw r8, (sp, 0)
+ stw r9, (sp, 4)
+.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */
+ ldw r1, (r3, 0)
+ ldw r5, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+ stw r1, (r7, 0)
+ stw r5, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_aligned_and_len_larger_16bytes
+ ldw r8, (sp, 0)
+ ldw r9, (sp, 4)
+ addi sp, 8
+
+.L_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ ldw r1, (r3, 0)
+ stw r1, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ jbr .L_aligned_and_len_less_16bytes
+
+.L_copy_by_byte: /* len less than 4 bytes */
+ cmpnei r4, 0
+ jbf .L_return
+ ldb r1, (r3, 0)
+ stb r1, (r7, 0)
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ jbr .L_copy_by_byte
+
+.L_return:
+ rts
+
+/* If dest is not aligned, we copy some bytes to make dest align.
+ Then we should judge whether src is aligned. */
+
+.L_dest_not_aligned:
+ mov r5, r3 /* consider overlapped case */
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ jbt .L_copy_by_byte
+
+.L1:
+ ldb r1, (r3, 0) /* makes the dest align. */
+ stb r1, (r7, 0)
+ addi r6, 1
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ cmpnei r6, 4
+ jbt .L1
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ jbf .L0 /* judge whether the src is aligned. */
+
+.L_dest_aligned_but_src_not_aligned:
+ mov r5, r3 /* consider overlapped case*/
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ jbt .L_copy_by_byte
+
+ bclri r3, 0
+ bclri r3, 1
+ ldw r1, (r3, 0)
+ addi r3, 4
+
+ subi sp, 16
+ stw r11, (sp,0)
+ stw r12, (sp,4)
+ stw r13, (sp,8)
+ movi r5, 8
+ mult r5, r6 /* r6 is used to store tne misaligned bits */
+ mov r12, r5
+ rsubi r5, 31
+ addi r5, 1
+ mov r13, r5
+
+ cmplti r4, 16
+ jbt .L_not_aligned_and_len_less_16bytes
+
+ stw r8, (sp, 12)
+ subi sp, 8
+ stw r9, (sp, 0)
+ stw r10, (sp, 4)
+.L_not_aligned_and_len_larger_16bytes:
+ ldw r5, (r3, 0)
+ ldw r11, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+
+ GET_FRONT_BITS r1 r12 /* little or big endian? */
+ mov r10, r5
+ GET_AFTER_BITS r5 r13
+ or r5, r1
+
+ GET_FRONT_BITS r10 r12
+ mov r1, r11
+ GET_AFTER_BITS r11 r13
+ or r11, r10
+
+ GET_FRONT_BITS r1 r12
+ mov r10, r8
+ GET_AFTER_BITS r8 r13
+ or r8, r1
+
+ GET_FRONT_BITS r10 r12
+ mov r1, r9
+ GET_AFTER_BITS r9 r13
+ or r9, r10
+
+ stw r5, (r7, 0)
+ stw r11, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_not_aligned_and_len_larger_16bytes
+ ldw r9, (sp, 0)
+ ldw r10, (sp, 4)
+ addi sp, 8
+ ldw r8, (sp,12)
+
+.L_not_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ jbf .L2
+ rsubi r6, 4 /* r6 is used to stored the misaligned bits */
+ subu r3, r6 /* initial the position */
+ ldw r11, (sp, 0)
+ ldw r12, (sp, 4)
+ ldw r13, (sp, 8)
+ addi sp, 16
+ jbr .L_copy_by_byte
+.L2:
+ ldw r5, (r3, 0)
+ GET_FRONT_BITS r1 r12
+ mov r11, r1
+ mov r1, r5
+ GET_AFTER_BITS r5 r13
+ or r5, r11
+ stw r5, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ jbr .L_not_aligned_and_len_less_16bytes
+
+.size Wmemcpy, .-Wmemcpy
+
+libc_hidden_def(Wmemcpy)
+.weak Wmemcpy
diff --git a/libc/string/csky/cskyv1/strcmp.S b/libc/string/csky/cskyv1/strcmp.S
new file mode 100644
index 000000000..e22f29ebd
--- /dev/null
+++ b/libc/string/csky/cskyv1/strcmp.S
@@ -0,0 +1,185 @@
+#include <features.h>
+#include <endian.h>
+
+#ifdef WANT_WIDE
+# define Wstrcmp wcscmp
+# define Wstrcoll wcscoll
+#else
+# define Wstrcmp strcmp
+# define Wstrcoll strcoll
+#endif
+
+/* FIXME attention!!! it may be a bug when WANT_WIDE define */
+/*libc_hidden_proto(Wstrcmp)*/
+ .align 2
+ .global Wstrcmp
+ .type Wstrcmp, @function
+Wstrcmp:
+ mov r6, r2
+
+ or r2, r3
+ andi r2, 0x3
+ cmpnei r2, 0x0 /* d or s is aligned ?*/
+ bt 4f /* if not aligned, goto 4f*/
+ 1: /* if aligned, load word each time.*/
+ ldw r2, (r6, 0)
+ ldw r7, (r3, 0)
+ cmpne r2, r7
+ bt 1f /* if d[i] != s[i], goto 1f */
+ tstnbz r2 /* if d[i] == s[i], check if d or s is at the end. */
+ bf 3f /* if at the end, goto 3f (finish comparing) */
+
+ ldw r2, (r6, 4)
+ ldw r7, (r3, 4)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 8)
+ ldw r7, (r3, 8)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 12)
+ ldw r7, (r3, 12)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 16)
+ ldw r7, (r3, 16)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 20)
+ ldw r7, (r3, 20)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 24)
+ ldw r7, (r3, 24)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ ldw r2, (r6, 28)
+ ldw r7, (r3, 28)
+ cmpne r2, r7
+ bt 1f
+ tstnbz r2
+ bf 3f
+
+ addi r6, 32
+ addi r3, 32
+ br 1b
+
+#ifdef __CSKYBE__
+ /* d[i] != s[i] in word, so we check byte 0 ? */
+ 1:
+ xtrb0 r1, r2
+ mov r4, r1
+ xtrb0 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb1 r1, r2
+ mov r4, r1
+ xtrb1 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb2 r1, r2
+ mov r4, r1
+ xtrb2 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb3 r1, r2
+ mov r4, r1
+ xtrb3 r1, r7
+
+#else /* little endian */
+ /* d[i] != s[i] in word, so we check byte 0 ? */
+1:
+ xtrb3 r1, r2
+ mov r4, r1
+ xtrb3 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb2 r1, r2
+ mov r4, r1
+ xtrb2 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb1 r1, r2
+ mov r4, r1
+ xtrb1 r1, r7
+ cmpne r4, r1
+ bt 2f
+ cmpnei r4, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb0 r1, r2
+ mov r4, r1
+ xtrb0 r1, r7
+
+#endif
+ /* get the result when d[i] != s[i] */
+2:
+ subu r4, r1
+ mov r2, r4
+ jmp r15
+
+ /* return when d[i] == s[i] */
+3:
+ subu r2, r7
+ jmp r15
+
+ /* cmp when d or s is not aligned */
+4:
+ ldb r2, (r6,0)
+ ldb r7, (r3, 0)
+ cmpne r2, r7
+ bt 3b
+ addi r3, 1
+ addi r6, 1
+ cmpnei r2, 0
+ bt 4b
+ jmp r15
+
+ .size Wstrcmp, .-Wstrcmp
+
+libc_hidden_def(Wstrcmp)
+.weak Wstrcmp
+#ifndef __UCLIBC_HAS_LOCALE__
+/* libc_hidden_proto(Wstrcoll) */
+strong_alias(Wstrcmp,Wstrcoll)
+libc_hidden_def(Wstrcoll)
+#endif
diff --git a/libc/string/csky/cskyv1/strcpy.S b/libc/string/csky/cskyv1/strcpy.S
new file mode 100644
index 000000000..c2f1e7a0f
--- /dev/null
+++ b/libc/string/csky/cskyv1/strcpy.S
@@ -0,0 +1,139 @@
+#include <features.h>
+#include <endian.h>
+
+#ifdef WANT_WIDE
+# define Wstrcpy wcscpy
+#else
+# define Wstrcpy strcpy
+#endif
+
+ .align 2
+ .global Wstrcpy
+ .type Wstrcpy, @function
+Wstrcpy:
+
+ mov r6, r2
+ mov r7, r3
+ or r7, r6
+ andi r7, 3
+ cmpnei r7, 0
+ bf 2f
+ 1:
+ ldb r5, (r3)
+ stb r5, (r6)
+ addi r3, 1
+ addi r6, 1
+ cmpnei r5, 0
+ bt 1b
+ 1:
+ jmp r15
+
+ 2:
+ ldw r5, (r3)
+ tstnbz r5
+ bf 10f
+ stw r5, (r6)
+
+ ldw r5, (r3, 4)
+ tstnbz r5
+ bf 3f
+ stw r5, (r6, 4)
+
+ ldw r5, (r3, 8)
+ tstnbz r5
+ bf 4f
+ stw r5, (r6, 8)
+
+ ldw r5, (r3, 12)
+ tstnbz r5
+ bf 5f
+ stw r5, (r6, 12)
+
+ ldw r5, (r3, 16)
+ tstnbz r5
+ bf 6f
+ stw r5, (r6, 16)
+
+ ldw r5, (r3, 20)
+ tstnbz r5
+ bf 7f
+ stw r5, (r6, 20)
+
+ ldw r5, (r3, 24)
+ tstnbz r5
+ bf 8f
+ stw r5, (r6, 24)
+
+ ldw r5, (r3, 28)
+ tstnbz r5
+ bf 9f
+ stw r5, (r6, 28)
+
+ addi r6, 32
+ addi r3, 32
+ br 2b
+
+ 3:
+ addi r6, 4
+ br 10f
+
+ 4:
+ addi r6, 8
+ br 10f
+
+ 5:
+ addi r6, 12
+ br 10f
+
+ 6:
+ addi r6, 16
+ br 10f
+
+ 7:
+ addi r6, 20
+ br 10f
+
+ 8:
+ addi r6, 24
+ br 10f
+
+ 9:
+ addi r6, 28
+
+ 10:
+#ifdef __CSKYBE__
+ xtrb0 r1, r5
+ stb r1, (r6)
+ cmpnei r1, 0
+ bf 5f
+ xtrb1 r1, r5
+ stb r1, (r6, 1)
+ cmpnei r1, 0
+ bf 5f
+ xtrb2 r1, r5
+ stb r1, (r6, 2 )
+ cmpnei r1, 0
+ bf 5f
+ stw r5, (r6)
+
+#else
+ xtrb3 r1, r5
+ stb r1, (r6)
+ cmpnei r1, 0
+ bf 5f
+ xtrb2 r1, r5
+ stb r1, (r6, 1)
+ cmpnei r1, 0
+ bf 5f
+ xtrb1 r1, r5
+ stb r1, (r6, 2)
+ cmpnei r1, 0
+ bf 5f
+ stw r5, (r6)
+#endif
+ 5:
+ jmp r15
+
+
+libc_hidden_def(Wstrcpy)
+.weak Wstrcpy
diff --git a/libc/string/csky/cskyv2/abiv2_memcpy.S b/libc/string/csky/cskyv2/abiv2_memcpy.S
new file mode 100644
index 000000000..c112ec01b
--- /dev/null
+++ b/libc/string/csky/cskyv2/abiv2_memcpy.S
@@ -0,0 +1,184 @@
+.macro GET_FRONT_BITS rx ry
+#ifdef __cskyLE__
+ lsr \rx, \ry
+#else
+ lsl \rx, \ry
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx ry
+#ifdef __cskyLE__
+ lsl \rx, \ry
+#else
+ lsr \rx, \ry
+#endif
+.endm
+
+
+#ifdef WANT_WIDE
+# define Wmemcpy wmemcpy
+#else
+# define Wmemcpy memcpy
+#endif
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+
+ .text
+ .align 2
+ .global Wmemcpy
+ .type Wmemcpy, @function
+Wmemcpy:
+ mov r3, r0
+ cmplti r2, 4 /* If len less than 4 bytes */
+ jbt .L_copy_by_byte
+
+ mov r12, r0
+ andi r12, 3
+ bnez r12, .L_dest_not_aligned /* If dest is not 4 bytes aligned */
+.L0:
+ mov r12, r1
+ andi r12, 3
+ bnez r12, .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */
+
+ cmplti r2, 16 /* dest and src are all aligned */
+ jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */
+
+.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */
+ ldw r18, (r1, 0)
+ ldw r19, (r1, 4)
+ ldw r20, (r1, 8)
+ ldw r21, (r1, 12)
+ stw r18, (r3, 0)
+ stw r19, (r3, 4)
+ stw r20, (r3, 8)
+ stw r21, (r3, 12)
+ subi r2, 16
+ addi r1, 16
+ addi r3, 16
+ cmplti r2, 16
+ jbf .L_aligned_and_len_larger_16bytes
+
+.L_aligned_and_len_less_16bytes:
+ cmplti r2, 4
+ jbt .L_copy_by_byte
+ ldw r18, (r1, 0)
+ stw r18, (r3, 0)
+ subi r2, 4
+ addi r1, 4
+ addi r3, 4
+ jbr .L_aligned_and_len_less_16bytes
+
+.L_copy_by_byte: /* len less than 4 bytes */
+ cmpnei r2, 0
+ jbf .L_return
+ ldb r18, (r1, 0)
+ stb r18, (r3, 0)
+ subi r2, 1
+ addi r1, 1
+ addi r3, 1
+ jbr .L_copy_by_byte
+
+.L_return:
+ rts
+
+/* If dest is not aligned, just copying some bytes makes the dest align.
+ After that, we judge whether the src is aligned. */
+
+.L_dest_not_aligned:
+ rsub r13, r1, r3 /* consider overlapped case */
+ abs r13, r13
+ cmplt r13, r2
+ jbt .L_copy_by_byte
+
+.L1:
+ ldb r18, (r1, 0) /* makes the dest align. */
+ stb r18, (r3, 0)
+ addi r12, 1
+ subi r2, 1
+ addi r1, 1
+ addi r3, 1
+ cmpnei r12, 4
+ jbt .L1
+ cmplti r2, 4
+ jbt .L_copy_by_byte
+ jbf .L0 /* judge whether the src is aligned. */
+
+.L_dest_aligned_but_src_not_aligned:
+ rsub r13, r1, r3 /* consider overlapped case */
+ abs r13, r13
+ cmplt r13, r2
+ jbt .L_copy_by_byte
+
+ bclri r1, 0
+ bclri r1, 1
+ ldw r18, (r1, 0)
+ addi r1, 4
+
+ movi r13, 8
+ mult r13, r12
+ mov r24, r13 /* r12 is used to store the misaligned bits */
+ rsubi r13, 32
+ mov r25, r13
+
+ cmplti r2, 16
+ jbt .L_not_aligned_and_len_less_16bytes
+
+.L_not_aligned_and_len_larger_16bytes:
+ ldw r20, (r1, 0)
+ ldw r21, (r1, 4)
+ ldw r22, (r1, 8)
+ ldw r23, (r1, 12)
+
+ GET_FRONT_BITS r18 r24 /* little or big endian? */
+ mov r19, r20
+ GET_AFTER_BITS r20 r25
+ or r20, r18
+
+ GET_FRONT_BITS r19 r24
+ mov r18, r21
+ GET_AFTER_BITS r21 r13
+ or r21, r19
+
+ GET_FRONT_BITS r18 r24
+ mov r19, r22
+ GET_AFTER_BITS r22 r25
+ or r22, r18
+
+ GET_FRONT_BITS r19 r24
+ mov r18, r23
+ GET_AFTER_BITS r23 r25
+ or r23, r19
+
+ stw r20, (r3, 0)
+ stw r21, (r3, 4)
+ stw r22, (r3, 8)
+ stw r23, (r3, 12)
+ subi r2, 16
+ addi r1, 16
+ addi r3, 16
+ cmplti r2, 16
+ jbf .L_not_aligned_and_len_larger_16bytes
+
+.L_not_aligned_and_len_less_16bytes:
+ cmplti r2, 4
+ jbf .L2
+ rsubi r12, 4 /* r12 is used to stored the misaligned bits */
+ subu r1, r12 /* initial the position */
+ jbr .L_copy_by_byte
+.L2:
+ ldw r21, (r1, 0)
+ GET_FRONT_BITS r18 r24
+ mov r19, r18
+ mov r18, r21
+ GET_AFTER_BITS r21 r25
+ or r21, r19
+ stw r21, (r3, 0)
+ subi r2, 4
+ addi r1, 4
+ addi r3, 4
+ jbr .L_not_aligned_and_len_less_16bytes
+
+.size Wmemcpy, .-Wmemcpy
+
+libc_hidden_def(Wmemcpy)
+.weak Wmemcpy
diff --git a/libc/string/csky/cskyv2/abiv2_strcmp.S b/libc/string/csky/cskyv2/abiv2_strcmp.S
new file mode 100644
index 000000000..202da7c8a
--- /dev/null
+++ b/libc/string/csky/cskyv2/abiv2_strcmp.S
@@ -0,0 +1,168 @@
+#include <endian.h>
+#include "macro.S"
+
+#ifdef WANT_WIDE
+# define Wstrcmp wcscmp
+# define Wstrcoll wcscoll
+#else
+# define Wstrcmp strcmp
+# define Wstrcoll strcoll
+#endif
+
+/* FIXME attention!!! it may be a bug when WANT_WIDE define */
+/*libc_hidden_proto(Wstrcmp)*/
+ .align 2
+ .global Wstrcmp
+ .type Wstrcmp, @function
+Wstrcmp:
+ mov a3, a0
+
+ or a0, a1
+ andi a0, 0x3
+ M_BNEZ a0, 4f
+ 1: // if aligned, load word each time.
+
+ ldw a0, (a3, 0)
+ ldw t0, (a1, 0)
+ M_BNE a0, t0, 1f // if d[i] != s[i], goto 1f
+ tstnbz a0 // if d[i] == s[i], check if d or s is at the end.
+ bf 3f // if at the end, goto 3f (finish comparing)
+ ldw a0, (a3, 4)
+ ldw t0, (a1, 4)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ ldw a0, (a3, 8)
+ ldw t0, (a1, 8)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ ldw a0, (a3, 12)
+ ldw t0, (a1, 12)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ ldw a0, (a3, 16)
+ ldw t0, (a1, 16)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ ldw a0, (a3, 20)
+ ldw t0, (a1, 20)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ ldw a0, (a3, 24)
+ ldw t0, (a1, 24)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ ldw a0, (a3, 28)
+ ldw t0, (a1, 28)
+ M_BNE a0, t0, 1f
+ tstnbz a0
+ bf 3f
+
+ addi a3, 32
+ addi a1, 32
+ br 1b
+
+#ifdef __CSKYBE__
+ /* d[i] != s[i] in word, so we check byte 0 ? */
+ 1:
+ xtrb0 t1, a0
+ mov a2, t1
+ xtrb0 t1, t0
+ M_BNE a2, t1, 2f
+ cmpnei a2, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb1 t1, a0
+ mov a2, t1
+ xtrb1 t1, t0
+ M_BNE a2, t1, 2f
+ cmpnei a2, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb2 t1, a0
+ mov a2, t1
+ xtrb2 t1, t0
+ M_BNE a2, t1, 2f
+ cmpnei a2, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb3 t1, a0
+ mov a2, t1
+ xtrb3 t1, t0
+
+#else /* little endian */
+ /* d[i] != s[i] in word, so we check byte 0 ? */
+ 1:
+ xtrb3 t1, a0
+ mov a2, t1
+ xtrb3 t1, t0
+ M_BNE a2, t1, 2f
+ cmpnei a2, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb2 t1, a0
+ mov a2, t1
+ xtrb2 t1, t0
+ M_BNE a2, t1, 2f
+ cmpnei a2, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb1 t1, a0
+ mov a2, t1
+ xtrb1 t1, t0
+ M_BNE a2, t1, 2f
+ cmpnei a2, 0
+ bf 2f
+
+ /* d[i] != s[i] in word, so we check byte 1 ? */
+ xtrb0 t1, a0
+ mov a2, t1
+ xtrb0 t1, t0
+
+#endif
+ /* get the result when d[i] != s[i] */
+ 2:
+ subu a2, t1
+ mov a0, a2
+ jmp r15
+
+ /* return when d[i] == s[i] */
+ 3:
+ subu a0, t0
+ jmp r15
+
+ /* cmp when d or s is not aligned */
+ 4:
+ ldb a0, (a3,0)
+ ldb t0, (a1, 0)
+ M_BNE a0, t0, 3b
+ addi a1, 1
+ addi a3, 1
+ M_BNEZ a0, 4b
+ jmp r15
+
+ .size Wstrcmp, .-Wstrcmp
+
+libc_hidden_def(Wstrcmp)
+.weak Wstrcmp
+#ifndef __UCLIBC_HAS_LOCALE__
+/* libc_hidden_proto(Wstrcoll) */
+strong_alias(Wstrcmp,Wstrcoll)
+libc_hidden_def(Wstrcoll)
+#endif
diff --git a/libc/string/csky/cskyv2/abiv2_strcpy.S b/libc/string/csky/cskyv2/abiv2_strcpy.S
new file mode 100644
index 000000000..20262feae
--- /dev/null
+++ b/libc/string/csky/cskyv2/abiv2_strcpy.S
@@ -0,0 +1,129 @@
+#include <endian.h>
+#include "macro.S"
+
+#ifdef WANT_WIDE
+# define Wstrcpy wcscpy
+#else
+# define Wstrcpy strcpy
+#endif
+
+ .align 2
+ .global Wstrcpy
+ .type Wstrcpy, @function
+Wstrcpy:
+
+ mov a3, a0
+ or a2, a1, a3
+ andi t0, a2, 3
+ M_BEZ t0, 2f
+ mov t0, a1
+ 1:
+ ld.b a2, (t0)
+ stb a2, (a3)
+ addi t0, t0, 1
+ addi a3, a3, 1
+ M_BNEZ a2, 1b
+
+ jmp r15
+
+ 2:
+ ldw a2, (a1)
+ tstnbz a2
+ bf 11f
+ stw a2, (a3)
+
+ ldw a2, (a1, 4)
+ tstnbz a2
+ bf 4f
+ stw a2, (a3, 4)
+
+ ldw a2, (a1, 8)
+ tstnbz a2
+ bf 5f
+ stw a2, (a3, 8)
+
+ ldw a2, (a1, 12)
+ tstnbz a2
+ bf 6f
+ stw a2, (a3, 12)
+
+ ldw a2, (a1, 16)
+ tstnbz a2
+ bf 7f
+ stw a2, (a3, 16)
+
+ ldw a2, (a1, 20)
+ tstnbz a2
+ bf 8f
+ stw a2, (a3, 20)
+
+ ldw a2, (a1, 24)
+ tstnbz a2
+ bf 9f
+ stw a2, (a3, 24)
+
+ ldw a2, (a1, 28)
+ tstnbz a2
+ bf 10f
+ stw a2, (a3, 28)
+
+ addi a3, 32
+ addi a1, 32
+ br 2b
+
+ 4:
+ addi a3, 4
+ br 11f
+
+ 5:
+ addi a3, 8
+ br 11f
+
+ 6:
+ addi a3, 12
+ br 11f
+
+ 7:
+ addi a3, 16
+ br 11f
+
+ 8:
+ addi a3, 20
+ br 11f
+
+ 9:
+ addi a3, 24
+ br 11f
+
+ 10:
+ addi a3, 28
+ 11:
+#ifdef __CSKYBE__
+ xtrb0 t0, a2
+ st.b t0, (a3)
+ M_BEZ t0, 5f
+ xtrb1 t0, a2
+ st.b t0, (a3, 1)
+ M_BEZ t0, 5f
+ xtrb2 t0, a2
+ st.b t0, (a3, 2 )
+ M_BEZ t0, 5f
+ stw a2, (a3)
+#else
+ xtrb3 t0, a2
+ st.b t0, (a3)
+ M_BEZ t0, 5f
+ xtrb2 t0, a2
+ st.b t0, (a3, 1)
+ M_BEZ t0, 5f
+ xtrb1 t0, a2
+ st.b t0, (a3, 2)
+ M_BEZ t0, 5f
+ stw a2, (a3)
+#endif
+ 5:
+ jmp r15
+
+
+libc_hidden_def(Wstrcpy)
+.weak Wstrcpy
diff --git a/libc/string/csky/cskyv2/macro.S b/libc/string/csky/cskyv2/macro.S
new file mode 100644
index 000000000..047645c21
--- /dev/null
+++ b/libc/string/csky/cskyv2/macro.S
@@ -0,0 +1,13 @@
+.macro M_BEZ rx, label
+ bez \rx, \label
+.endm
+
+.macro M_BNEZ rx, label
+ bnez \rx, \label
+.endm
+
+.macro M_BNE rx, ry, label
+ cmpne \rx, \ry
+ bt \label
+.endm
+
diff --git a/libc/string/csky/memcpy.S b/libc/string/csky/memcpy.S
new file mode 100644
index 000000000..51d258a11
--- /dev/null
+++ b/libc/string/csky/memcpy.S
@@ -0,0 +1,7 @@
+#include <features.h>
+
+#ifdef __CSKYABIV2__
+#include "cskyv2/abiv2_memcpy.S"
+#else
+#include "cskyv1/memcpy.S"
+#endif
diff --git a/libc/string/csky/strcmp.S b/libc/string/csky/strcmp.S
new file mode 100644
index 000000000..05a88c912
--- /dev/null
+++ b/libc/string/csky/strcmp.S
@@ -0,0 +1,7 @@
+#include <features.h>
+
+#ifdef __CSKYABIV2__
+#include "cskyv2/abiv2_strcmp.S"
+#else
+#include "cskyv1/strcmp.S"
+#endif
diff --git a/libc/string/csky/strcpy.S b/libc/string/csky/strcpy.S
new file mode 100644
index 000000000..dd3be04b5
--- /dev/null
+++ b/libc/string/csky/strcpy.S
@@ -0,0 +1,7 @@
+#include <features.h>
+
+#ifdef __CSKYABIV2__
+#include "cskyv2/abiv2_strcpy.S"
+#else
+#include "cskyv1/strcpy.S"
+#endif