diff options
Diffstat (limited to 'libc/string')
| -rw-r--r-- | libc/string/csky/Makefile | 6 | ||||
| -rw-r--r-- | libc/string/csky/cskyv1/memcpy.S | 211 | ||||
| -rw-r--r-- | libc/string/csky/cskyv1/strcmp.S | 185 | ||||
| -rw-r--r-- | libc/string/csky/cskyv1/strcpy.S | 139 | ||||
| -rw-r--r-- | libc/string/csky/cskyv2/abiv2_memcpy.S | 184 | ||||
| -rw-r--r-- | libc/string/csky/cskyv2/abiv2_strcmp.S | 168 | ||||
| -rw-r--r-- | libc/string/csky/cskyv2/abiv2_strcpy.S | 129 | ||||
| -rw-r--r-- | libc/string/csky/cskyv2/macro.S | 13 | ||||
| -rw-r--r-- | libc/string/csky/memcpy.S | 7 | ||||
| -rw-r--r-- | libc/string/csky/strcmp.S | 7 | ||||
| -rw-r--r-- | libc/string/csky/strcpy.S | 7 | 
11 files changed, 1056 insertions, 0 deletions
| diff --git a/libc/string/csky/Makefile b/libc/string/csky/Makefile new file mode 100644 index 000000000..ce5add623 --- /dev/null +++ b/libc/string/csky/Makefile @@ -0,0 +1,6 @@ +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/csky/cskyv1/memcpy.S b/libc/string/csky/cskyv1/memcpy.S new file mode 100644 index 000000000..dfa7f64a4 --- /dev/null +++ b/libc/string/csky/cskyv1/memcpy.S @@ -0,0 +1,211 @@ +.macro      GET_FRONT_BITS rx ry +#ifdef      __cskyLE__ +    lsr     \rx, \ry +#else +    lsl     \rx, \ry +#endif +.endm + +.macro      GET_AFTER_BITS rx ry +#ifdef      __cskyLE__ +    lsl     \rx, \ry +#else +    lsr     \rx, \ry +#endif +.endm + + +#ifdef WANT_WIDE +# define Wmemcpy wmemcpy +#else +# define Wmemcpy memcpy +#endif + +/* void *memcpy(void *dest, const void *src, size_t n); */ + +	.text +	.align 2 +	.global Wmemcpy +	.type   Wmemcpy, @function +Wmemcpy: +    mov     r7, r2 +    cmplti  r4, 4                                   /* If len less than 4 bytes */ +    jbt     .L_copy_by_byte + +    mov     r6, r2 +    andi    r6, 3 +    cmpnei  r6, 0 +    jbt     .L_dest_not_aligned                     /* If dest is not 4 bytes aligned */ +.L0: +    mov     r6, r3 +    andi    r6, 3 +    cmpnei  r6, 0 +    jbt     .L_dest_aligned_but_src_not_aligned     /* If dest is aligned, but src is not aligned */ + +    cmplti  r4, 16                                  /* dest and src are all aligned */ +    jbt     .L_aligned_and_len_less_16bytes         /* If len less than 16 bytes */ + +    subi    sp, 8 +    stw     r8, (sp, 0) +    stw     r9, (sp, 4) +.L_aligned_and_len_larger_16bytes:                  /* src and dst are all aligned, and len > 16 bytes */ +    ldw     r1, (r3, 0) +    ldw     r5, (r3, 4) +    ldw     r8, (r3, 8) +    ldw     r9, (r3, 12) +    stw     r1, (r7, 0) +    stw     r5, (r7, 4) +    stw     r8, (r7, 8) +    stw     r9, (r7, 12) +    subi    r4, 16 +    addi    r3, 16 +    addi    r7, 16 +    cmplti  r4, 16 +    jbf     .L_aligned_and_len_larger_16bytes +    ldw     r8, (sp, 0) +    ldw     r9, (sp, 4) +    addi    sp, 8 + +.L_aligned_and_len_less_16bytes: +    cmplti  r4, 4 +    jbt     .L_copy_by_byte +    ldw     r1, (r3, 0) +    stw     r1, (r7, 0) +    subi    r4, 4 +    addi    r3, 4 +    addi    r7, 4 +    jbr     .L_aligned_and_len_less_16bytes + +.L_copy_by_byte:                                    /* len less than 4 bytes */ +    cmpnei  r4, 0 +    jbf     .L_return +    ldb     r1, (r3, 0) +    stb     r1, (r7, 0) +    subi    r4, 1 +    addi    r3, 1 +    addi    r7, 1 +    jbr     .L_copy_by_byte + +.L_return: +    rts + +/* If dest is not aligned, we copy some bytes to make dest align. +   Then we should judge whether src is aligned. */ + +.L_dest_not_aligned: +    mov     r5, r3                                  /* consider overlapped case */ +    rsub    r5, r5, r7 +    abs     r5, r5 +    cmplt   r5, r4 +    jbt     .L_copy_by_byte + +.L1: +    ldb     r1, (r3, 0)                             /* makes the dest align. */ +    stb     r1, (r7, 0) +    addi    r6, 1 +    subi    r4, 1 +    addi    r3, 1 +    addi    r7, 1 +    cmpnei  r6, 4 +    jbt     .L1 +    cmplti  r4, 4 +    jbt     .L_copy_by_byte +    jbf     .L0                                     /* judge whether the src is aligned. */ + +.L_dest_aligned_but_src_not_aligned: +    mov     r5, r3                                  /* consider overlapped case*/ +    rsub    r5, r5, r7 +    abs     r5, r5 +    cmplt   r5, r4 +    jbt     .L_copy_by_byte + +    bclri   r3, 0 +    bclri   r3, 1 +    ldw     r1, (r3, 0) +    addi    r3, 4 + +    subi    sp, 16 +    stw     r11, (sp,0) +    stw     r12, (sp,4) +    stw     r13, (sp,8) +    movi    r5, 8 +    mult    r5, r6                                  /* r6 is used to store tne misaligned bits */ +    mov     r12, r5 +    rsubi   r5, 31 +    addi    r5, 1 +    mov     r13, r5 + +    cmplti  r4, 16 +    jbt     .L_not_aligned_and_len_less_16bytes + +    stw     r8, (sp, 12) +    subi    sp, 8 +    stw     r9, (sp, 0) +    stw     r10, (sp, 4) +.L_not_aligned_and_len_larger_16bytes: +    ldw     r5, (r3, 0) +    ldw     r11, (r3, 4) +    ldw     r8, (r3, 8) +    ldw     r9, (r3, 12) + +    GET_FRONT_BITS r1 r12                          /* little or big endian? */ +    mov     r10, r5 +    GET_AFTER_BITS r5 r13 +    or      r5, r1 + +    GET_FRONT_BITS r10 r12 +    mov     r1, r11 +    GET_AFTER_BITS r11 r13 +    or      r11, r10 + +    GET_FRONT_BITS r1 r12 +    mov     r10, r8 +    GET_AFTER_BITS r8 r13 +    or      r8, r1 + +    GET_FRONT_BITS r10 r12 +    mov     r1, r9 +    GET_AFTER_BITS r9 r13 +    or      r9, r10 + +    stw     r5, (r7, 0) +    stw     r11, (r7, 4) +    stw     r8, (r7, 8) +    stw     r9, (r7, 12) +    subi    r4, 16 +    addi    r3, 16 +    addi    r7, 16 +    cmplti  r4, 16 +    jbf     .L_not_aligned_and_len_larger_16bytes +    ldw     r9, (sp, 0) +    ldw     r10, (sp, 4) +    addi    sp, 8 +    ldw     r8, (sp,12) + +.L_not_aligned_and_len_less_16bytes: +    cmplti  r4, 4 +    jbf     .L2 +    rsubi   r6, 4                                   /* r6 is used to stored the misaligned bits */ +    subu    r3, r6                                 /* initial the position */ +    ldw     r11, (sp, 0) +    ldw     r12, (sp, 4) +    ldw     r13, (sp, 8) +    addi    sp, 16 +    jbr     .L_copy_by_byte +.L2: +    ldw     r5, (r3, 0) +    GET_FRONT_BITS r1 r12 +    mov     r11, r1 +    mov     r1, r5 +    GET_AFTER_BITS r5 r13 +    or      r5, r11 +    stw     r5, (r7, 0) +    subi    r4, 4 +    addi    r3, 4 +    addi    r7, 4 +    jbr     .L_not_aligned_and_len_less_16bytes + +.size   Wmemcpy, .-Wmemcpy + +libc_hidden_def(Wmemcpy) +.weak Wmemcpy diff --git a/libc/string/csky/cskyv1/strcmp.S b/libc/string/csky/cskyv1/strcmp.S new file mode 100644 index 000000000..e22f29ebd --- /dev/null +++ b/libc/string/csky/cskyv1/strcmp.S @@ -0,0 +1,185 @@ +#include <features.h> +#include <endian.h> + +#ifdef WANT_WIDE +# define Wstrcmp wcscmp +# define Wstrcoll wcscoll +#else +# define Wstrcmp strcmp +# define Wstrcoll strcoll +#endif + +/* FIXME attention!!! it may be a bug when WANT_WIDE define */ +/*libc_hidden_proto(Wstrcmp)*/ +	.align 2 +	.global Wstrcmp +	.type   Wstrcmp, @function +Wstrcmp: +    mov        r6, r2 + +    or         r2, r3 +    andi       r2, 0x3 +    cmpnei     r2, 0x0     /* d or s is aligned ?*/ +    bt         4f          /* if not aligned, goto 4f*/ +    1:                   /* if aligned, load word each time.*/ +    ldw        r2, (r6, 0) +    ldw        r7, (r3, 0) +    cmpne      r2, r7 +    bt         1f       /* if d[i] != s[i], goto 1f */ +    tstnbz     r2       /* if d[i] == s[i], check if d or s is at the end. */ +    bf         3f       /* if at the end, goto 3f (finish comparing) */ + +    ldw        r2, (r6, 4) +    ldw        r7, (r3, 4) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    ldw        r2, (r6, 8) +    ldw        r7, (r3, 8) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    ldw        r2, (r6, 12) +    ldw        r7, (r3, 12) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    ldw        r2, (r6, 16) +    ldw        r7, (r3, 16) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    ldw        r2, (r6, 20) +    ldw        r7, (r3, 20) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    ldw        r2, (r6, 24) +    ldw        r7, (r3, 24) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    ldw        r2, (r6, 28) +    ldw        r7, (r3, 28) +    cmpne      r2, r7 +    bt         1f +    tstnbz     r2 +    bf         3f + +    addi       r6, 32 +    addi       r3, 32 +    br         1b + +#ifdef __CSKYBE__ +    /* d[i] != s[i] in word, so we check byte 0 ? */ +    1: +    xtrb0      r1, r2 +    mov        r4, r1 +    xtrb0      r1, r7 +    cmpne      r4, r1 +    bt         2f +    cmpnei     r4, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb1      r1, r2 +    mov        r4, r1 +    xtrb1      r1, r7 +    cmpne      r4, r1 +    bt         2f +    cmpnei     r4, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb2      r1, r2 +    mov        r4, r1 +    xtrb2      r1, r7 +    cmpne      r4, r1 +    bt         2f +    cmpnei     r4, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb3      r1, r2 +    mov        r4, r1 +    xtrb3      r1, r7 + +#else /* little endian */ +    /* d[i] != s[i] in word, so we check byte 0 ? */ +1: +    xtrb3      r1, r2 +    mov        r4, r1 +    xtrb3      r1, r7 +    cmpne      r4, r1 +    bt         2f +    cmpnei     r4, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb2      r1, r2 +    mov        r4, r1 +    xtrb2      r1, r7 +    cmpne      r4, r1 +    bt         2f +    cmpnei     r4, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb1      r1, r2 +    mov        r4, r1 +    xtrb1      r1, r7 +    cmpne      r4, r1 +    bt         2f +    cmpnei     r4, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb0      r1, r2 +    mov        r4, r1 +    xtrb0      r1, r7 + +#endif +    /* get the result when d[i] != s[i] */ +2: +    subu       r4, r1 +    mov        r2, r4 +    jmp        r15 + +    /* return when d[i] == s[i] */ +3: +    subu       r2, r7 +    jmp        r15 + +     /* cmp when d or s is not aligned */ +4: +     ldb       r2, (r6,0) +     ldb       r7, (r3, 0) +     cmpne     r2, r7 +     bt        3b +     addi      r3, 1 +     addi      r6, 1 +     cmpnei    r2, 0 +     bt        4b +     jmp       r15 + +     .size   Wstrcmp, .-Wstrcmp + +libc_hidden_def(Wstrcmp) +.weak Wstrcmp +#ifndef __UCLIBC_HAS_LOCALE__ +/* libc_hidden_proto(Wstrcoll) */ +strong_alias(Wstrcmp,Wstrcoll) +libc_hidden_def(Wstrcoll) +#endif diff --git a/libc/string/csky/cskyv1/strcpy.S b/libc/string/csky/cskyv1/strcpy.S new file mode 100644 index 000000000..c2f1e7a0f --- /dev/null +++ b/libc/string/csky/cskyv1/strcpy.S @@ -0,0 +1,139 @@ +#include <features.h> +#include <endian.h> + +#ifdef WANT_WIDE +# define Wstrcpy wcscpy +#else +# define Wstrcpy strcpy +#endif + +	.align 2 +	.global Wstrcpy +	.type   Wstrcpy, @function +Wstrcpy: + +		mov     r6, r2 +        mov     r7, r3 +        or      r7, r6 +        andi    r7, 3 +        cmpnei  r7, 0 +        bf      2f +       1: +        ldb     r5, (r3) +        stb     r5, (r6) +        addi    r3, 1 +        addi    r6, 1 +        cmpnei  r5, 0 +        bt      1b +       1: +        jmp     r15 + +       2: +        ldw     r5, (r3) +        tstnbz  r5 +        bf      10f +        stw     r5, (r6) + +        ldw     r5, (r3, 4) +        tstnbz  r5 +        bf      3f +        stw     r5, (r6, 4) + +        ldw     r5, (r3, 8) +        tstnbz  r5 +        bf      4f +        stw     r5, (r6, 8) + +        ldw     r5, (r3, 12) +        tstnbz  r5 +        bf      5f +        stw     r5, (r6, 12) + +        ldw     r5, (r3, 16) +        tstnbz  r5 +        bf      6f +        stw     r5, (r6, 16) + +        ldw     r5, (r3, 20) +        tstnbz  r5 +        bf      7f +        stw     r5, (r6, 20) + +        ldw     r5, (r3, 24) +        tstnbz  r5 +        bf      8f +        stw     r5, (r6, 24) + +        ldw     r5, (r3, 28) +        tstnbz  r5 +        bf      9f +        stw     r5, (r6, 28) + +        addi    r6, 32 +        addi    r3, 32 +        br      2b + +       3: +        addi    r6, 4 +        br      10f + +       4: +        addi    r6, 8 +        br      10f + +       5: +        addi    r6, 12 +        br      10f + +       6: +        addi    r6, 16 +        br      10f + +       7: +        addi    r6, 20 +        br      10f + +       8: +        addi    r6, 24 +        br      10f + +       9: +        addi    r6, 28 + +       10: +#ifdef __CSKYBE__ +        xtrb0   r1, r5 +        stb     r1, (r6) +        cmpnei  r1, 0 +        bf      5f +        xtrb1   r1, r5 +        stb     r1, (r6, 1) +        cmpnei  r1, 0 +        bf      5f +        xtrb2   r1, r5 +        stb     r1, (r6, 2 ) +        cmpnei  r1, 0 +        bf      5f +        stw     r5, (r6) + +#else +        xtrb3   r1, r5 +        stb     r1, (r6) +        cmpnei  r1, 0 +        bf      5f +        xtrb2   r1, r5 +        stb     r1, (r6, 1) +        cmpnei  r1, 0 +        bf      5f +        xtrb1   r1, r5 +        stb     r1, (r6, 2) +        cmpnei  r1, 0 +        bf      5f +        stw     r5, (r6) +#endif +       5: +        jmp     r15 + + +libc_hidden_def(Wstrcpy) +.weak Wstrcpy diff --git a/libc/string/csky/cskyv2/abiv2_memcpy.S b/libc/string/csky/cskyv2/abiv2_memcpy.S new file mode 100644 index 000000000..c112ec01b --- /dev/null +++ b/libc/string/csky/cskyv2/abiv2_memcpy.S @@ -0,0 +1,184 @@ +.macro      GET_FRONT_BITS rx ry +#ifdef      __cskyLE__ +    lsr     \rx, \ry +#else +    lsl     \rx, \ry +#endif +.endm + +.macro      GET_AFTER_BITS rx ry +#ifdef      __cskyLE__ +    lsl     \rx, \ry +#else +    lsr     \rx, \ry +#endif +.endm + + +#ifdef WANT_WIDE +# define Wmemcpy wmemcpy +#else +# define Wmemcpy memcpy +#endif + +/* void *memcpy(void *dest, const void *src, size_t n); */ + +    .text +	.align 2 +	.global Wmemcpy +	.type   Wmemcpy, @function +Wmemcpy: +    mov     r3, r0 +    cmplti  r2, 4                                            /* If len less than 4 bytes */ +    jbt     .L_copy_by_byte + +    mov     r12, r0 +    andi    r12, 3 +    bnez    r12, .L_dest_not_aligned                         /* If dest is not 4 bytes aligned */ +.L0: +    mov     r12, r1 +    andi    r12, 3 +    bnez    r12, .L_dest_aligned_but_src_not_aligned         /* If dest is aligned, but src is not aligned */ + +    cmplti  r2, 16                                           /* dest and src are all aligned */ +    jbt     .L_aligned_and_len_less_16bytes                  /* If len less than 16 bytes */ + +.L_aligned_and_len_larger_16bytes:                           /* src and dst are all aligned, and len > 16 bytes */ +    ldw     r18, (r1, 0) +    ldw     r19, (r1, 4) +    ldw     r20, (r1, 8) +    ldw     r21, (r1, 12) +    stw     r18, (r3, 0) +    stw     r19, (r3, 4) +    stw     r20, (r3, 8) +    stw     r21, (r3, 12) +    subi    r2, 16 +    addi    r1, 16 +    addi    r3, 16 +    cmplti  r2, 16 +    jbf     .L_aligned_and_len_larger_16bytes + +.L_aligned_and_len_less_16bytes: +    cmplti  r2, 4 +    jbt     .L_copy_by_byte +    ldw     r18, (r1, 0) +    stw     r18, (r3, 0) +    subi    r2, 4 +    addi    r1, 4 +    addi    r3, 4 +    jbr     .L_aligned_and_len_less_16bytes + +.L_copy_by_byte:                                    /* len less than 4 bytes */ +    cmpnei  r2, 0 +    jbf     .L_return +    ldb     r18, (r1, 0) +    stb     r18, (r3, 0) +    subi    r2, 1 +    addi    r1, 1 +    addi    r3, 1 +    jbr     .L_copy_by_byte + +.L_return: +    rts + +/* If dest is not aligned, just copying some bytes makes the dest align. +   After that, we judge whether the src is aligned. */ + +.L_dest_not_aligned: +    rsub    r13, r1, r3                              /* consider overlapped case */ +    abs     r13, r13 +    cmplt   r13, r2 +    jbt     .L_copy_by_byte + +.L1: +    ldb     r18, (r1, 0)                             /* makes the dest align. */ +    stb     r18, (r3, 0) +    addi    r12, 1 +    subi    r2, 1 +    addi    r1, 1 +    addi    r3, 1 +    cmpnei  r12, 4 +    jbt     .L1 +    cmplti  r2, 4 +    jbt     .L_copy_by_byte +    jbf     .L0                                     /* judge whether the src is aligned. */ + +.L_dest_aligned_but_src_not_aligned: +    rsub    r13, r1, r3                             /* consider overlapped case */ +    abs     r13, r13 +    cmplt   r13, r2 +    jbt     .L_copy_by_byte + +    bclri   r1, 0 +    bclri   r1, 1 +    ldw     r18, (r1, 0) +    addi    r1, 4 + +    movi    r13, 8 +    mult    r13, r12 +    mov     r24, r13                                /* r12 is used to store the misaligned bits */ +    rsubi   r13, 32 +    mov     r25, r13 + +    cmplti  r2, 16 +    jbt     .L_not_aligned_and_len_less_16bytes + +.L_not_aligned_and_len_larger_16bytes: +    ldw     r20, (r1, 0) +    ldw     r21, (r1, 4) +    ldw     r22, (r1, 8) +    ldw     r23, (r1, 12) + +    GET_FRONT_BITS r18 r24                          /* little or big endian? */ +    mov     r19, r20 +    GET_AFTER_BITS r20 r25 +    or      r20, r18 + +    GET_FRONT_BITS r19 r24 +    mov     r18, r21 +    GET_AFTER_BITS r21 r13 +    or      r21, r19 + +    GET_FRONT_BITS r18 r24 +    mov     r19, r22 +    GET_AFTER_BITS r22 r25 +    or      r22, r18 + +    GET_FRONT_BITS r19 r24 +    mov     r18, r23 +    GET_AFTER_BITS r23 r25 +    or      r23, r19 + +    stw     r20, (r3, 0) +    stw     r21, (r3, 4) +    stw     r22, (r3, 8) +    stw     r23, (r3, 12) +    subi    r2, 16 +    addi    r1, 16 +    addi    r3, 16 +    cmplti  r2, 16 +    jbf     .L_not_aligned_and_len_larger_16bytes + +.L_not_aligned_and_len_less_16bytes: +    cmplti  r2, 4 +    jbf     .L2 +    rsubi   r12, 4                                   /* r12 is used to stored the misaligned bits */ +    subu    r1, r12                                  /* initial the position */ +    jbr     .L_copy_by_byte +.L2: +    ldw     r21, (r1, 0) +    GET_FRONT_BITS r18 r24 +    mov     r19, r18 +    mov     r18, r21 +    GET_AFTER_BITS r21 r25 +    or      r21, r19 +    stw     r21, (r3, 0) +    subi    r2, 4 +    addi    r1, 4 +    addi    r3, 4 +    jbr     .L_not_aligned_and_len_less_16bytes + +.size   Wmemcpy, .-Wmemcpy + +libc_hidden_def(Wmemcpy) +.weak Wmemcpy diff --git a/libc/string/csky/cskyv2/abiv2_strcmp.S b/libc/string/csky/cskyv2/abiv2_strcmp.S new file mode 100644 index 000000000..202da7c8a --- /dev/null +++ b/libc/string/csky/cskyv2/abiv2_strcmp.S @@ -0,0 +1,168 @@ +#include <endian.h> +#include "macro.S" + +#ifdef WANT_WIDE +# define Wstrcmp wcscmp +# define Wstrcoll wcscoll +#else +# define Wstrcmp strcmp +# define Wstrcoll strcoll +#endif + +/* FIXME attention!!! it may be a bug when WANT_WIDE define */ +/*libc_hidden_proto(Wstrcmp)*/ +	.align 2 +	.global Wstrcmp +	.type   Wstrcmp, @function +Wstrcmp: +    mov        a3, a0 + +    or         a0, a1 +    andi       a0, 0x3 +    M_BNEZ     a0, 4f +    1:                   // if aligned, load word each time. + +    ldw        a0, (a3, 0) +    ldw        t0, (a1, 0) +    M_BNE      a0, t0, 1f // if d[i] != s[i], goto 1f +    tstnbz     a0       // if d[i] == s[i], check if d or s is at the end. +    bf         3f       // if at the end, goto 3f (finish comparing) +    ldw        a0, (a3, 4) +    ldw        t0, (a1, 4) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    ldw        a0, (a3, 8) +    ldw        t0, (a1, 8) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    ldw        a0, (a3, 12) +    ldw        t0, (a1, 12) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    ldw        a0, (a3, 16) +    ldw        t0, (a1, 16) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    ldw        a0, (a3, 20) +    ldw        t0, (a1, 20) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    ldw        a0, (a3, 24) +    ldw        t0, (a1, 24) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    ldw        a0, (a3, 28) +    ldw        t0, (a1, 28) +    M_BNE      a0, t0, 1f +    tstnbz     a0 +    bf         3f + +    addi       a3, 32 +    addi       a1, 32 +    br         1b + +#ifdef __CSKYBE__ +    /* d[i] != s[i] in word, so we check byte 0 ? */ +    1: +    xtrb0      t1, a0 +    mov        a2, t1 +    xtrb0      t1, t0 +    M_BNE      a2, t1, 2f +    cmpnei     a2, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb1      t1, a0 +    mov        a2, t1 +    xtrb1      t1, t0 +    M_BNE      a2, t1, 2f +    cmpnei     a2, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb2      t1, a0 +    mov        a2, t1 +    xtrb2      t1, t0 +    M_BNE      a2, t1, 2f +    cmpnei     a2, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb3      t1, a0 +    mov        a2, t1 +    xtrb3      t1, t0 + +#else /* little endian */ +    /* d[i] != s[i] in word, so we check byte 0 ? */ +    1: +    xtrb3      t1, a0 +    mov        a2, t1 +    xtrb3      t1, t0 +    M_BNE      a2, t1, 2f +    cmpnei     a2, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb2      t1, a0 +    mov        a2, t1 +    xtrb2      t1, t0 +    M_BNE      a2, t1, 2f +    cmpnei     a2, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb1      t1, a0 +    mov        a2, t1 +    xtrb1      t1, t0 +    M_BNE      a2, t1, 2f +    cmpnei     a2, 0 +    bf         2f + +    /* d[i] != s[i] in word, so we check byte 1 ? */ +    xtrb0      t1, a0 +    mov        a2, t1 +    xtrb0      t1, t0 + +#endif +    /* get the result when d[i] != s[i] */ +    2: +    subu       a2, t1 +    mov        a0, a2 +    jmp        r15 + +    /* return when d[i] == s[i] */ +    3: +    subu       a0, t0 +    jmp        r15 + +     /* cmp when d or s is not aligned */ +    4: +    ldb       a0, (a3,0) +    ldb       t0, (a1, 0) +    M_BNE     a0, t0, 3b +    addi      a1, 1 +    addi      a3, 1 +    M_BNEZ    a0, 4b +    jmp        r15 + +    .size   Wstrcmp, .-Wstrcmp + +libc_hidden_def(Wstrcmp) +.weak Wstrcmp +#ifndef __UCLIBC_HAS_LOCALE__ +/* libc_hidden_proto(Wstrcoll) */ +strong_alias(Wstrcmp,Wstrcoll) +libc_hidden_def(Wstrcoll) +#endif diff --git a/libc/string/csky/cskyv2/abiv2_strcpy.S b/libc/string/csky/cskyv2/abiv2_strcpy.S new file mode 100644 index 000000000..20262feae --- /dev/null +++ b/libc/string/csky/cskyv2/abiv2_strcpy.S @@ -0,0 +1,129 @@ +#include <endian.h> +#include "macro.S" + +#ifdef WANT_WIDE +# define Wstrcpy wcscpy +#else +# define Wstrcpy strcpy +#endif + +	.align 2 +	.global Wstrcpy +	.type   Wstrcpy, @function +Wstrcpy: + +        mov     a3, a0 +        or      a2, a1, a3 +        andi    t0, a2, 3 +        M_BEZ   t0, 2f +        mov     t0, a1 +       1: +        ld.b    a2, (t0) +        stb     a2, (a3) +        addi    t0, t0, 1 +        addi    a3, a3, 1 +        M_BNEZ  a2, 1b + +        jmp     r15 + +       2: +        ldw     a2, (a1) +        tstnbz  a2 +        bf      11f +        stw     a2, (a3) + +        ldw     a2, (a1, 4) +        tstnbz  a2 +        bf      4f +        stw     a2, (a3, 4) + +        ldw     a2, (a1, 8) +        tstnbz  a2 +        bf      5f +        stw     a2, (a3, 8) + +        ldw     a2, (a1, 12) +        tstnbz  a2 +        bf      6f +        stw     a2, (a3, 12) + +        ldw     a2, (a1, 16) +        tstnbz  a2 +        bf      7f +        stw     a2, (a3, 16) + +        ldw     a2, (a1, 20) +        tstnbz  a2 +        bf      8f +        stw     a2, (a3, 20) + +        ldw     a2, (a1, 24) +        tstnbz  a2 +        bf      9f +        stw     a2, (a3, 24) + +        ldw     a2, (a1, 28) +        tstnbz  a2 +        bf      10f +        stw     a2, (a3, 28) + +        addi     a3, 32 +        addi     a1, 32 +        br       2b + +       4: +        addi    a3, 4 +        br      11f + +       5: +        addi    a3, 8 +        br      11f + +       6: +        addi    a3, 12 +        br      11f + +       7: +        addi    a3, 16 +        br      11f + +       8: +        addi    a3, 20 +        br      11f + +       9: +        addi    a3, 24 +        br      11f + +       10: +        addi    a3, 28 +       11: +#ifdef __CSKYBE__ +        xtrb0   t0, a2 +        st.b    t0, (a3) +        M_BEZ   t0, 5f +        xtrb1   t0, a2 +        st.b    t0, (a3, 1) +        M_BEZ   t0, 5f +        xtrb2   t0, a2 +        st.b    t0, (a3, 2 ) +        M_BEZ   t0, 5f +        stw     a2, (a3) +#else +        xtrb3   t0, a2 +        st.b    t0, (a3) +        M_BEZ   t0, 5f +        xtrb2   t0, a2 +        st.b    t0, (a3, 1) +        M_BEZ   t0, 5f +        xtrb1   t0, a2 +        st.b    t0, (a3, 2) +        M_BEZ   t0, 5f +        stw     a2, (a3) +#endif +       5: +	jmp     r15 + + +libc_hidden_def(Wstrcpy) +.weak Wstrcpy diff --git a/libc/string/csky/cskyv2/macro.S b/libc/string/csky/cskyv2/macro.S new file mode 100644 index 000000000..047645c21 --- /dev/null +++ b/libc/string/csky/cskyv2/macro.S @@ -0,0 +1,13 @@ +.macro M_BEZ rx, label +	bez   \rx, \label +.endm + +.macro M_BNEZ rx, label +	bnez  \rx, \label +.endm + +.macro M_BNE rx, ry, label +	cmpne \rx, \ry +	bt    \label +.endm + diff --git a/libc/string/csky/memcpy.S b/libc/string/csky/memcpy.S new file mode 100644 index 000000000..51d258a11 --- /dev/null +++ b/libc/string/csky/memcpy.S @@ -0,0 +1,7 @@ +#include <features.h> + +#ifdef __CSKYABIV2__ +#include "cskyv2/abiv2_memcpy.S" +#else +#include "cskyv1/memcpy.S" +#endif diff --git a/libc/string/csky/strcmp.S b/libc/string/csky/strcmp.S new file mode 100644 index 000000000..05a88c912 --- /dev/null +++ b/libc/string/csky/strcmp.S @@ -0,0 +1,7 @@ +#include <features.h> + +#ifdef __CSKYABIV2__ +#include "cskyv2/abiv2_strcmp.S" +#else +#include "cskyv1/strcmp.S" +#endif diff --git a/libc/string/csky/strcpy.S b/libc/string/csky/strcpy.S new file mode 100644 index 000000000..dd3be04b5 --- /dev/null +++ b/libc/string/csky/strcpy.S @@ -0,0 +1,7 @@ +#include <features.h> + +#ifdef __CSKYABIV2__ +#include "cskyv2/abiv2_strcpy.S" +#else +#include "cskyv1/strcpy.S" +#endif | 
