diff options
author | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2008-10-03 13:59:52 +0000 |
---|---|---|
committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2008-10-03 13:59:52 +0000 |
commit | 2ba017a2d5af01cc3ef0dc554252a521e8d7c4f8 (patch) | |
tree | 0e0db7e3fbb4fbe1be3c56ad6c80bb7d63effb93 /libc/string/ia64 | |
parent | 94bbeb72728193288f2bf071cf0e40293499045b (diff) |
- use c89-style comments
Closes issue #5194
Diffstat (limited to 'libc/string/ia64')
-rw-r--r-- | libc/string/ia64/bzero.S | 136 | ||||
-rw-r--r-- | libc/string/ia64/memccpy.S | 102 | ||||
-rw-r--r-- | libc/string/ia64/memchr.S | 32 | ||||
-rw-r--r-- | libc/string/ia64/memcmp.S | 94 | ||||
-rw-r--r-- | libc/string/ia64/memcpy.S | 160 | ||||
-rw-r--r-- | libc/string/ia64/memmove.S | 170 | ||||
-rw-r--r-- | libc/string/ia64/memset.S | 178 | ||||
-rw-r--r-- | libc/string/ia64/strchr.S | 32 | ||||
-rw-r--r-- | libc/string/ia64/strcmp.S | 2 | ||||
-rw-r--r-- | libc/string/ia64/strcpy.S | 66 | ||||
-rw-r--r-- | libc/string/ia64/strlen.S | 18 | ||||
-rw-r--r-- | libc/string/ia64/strncmp.S | 10 | ||||
-rw-r--r-- | libc/string/ia64/strncpy.S | 90 |
13 files changed, 545 insertions, 545 deletions
diff --git a/libc/string/ia64/bzero.S b/libc/string/ia64/bzero.S index d390838a6..1f0f8b7ac 100644 --- a/libc/string/ia64/bzero.S +++ b/libc/string/ia64/bzero.S @@ -47,13 +47,13 @@ #define ptr1 r28 #define ptr2 r27 #define ptr3 r26 -#define ptr9 r24 +#define ptr9 r24 #define loopcnt r23 #define linecnt r22 #define bytecnt r21 -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches +/* This routine uses only scratch predicate registers (p6 - p15) */ +#define p_scr p6 /* default register for same-cycle branches */ #define p_unalgn p9 #define p_y p11 #define p_n p12 @@ -65,7 +65,7 @@ #define MIN1 15 #define MIN1P1HALF 8 #define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount +#define LSIZE_SH 7 /* shift amount */ #define PREF_AHEAD 8 #define USE_FLP @@ -87,49 +87,49 @@ ENTRY(bzero) movi0 save_lc = ar.lc } { .mmi .body - mov ret0 = dest // return value + mov ret0 = dest /* return value */ nop.m 0 cmp.eq p_scr, p0 = cnt, r0 ;; } { .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) + and ptr2 = -(MIN1+1), dest /* aligned address */ + and tmp = MIN1, dest /* prepare to check for alignment */ + tbit.nz p_y, p_n = dest, 0 /* Do we have an odd address? (M_B_U) */ } { .mib mov ptr1 = dest nop.i 0 -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +(p_scr) br.ret.dpnt.many rp /* return immediately if count = 0 */ ;; } { .mib cmp.ne p_unalgn, p0 = tmp, r0 -} { .mib // NB: # of bytes to move is 1 - sub bytecnt = (MIN1+1), tmp // higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +} { .mib /* NB: # of bytes to move is 1 */ + sub bytecnt = (MIN1+1), tmp /* higher than loopcnt */ + cmp.gt p_scr, p0 = 16, cnt /* is it a minimalistic task? */ +(p_scr) br.cond.dptk.many .move_bytes_unaligned /* go move just a few (M_B_U) */ ;; } { .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +(p_unalgn) add ptr1 = (MIN1+1), ptr2 /* after alignment */ +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 /* after alignment */ +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 /* should we do a st8 ? */ ;; } { .mib (p_y) add cnt = -8, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 /* should we do a st4 ? */ } { .mib (p_y) st8 [ptr2] = r0,-4 (p_n) add ptr2 = 4, ptr2 ;; } { .mib (p_yy) add cnt = -4, cnt -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 /* should we do a st2 ? */ } { .mib (p_yy) st4 [ptr2] = r0,-2 (p_nn) add ptr2 = 2, ptr2 ;; } { .mmi - mov tmp = LINE_SIZE+1 // for compare + mov tmp = LINE_SIZE+1 /* for compare */ (p_y) add cnt = -2, cnt -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 /* should we do a st1 ? */ } { .mmi nop.m 0 (p_y) st2 [ptr2] = r0,-1 @@ -138,44 +138,44 @@ ENTRY(bzero) { .mmi (p_yy) st1 [ptr2] = r0 - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? + cmp.gt p_scr, p0 = tmp, cnt /* is it a minimalistic task? */ } { .mbb (p_yy) add cnt = -1, cnt -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +(p_scr) br.cond.dpnt.many .fraction_of_line /* go move just a few */ ;; } { .mib - nop.m 0 + nop.m 0 shr.u linecnt = cnt, LSIZE_SH nop.b 0 ;; } .align 32 -.l1b: // ------------------// L1B: store ahead into cache lines; fill later +.l1b: /* ------------------ L1B: store ahead into cache lines; fill later */ { .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder + and tmp = -(LINE_SIZE), cnt /* compute end of range */ + mov ptr9 = ptr1 /* used for prefetching */ + and cnt = (LINE_SIZE-1), cnt /* remainder */ } { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value + mov loopcnt = PREF_AHEAD-1 /* default prefetch loop */ + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt /* check against actual value */ ;; } { .mmi (p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range + add ptr2 = 16, ptr1 /* start of stores (beyond prefetch stores) */ + add ptr1 = tmp, ptr1 /* first address beyond total range */ ;; } { .mmi - add tmp = -1, linecnt // next loop count + add tmp = -1, linecnt /* next loop count */ movi0 ar.lc = loopcnt ;; } .pref_l1b: { .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + stf.spill [ptr9] = f0, 128 /* Do stores one cache line apart */ nop.i 0 br.cloop.dptk.few .pref_l1b ;; } { .mmi - add ptr0 = 16, ptr2 // Two stores in parallel + add ptr0 = 16, ptr2 /* Two stores in parallel */ movi0 ar.lc = tmp ;; } .l1bx: @@ -190,7 +190,7 @@ ENTRY(bzero) { .mmi stf.spill [ptr2] = f0, 32 stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + cmp.lt p_scr, p0 = ptr9, ptr1 /* do we need more prefetching? */ ;; } { .mmb stf.spill [ptr2] = f0, 32 @@ -198,14 +198,14 @@ ENTRY(bzero) br.cloop.dptk.few .l1bx ;; } { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ (p_scr) br.cond.dpnt.many .move_bytes_from_alignment ;; } .fraction_of_line: { .mib add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 + shr.u loopcnt = cnt, 5 /* loopcnt = cnt / 32 */ ;; } { .mib cmp.eq p_scr, p0 = loopcnt, r0 @@ -213,11 +213,11 @@ ENTRY(bzero) (p_scr) br.cond.dpnt.many .store_words ;; } { .mib - and cnt = 0x1f, cnt // compute the remaining cnt + and cnt = 0x1f, cnt /* compute the remaining cnt */ movi0 ar.lc = loopcnt ;; } .align 32 -.l2: // -----------------------------// L2A: store 32B in 2 cycles +.l2: /* ----------------------------- L2A: store 32B in 2 cycles */ { .mmb store [ptr1] = myval, 8 store [ptr2] = myval, 8 @@ -228,38 +228,38 @@ ENTRY(bzero) ;; } .store_words: { .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch + cmp.gt p_scr, p0 = 8, cnt /* just a few bytes left ? */ +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment /* Branch */ ;; } { .mmi - store [ptr1] = myval, 8 // store - cmp.le p_y, p_n = 16, cnt // - add cnt = -8, cnt // subtract + store [ptr1] = myval, 8 /* store */ + cmp.le p_y, p_n = 16, cnt /* */ + add cnt = -8, cnt /* subtract */ ;; } { .mmi -(p_y) store [ptr1] = myval, 8 // store +(p_y) store [ptr1] = myval, 8 /* store */ (p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract +(p_y) add cnt = -8, cnt /* subtract */ ;; } -{ .mmi // store +{ .mmi /* store */ (p_yy) store [ptr1] = myval, 8 -(p_yy) add cnt = -8, cnt // subtract +(p_yy) add cnt = -8, cnt /* subtract */ ;; } .move_bytes_from_alignment: { .mib cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? + tbit.nz.unc p_y, p0 = cnt, 2 /* should we terminate with a st4 ? */ (p_scr) br.cond.dpnt.few .restore_and_exit ;; } { .mib (p_y) st4 [ptr1] = r0,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? + tbit.nz.unc p_yy, p0 = cnt, 1 /* should we terminate with a st2 ? */ ;; } { .mib (p_yy) st2 [ptr1] = r0,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? + tbit.nz.unc p_y, p0 = cnt, 0 /* should we terminate with a st1 ? */ ;; } { .mib @@ -281,38 +281,38 @@ ENTRY(bzero) (p_n) add ptr2 = 2, ptr1 } { .mmi (p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = r0, 1 // fill 1 (odd-aligned) byte -(p_y) add cnt = -1, cnt // [15, 14 (or less) left] +(p_y) st1 [ptr1] = r0, 1 /* fill 1 (odd-aligned) byte */ +(p_y) add cnt = -1, cnt /* [15, 14 (or less) left] */ ;; } { .mmi (p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store + add ptr3 = ptr1, cnt /* prepare last store */ movi0 ar.lc = save_lc } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_yy) add cnt = -4, cnt // [11, 10 (o less) left] +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) add cnt = -4, cnt /* [11, 10 (o less) left] */ ;; } { .mmi (p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? + add ptr3 = -1, ptr3 /* last store */ + tbit.nz p_scr, p0 = cnt, 1 /* will there be a st2 at the end ? */ } { .mmi -(p_y) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes -(p_y) add cnt = -4, cnt // [7, 6 (or less) left] +(p_y) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ +(p_y) add cnt = -4, cnt /* [7, 6 (or less) left] */ ;; } { .mmi -(p_yy) st2 [ptr1] = r0, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = r0, 4 // fill 2 (aligned) bytes - // [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +(p_yy) st2 [ptr1] = r0, 4 /* fill 2 (aligned) bytes */ +(p_yy) st2 [ptr2] = r0, 4 /* fill 2 (aligned) bytes */ + /* [3, 2 (or less) left] */ + tbit.nz p_y, p0 = cnt, 0 /* will there be a st1 at the end ? */ } { .mmi (p_yy) add cnt = -4, cnt ;; } { .mmb -(p_scr) st2 [ptr1] = r0 // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = r0 // fill last byte (using ptr3) +(p_scr) st2 [ptr1] = r0 /* fill 2 (aligned) bytes */ +(p_y) st1 [ptr3] = r0 /* fill last byte (using ptr3) */ br.ret.sptk.many rp ;; } END(bzero) diff --git a/libc/string/ia64/memccpy.S b/libc/string/ia64/memccpy.S index 1afba3637..259d680bc 100644 --- a/libc/string/ia64/memccpy.S +++ b/libc/string/ia64/memccpy.S @@ -23,7 +23,7 @@ Inputs: in0: dest in1: src - in2: char + in2: char in3: byte count This implementation assumes little endian mode (UM.be = 0). @@ -69,75 +69,75 @@ ENTRY(memccpy) .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] .rotp p[MEMLAT + 6 + 1] - mov ret0 = r0 // return NULL if no match + mov ret0 = r0 /* return NULL if no match */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers - mov dest = in0 // dest + mov saved_pr = pr /* save the predicate registers */ + mov dest = in0 /* dest */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter - mov saved_ec = ar.ec // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ + mov saved_ec = ar.ec /* save the loop counter */ .body - mov src = in1 // src - extr.u char = in2, 0, 8 // char - mov len = in3 // len - sub tmp = r0, in0 // tmp = -dest - cmp.ne p7, p0 = r0, r0 // clear p7 + mov src = in1 /* src */ + extr.u char = in2, 0, 8 /* char */ + mov len = in3 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES - mov ar.ec = 0 // ec not guaranteed zero on entry -(p6) br.cond.spnt .cpyfew // copy byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ + mov ar.ec = 0 /* ec not guaranteed zero on entry */ +(p6) br.cond.spnt .cpyfew /* copy byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 mux1 charx8 = char, @brcst (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value = [src], 1 /* value = *src++ */ ;; - st1 [dest] = value, 1 // *dest++ = value + st1 [dest] = value, 1 /* *dest++ = value */ cmp.eq p6, p0 = value, char (p6) br.cond.spnt .foundit br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned ;; - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 6 + 1 // six more passes needed - ld8 r[1] = [asrc], 8 // r[1] = w0 - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 6 + 1 /* six more passes needed */ + ld8 r[1] = [asrc], 8 /* r[1] = w0 */ + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ ALIGN(32) .l2: -(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[0]) ld8.s r[0] = [asrc], 8 /* r[0] = w1 */ +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) xor tmp3[0] = val[1], charx8 (p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1] -(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't - // valid - rollback! +(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 /* our data isn't */ + /* valid - rollback! */ (p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1] (p6) br.cond.spnt .gotit -(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+6]) st8 [dest] = val[3], 8 /* store val to dest */ +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ br.ctop.sptk .l2 br.cond.sptk .cpyfew .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 2 + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 2 + 1 ;; /* set EC */ .l3: (p[0]) ld8.s r[0] = [src], 8 (p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8 @@ -149,8 +149,8 @@ ENTRY(memccpy) (p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8 br.ctop.dptk .l3 .cpyfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -163,14 +163,14 @@ ENTRY(memccpy) .foundit: (p6) mov ret0 = dest .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter - mov ar.ec = saved_ec ;; // restore the epilog counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ + mov ar.ec = saved_ec ;; /* restore the epilog counter */ br.ret.sptk.many b0 .gotit: .pred.rel "mutex" p6, p7 -(p6) mov value = val[3] // if coming from l2 -(p7) mov value = r[MEMLAT+2] // if coming from l3 +(p6) mov value = val[3] /* if coming from l2 */ +(p7) mov value = r[MEMLAT+2] /* if coming from l3 */ mov ar.lc = pos0[1] ;; .l5: extr.u tmp = value, 0, 8 ;; diff --git a/libc/string/ia64/memchr.S b/libc/string/ia64/memchr.S index 2bf078fe6..0246b5997 100644 --- a/libc/string/ia64/memchr.S +++ b/libc/string/ia64/memchr.S @@ -62,18 +62,18 @@ ENTRY(__memchr) .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2] .rotp p[MEMLAT+3] .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .save pr, saved_pr - mov saved_pr = pr // save the predicates + mov saved_pr = pr /* save the predicates */ .body mov ret0 = str - and tmp = 7, str // tmp = str % 8 - cmp.ne p7, p0 = r0, r0 // clear p7 - extr.u chr = in1, 0, 8 // chr = (unsigned char) in1 + and tmp = 7, str /* tmp = str % 8 */ + cmp.ne p7, p0 = r0, r0 /* clear p7 */ + extr.u chr = in1, 0, 8 /* chr = (unsigned char) in1 */ mov len = in2 - cmp.gtu p6, p0 = 16, in2 // use a simple loop for short -(p6) br.cond.spnt .srchfew ;; // searches - sub loopcnt = 8, tmp // loopcnt = 8 - tmp + cmp.gtu p6, p0 = 16, in2 /* use a simple loop for short */ +(p6) br.cond.spnt .srchfew ;; /* searches */ + sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */ cmp.eq p6, p0 = tmp, r0 (p6) br.cond.sptk .str_aligned;; sub len = len, loopcnt @@ -86,12 +86,12 @@ ENTRY(__memchr) (p6) br.cond.spnt .foundit br.cloop.sptk .l1 ;; .str_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // remaining len = len & 7 + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* remaining len = len & 7 */ adds loopcnt = -1, loopcnt mov ar.ec = MEMLAT + 3 - mux1 chrx8 = chr, @brcst ;; // get a word full of chr + mux1 chrx8 = chr, @brcst ;; /* get a word full of chr */ mov ar.lc = loopcnt mov pr.rot = 1 << 16 ;; .l2: @@ -114,12 +114,12 @@ ENTRY(__memchr) (p6) br.cond.dpnt .foundit br.cloop.sptk .l3 ;; .notfound: - cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here) - mov ret0 = r0 ;; // return NULL + cmp.ne p6, p0 = r0, r0 /* clear p6 (p7 was already 0 when we got here) */ + mov ret0 = r0 ;; /* return NULL */ .foundit: .pred.rel "mutex" p6, p7 -(p6) adds ret0 = -1, ret0 // if we got here from l1 or l3 -(p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2 +(p6) adds ret0 = -1, ret0 /* if we got here from l1 or l3 */ +(p7) add ret0 = addr[MEMLAT+2], poschr[1] /* if we got here from l2 */ mov pr = saved_pr, -1 mov ar.lc = saved_lc br.ret.sptk.many b0 diff --git a/libc/string/ia64/memcmp.S b/libc/string/ia64/memcmp.S index 8b0c096ce..adb1a20de 100644 --- a/libc/string/ia64/memcmp.S +++ b/libc/string/ia64/memcmp.S @@ -28,7 +28,7 @@ In this form, it assumes little endian mode. For big endian mode, the the two shifts in .l2 must be inverted: - shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 + shl tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 << sh1 shr.u tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 >> sh2 and all the mux1 instructions should be replaced by plain mov's. */ @@ -36,8 +36,8 @@ #include "sysdep.h" #undef ret -#define OP_T_THRES 16 -#define OPSIZ 8 +#define OP_T_THRES 16 +#define OPSIZ 8 #define MEMLAT 2 #define start r15 @@ -56,85 +56,85 @@ ENTRY(memcmp) .prologue - alloc r2 = ar.pfs, 3, 37, 0, 40 + alloc r2 = ar.pfs, 3, 37, 0, 40 .rotr r[MEMLAT + 2], q[MEMLAT + 5], tmp1[4], tmp2[4], val[2] .rotp p[MEMLAT + 4 + 1] - mov ret0 = r0 // by default return value = 0 + mov ret0 = r0 /* by default return value = 0 */ .save pr, saved_pr - mov saved_pr = pr // save the predicate registers + mov saved_pr = pr /* save the predicate registers */ .save ar.lc, saved_lc - mov saved_lc = ar.lc // save the loop counter + mov saved_lc = ar.lc /* save the loop counter */ .body - mov dest = in0 // dest - mov src = in1 // src - mov len = in2 // len - sub tmp = r0, in0 // tmp = -dest + mov dest = in0 /* dest */ + mov src = in1 /* src */ + mov len = in2 /* len */ + sub tmp = r0, in0 /* tmp = -dest */ ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 - cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES -(p6) br.cond.spnt .cmpfew // compare byte by byte + and loopcnt = 7, tmp /* loopcnt = -dest % 8 */ + cmp.ge p6, p0 = OP_T_THRES, len /* is len <= OP_T_THRES */ +(p6) br.cond.spnt .cmpfew /* compare byte by byte */ ;; cmp.eq p6, p0 = loopcnt, r0 (p6) br.cond.sptk .dest_aligned - sub len = len, loopcnt // len -= -dest % 8 - adds loopcnt = -1, loopcnt // --loopcnt + sub len = len, loopcnt /* len -= -dest % 8 */ + adds loopcnt = -1, loopcnt /* --loopcnt */ ;; mov ar.lc = loopcnt -.l1: // copy -dest % 8 bytes - ld1 value1 = [src], 1 // value = *src++ +.l1: /* copy -dest % 8 bytes */ + ld1 value1 = [src], 1 /* value = *src++ */ ld1 value2 = [dest], 1 ;; cmp.ne p6, p0 = value1, value2 (p6) br.cond.spnt .done br.cloop.dptk .l1 .dest_aligned: - and sh1 = 7, src // sh1 = src % 8 - and tmp = -8, len // tmp = len & -OPSIZ - and asrc = -8, src // asrc = src & -OPSIZ -- align src - shr.u loopcnt = len, 3 // loopcnt = len / 8 - and len = 7, len ;; // len = len % 8 - shl sh1 = sh1, 3 // sh1 = 8 * (src % 8) - adds loopcnt = -1, loopcnt // --loopcnt - mov pr.rot = 1 << 16 ;; // set rotating predicates - sub sh2 = 64, sh1 // sh2 = 64 - sh1 - mov ar.lc = loopcnt // set LC - cmp.eq p6, p0 = sh1, r0 // is the src aligned? + and sh1 = 7, src /* sh1 = src % 8 */ + and tmp = -8, len /* tmp = len & -OPSIZ */ + and asrc = -8, src /* asrc = src & -OPSIZ -- align src */ + shr.u loopcnt = len, 3 /* loopcnt = len / 8 */ + and len = 7, len ;; /* len = len % 8 */ + shl sh1 = sh1, 3 /* sh1 = 8 * (src % 8) */ + adds loopcnt = -1, loopcnt /* --loopcnt */ + mov pr.rot = 1 << 16 ;; /* set rotating predicates */ + sub sh2 = 64, sh1 /* sh2 = 64 - sh1 */ + mov ar.lc = loopcnt /* set LC */ + cmp.eq p6, p0 = sh1, r0 /* is the src aligned? */ (p6) br.cond.sptk .src_aligned - add src = src, tmp // src += len & -OPSIZ - mov ar.ec = MEMLAT + 4 + 1 // four more passes needed - ld8 r[1] = [asrc], 8 ;; // r[1] = w0 + add src = src, tmp /* src += len & -OPSIZ */ + mov ar.ec = MEMLAT + 4 + 1 /* four more passes needed */ + ld8 r[1] = [asrc], 8 ;; /* r[1] = w0 */ .align 32 -// We enter this loop with p6 cleared by the above comparison +/* We enter this loop with p6 cleared by the above comparison */ .l2: -(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1 +(p[0]) ld8 r[0] = [asrc], 8 /* r[0] = w1 */ (p[0]) ld8 q[0] = [dest], 8 -(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 -(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 +(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 /* tmp1 = w0 >> sh1 */ +(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 /* tmp2 = w1 << sh2 */ (p[MEMLAT+4]) cmp.ne p6, p0 = q[MEMLAT + 4], val[1] -(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 +(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] /* val = tmp1 | tmp2 */ (p6) br.cond.spnt .l2exit br.ctop.sptk .l2 br.cond.sptk .cmpfew .l3exit: mux1 value1 = r[MEMLAT], @rev mux1 value2 = q[MEMLAT], @rev - cmp.ne p6, p0 = r0, r0 ;; // clear p6 + cmp.ne p6, p0 = r0, r0 ;; /* clear p6 */ .l2exit: (p6) mux1 value1 = val[1], @rev (p6) mux1 value2 = q[MEMLAT + 4], @rev ;; cmp.ltu p6, p7 = value2, value1 ;; (p6) mov ret0 = -1 (p7) mov ret0 = 1 - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 .src_aligned: - cmp.ne p6, p0 = r0, r0 // clear p6 - mov ar.ec = MEMLAT + 1 ;; // set EC + cmp.ne p6, p0 = r0, r0 /* clear p6 */ + mov ar.ec = MEMLAT + 1 ;; /* set EC */ .l3: (p[0]) ld8 r[0] = [src], 8 (p[0]) ld8 q[0] = [dest], 8 @@ -142,8 +142,8 @@ ENTRY(memcmp) (p6) br.cond.spnt .l3exit br.ctop.dptk .l3 ;; .cmpfew: - cmp.eq p6, p0 = len, r0 // is len == 0 ? - adds len = -1, len // --len; + cmp.eq p6, p0 = len, r0 /* is len == 0 ? */ + adds len = -1, len /* --len; */ (p6) br.cond.spnt .restore_and_exit ;; mov ar.lc = len .l4: @@ -154,10 +154,10 @@ ENTRY(memcmp) (p6) br.cond.spnt .done br.cloop.dptk .l4 ;; .done: -(p6) sub ret0 = value2, value1 // don't execute it if falling thru +(p6) sub ret0 = value2, value1 /* don't execute it if falling thru */ .restore_and_exit: - mov pr = saved_pr, -1 // restore the predicate registers - mov ar.lc = saved_lc // restore the loop counter + mov pr = saved_pr, -1 /* restore the predicate registers */ + mov ar.lc = saved_lc /* restore the loop counter */ br.ret.sptk.many b0 END(memcmp) libc_hidden_def (memcmp) diff --git a/libc/string/ia64/memcpy.S b/libc/string/ia64/memcpy.S index 810eb0c0e..6c48a72d9 100644 --- a/libc/string/ia64/memcpy.S +++ b/libc/string/ia64/memcpy.S @@ -42,8 +42,8 @@ #define LFETCH_DIST 500 -#define ALIGN_UNROLL_no 4 // no. of elements -#define ALIGN_UNROLL_sh 2 // (shift amount) +#define ALIGN_UNROLL_no 4 /* no. of elements */ +#define ALIGN_UNROLL_sh 2 /* (shift amount) */ #define MEMLAT 8 #define Nrot ((4*(MEMLAT+2) + 7) & ~7) @@ -168,76 +168,76 @@ ENTRY(memcpy) .rotr r[MEMLAT+1], s[MEMLAT+2], q[MEMLAT+1], t[MEMLAT+1] .rotp p[MEMLAT+2] .rotf fr[MEMLAT+1], fq[MEMLAT+1], fs[MEMLAT+1], ft[MEMLAT+1] - mov ret0 = in0 // return tmp2 = dest + mov ret0 = in0 /* return tmp2 = dest */ .save pr, saved_pr - movi0 saved_pr = pr // save the predicate registers + movi0 saved_pr = pr /* save the predicate registers */ } { .mmi - and tmp4 = 7, in0 // check if destination is aligned - mov dest = in0 // dest - mov src = in1 // src + and tmp4 = 7, in0 /* check if destination is aligned */ + mov dest = in0 /* dest */ + mov src = in1 /* src */ ;; } { .mii - cmp.eq p_scr, p0 = in2, r0 // if (len == 0) + cmp.eq p_scr, p0 = in2, r0 /* if (len == 0) */ .save ar.lc, saved_lc - movi0 saved_lc = ar.lc // save the loop counter + movi0 saved_lc = ar.lc /* save the loop counter */ .body - cmp.ge p_few, p0 = OP_T_THRES, in2 // is len <= OP_T_THRESH + cmp.ge p_few, p0 = OP_T_THRES, in2 /* is len <= OP_T_THRESH */ } { .mbb - mov len = in2 // len -(p_scr) br.cond.dpnt.few .restore_and_exit // Branch no. 1: return dest -(p_few) br.cond.dpnt.many .copy_bytes // Branch no. 2: copy byte by byte + mov len = in2 /* len */ +(p_scr) br.cond.dpnt.few .restore_and_exit /* Branch no. 1: return dest */ +(p_few) br.cond.dpnt.many .copy_bytes /* Branch no. 2: copy byte by byte */ ;; } { .mmi #if defined(USE_LFETCH) - lfetch.nt1 [dest] // - lfetch.nt1 [src] // + lfetch.nt1 [dest] /* */ + lfetch.nt1 [src] /* */ #endif - shr.u elemcnt = len, 3 // elemcnt = len / 8 + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ } { .mib - cmp.eq p_scr, p0 = tmp4, r0 // is destination aligned? - sub loopcnt = 7, tmp4 // + cmp.eq p_scr, p0 = tmp4, r0 /* is destination aligned? */ + sub loopcnt = 7, tmp4 /* */ (p_scr) br.cond.dptk.many .dest_aligned ;; } { .mmi - ld1 tmp2 = [src], 1 // - sub len = len, loopcnt, 1 // reduce len - movi0 ar.lc = loopcnt // + ld1 tmp2 = [src], 1 /* */ + sub len = len, loopcnt, 1 /* reduce len */ + movi0 ar.lc = loopcnt /* */ } { .mib - cmp.ne p_scr, p0 = 0, loopcnt // avoid loading beyond end-point + cmp.ne p_scr, p0 = 0, loopcnt /* avoid loading beyond end-point */ ;; } -.l0: // ---------------------------- // L0: Align src on 8-byte boundary +.l0: /* ---------------------------- L0: Align src on 8-byte boundary */ { .mmi - st1 [dest] = tmp2, 1 // -(p_scr) ld1 tmp2 = [src], 1 // + st1 [dest] = tmp2, 1 /* */ +(p_scr) ld1 tmp2 = [src], 1 /* */ } { .mib - cmp.lt p_scr, p0 = 1, loopcnt // avoid load beyond end-point + cmp.lt p_scr, p0 = 1, loopcnt /* avoid load beyond end-point */ add loopcnt = -1, loopcnt - br.cloop.dptk.few .l0 // + br.cloop.dptk.few .l0 /* */ ;; } .dest_aligned: { .mmi - and tmp4 = 7, src // ready for alignment check - shr.u elemcnt = len, 3 // elemcnt = len / 8 + and tmp4 = 7, src /* ready for alignment check */ + shr.u elemcnt = len, 3 /* elemcnt = len / 8 */ ;; } { .mib - cmp.ne p_scr, p0 = tmp4, r0 // is source also aligned - tbit.nz p_xtr, p_nxtr = src, 3 // prepare a separate move if src -} { .mib // is not 16B aligned - add ptr2 = LFETCH_DIST, dest // prefetch address + cmp.ne p_scr, p0 = tmp4, r0 /* is source also aligned */ + tbit.nz p_xtr, p_nxtr = src, 3 /* prepare a separate move if src */ +} { .mib /* is not 16B aligned */ + add ptr2 = LFETCH_DIST, dest /* prefetch address */ add ptr1 = LFETCH_DIST, src (p_scr) br.cond.dptk.many .src_not_aligned ;; } -// The optimal case, when dest, and src are aligned +/* The optimal case, when dest, and src are aligned */ .both_aligned: { .mmi .pred.rel "mutex",p_xtr,p_nxtr -(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt // Need N + 1 to qualify -(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt // Need only N to qualify - movi0 pr.rot = 1 << 16 // set rotating predicates +(p_xtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no+1, elemcnt /* Need N + 1 to qualify */ +(p_nxtr) cmp.gt p_scr, p0 = ALIGN_UNROLL_no, elemcnt /* Need only N to qualify */ + movi0 pr.rot = 1 << 16 /* set rotating predicates */ } { .mib (p_scr) br.cond.dpnt.many .copy_full_words ;; } @@ -245,21 +245,21 @@ ENTRY(memcpy) { .mmi (p_xtr) load tempreg = [src], 8 (p_xtr) add elemcnt = -1, elemcnt - movi0 ar.ec = MEMLAT + 1 // set the epilog counter + movi0 ar.ec = MEMLAT + 1 /* set the epilog counter */ ;; } { .mmi -(p_xtr) add len = -8, len // - add asrc = 16, src // one bank apart (for USE_INT) - shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh // cater for unrolling +(p_xtr) add len = -8, len /* */ + add asrc = 16, src /* one bank apart (for USE_INT) */ + shr.u loopcnt = elemcnt, ALIGN_UNROLL_sh /* cater for unrolling */ ;;} { .mmi add loopcnt = -1, loopcnt -(p_xtr) store [dest] = tempreg, 8 // copy the "extra" word +(p_xtr) store [dest] = tempreg, 8 /* copy the "extra" word */ nop.i 0 ;; } { .mib add adest = 16, dest - movi0 ar.lc = loopcnt // set the loop counter + movi0 ar.lc = loopcnt /* set the loop counter */ ;; } #ifdef GAS_ALIGN_BREAKS_UNWIND_INFO @@ -268,7 +268,7 @@ ENTRY(memcpy) .align 32 #endif #if defined(USE_FLP) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi #if defined(USE_LFETCH) (p[0]) lfetch.nt1 [ptr2],32 @@ -290,7 +290,7 @@ ENTRY(memcpy) br.ctop.dptk.many .l1 ;; } #elif defined(USE_INT) -.l1: // ------------------------------- // L1: Everything a multiple of 8 +.l1: /* ------------------------------- L1: Everything a multiple of 8 */ { .mmi (p[0]) load the_r[0] = [src], 8 (p[0]) load the_q[0] = [asrc], 8 @@ -317,58 +317,58 @@ ENTRY(memcpy) .copy_full_words: { .mib - cmp.gt p_scr, p0 = 8, len // - shr.u elemcnt = len, 3 // + cmp.gt p_scr, p0 = 8, len /* */ + shr.u elemcnt = len, 3 /* */ (p_scr) br.cond.dpnt.many .copy_bytes ;; } { .mii load tempreg = [src], 8 - add loopcnt = -1, elemcnt // + add loopcnt = -1, elemcnt /* */ ;; } { .mii - cmp.ne p_scr, p0 = 0, loopcnt // - mov ar.lc = loopcnt // + cmp.ne p_scr, p0 = 0, loopcnt /* */ + mov ar.lc = loopcnt /* */ ;; } -.l2: // ------------------------------- // L2: Max 4 words copied separately +.l2: /* ------------------------------- L2: Max 4 words copied separately */ { .mmi store [dest] = tempreg, 8 -(p_scr) load tempreg = [src], 8 // +(p_scr) load tempreg = [src], 8 /* */ add len = -8, len } { .mib - cmp. |