summaryrefslogtreecommitdiff
path: root/libc/string/generic/_memcpy_fwd.c
diff options
context:
space:
mode:
authorCarmelo Amoroso <carmelo.amoroso@st.com>2008-09-09 16:55:27 +0000
committerCarmelo Amoroso <carmelo.amoroso@st.com>2008-09-09 16:55:27 +0000
commite4f55f33f69fce85099dd5936cc74856aa1b453d (patch)
treef35d6680197aa43098c99291e909150bf52f409a /libc/string/generic/_memcpy_fwd.c
parenta79016198c859a3388584ac7782d760f349e2d67 (diff)
Add optimized memcpy implementation for sh4 (from Stuart Menefy @STMicroelectronics).
This implementation is based on 'backward copying'. Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
Diffstat (limited to 'libc/string/generic/_memcpy_fwd.c')
-rw-r--r--libc/string/generic/_memcpy_fwd.c185
1 files changed, 185 insertions, 0 deletions
diff --git a/libc/string/generic/_memcpy_fwd.c b/libc/string/generic/_memcpy_fwd.c
new file mode 100644
index 000000000..470165a57
--- /dev/null
+++ b/libc/string/generic/_memcpy_fwd.c
@@ -0,0 +1,185 @@
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+static void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1;
+
+ switch (len % 8)
+ {
+ case 2:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 6 * OPSIZ;
+ dstp -= 7 * OPSIZ;
+ len += 6;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 5 * OPSIZ;
+ dstp -= 6 * OPSIZ;
+ len += 5;
+ goto do2;
+ case 4:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 4 * OPSIZ;
+ dstp -= 5 * OPSIZ;
+ len += 4;
+ goto do3;
+ case 5:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 3 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ len += 3;
+ goto do4;
+ case 6:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 2 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do5;
+ case 7:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 1 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do6;
+
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 0 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ goto do7;
+ case 1:
+ a1 = ((op_t *) srcp)[0];
+ srcp -=-1 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do
+ {
+ do8:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ do7:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = a0;
+ do6:
+ a0 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = a1;
+ do5:
+ a1 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = a0;
+ do4:
+ a0 = ((op_t *) srcp)[4];
+ ((op_t *) dstp)[4] = a1;
+ do3:
+ a1 = ((op_t *) srcp)[5];
+ ((op_t *) dstp)[5] = a0;
+ do2:
+ a0 = ((op_t *) srcp)[6];
+ ((op_t *) dstp)[6] = a1;
+ do1:
+ a1 = ((op_t *) srcp)[7];
+ ((op_t *) dstp)[7] = a0;
+
+ srcp += 8 * OPSIZ;
+ dstp += 8 * OPSIZ;
+ len -= 8;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = a1;
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+static void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1, a2, a3;
+ int sh_1, sh_2;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ case 2:
+ a1 = ((op_t *) srcp)[0];
+ a2 = ((op_t *) srcp)[1];
+ srcp -= 1 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ srcp -= 0 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a3 = ((op_t *) srcp)[0];
+ a0 = ((op_t *) srcp)[1];
+ srcp -=-1 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ len += 0;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp)[0];
+ a3 = ((op_t *) srcp)[1];
+ srcp -=-2 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do
+ {
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+ do3:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
+ do2:
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
+ do1:
+ a3 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
+
+ srcp += 4 * OPSIZ;
+ dstp += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+}