/* memmove implementation for SH4 * * Copyright (C) 2009 STMicroelectronics Ltd. * * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> * * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */ #ifndef __SH_FPU_ANY__ #include "../../generic/memmove.c" #else #include <string.h> #define FPSCR_SR (1 << 20) #define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x)) #define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x)) static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len) { char *d = (char *)dest; char *s = (char *)src; if (len >= 64) { unsigned long fpscr; int *s1; int *d1; /* Align the dest to 4 byte boundary. */ while ((unsigned)d & 0x7) { *d++ = *s++; len--; } s1 = (int *)s; d1 = (int *)d; /* check if s is well aligned to use FPU */ if (!((unsigned)s1 & 0x7)) { /* Align the dest to cache-line boundary */ while ((unsigned)d1 & 0x1c) { *d1++ = *s1++; len -= 4; } /* Use paired single precision load or store mode for * 64-bit tranfering.*/ STORE_FPSCR(fpscr); LOAD_FPSCR(FPSCR_SR); while (len >= 32) { __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1)); __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1)); __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1)); __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1)); __asm__ __volatile__ ("fmov dr0,@%0"::"r" (d1):"memory"); d1 += 2; __asm__ __volatile__ ("fmov dr2,@%0"::"r" (d1):"memory"); d1 += 2; __asm__ __volatile__ ("fmov dr4,@%0"::"r" (d1):"memory"); d1 += 2; __asm__ __volatile__ ("fmov dr6,@%0"::"r" (d1):"memory"); d1 += 2; len -= 32; } LOAD_FPSCR(fpscr); } s = (char *)s1; d = (char *)d1; /*TODO: other subcases could be covered here?!?*/ } /* Go to per-byte copy */ while (len > 0) { *d++ = *s++; len--; } return; } void *memmove(void *dest, const void *src, size_t len) { unsigned long int d = (long int)dest; unsigned long int s = (long int)src; unsigned long int res; if (d >= s) res = d - s; else res = s - d; /* * 1) dest and src are not overlap ==> memcpy (BWD/FDW) * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW) * 3) left-to-right overlap ==> Copy from the beginning to the end * 4) right-to-left overlap ==> Copy from the end to the beginning */ if (res == 0) /* 100% overlap */ memcpy(dest, src, len); /* No overlap */ else if (res >= len) memcpy(dest, src, len); else { if (d > s) /* right-to-left overlap */ memcpy(dest, src, len); /* memcpy is BWD */ else /* cannot use SH4 memcpy for this case */ fpu_optimised_copy_fwd(dest, src, len); } return (dest); } libc_hidden_def(memmove) #endif /*__SH_FPU_ANY__ */