From 59214098c8ddda66ec7e9954b440cc67a9dbea2f Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sun, 29 Nov 2009 16:49:11 -0800 Subject: test/stat: fix compiling the memcmp-stat test when __USE_MISC is defined Signed-off-by: Austin Foxley --- test/stat/memcmp-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/stat/memcmp-stat.c b/test/stat/memcmp-stat.c index c38e3ff88..254c754c4 100644 --- a/test/stat/memcmp-stat.c +++ b/test/stat/memcmp-stat.c @@ -48,7 +48,7 @@ static void show_stat(struct stat *st) (long int)st->st_size, (long int)st->st_blksize, (long int)st->st_blocks, -#ifndef __UCLIBC__ +#if !defined(__UCLIBC__) || defined(__USE_MISC) (long int)st->st_atime, (long int)st->st_atim.tv_nsec, (long int)st->st_mtime, -- cgit v1.2.3 From 263d2b8c93c3447115e15944ffec34991077d0c3 Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sun, 29 Nov 2009 17:00:38 -0800 Subject: test/pthread: fix build of cancellation-points test Signed-off-by: Austin Foxley --- test/pthread/cancellation-points.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/pthread/cancellation-points.c b/test/pthread/cancellation-points.c index af5df3aa1..c7e784e4f 100644 --- a/test/pthread/cancellation-points.c +++ b/test/pthread/cancellation-points.c @@ -29,12 +29,12 @@ /* take care of optional things ... */ #define STUB(func, args) static void func args { sleep(0); } -#if !defined(__UCLIBC__) || defined(__UCLIBC_AIO__) +#if defined(__UCLIBC_AIO__) # include #else STUB(aio_suspend, (void *p, int n, const void *p2)) #endif -#if !defined(__UCLIBC__) || defined(__UCLIBC_STROPTS__) +#if defined(__UCLIBC_STROPTS__) # include #else STUB(getmsg, (int f, void *p, void *p2, void *p3)) -- cgit v1.2.3 From 98f2724767551b9644f17e586356d2211172c8d7 Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sun, 29 Nov 2009 17:02:43 -0800 Subject: test/dlopen: Match the function name that libtest actually sets Signed-off-by: Austin Foxley --- test/dlopen/dltest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/dlopen/dltest.c b/test/dlopen/dltest.c index 6bec6e00e..b5fa1cdd7 100644 --- a/test/dlopen/dltest.c +++ b/test/dlopen/dltest.c @@ -25,7 +25,7 @@ int main(int argc, char **argv) } mydltest(&value1, &value2); - printf("dltest: __pthread_once=%p\n", value1); + printf("dltest: pthread_once=%p\n", value1); printf("dltest: pthread_self=%p\n", value2); if (value1 == value2) { ret = EXIT_FAILURE; -- cgit v1.2.3 From b9a0b554737056b324ed154c08e37bf700353149 Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sun, 29 Nov 2009 17:20:11 -0800 Subject: test/dlopen: fix build for libtest.so needs -lpthread, recent addition of -z,defs and -z,now exposed this Signed-off-by: Austin Foxley --- test/dlopen/Makefile.in | 1 + 1 file changed, 1 insertion(+) diff --git a/test/dlopen/Makefile.in b/test/dlopen/Makefile.in index 69f493724..5d4d2fb36 100644 --- a/test/dlopen/Makefile.in +++ b/test/dlopen/Makefile.in @@ -33,6 +33,7 @@ test1: libtest1.so test2: libtest1.so libtest2.so test3: libtest1.so libtest2.so libtest1.so: libtest2.so +LDFLAGS_libtest.so := -lpthread LDFLAGS_libtest1.so := ./libtest2.so -Wl,-rpath,. LDFLAGS_libtest2.so := -Wl,-rpath,. LDFLAGS_libtest3.so := -lpthread -Wl,-rpath,. -- cgit v1.2.3 From 9275f4baaa55113ee6c1c9ee8b0f6d6a053a9f3e Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sun, 29 Nov 2009 20:54:10 -0800 Subject: test/.gitignore: Ignore a few more test binaries Signed-off-by: Austin Foxley --- test/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/.gitignore b/test/.gitignore index 4d9df1909..f4fc7c743 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -115,10 +115,13 @@ malloc/tst-mallocfork malloc/tst-mcheck malloc/tst-obstack math/basic-test +math/compile_test math/libm-test-ulps.h math/libm-test.c math/rint math/test-double +math/test-ildoubl +math/test-ldouble math/test-float math/test-fpucw math/test-idouble -- cgit v1.2.3 From 460cb81669214ece60a0104142ae38d9f3ae8fe5 Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Mon, 30 Nov 2009 20:43:56 -0800 Subject: uClibc_ctype.h: fix inverted check for susv4 macro Was causing build failures if ctype tables were disabled and susv4 was on Signed-off-by: Austin Foxley --- libc/sysdeps/linux/common/bits/uClibc_ctype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/sysdeps/linux/common/bits/uClibc_ctype.h b/libc/sysdeps/linux/common/bits/uClibc_ctype.h index 22d2df03a..3bf4e1b28 100644 --- a/libc/sysdeps/linux/common/bits/uClibc_ctype.h +++ b/libc/sysdeps/linux/common/bits/uClibc_ctype.h @@ -103,7 +103,7 @@ __BEGIN_DECLS /* Now some non-ansi/iso c99 macros. */ -#ifndef __UCLIBC_SUSV4_LEGACY__ +#ifdef __UCLIBC_SUSV4_LEGACY__ #define __isascii(c) (((c) & ~0x7f) == 0) #define __toascii(c) ((c) & 0x7f) /* Works correctly *only* on lowercase letters! */ -- cgit v1.2.3 From c5c2cf0da5644c96a2ae3db315f51bf91f15723d Mon Sep 17 00:00:00 2001 From: Carmelo Amoroso Date: Thu, 3 Dec 2009 08:06:10 +0100 Subject: sh: Add a prototype for the gcc __set_fpscr internal function The sh port uses the gcc __set_fpscr internal function, but neither gcc nor glibc creates a prototype for it. This leads to a bunch of random warnings about implicit decls during the build. Signed-off-by: Mike Frysinger Signed-off-by: Carmelo Amoroso --- libc/sysdeps/linux/sh/fpu_control.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/sysdeps/linux/sh/fpu_control.h b/libc/sysdeps/linux/sh/fpu_control.h index db3cc4557..cbd889ece 100644 --- a/libc/sysdeps/linux/sh/fpu_control.h +++ b/libc/sysdeps/linux/sh/fpu_control.h @@ -1,5 +1,5 @@ /* FPU control word definitions. SH version. - Copyright (C) 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2009 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -47,6 +47,8 @@ typedef unsigned int fpu_control_t; #define _FPU_GETCW(cw) __asm__ ("sts fpscr,%0" : "=r" (cw)) #if defined __GNUC__ +/* GCC provides this function */ +extern void __set_fpscr (unsigned long); #define _FPU_SETCW(cw) __set_fpscr ((cw)) #else #define _FPU_SETCW(cw) __asm__ ("lds %0,fpscr" : : "r" (cw)) -- cgit v1.2.3 From d67f6eb3ff53bdced973fa4e8386edf092fec2e1 Mon Sep 17 00:00:00 2001 From: Filippo Arcidiacono Date: Wed, 9 Dec 2009 16:24:46 +0100 Subject: test_dlopen: Remove extra -lpthread from LD_FLAGS for dltest{2} dltest and dltest2 do not refer any symbols implemented in pthread library, so do not explicitly link them with pthread. It is required only for the shared objects that are loaded via dlopen by those tests. Signed-off-by: Filippo Arcidiacono Signed-off-by: Carmelo Amoroso --- test/dlopen/Makefile.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/dlopen/Makefile.in b/test/dlopen/Makefile.in index 5d4d2fb36..22190d9f9 100644 --- a/test/dlopen/Makefile.in +++ b/test/dlopen/Makefile.in @@ -10,8 +10,8 @@ CFLAGS_dltest := -DLIBNAME="\"./libtest.so\"" CFLAGS_dltest2 := -DLIBNAME="\"./libtest3.so\"" LDFLAGS_dlstatic := -ldl -LDFLAGS_dltest := -ldl -lpthread -LDFLAGS_dltest2 := -ldl -lpthread +LDFLAGS_dltest := -ldl +LDFLAGS_dltest2 := -ldl LDFLAGS_dlundef := -ldl LDFLAGS_dlafk := -ldl ./libafk.so -Wl,-rpath,. LDFLAGS_test1 := -ldl -- cgit v1.2.3 From a92f0c261364774d4f844300dd2c3b11e9deaf02 Mon Sep 17 00:00:00 2001 From: Khem Raj Date: Fri, 11 Dec 2009 09:40:05 -0800 Subject: Include bits/libc-lock.h to get __libc_once_define Signed-off-by: Khem Raj Acked-by: Austin Foxley --- libc/inet/rpc/rpc_thread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/inet/rpc/rpc_thread.c b/libc/inet/rpc/rpc_thread.c index 54781d040..71303b2be 100644 --- a/libc/inet/rpc/rpc_thread.c +++ b/libc/inet/rpc/rpc_thread.c @@ -14,6 +14,7 @@ #ifdef __UCLIBC_HAS_THREADS__ #include +#include /* Variable used in non-threaded applications or for the first thread. */ static struct rpc_thread_variables __libc_tsd_RPC_VARS_mem; -- cgit v1.2.3 From 1eac4f3880f10a4a9702939b60d322b40db08972 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 13 Dec 2009 04:00:52 +0100 Subject: syslog: fix openlog(xx, LOG_KERN) and optimize a bit The fix: logfac == 0 in openlog(xx, logfac) is allowed now. Corresponding internal openlog() call in vsyslog() uses explicit LOG_USER in order to set it as a default facility. Optimizations: mylock is not recursive now, since a single intenal call of openlog is converted to a call to openlog_internal which assumes that lock is already taken. No recursive locking is possible now. LogFacility is reduced to byte. cache static LogFile in auto variable fd (smaller code). vsyslog with bogus pri parameter wouldn't lock/unlock and mess with signals - it will just return at once. pass NULL as ident string in internal openlog call - same effect as passing LogTag but smaller code. comment out "if (LogTag)" checks - it is never NULL. use the same struct sigaction for setting new sigaction and for saving old one - saves ~32 bytes of stack. Signed-off-by: Denys Vlasenko --- libc/misc/syslog/syslog.c | 102 ++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 45 deletions(-) diff --git a/libc/misc/syslog/syslog.c b/libc/misc/syslog/syslog.c index 794c0c17f..f66ba8faf 100644 --- a/libc/misc/syslog/syslog.c +++ b/libc/misc/syslog/syslog.c @@ -80,22 +80,21 @@ #include - #include -__UCLIBC_MUTEX_STATIC(mylock, PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP); + +__UCLIBC_MUTEX_STATIC(mylock, PTHREAD_MUTEX_INITIALIZER); +static const char *LogTag = "syslog"; /* string to tag the entry with */ static int LogFile = -1; /* fd for log */ static smalluint connected; /* have done connect */ -/* all bits in option argument for openlog() fit in 8 bits */ -static smalluint LogStat = 0; /* status bits, set by openlog() */ -static const char *LogTag = "syslog"; /* string to tag the entry with */ -/* this fits in 8 bits too (LOG_LOCAL7 = 23<<3 = 184), - * but NB: LOG_FACMASK is bigger (= 0x03f8 = 127<<3) for some strange reason. - * Oh well. */ -static int LogFacility = LOG_USER;/* default facility code */ -/* bits mask of priorities (eight prios - 8 bits is enough) */ -static smalluint LogMask = 0xff; /* mask of priorities to be logged */ +/* all bits in option argument for openlog fit in 8 bits */ +static smalluint LogStat = 0; /* status bits, set by openlog */ +/* default facility code if openlog is not called */ +/* (this fits in 8 bits even without >> 3 shift, but playing extra safe) */ +static smalluint LogFacility = LOG_USER >> 3; +/* bits mask of priorities to be logged (eight prios - 8 bits is enough) */ +static smalluint LogMask = 0xff; /* AF_UNIX address of local logger (we use struct sockaddr * instead of struct sockaddr_un since "/dev/log" is small enough) */ static const struct sockaddr SyslogAddr = { @@ -115,45 +114,46 @@ closelog_intern(int sig) if (sig == 0) { /* called from closelog()? - reset to defaults */ LogStat = 0; LogTag = "syslog"; - LogFacility = LOG_USER; + LogFacility = LOG_USER >> 3; LogMask = 0xff; } } -/* - * OPENLOG -- open system log - */ -void -openlog(const char *ident, int logstat, int logfac) +static void +openlog_intern(const char *ident, int logstat, int logfac) { + int fd; int logType = SOCK_DGRAM; - __UCLIBC_MUTEX_LOCK(mylock); - if (ident != NULL) LogTag = ident; LogStat = logstat; - if (logfac != 0 && (logfac &~ LOG_FACMASK) == 0) - LogFacility = logfac; - if (LogFile == -1) { -retry: - if (LogStat & LOG_NDELAY) { - if ((LogFile = socket(AF_UNIX, logType, 0)) == -1) { - goto DONE; + /* (we were checking also for logfac != 0, but it breaks + * openlog(xx, LOG_KERN) since LOG_KERN == 0) */ + if ((logfac & ~LOG_FACMASK) == 0) /* if we don't have invalid bits */ + LogFacility = (unsigned)logfac >> 3; + + fd = LogFile; + if (fd == -1) { + retry: + if (logstat & LOG_NDELAY) { + LogFile = fd = socket(AF_UNIX, logType, 0); + if (fd == -1) { + return; } - fcntl(LogFile, F_SETFD, FD_CLOEXEC); + fcntl(fd, F_SETFD, FD_CLOEXEC); /* We don't want to block if e.g. syslogd is SIGSTOPed */ - fcntl(LogFile, F_SETFL, O_NONBLOCK | fcntl(LogFile, F_GETFL)); + fcntl(fd, F_SETFL, O_NONBLOCK | fcntl(fd, F_GETFL)); } } - if (LogFile != -1 && !connected) { - if (connect(LogFile, &SyslogAddr, sizeof(SyslogAddr)) != -1) { + if (fd != -1 && !connected) { + if (connect(fd, &SyslogAddr, sizeof(SyslogAddr)) != -1) { connected = 1; } else { - if (LogFile != -1) { - close(LogFile); - LogFile = -1; + if (fd != -1) { + close(fd); + LogFile = fd = -1; } if (logType == SOCK_DGRAM) { logType = SOCK_STREAM; @@ -161,8 +161,16 @@ retry: } } } +} -DONE: +/* + * OPENLOG -- open system log + */ +void +openlog(const char *ident, int logstat, int logfac) +{ + __UCLIBC_MUTEX_LOCK(mylock); + openlog_intern(ident, logstat, logfac); __UCLIBC_MUTEX_UNLOCK(mylock); } libc_hidden_def(openlog) @@ -180,25 +188,29 @@ vsyslog(int pri, const char *fmt, va_list ap) int fd, saved_errno; int rc; char tbuf[1024]; /* syslogd is unable to handle longer messages */ - struct sigaction action, oldaction; + struct sigaction action; + + /* Just throw out this message if pri has bad bits. */ + if ((pri & ~(LOG_PRIMASK|LOG_FACMASK)) != 0) + return; memset(&action, 0, sizeof(action)); action.sa_handler = closelog_intern; - sigaction(SIGPIPE, &action, &oldaction); + sigaction(SIGPIPE, &action, &action); saved_errno = errno; __UCLIBC_MUTEX_LOCK(mylock); - /* See if we should just throw out this message. */ - if (!(LogMask & LOG_MASK(LOG_PRI(pri))) || (pri &~ (LOG_PRIMASK|LOG_FACMASK))) + /* See if we should just throw out this message according to LogMask. */ + if ((LogMask & LOG_MASK(LOG_PRI(pri))) == 0) goto getout; if (LogFile < 0 || !connected) - openlog(LogTag, LogStat | LOG_NDELAY, 0); + openlog_intern(NULL, LogStat | LOG_NDELAY, LOG_USER); /* Set default facility if none specified. */ if ((pri & LOG_FACMASK) == 0) - pri |= LogFacility; + pri |= ((int)LogFacility << 3); /* Build the message. We know the starting part of the message can take * no longer than 64 characters plus length of the LogTag. So it's @@ -206,7 +218,7 @@ vsyslog(int pri, const char *fmt, va_list ap) */ (void)time(&now); stdp = p = tbuf + sprintf(tbuf, "<%d>%.15s ", pri, ctime(&now) + 4); - if (LogTag) { + /*if (LogTag) - always true */ { if (strlen(LogTag) < sizeof(tbuf) - 64) p += sprintf(p, "%s", LogTag); else @@ -214,7 +226,7 @@ vsyslog(int pri, const char *fmt, va_list ap) } if (LogStat & LOG_PID) p += sprintf(p, "[%d]", getpid()); - if (LogTag) { + /*if (LogTag) - always true */ { *p++ = ':'; *p++ = ' '; } @@ -253,7 +265,7 @@ vsyslog(int pri, const char *fmt, va_list ap) /* Output the message to the local logger using NUL as a message delimiter. */ p = tbuf; - *last_chr = 0; + *last_chr = '\0'; if (LogFile >= 0) { do { rc = write(LogFile, p, last_chr + 1 - p); @@ -288,9 +300,9 @@ vsyslog(int pri, const char *fmt, va_list ap) (void)close(fd); } -getout: + getout: __UCLIBC_MUTEX_UNLOCK(mylock); - sigaction(SIGPIPE, &oldaction, NULL); + sigaction(SIGPIPE, &action, NULL); } libc_hidden_def(vsyslog) -- cgit v1.2.3 From 6732cb1ae137d7af17eb911004ba904badba1b85 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 13 Dec 2009 05:47:19 +0100 Subject: syslog: use send(MSG_NOSIGNAL) instead of write, thus no need to handle SIGPIPE Size changes by this and previous change: text data bss dec hex filename 1151 13 2 1166 48e libc/misc/syslog/syslog.o 1093 10 2 1105 451 libc/misc/syslog/syslog.o 1047 10 2 1059 423 libc/misc/syslog/syslog.o Signed-off-by: Denys Vlasenko --- libc/misc/syslog/syslog.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/libc/misc/syslog/syslog.c b/libc/misc/syslog/syslog.c index f66ba8faf..b10a55615 100644 --- a/libc/misc/syslog/syslog.c +++ b/libc/misc/syslog/syslog.c @@ -85,6 +85,10 @@ __UCLIBC_MUTEX_STATIC(mylock, PTHREAD_MUTEX_INITIALIZER); +/* !glibc_compat: glibc uses argv[0] by default + * (default: if there was no openlog or if openlog passed NULL), + * not string "syslog" + */ static const char *LogTag = "syslog"; /* string to tag the entry with */ static int LogFile = -1; /* fd for log */ static smalluint connected; /* have done connect */ @@ -188,16 +192,11 @@ vsyslog(int pri, const char *fmt, va_list ap) int fd, saved_errno; int rc; char tbuf[1024]; /* syslogd is unable to handle longer messages */ - struct sigaction action; /* Just throw out this message if pri has bad bits. */ if ((pri & ~(LOG_PRIMASK|LOG_FACMASK)) != 0) return; - memset(&action, 0, sizeof(action)); - action.sa_handler = closelog_intern; - sigaction(SIGPIPE, &action, &action); - saved_errno = errno; __UCLIBC_MUTEX_LOCK(mylock); @@ -268,7 +267,8 @@ vsyslog(int pri, const char *fmt, va_list ap) *last_chr = '\0'; if (LogFile >= 0) { do { - rc = write(LogFile, p, last_chr + 1 - p); + /* can't just use write, it can result in SIGPIPE */ + rc = send(LogFile, p, last_chr + 1 - p, MSG_NOSIGNAL); if (rc < 0) { /* I don't think looping forever on EAGAIN is a good idea. * Imagine that syslogd is SIGSTOPed... */ @@ -302,7 +302,6 @@ vsyslog(int pri, const char *fmt, va_list ap) getout: __UCLIBC_MUTEX_UNLOCK(mylock); - sigaction(SIGPIPE, &action, NULL); } libc_hidden_def(vsyslog) -- cgit v1.2.3 From 8cd420c223f1a39c01835189669928ea8df9d221 Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Mon, 14 Dec 2009 16:45:49 +0100 Subject: sh: fix endianess and optimise the SH4 memcpy This patch fixes the big-endian code and adds a new optimization only for little endian mode. This optimization is based on prefetching and 64bit data transfer via FPU. Tests shows that ---------------------------------------- Memory bandwidth | Gain | sh4-300 | sh4-200 ---------------------------------------- 512 bytes to 16KiB | ~20% | ~25% from 32KiB to 16MiB | ~190% | ~5% ---------------------------------------- Signed-off-by: Giuseppe Cavallaro Signed-off-by: Carmelo Amoroso --- libc/string/sh/sh4/memcpy.S | 109 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 2 deletions(-) diff --git a/libc/string/sh/sh4/memcpy.S b/libc/string/sh/sh4/memcpy.S index 0954bce85..c03c18c73 100644 --- a/libc/string/sh/sh4/memcpy.S +++ b/libc/string/sh/sh4/memcpy.S @@ -6,6 +6,9 @@ * Modified from memcpy.S and micro-optimised for SH4 * Stuart Menefy (stuart.menefy@st.com) * + * Copyright (c) 2009 STMicroelectronics Ltd + * Optimised using prefetching and 64bit data transfer via FPU + * Author: Giuseppe Cavallaro */ /* @@ -17,6 +20,22 @@ #include +#ifdef __LITTLE_ENDIAN__ +#define MEMCPY_USES_FPU +/* Use paired single precision load or store mode for 64-bit tranfering. + * FPSCR.SZ=1,FPSCR.SZ=0 is well defined on both SH4-200 and SH4-300. + * Currenlty it has been only implemented and tested for little endian mode. */ +.macro FPU_SET_PAIRED_PREC + sts fpscr, r7 + mov #0x10, r6 ! PR=0 SZ=1 + shll16 r6 + lds r6, fpscr +.endm +.macro RESTORE_FPSCR + lds r7, fpscr +.endm +#endif + ! ! GHIJ KLMN OPQR --> ...G HIJK LMNO PQR. ! @@ -189,9 +208,7 @@ memcpy: mov r4, r0 ! 5 MT (0 cycle latency) add r6, r0 ! 49 EX - mov #16, r1 ! 6 EX bt/s .Lcase00 ! 111 BR (aligned) - sub r4, r5 ! 75 EX ! Arguments are not nicely long word aligned or zero len. @@ -207,6 +224,7 @@ memcpy: ! However the penalty for getting it 'wrong' is much higher for long word ! aligned data (and this is more common), so use a value of 16. + mov #16, r1 ! 6 EX cmp/gt r6,r1 ! 56 MT add #-1,r5 ! 50 EX @@ -447,6 +465,92 @@ memcpy: mov.l r7, @-r0 ! 30 LS +#ifdef MEMCPY_USES_FPU + ! Copy the cache line aligned blocks by using the FPU registers. + ! If src and dst are well aligned adopt 64-bit data transfer. + ! We also need r0 as a temporary (for movca), so 'undo' the invariant: + ! r5: src (was r0+r5) + ! r1: dest (was r0) +1: + add r0, r5 + mov r0, r1 + + add #-0x1c, r5 + mov r5, r0 + + tst #7, r0 ! src is 8byte aligned + mov r5, r3 + + add #-64, r3 ! To pefetch head + bt/s 3f + + pref @r3 + +2: fmov.s @r5+, fr0 + mov r1, r6 + fmov.s @r5+, fr1 + add #-32, r6 + fmov.s @r5+, fr2 + fmov.s @r5+, fr3 + fmov.s @r5+, fr4 + fmov.s @r5+, fr5 + fmov.s @r5+, fr6 + fmov.s @r5+, fr7 + add #-0x40, r5 + + movca.l r0, @r6 ! Cache allocate + store on dst-32. + + fmov.s fr7, @-r1 + fmov.s fr6, @-r1 + fmov.s fr5, @-r1 + fmov.s fr4, @-r1 + fmov.s fr3, @-r1 + fmov.s fr2, @-r1 + fmov.s fr1, @-r1 + fmov.s fr0, @-r1 + + add #-32, r3 + cmp/eq r2,r1 + + bf/s 2b + pref @r3 ! Prefetch the next cache line. + + bra 5f + +3: FPU_SET_PAIRED_PREC + +4: fmov @r5+, dr0 + mov r1, r6 + fmov @r5+, dr2 + add #-32, r6 + fmov @r5+, dr4 + fmov @r5+, dr6 + add #-0x40, r5 + + movca.l r0, @r6 + + fmov dr6, @-r1 + fmov dr4, @-r1 + fmov dr2, @-r1 + fmov dr0, @-r1 + add #-32, r3 + cmp/eq r2,r1 + + bf/s 4b + pref @r3 + + RESTORE_FPSCR + +5: mov r1, r0 + + cmp/eq r4, r0 ! 54 MT + bf/s 1f ! 109 BR + sub r1, r5 ! 75 EX + + rts + nop +1: +#else ! Copy the cache line aligned blocks ! ! In use: r0, r2, r4, r5 @@ -512,6 +616,7 @@ memcpy: rts 1: mov.l @r15+, r8 ! 15 LS +#endif sub r4, r1 ! 75 EX (len remaining) ! number of trailing bytes is non-zero -- cgit v1.2.3