90 files changed, 9169 insertions, 634 deletions
diff --git a/libc/inet/getnet.c b/libc/inet/getnet.c
index c604b63d3..9049f97af 100644
--- a/libc/inet/getnet.c
+++ b/libc/inet/getnet.c
@@ -27,9 +27,11 @@ aliases: case sensitive optional space or tab separated list of other names
 #include <bits/uClibc_mutex.h>
 __UCLIBC_MUTEX_STATIC(mylock, PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP);
 
-#define	MAXALIASES	35
-#define BUFSZ		(80) /* one line */
-#define SBUFSIZE	(BUFSZ + 1 + (sizeof(char *) * MAXALIASES))
+#define MINTOKENS	2
+#define	MAXALIASES	8
+#define MAXTOKENS	(MINTOKENS + MAXALIASES + 1)
+#define BUFSZ		(255) /* one line */
+#define SBUFSIZE	(BUFSZ + 1 + (sizeof(char *) * MAXTOKENS))
 
 static parser_t *netp = NULL;
 static struct netent nete;
@@ -65,10 +67,8 @@ int getnetent_r(struct netent *result_buf,
 				int *h_errnop
 				 )
 {
-	char **alias, *cp = NULL;
-	char **net_aliases;
 	char **tok = NULL;
-	const size_t aliaslen = sizeof(*net_aliases) * MAXALIASES;
+	const size_t aliaslen = sizeof(char *) * MAXTOKENS;
 	int ret = ERANGE;
 
 	*result = NULL;
@@ -86,7 +86,7 @@ int getnetent_r(struct netent *result_buf,
 	netp->data_len = aliaslen;
 	netp->line_len = buflen - aliaslen;
 	/* <name>[[:space:]]<netnumber>[[:space:]][<aliases>] */
-	if (!config_read(netp, &tok, 3, 2, "# \t/", PARSE_NORMAL)) {
+	if (!config_read(netp, &tok, MAXTOKENS-1, MINTOKENS, "# \t/", PARSE_NORMAL)) {
 		goto DONE;
 	}
 	result_buf->n_name = *(tok++);
@@ -110,16 +110,7 @@ int getnetent_r(struct netent *result_buf,
 			sa4_to_uint32(addri->ai_addr);
 		freeaddrinfo(addri);
 	}
-	result_buf->n_aliases = alias = net_aliases = tok;
-	cp = *alias;
-	while (cp && *cp) {
-		if (alias < &net_aliases[MAXALIASES - 1])
-			*alias++ = cp;
-		cp = strpbrk(cp, " \t");
-		if (cp != NULL)
-			*cp++ = '\0';
-	}
-	*alias = NULL;
+	result_buf->n_aliases = tok;
 	*result = result_buf;
 	ret = 0;
  DONE:
diff --git a/libc/inet/getproto.c b/libc/inet/getproto.c
index bcf507bda..c59da7e66 100644
--- a/libc/inet/getproto.c
+++ b/libc/inet/getproto.c
@@ -27,9 +27,11 @@ aliases: case sensitive optional space or tab separated list of other names
 #include <bits/uClibc_mutex.h>
 __UCLIBC_MUTEX_STATIC(mylock, PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP);
 
-#define	MAXALIASES	35
-#define BUFSZ		(80) /* one line */
-#define SBUFSIZE	(BUFSZ + 1 + (sizeof(char *) * MAXALIASES))
+#define MINTOKENS	2
+#define	MAXALIASES	8 /* will probably never be more than one */
+#define MAXTOKENS	(MINTOKENS + MAXALIASES + 1)
+#define BUFSZ		(255) /* one line */
+#define SBUFSIZE	(BUFSZ + 1 + (sizeof(char *) * MAXTOKENS))
 
 static parser_t *protop = NULL;
 static struct protoent protoe;
@@ -63,10 +65,8 @@ libc_hidden_def(endprotoent)
 int getprotoent_r(struct protoent *result_buf,
 				 char *buf, size_t buflen, struct protoent **result)
 {
-	char **alias, *cp = NULL;
-	char **proto_aliases;
 	char **tok = NULL;
-	const size_t aliaslen = sizeof(*proto_aliases) * MAXALIASES;
+	const size_t aliaslen = sizeof(char *) * MAXTOKENS;
 	int ret = ERANGE;
 
 	*result = NULL;
@@ -85,21 +85,12 @@ int getprotoent_r(struct protoent *result_buf,
 	protop->data_len = aliaslen;
 	protop->line_len = buflen - aliaslen;
 	/* <name>[[:space:]]<protonumber>[[:space:]][<aliases>] */
-	if (!config_read(protop, &tok, 3, 2, "# \t/", PARSE_NORMAL)) {
+	if (!config_read(protop, &tok, MAXTOKENS - 1, MINTOKENS, "# \t/", PARSE_NORMAL)) {
 		goto DONE;
 	}
 	result_buf->p_name = *(tok++);
 	result_buf->p_proto = atoi(*(tok++));
-	result_buf->p_aliases = alias = proto_aliases = tok;
-	cp = *alias;
-	while (cp && *cp) {
-		if (alias < &proto_aliases[MAXALIASES - 1])
-			*alias++ = cp;
-		cp = strpbrk(cp, " \t");
-		if (cp != NULL)
-			*cp++ = '\0';
-	}
-	*alias = NULL;
+	result_buf->p_aliases = tok;
 	*result = result_buf;
 	ret = 0;
  DONE:
diff --git a/libc/inet/getservice.c b/libc/inet/getservice.c
index 47d26a262..183099f5c 100644
--- a/libc/inet/getservice.c
+++ b/libc/inet/getservice.c
@@ -28,9 +28,11 @@ aliases: case sensitive optional space or tab separated list of other names
 #include <bits/uClibc_mutex.h>
 __UCLIBC_MUTEX_STATIC(mylock, PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP);
 
-#define	MAXALIASES	35
-#define BUFSZ		(80) /* one line */
-#define SBUFSIZE	(BUFSZ + 1 + (sizeof(char *) * MAXALIASES))
+#define MINTOKENS	3
+#define	MAXALIASES	8	/* we seldomly need more than 1 alias */
+#define MAXTOKENS	(MINTOKENS + MAXALIASES + 1)
+#define BUFSZ		(255)	/* one line */
+#define SBUFSIZE	(BUFSZ + 1 + (sizeof(char *) * MAXTOKENS))
 
 static parser_t *servp = NULL;
 static struct servent serve;
@@ -65,11 +67,9 @@ libc_hidden_def(endservent)
 int getservent_r(struct servent *result_buf,
 				 char *buf, size_t buflen, struct servent **result)
 {
-	char **alias;
-	char **serv_aliases;
 	char **tok = NULL;
-	const size_t aliaslen = sizeof(*serv_aliases) * MAXALIASES;
-	int ret = ENOENT;
+	const size_t aliaslen = sizeof(char *) * MAXTOKENS;
+	int ret = ERANGE;
 
 	*result = NULL;
 	if (buflen < aliaslen
@@ -77,7 +77,7 @@ int getservent_r(struct servent *result_buf,
 		goto DONE_NOUNLOCK;
 
 	__UCLIBC_MUTEX_LOCK(mylock);
-
+	ret = ENOENT;
 	if (servp == NULL)
 		setservent(serv_stayopen);
 	if (servp == NULL)
@@ -87,14 +87,13 @@ int getservent_r(struct servent *result_buf,
 	servp->data_len = aliaslen;
 	servp->line_len = buflen - aliaslen;
 	/* <name>[[:space:]]<port>/<proto>[[:space:]][<aliases>] */
-	if (!config_read(servp, &tok, MAXALIASES, 3, "# \t/", PARSE_NORMAL)) {
-		ret = ERANGE;
+	if (!config_read(servp, &tok, MAXTOKENS - 1, MINTOKENS, "# \t/", PARSE_NORMAL)) {
 		goto DONE;
 	}
 	result_buf->s_name = *(tok++);
 	result_buf->s_port = htons((u_short) atoi(*(tok++)));
 	result_buf->s_proto = *(tok++);
-	result_buf->s_aliases = alias = serv_aliases = tok;
+	result_buf->s_aliases = tok;
 	*result = result_buf;
 	ret = 0;
  DONE:
@@ -107,9 +106,8 @@ libc_hidden_def(getservent_r)
 
 static void __initbuf(void)
 {
-	if (servbuf)
-		servbuf_sz += BUFSZ;
-	servbuf = realloc(servbuf, servbuf_sz);
+	if (!servbuf)
+		servbuf = malloc(SBUFSIZE);
 	if (!servbuf)
 		abort();
 }
@@ -118,9 +116,8 @@ struct servent *getservent(void)
 {
 	struct servent *result;
 
-	do {
-		__initbuf();
-	} while (getservent_r(&serve, servbuf, servbuf_sz, &result) == ERANGE);
+	__initbuf();
+	getservent_r(&serve, servbuf, servbuf_sz, &result);
 	return result;
 }
 
@@ -155,10 +152,8 @@ struct servent *getservbyname(const char *name, const char *proto)
 {
 	struct servent *result;
 
-	do {
-		__initbuf();
-	} while (getservbyname_r(name, proto, &serve, servbuf, servbuf_sz, &result)
-			 == ERANGE);
+	__initbuf();
+	getservbyname_r(name, proto, &serve, servbuf, servbuf_sz, &result);
 	return result;
 }
 
@@ -188,10 +183,8 @@ struct servent *getservbyport(int port, const char *proto)
 {
 	struct servent *result;
 
-	do {
-		__initbuf();
-	} while (getservbyport_r(port, proto, &serve, servbuf, servbuf_sz, &result)
-			 == ERANGE);
+	__initbuf();
+	getservbyport_r(port, proto, &serve, servbuf, servbuf_sz, &result);
 	return result;
 }
 libc_hidden_def(getservbyport)
diff --git a/libc/inet/resolv.c b/libc/inet/resolv.c
index 364a4b9f5..47bab7519 100644
--- a/libc/inet/resolv.c
+++ b/libc/inet/resolv.c
@@ -335,7 +335,7 @@ Domain name in a message can be represented as either:
 
 
 #define MAX_RECURSE    5
-#define MAXALIASES  (6)
+#define MAXALIASES  (4)
 #define BUFSZ       (80) /* one line */
 #define SBUFSIZE    (BUFSZ + 1 + (sizeof(char *) * MAXALIASES))
 
@@ -1587,6 +1587,11 @@ parser_t * __open_etc_hosts(void)
 	return parser;
 }
 
+#define MINTOKENS 2 //dotted ip address + canonical name
+#define MAXTOKENS (MINTOKENS + MAXALIASES)
+#define HALISTOFF (sizeof(char*) * MAXTOKENS)
+#define INADDROFF (HALISTOFF + 2 * sizeof(char*))
+
 int attribute_hidden __read_etc_hosts_r(
 		parser_t * parser,
 		const char *name,
@@ -1601,8 +1606,7 @@ int attribute_hidden __read_etc_hosts_r(
 	char **host_aliases;
 	char **tok = NULL;
 	struct in_addr *h_addr0 = NULL;
-#define ALIASOFF (sizeof(*host_aliases) * MAXALIASES + 2 * sizeof(char*))
-	const size_t aliaslen = ALIASOFF +
+	const size_t aliaslen = INADDROFF +
 #ifdef __UCLIBC_HAS_IPV6__
 							sizeof(struct in6_addr)
 #else
@@ -1622,8 +1626,8 @@ int attribute_hidden __read_etc_hosts_r(
 		return errno;
 	}
 	/* Layout in buf:
-	 * char **alias for MAXALIAS aliases
-	 * char **h_addr_list[1] = {*in[6]_addr, NULL}
+	 * char *alias[MAXTOKENS]  = {address, name, aliases...}
+	 * char **h_addr_list[1]   = {*in[6]_addr, NULL}
 	 * struct in[6]_addr
 	 * char line_buffer[BUFSZ+];
 	 */
@@ -1632,7 +1636,7 @@ int attribute_hidden __read_etc_hosts_r(
 	parser->line_len = buflen - aliaslen;
 	*h_errnop = HOST_NOT_FOUND;
 	/* <ip>[[:space:]][<aliases>] */
-	while (config_read(parser, &tok, MAXALIASES, 2, "# \t", PARSE_NORMAL)) {
+	while (config_read(parser, &tok, MAXTOKENS, MINTOKENS, "# \t", PARSE_NORMAL)) {
 		result_buf->h_aliases = alias = host_aliases = tok+1;
 		if (action == GETHOSTENT) {
 			/* Return whatever the next entry happens to be. */
@@ -1650,9 +1654,9 @@ int attribute_hidden __read_etc_hosts_r(
 		}
 found:
 		result_buf->h_name = *(result_buf->h_aliases++);
-		result_buf->h_addr_list = (char**)(buf + ALIASOFF);
+		result_buf->h_addr_list = (char**)(buf + HALISTOFF);
 		*(result_buf->h_addr_list + 1) = '\0';
-		h_addr0 = (struct in_addr*)(buf + ALIASOFF + 2 * sizeof (char*));
+		h_addr0 = (struct in_addr*)(buf + INADDROFF);
 		result_buf->h_addr = (char*)h_addr0;
 		if (0) /* nothing */;
 #ifdef __UCLIBC_HAS_IPV4__
diff --git a/libc/misc/internals/parse_config.c b/libc/misc/internals/parse_config.c
index 8fa324e10..c17d25553 100644
--- a/libc/misc/internals/parse_config.c
+++ b/libc/misc/internals/parse_config.c
@@ -78,6 +78,14 @@ static off_t bb_get_chunk_with_continuation(parser_t* parsr)
 			 parsr->line_len += PAGE_SIZE;
 			 parsr->data = realloc(parsr->data,
 								   parsr->data_len + parsr->line_len);
+			parsr->line = parsr->data + parsr->data_len;
+		} else {
+			/* discard rest of line if not enough space in buffer */
+			int c;
+			do {
+				c = fgetc(parsr->fp);
+			} while (c != EOF && c != '\n');
+			break;
 		}
 	}
 	return pos;
@@ -186,23 +194,20 @@ again:
 			parser->line_len = 81;
 		if (parser->data_len == 0)
 			parser->data_len += 1 + ntokens * sizeof(char *);
-		parser->data = realloc(parser->data,
-								parser->data_len + parser->line_len);
+		parser->data = malloc(parser->data_len + parser->line_len);
 		if (parser->data == NULL)
 			return 0;
 		parser->allocated |= 1;
 	} /* else { assert(parser->data_len > 0); } */
-	if (parser->line == NULL)
-		parser->line = parser->data + parser->data_len;
-	if (*tokens == NULL)
-		*tokens = (char **) parser->data;
-	memset(*tokens, 0, sizeof(*tokens[0]) * ntokens);
+	parser->line = parser->data + parser->data_len;
 	/*config_free_data(parser);*/
 
 	/* Read one line (handling continuations with backslash) */
 	len = bb_get_chunk_with_continuation(parser);
 	if (len == -1)
 		return 0;
+	*tokens = (char **) parser->data;
+	memset(*tokens, 0, sizeof(*tokens[0]) * ntokens);
 	line = parser->line;
 
 	/* Skip multiple token-delimiters in the start of line? */
diff --git a/libc/misc/utmp/utxent.c b/libc/misc/utmp/utxent.c
index 3c59f1c48..a0e80a662 100644
--- a/libc/misc/utmp/utxent.c
+++ b/libc/misc/utmp/utxent.c
@@ -71,7 +71,8 @@ void getutmp (const struct utmpx *utmpx, struct utmp *utmp)
 	memcpy (utmp->ut_host, utmpx->ut_host, sizeof (utmp->ut_host));
 #endif
 #if _HAVE_UT_TV - 0
-	utmp->ut_tv = utmpx->ut_tv;
+	utmp->ut_tv.tv_sec = utmpx->ut_tv.tv_sec;
+	utmp->ut_tv.tv_usec = utmpx->ut_tv.tv_usec;
 #else
 	utmp->ut_time = utmpx->ut_time;
 #endif
@@ -97,7 +98,8 @@ void getutmpx (const struct utmp *utmp, struct utmpx *utmpx)
 	memcpy (utmpx->ut_host, utmp->ut_host, sizeof (utmp->ut_host));
 #endif
 #if _HAVE_UT_TV - 0
-	utmpx->ut_tv = utmp->ut_tv;
+	utmpx->ut_tv.tv_sec = utmp->ut_tv.tv_sec;
+	utmpx->ut_tv.tv_usec = utmp->ut_tv.tv_usec;
 #else
 	utmpx->ut_time = utmp->ut_time;
 #endif
diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c
index 7380ac9ae..ab6c617ed 100644
--- a/libc/misc/wchar/wchar.c
+++ b/libc/misc/wchar/wchar.c
@@ -286,6 +286,8 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
 		s = empty_string;
 		n = 1;
 	} else if (*s == '\0') {
+		if (pwc)
+			*pwc = '\0';
 	/* According to the ISO C 89 standard this is the expected behaviour.  */
 		return 0;
 	} else if (!n) {
diff --git a/libc/signal/sigsetops.c b/libc/signal/sigsetops.c
index c47a87b0e..fa5fe6acf 100644
--- a/libc/signal/sigsetops.c
+++ b/libc/signal/sigsetops.c
@@ -3,7 +3,7 @@
 
 #include <features.h>
 
-#define _EXTERN_INLINE
+#define __PROVIDE_OUT_OF_LINE_SIGSETFN
 #ifndef __USE_EXTERN_INLINES
 # define __USE_EXTERN_INLINES	1
 #endif
@@ -12,6 +12,9 @@
 
 /* Since we massaged signal.h into emitting non-inline function
  * definitions, we need to finish PLT avoidance trick: */
+#undef __sigismember
+#undef __sigaddset
+#undef __sigdelset
 libc_hidden_def(__sigismember)
 libc_hidden_def(__sigaddset)
 libc_hidden_def(__sigdelset)
diff --git a/libc/stdio/_vfprintf.c b/libc/stdio/_vfprintf.c
index 6fa8ecb8d..3b007084d 100644
--- a/libc/stdio/_vfprintf.c
+++ b/libc/stdio/_vfprintf.c
@@ -104,13 +104,13 @@
 #include <printf.h>
 
 #ifdef __UCLIBC_HAS_THREADS__
-#include <stdio_ext.h>
-#include <pthread.h>
-#endif /* __UCLIBC_HAS_THREADS__ */
+# include <stdio_ext.h>
+# include <pthread.h>
+#endif
 
 #ifdef __UCLIBC_HAS_WCHAR__
-#include <wchar.h>
-#endif /* __UCLIBC_HAS_WCHAR__ */
+# include <wchar.h>
+#endif
 
 #include <bits/uClibc_uintmaxtostr.h>
 #include <bits/uClibc_va_copy.h>
@@ -136,24 +136,24 @@
 /**********************************************************************/
 
 #if defined(__UCLIBC__) && !defined(__UCLIBC_HAS_FLOATS__)
-#undef __STDIO_PRINTF_FLOAT
+# undef __STDIO_PRINTF_FLOAT
 #endif
 
 #ifdef __BCC__
-#undef __STDIO_PRINTF_FLOAT
+# undef __STDIO_PRINTF_FLOAT
 #endif
 
 #ifdef __STDIO_PRINTF_FLOAT
-#include <float.h>
-#include <bits/uClibc_fpmax.h>
-#else  /* __STDIO_PRINTF_FLOAT */
-#undef L__fpmaxtostr
-#endif /* __STDIO_PRINTF_FLOAT */
+# include <float.h>
+# include <bits/uClibc_fpmax.h>
+#else
+# undef L__fpmaxtostr
+#endif
 
 
 #undef __STDIO_HAS_VSNPRINTF
 #if defined(__STDIO_BUFFERS) || defined(__USE_OLD_VFPRINTF__) || defined(__UCLIBC_HAS_GLIBC_CUSTOM_STREAMS__)
-#define __STDIO_HAS_VSNPRINTF 1
+# define __STDIO_HAS_VSNPRINTF 1
 #endif
 
 /**********************************************************************/
@@ -162,40 +162,36 @@
 /* #define __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__ */
 
 #ifdef __UCLIBC_MJN3_ONLY__
-#ifdef L_register_printf_function
+# ifdef L_register_printf_function
 /* emit only once */
-#warning WISHLIST: Make MAX_USER_SPEC configurable?
-#warning WISHLIST: Make MAX_ARGS_PER_SPEC configurable?
+#  warning WISHLIST: Make MAX_USER_SPEC configurable?
+#  warning WISHLIST: Make MAX_ARGS_PER_SPEC configurable?
+# endif
 #endif
-#endif /* __UCLIBC_MJN3_ONLY__ */
 
 #ifdef __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__
-
-#define MAX_USER_SPEC       10
-#define MAX_ARGS_PER_SPEC    5
-
-#else  /* __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__ */
-
-#undef MAX_USER_SPEC
-#define MAX_ARGS_PER_SPEC    1
-
-#endif /* __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__ */
+# define MAX_USER_SPEC       10
+# define MAX_ARGS_PER_SPEC    5
+#else
+# undef MAX_USER_SPEC
+# define MAX_ARGS_PER_SPEC    1
+#endif
 
 #if MAX_ARGS_PER_SPEC < 1
-#error MAX_ARGS_PER_SPEC < 1!
-#undef MAX_ARGS_PER_SPEC
-#define MAX_ARGS_PER_SPEC    1
+# error MAX_ARGS_PER_SPEC < 1!
+# undef MAX_ARGS_PER_SPEC
+# define MAX_ARGS_PER_SPEC    1
 #endif
 
 #if defined(NL_ARGMAX) && (NL_ARGMAX < 9)
-#error NL_ARGMAX < 9!
+# error NL_ARGMAX < 9!
 #endif
 
 #if defined(NL_ARGMAX) && (NL_ARGMAX >= (MAX_ARGS_PER_SPEC + 2))
-#define MAX_ARGS        NL_ARGMAX
+# define MAX_ARGS        NL_ARGMAX
 #else
 /* N for spec itself, plus 1 each for width and precision */
-#define MAX_ARGS        (MAX_ARGS_PER_SPEC + 2)
+# define MAX_ARGS        (MAX_ARGS_PER_SPEC + 2)
 #endif
 
 /**********************************************************************/
@@ -207,20 +203,20 @@
 extern printf_function _custom_printf_handler[MAX_USER_SPEC] attribute_hidden;
 extern printf_arginfo_function *_custom_printf_arginfo[MAX_USER_SPEC] attribute_hidden;
 extern char *_custom_printf_spec attribute_hidden;
-#endif /* __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__ */
+#endif
 
 /**********************************************************************/
 
 #define SPEC_FLAGS		" +0-#'I"
 enum {
-	FLAG_SPACE		=	0x01,
-	FLAG_PLUS		=	0x02,	/* must be 2 * FLAG_SPACE */
-	FLAG_ZERO		=	0x04,
-	FLAG_MINUS		=	0x08,	/* must be 2 * FLAG_ZERO */
-	FLAG_HASH		=	0x10,
-	FLAG_THOUSANDS	=	0x20,
-	FLAG_I18N		=	0x40,	/* only works for d, i, u */
-	FLAG_WIDESTREAM =   0x80
+	FLAG_SPACE      = 0x01,
+	FLAG_PLUS       = 0x02,	/* must be 2 * FLAG_SPACE */
+	FLAG_ZERO       = 0x04,
+	FLAG_MINUS      = 0x08,	/* must be 2 * FLAG_ZERO */
+	FLAG_HASH       = 0x10,
+	FLAG_THOUSANDS  = 0x20,
+	FLAG_I18N       = 0x40,	/* only works for d, i, u */
+	FLAG_WIDESTREAM = 0x80
 };
 
 /**********************************************************************/
@@ -240,10 +236,10 @@ enum {
 };
 
 /*                         p   x   X  o   u   d   i */
-#define SPEC_BASE		{ 16, 16, 16, 8, 10, 10, 10 }
+#define SPEC_BASE       { 16, 16, 16, 8, 10, 10, 10 }
 
-#define SPEC_RANGES		{ CONV_n, CONV_p, CONV_i, CONV_A, \
-						  CONV_C, CONV_S, CONV_c, CONV_s, CONV_custom0 }
+#define SPEC_RANGES     { CONV_n, CONV_p, CONV_i, CONV_A, \
+                          CONV_C, CONV_S, CONV_c, CONV_s, CONV_custom0 }
 
 #define SPEC_OR_MASK		 { \
 	/* n */			(PA_FLAG_PTR|PA_INT), \
@@ -284,43 +280,43 @@ enum {
 /*  #endif */
 
 #ifdef PDS
-#error PDS already defined!
+# error PDS already defined!
 #endif
 #ifdef SS
-#error SS already defined!
+# error SS already defined!
 #endif
 #ifdef IMS
-#error IMS already defined!
+# error IMS already defined!
 #endif
 
 #if PTRDIFF_MAX == INT_MAX
-#define PDS		0
+# define PDS		0
 #elif PTRDIFF_MAX == LONG_MAX
-#define PDS		4
+# define PDS		4
 #elif defined(LLONG_MAX) && (PTRDIFF_MAX == LLONG_MAX)
-#define PDS		8
+# define PDS		8
 #else
-#error fix QUAL_CHARS ptrdiff_t entry 't'!
+# error fix QUAL_CHARS ptrdiff_t entry 't'!
 #endif
 
 #if SIZE_MAX == UINT_MAX
-#define SS		0
+# define SS		0
 #elif SIZE_MAX == ULONG_MAX
-#define SS		4
+# define SS		4
 #elif defined(LLONG_MAX) && (SIZE_MAX == ULLONG_MAX)
-#define SS		8
+# define SS		8
 #else
-#error fix QUAL_CHARS size_t entries 'z', 'Z'!
+# error fix QUAL_CHARS size_t entries 'z', 'Z'!
 #endif
 
 #if INTMAX_MAX == INT_MAX
-#define IMS		0
+# define IMS		0
 #elif INTMAX_MAX == LONG_MAX
-#define IMS		4
+# define IMS		4
 #elif defined(LLONG_MAX) && (INTMAX_MAX == LLONG_MAX)
-#define IMS		8
+# define IMS		8
 #else
-#error fix QUAL_CHARS intmax_t entry 'j'!
+# error fix QUAL_CHARS intmax_t entry 'j'!
 #endif
 
 #define QUAL_CHARS		{ \
@@ -328,51 +324,52 @@ enum {
 	/* q:long_long  Z:(s)size_t */ \
 	'h',   'l',  'L',  'j',  'z',  't',  'q', 'Z',  0, \
 	 2,     4,    8,  IMS,   SS,  PDS,    8,  SS,   0, /* TODO -- fix!!! */\
-     1,     8 \
+	 1,     8 \
 }
 
 /**********************************************************************/
 
 #ifdef __STDIO_VA_ARG_PTR
-#ifdef __BCC__
-#define __va_arg_ptr(ap,type)		(((type *)(ap += sizeof(type))) - 1)
-#endif
+# ifdef __BCC__
+#  define __va_arg_ptr(ap,type)		(((type *)(ap += sizeof(type))) - 1)
+# endif
 
-#if 1
-#ifdef __GNUC__
+# if 1
+#  ifdef __GNUC__
 /* TODO -- need other than for 386 as well! */
 
-#ifndef __va_rounded_size
-#define __va_rounded_size(TYPE)  \
-  (((sizeof (TYPE) + sizeof (int) - 1) / sizeof (int)) * sizeof (int))
-#endif
-#define __va_arg_ptr(AP, TYPE)						\
- (AP = (va_list) ((char *) (AP) + __va_rounded_size (TYPE)),	\
-  ((void *) ((char *) (AP) - __va_rounded_size (TYPE))))
-#endif
-#endif
+#   ifndef __va_rounded_size
+#    define __va_rounded_size(TYPE) \
+	(((sizeof (TYPE) + sizeof (int) - 1) / sizeof (int)) * sizeof (int))
+#   endif
+#   define __va_arg_ptr(AP, TYPE)  \
+	(AP = (va_list) ((char *) (AP) + __va_rounded_size (TYPE)),  \
+	 ((void *) ((char *) (AP) - __va_rounded_size (TYPE)))  \
+	)
+#  endif
+# endif
 #endif /* __STDIO_VA_ARG_PTR */
 
 #ifdef __va_arg_ptr
-#define GET_VA_ARG(AP,F,TYPE,ARGS)	(*(AP) = __va_arg_ptr(ARGS,TYPE))
-#define GET_ARG_VALUE(AP,F,TYPE)	(*((TYPE *)(*(AP))))
+# define GET_VA_ARG(AP,F,TYPE,ARGS)	(*(AP) = __va_arg_ptr(ARGS,TYPE))
+# define GET_ARG_VALUE(AP,F,TYPE)	(*((TYPE *)(*(AP))))
 #else
 typedef union {
 	wchar_t wc;
 	unsigned int u;
 	unsigned long ul;
-#ifdef ULLONG_MAX
+# ifdef ULLONG_MAX
 	unsigned long long ull;
-#endif
-#ifdef __STDIO_PRINTF_FLOAT
+# endif
+# ifdef __STDIO_PRINTF_FLOAT
 	double d;
 	long double ld;
-#endif /* __STDIO_PRINTF_FLOAT */
+# endif
 	void *p;
 } argvalue_t;
 
-#define GET_VA_ARG(AU,F,TYPE,ARGS)	(AU->F = va_arg(ARGS,TYPE))
-#define GET_ARG_VALUE(AU,F,TYPE)	((TYPE)((AU)->F))
+# define GET_VA_ARG(AU,F,TYPE,ARGS)	(AU->F = va_arg(ARGS,TYPE))
+# define GET_ARG_VALUE(AU,F,TYPE)	((TYPE)((AU)->F))
 #endif
 
 typedef struct {
@@ -380,7 +377,7 @@ typedef struct {
 	struct printf_info info;
 #ifdef NL_ARGMAX
 	int maxposarg;				/* > 0 if args are positional, 0 if not, -1 if unknown */
-#endif /* NL_ARGMAX */
+#endif
 	int num_data_args;			/* TODO: use sentinal??? */
 	unsigned int conv_num;
 	unsigned char argnumber[4]; /* width | prec | 1st data | unused */
@@ -436,7 +433,8 @@ size_t parse_printf_format(register const char *template,
 
 	if (_ppfs_init(&ppfs, template) >= 0) {
 #ifdef NL_ARGMAX
-		if (ppfs.maxposarg > 0)  { /* Using positional args. */
+		if (ppfs.maxposarg > 0)  {
+			/* Using positional args. */
 			count = ppfs.maxposarg;
 			if (n > count) {
 				n = count;
@@ -444,8 +442,10 @@ size_t parse_printf_format(register const char *template,
 			for (i = 0 ; i < n ; i++) {
 				*argtypes++ = ppfs.argtype[i];
 			}
-		} else {				/* Not using positional args. */
-#endif /* NL_ARGMAX */
+		} else
+#endif
+		{
+			/* Not using positional args. */
 			while (*template) {
 				if ((*template == '%') && (*++template != '%')) {
 					ppfs.fmtpos = template;
@@ -478,9 +478,7 @@ size_t parse_printf_format(register const char *template,
 					++template;
 				}
 			}
-#ifdef NL_ARGMAX
 		}
-#endif /* NL_ARGMAX */
 	}
 
 	return count;
@@ -498,10 +496,10 @@ int attribute_hidden _ppfs_init(register ppfs_t *ppfs, const char *fmt0)
 	memset(ppfs, 0, sizeof(ppfs_t)); /* TODO: nonportable???? */
 #ifdef NL_ARGMAX
 	--ppfs->maxposarg;			/* set to -1 */
-#endif /* NL_ARGMAX */
+#endif
 	ppfs->fmtpos = fmt0;
 #ifdef __UCLIBC_MJN3_ONLY__
-#warning TODO: Make checking of the format string in C locale an option.
+# warning TODO: Make checking of the format string in C locale an option.
 #endif
 #ifdef __UCLIBC_HAS_LOCALE__
 	/* To support old programs, don't check mb validity if in C locale. */
@@ -550,7 +548,8 @@ int attribute_hidden _ppfs_init(register ppfs_t *ppfs, const char *fmt0)
 		while (*fmt) {
 			if ((*fmt == '%') && (*++fmt != '%')) {
 				ppfs->fmtpos = fmt; /* back up to the '%' */
-				if ((r = _ppfs_parsespec(ppfs)) < 0) {
+				r = _ppfs_parsespec(ppfs);
+				if (r < 0) {
 					return -1;
 				}
 				fmt = ppfs->fmtpos;	/* update to one past end of spec */
@@ -587,13 +586,14 @@ void attribute_hidden _ppfs_prepargs(register ppfs_t *ppfs, va_list arg)
 	va_copy(ppfs->arg, arg);
 
 #ifdef NL_ARGMAX
-	if ((i = ppfs->maxposarg) > 0)  { /* init for positional args */
+	i = ppfs->maxposarg; /* init for positional args */
+	if (i > 0) {
 		ppfs->num_data_args = i;
 		ppfs->info.width = ppfs->info.prec = ppfs->maxposarg = 0;
 		_ppfs_setargs(ppfs);
 		ppfs->maxposarg = i;
 	}
-#endif /* NL_ARGMAX */
+#endif
 }
 #endif
 /**********************************************************************/
@@ -610,7 +610,7 @@ void attribute_hidden _ppfs_setargs(register ppfs_t *ppfs)
 
 #ifdef NL_ARGMAX
 	if (ppfs->maxposarg == 0) {	/* initing for or no pos args */
-#endif /* NL_ARGMAX */
+#endif
 		if (ppfs->info.width == INT_MIN) {
 			ppfs->info.width =
 #ifdef __va_arg_ptr
@@ -743,7 +743,7 @@ static const short int type_codes[] = {
 	/* PA_FLOAT, */
 	PA_DOUBLE,
 	PA_DOUBLE|PA_FLAG_LONG_DOUBLE,
-#endif /* __STDIO_PRINTF_FLOAT */
+#endif
 };
 
 static const unsigned char type_sizes[] = {
@@ -766,7 +766,7 @@ static const unsigned char type_sizes[] = {
 	/* PROMOTED_SIZE_OF(float), */
 	PROMOTED_SIZE_OF(double),
 	PROMOTED_SIZE_OF(long double),
-#endif /* __STDIO_PRINTF_FLOAT */
+#endif
 };
 
 static int _promoted_size(int argtype)
@@ -825,7 +825,7 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 	int dpoint;
 #ifdef NL_ARGMAX
 	int maxposarg;
-#endif /* NL_ARGMAX */
+#endif
 	int p_m_spec_chars;
 	int n;
 	int argtype[MAX_ARGS_PER_SPEC+2];
@@ -838,7 +838,7 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 	static const char qual_chars[] = QUAL_CHARS;
 #ifdef __UCLIBC_HAS_WCHAR__
 	char buf[32];
-#endif /* __UCLIBC_HAS_WCHAR__ */
+#endif
 
 	/* WIDE note: we can test against '%' here since we don't allow */
 	/* WIDE note: other mappings of '%' in the wide char set. */
@@ -849,7 +849,7 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 	argtype[1] = __PA_NOARG;
 #ifdef NL_ARGMAX
 	maxposarg = ppfs->maxposarg;
-#endif /* NL_ARGMAX */
+#endif
 
 #ifdef __UCLIBC_HAS_WCHAR__
 	/* This is somewhat lame, but saves a lot of code.  If we're dealing with
@@ -859,15 +859,15 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 	 * While there a legal specifiers that won't, the all involve duplicate
 	 * flags or outrageous field widths/precisions. */
 	width = dpoint = 0;
-	if ((flags = ppfs->info._flags & FLAG_WIDESTREAM) == 0) {
+	flags = ppfs->info._flags & FLAG_WIDESTREAM;
+	if (flags == 0) {
 		fmt = ppfs->fmtpos;
 	} else {
 		fmt = buf + 1;
 		i = 0;
 		do {
-			if ((buf[i] = (char) (((wchar_t *) ppfs->fmtpos)[i-1]))
-				!= (((wchar_t *) ppfs->fmtpos)[i-1])
-				) {
+			buf[i] = (char) (((wchar_t *) ppfs->fmtpos)[i-1]);
+			if (buf[i] != (((wchar_t *) ppfs->fmtpos)[i-1])) {
 				return -1;
 			}
 		} while (buf[i++] && (i < sizeof(buf)));
@@ -876,7 +876,7 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 #else  /* __UCLIBC_HAS_WCHAR__ */
 	width = flags = dpoint = 0;
 	fmt = ppfs->fmtpos;
-#endif /* __UCLIBC_HAS_WCHAR__ */
+#endif
 
 	assert(fmt[-1] == '%');
 	assert(fmt[0] != '%');
@@ -908,22 +908,23 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 			if (maxposarg == 0) {
 				return -1;
 			}
-			if ((argnumber[2] = i) > maxposarg) {
+			argnumber[2] = i;
+			if (argnumber[2] > maxposarg) {
 				maxposarg = i;
 			}
 			/* Now fall through to check flags. */
 		} else {
 			if (maxposarg > 0) {
-#ifdef __UCLIBC_HAS_PRINTF_M_SPEC__
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning TODO: Support prec and width for %m when positional args used
+# ifdef __UCLIBC_HAS_PRINTF_M_SPEC__
+#  ifdef __UCLIBC_MJN3_ONLY__
+#   warning TODO: Support prec and width for %m when positional args used
 				/* Actually, positional arg processing will fail in general
 				 * for specifiers that don't require an arg. */
-#endif /* __UCLIBC_MJN3_ONLY__ */
+#  endif
 				if (*fmt == 'm') {
 					goto PREC_WIDTH;
 				}
-#endif /* __UCLIBC_HAS_PRINTF_M_SPEC__ */
+# endif /* __UCLIBC_HAS_PRINTF_M_SPEC__ */
 				return -1;
 			}
 			maxposarg = 0;		/* Possible redundant store, but cuts size. */
@@ -982,7 +983,7 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 			}
 			argnumber[-dpoint] = i;
 		} else
-#endif /* NL_ARGMAX */
+#endif
 		if (++p != fmt) {
 			 /* Not using pos args but digits followed *. */
 			return -1;
@@ -1054,33 +1055,30 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 		if (*fmt == 'm') {
 			ppfs->conv_num = CONV_m;
 			ppfs->num_data_args = 0;
-			goto DONE;
-		}
+		} else
 #endif
-#ifdef __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__
-
-		/* Handle custom arg -- WARNING -- overwrites p!!! */
-		ppfs->conv_num = CONV_custom0;
-		p = _custom_printf_spec;
-		do {
-			if (*p == *fmt) {
-				if ((ppfs->num_data_args
-					 = ((*_custom_printf_arginfo[(int)(p-_custom_printf_spec)])
-						(&(ppfs->info), MAX_ARGS_PER_SPEC, argtype+2)))
-					> MAX_ARGS_PER_SPEC) {
-					break;		/* Error -- too many args! */
+		{
+#ifndef __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__
+			return -1;  /* Error */
+#else
+			/* Handle custom arg -- WARNING -- overwrites p!!! */
+			ppfs->conv_num = CONV_custom0;
+			p = _custom_printf_spec;
+			while (1) {
+				if (*p == *fmt) {
+					printf_arginfo_function *fp = _custom_printf_arginfo[(int)(p - _custom_printf_spec)];
+					ppfs->num_data_args = fp(&(ppfs->info), MAX_ARGS_PER_SPEC, argtype + 2);
+					if (ppfs->num_data_args > MAX_ARGS_PER_SPEC) {
+						return -1;  /* Error -- too many args! */
+					}
+					break;
 				}
-				goto DONE;
+				if (++p >= (_custom_printf_spec + MAX_USER_SPEC))
+					return -1;  /* Error */
 			}
-		} while (++p < (_custom_printf_spec + MAX_USER_SPEC));
-#endif /* __UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__ */
-		/* Otherwise error. */
-		return -1;
-	}
-
-#if defined(__UCLIBC_HAS_GLIBC_CUSTOM_PRINTF__) || defined(__UCLIBC_HAS_PRINTF_M_SPEC__)
- DONE:
 #endif
+		}
+	}
 
 #ifdef NL_ARGMAX
 	if (maxposarg > 0) {
@@ -1091,7 +1089,8 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 				 ? (ppfs->argnumber[i] = argnumber[i])
 				 : argnumber[2] + (i-2));
 			if (n > maxposarg) {
-				if ((maxposarg = n) > NL_ARGMAX) {
+				maxposarg = n;
+				if (maxposarg > NL_ARGMAX) {
 					return -1;
 				}
 			}
@@ -1101,18 +1100,20 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 				ppfs->argtype[n] = argtype[i];
 			}
 		} while (++i < ppfs->num_data_args + 2);
-	} else {
+	} else
 #endif /* NL_ARGMAX */
+	{
 		ppfs->argnumber[2] = 1;
 		memcpy(ppfs->argtype, argtype + 2, ppfs->num_data_args * sizeof(int));
-#ifdef NL_ARGMAX
 	}
 
+#ifdef NL_ARGMAX
 	ppfs->maxposarg = maxposarg;
-#endif /* NL_ARGMAX */
+#endif
 
 #ifdef __UCLIBC_HAS_WCHAR__
-	if ((flags = ppfs->info._flags & FLAG_WIDESTREAM) == 0) {
+	flags = ppfs->info._flags & FLAG_WIDESTREAM;
+	if (flags == 0) {
 		ppfs->fmtpos = ++fmt;
 	} else {
 		ppfs->fmtpos = (const char *) (((const wchar_t *)(ppfs->fmtpos))
@@ -1120,7 +1121,7 @@ int attribute_hidden _ppfs_parsespec(ppfs_t *ppfs)
 	}
 #else  /* __UCLIBC_HAS_WCHAR__ */
 	ppfs->fmtpos = ++fmt;
-#endif /* __UCLIBC_HAS_WCHAR__ */
+#endif
 
  	return ppfs->num_data_args + 2;
 }
@@ -1202,8 +1203,10 @@ static size_t _fp_out_narrow(FILE *fp, intptr_t type, intptr_t len, intptr_t buf
 
 	if (type & 0x80) {			/* Some type of padding needed. */
 		int buflen = strlen((const char *) buf);
-		if ((len -= buflen) > 0) {
-			if ((r = _charpad(fp, (type & 0x7f), len)) != len) {
+		len -= buflen;
+		if (len > 0) {
+			r = _charpad(fp, (type & 0x7f), len);
+			if (r != len) {
 				return r;
 			}
 		}
@@ -1274,8 +1277,10 @@ static size_t _fp_out_wide(FILE *fp, intptr_t type, intptr_t len, intptr_t buf)
 
 	if (type & 0x80) {			/* Some type of padding needed */
 		int buflen = strlen(s);
-		if ((len -= buflen) > 0) {
-			if ((r = _charpad(fp, (type & 0x7f), len)) != len) {
+		len -= buflen;
+		if (len > 0) {
+			r = _charpad(fp, (type & 0x7f), len);
+			if (r != len) {
 				return r;
 			}
 		}
@@ -1287,13 +1292,13 @@ static size_t _fp_out_wide(FILE *fp, intptr_t type, intptr_t len, intptr_t buf)
 		do {
 #ifdef __LOCALE_C_ONLY
 			wbuf[i] = s[i];
-#else  /* __LOCALE_C_ONLY */
+#else
 
-#ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__
+# ifdef __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__
 			if (s[i] == ',') {
 				wbuf[i] = __UCLIBC_CURLOCALE->thousands_sep_wc;
 			} else
-#endif /* __UCLIBC_HAS_GLIBC_DIGIT_GROUPING__ */
+# endif
 			if (s[i] == '.') {
 				wbuf[i] = __UCLIBC_CURLOCALE->decimal_point_wc;
 			} else {
@@ -1320,7 +1325,7 @@ static int _ppwfs_init(register ppfs_t *ppfs, const wchar_t *fmt0)
 	memset(ppfs, 0, sizeof(ppfs_t)); /* TODO: nonportable???? */
 #ifdef NL_ARGMAX
 	--ppfs->maxposarg;			/* set to -1 */
-#endif /* NL_ARGMAX */
+#endif
 	ppfs->fmtpos = (const char *) fmt0;
 	ppfs->info._flags = FLAG_WIDESTREAM;
 
@@ -1366,7 +1371,8 @@ static int _ppwfs_init(register ppfs_t *ppfs, const wchar_t *fmt0)
 		while (*fmt) {
 			if ((*fmt == '%') && (*++fmt != '%')) {
 				ppfs->fmtpos = (const char *) fmt; /* back up to the '%' */
-				if ((r = _ppfs_parsespec(ppfs)) < 0) {
+				r = _ppfs_parsespec(ppfs);
+				if (r < 0) {
 					return -1;
 				}
 				fmt = (const wchar_t *) ppfs->fmtpos; /* update to one past end of spec */
@@ -1419,10 +1425,10 @@ static int _do_one_spec(FILE * __restrict stream,
 	static const char spec_base[] = SPEC_BASE;
 #ifdef L__vfprintf_internal
 	static const char prefix[] = "+\0-\0 \0000x\0000X";
-	/*                            0  2  4  6   9 11*/
-#else  /* L__vfprintf_internal */
+	/*                            0  2  4    6     9 11*/
+#else
 	static const wchar_t prefix[] = L"+\0-\0 \0000x\0000X";
-#endif /* L__vfprintf_internal */
+#endif
 	enum {
 		PREFIX_PLUS = 0,
 		PREFIX_MINUS = 2,
@@ -1441,7 +1447,7 @@ static int _do_one_spec(FILE * __restrict stream,
 #ifdef __UCLIBC_HAS_WCHAR__
 	const wchar_t *ws = NULL;
 	mbstate_t mbstate;
-#endif /* __UCLIBC_HAS_WCHAR__ */
+#endif
 	size_t slen;
 #ifdef L__vfprintf_internal
 #define SLEN slen
@@ -1457,7 +1463,7 @@ static int _do_one_spec(FILE * __restrict stream,
 	char padchar = ' ';
 #ifdef __UCLIBC_MJN3_ONLY__
 #warning TODO: Determine appropriate buf size.
-#endif /* __UCLIBC_MJN3_ONLY__ */
+#endif
 	/* TODO: buf needs to be big enough for any possible error return strings
 	 * and also for any locale-grouped long long integer strings generated.
 	 * This should be large enough for any of the current archs/locales, but
@@ -1477,21 +1483,21 @@ static int _do_one_spec(FILE * __restrict stream,
 	/* Deal with the argptr vs argvalue issue. */
 #ifdef __va_arg_ptr
 	argptr = (const void * const *) ppfs->argptr;
-#ifdef NL_ARGMAX
+# ifdef NL_ARGMAX
 	if (ppfs->maxposarg > 0) {	/* Using positional args... */
 		argptr += ppfs->argnumber[2] - 1;
 	}
-#endif /* NL_ARGMAX */
+# endif
 #else
 	/* Need to build a local copy... */
 	{
 		register argvalue_t *p = ppfs->argvalue;
 		int i;
-#ifdef NL_ARGMAX
+# ifdef NL_ARGMAX
 		if (ppfs->maxposarg > 0) {	/* Using positional args... */
 			p += ppfs->argnumber[2] - 1;
 		}
-#endif /* NL_ARGMAX */
+# endif
 		for (i = 0 ; i < ppfs->num_data_args ; i++ ) {
 			argptr[i] = (void *) p++;
 		}
@@ -1513,8 +1519,9 @@ static int _do_one_spec(FILE * __restrict stream,
 #ifdef L__vfprintf_internal
 #warning CONSIDER: Should we ignore these flags if stub locale?  What about custom specs?
 #endif
-#endif /* __UCLIBC_MJN3_ONLY__ */
-			if ((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10) {
+#endif
+			base = spec_base[(int)(ppfs->conv_num - CONV_p)];
+			if (base == 10) {
 				if (PRINT_INFO_FLAG_VAL(&(ppfs->info),group)) {
 					alphacase = __UIM_GROUP;
 				}
@@ -1541,7 +1548,7 @@ static int _do_one_spec(FILE * __restrict stream,
 #ifdef L__vfprintf_internal
 #warning CONSIDER: If using outdigits and/or grouping, how should we interpret precision?
 #endif
-#endif /* __UCLIBC_MJN3_ONLY__ */
+#endif
 			s = _uintmaxtostr(buf + sizeof(buf) - 1,
 							  (uintmax_t)
 							  _load_inttype(ppfs->conv_num == CONV_p ? PA_FLAG_LONG : *argtype & __PA_INTMASK,
@@ -1613,14 +1620,15 @@ static int _do_one_spec(FILE * __restrict stream,
 			return 0;
 #else  /* __STDIO_PRINTF_FLOAT */
 			return -1;			/* TODO -- try to continue? */
-#endif /* __STDIO_PRINTF_FLOAT */
+#endif
 		} else if (ppfs->conv_num <= CONV_S) {	/* wide char or string */
 #ifdef L__vfprintf_internal
 
 #ifdef __UCLIBC_HAS_WCHAR__
 			mbstate.__mask = 0;	/* Initialize the mbstate. */
 			if (ppfs->conv_num == CONV_S) { /* wide string */
-				if (!(ws = *((const wchar_t **) *argptr))) {
+				ws = *((const wchar_t **) *argptr);
+				if (!ws) {
 					goto NULL_STRING;
 				}
 				/* We use an awful uClibc-specific hack here, passing
@@ -1628,12 +1636,12 @@ static int _do_one_spec(FILE * __restrict stream,
 				 * uClibc's wcsrtombs that we want a "restricted" length
 				 * such that the mbs fits in a buffer of the specified
 				 * size with no partial conversions. */
-				if ((slen = wcsrtombs((char *) &ws, &ws, /* Use awful hack! */
-									  ((ppfs->info.prec >= 0)
-									   ? ppfs->info.prec
-									   : SIZE_MAX), &mbstate))
-					== ((size_t)-1)
-					) {
+				slen = wcsrtombs((char *) &ws, &ws, /* Use awful hack! */
+							((ppfs->info.prec >= 0)
+							 ? ppfs->info.prec
+							 : SIZE_MAX),
+							&mbstate);
+				if (slen == ((size_t)-1)) {
 					return -1;	/* EILSEQ */
 				}
 			} else {			/* wide char */
@@ -1646,7 +1654,7 @@ static int _do_one_spec(FILE * __restrict stream,
 			}
 #else  /* __UCLIBC_HAS_WCHAR__ */
 			return -1;
-#endif /* __UCLIBC_HAS_WCHAR__ */
+#endif
 		} else if (ppfs->conv_num <= CONV_s) {	/* char or string */
 			if (ppfs->conv_num == CONV_s) { /* string */
 				s = *((char **) (*argptr));
@@ -1692,7 +1700,7 @@ static int _do_one_spec(FILE * __restrict stream,
 			if (ppfs->conv_num == CONV_s) { /* string */
 #ifdef __UCLIBC_MJN3_ONLY__
 #warning TODO: Fix %s for _vfwprintf_internal... output upto illegal sequence?
-#endif /* __UCLIBC_MJN3_ONLY__ */
+#endif
 				s = *((char **) (*argptr));
 				if (s) {
 #ifdef __UCLIBC_HAS_PRINTF_M_SPEC__
@@ -1759,7 +1767,7 @@ static int _do_one_spec(FILE * __restrict stream,
 #ifdef L__vfprintf_internal
 #warning CONSIDER: If using outdigits and/or grouping, how should we pad?
 #endif
-#endif /* __UCLIBC_MJN3_ONLY__ */
+#endif
 		{
 			size_t t;
 
@@ -1790,7 +1798,7 @@ static int _do_one_spec(FILE * __restrict stream,
 
 #ifdef L__vfprintf_internal
 
-#ifdef __UCLIBC_HAS_WCHAR__
+# ifdef __UCLIBC_HAS_WCHAR__
 		if (!ws) {
 			assert(s);
 			if (_outnstr(stream, s, slen) != slen) {
@@ -1802,18 +1810,18 @@ static int _do_one_spec(FILE * __restrict stream,
 			while (slen) {
 				t = (slen <= sizeof(buf)) ? slen : sizeof(buf);
 				t = wcsrtombs(buf, &ws, t, &mbstate);
-				assert (t != ((size_t)(-1)));
+				assert(t != ((size_t)(-1)));
 				if (_outnstr(stream, buf, t) != t) {
 					return -1;
 				}
 				slen -= t;
 			}
 		}
-#else  /* __UCLIBC_HAS_WCHAR__ */
+# else  /* __UCLIBC_HAS_WCHAR__ */
 		if (_outnstr(stream, (const unsigned char *) s, slen) != slen) {
 			return -1;
 		}
-#endif /* __UCLIBC_HAS_WCHAR__ */
+# endif
 
 #else  /* L__vfprintf_internal */
 
@@ -1881,7 +1889,8 @@ int VFPRINTF_internal (FILE * __restrict stream,
 				/* TODO: _do_one_spec needs to know what the output funcs are!!! */
 				ppfs.fmtpos = (const char *)(++format);
 				/* TODO: check -- should only fail on stream error */
-				if ( (r = _do_one_spec(stream, &ppfs, &count)) < 0) {
+				r = _do_one_spec(stream, &ppfs, &count);
+				if (r < 0) {
 					count = -1;
 					break;
 				}
@@ -1917,13 +1926,13 @@ int VFPRINTF_internal (FILE * __restrict stream,
  * is using __stdio_fwrite (TODO: do the same for wide functions).
  */
 #ifdef L_vfprintf
-#define VFPRINTF vfprintf
-#define VFPRINTF_internal _vfprintf_internal
-#define FMT_TYPE char
+# define VFPRINTF vfprintf
+# define VFPRINTF_internal _vfprintf_internal
+# define FMT_TYPE char
 #else
-#define VFPRINTF vfwprintf
-#define VFPRINTF_internal _vfwprintf_internal
-#define FMT_TYPE wchar_t
+# define VFPRINTF vfwprintf
+# define VFPRINTF_internal _vfwprintf_internal
+# define FMT_TYPE wchar_t
 #endif
 
 libc_hidden_proto(VFPRINTF)
diff --git a/libc/stdlib/system.c b/libc/stdlib/system.c
index acd86761d..a0ff726ad 100644
--- a/libc/stdlib/system.c
+++ b/libc/stdlib/system.c
@@ -5,6 +5,7 @@
  */
 
 #include <stdio.h>
+#include <string.h>
 #include <stddef.h>
 #include <signal.h>
 #include <unistd.h>
@@ -126,9 +127,10 @@ do_system (const char *line)
   struct sigaction sa;
   sigset_t omask;
 
+  memset(&sa, 0, sizeof(sa));
   sa.sa_handler = SIG_IGN;
-  sa.sa_flags = 0;
-  __sigemptyset (&sa.sa_mask);
+  /*sa.sa_flags = 0; - done by memset */
+  /*__sigemptyset (&sa.sa_mask); - done by memset */
 
   DO_LOCK ();
   if (ADD_REF () == 0)
diff --git a/libc/string/microblaze/Makefile b/libc/string/microblaze/Makefile
new file mode 100644
index 000000000..5bdfef2f7
--- /dev/null
+++ b/libc/string/microblaze/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir	:= ../../../
+top_builddir	:= ../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/microblaze/memcpy.S b/libc/string/microblaze/memcpy.S
new file mode 100644
index 000000000..7cf081e87
--- /dev/null
+++ b/libc/string/microblaze/memcpy.S
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2008 Jim Law - Iris LP  All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ * Written by Jim Law <jlaw@irispower.com>
+ *
+ * intended to replace:
+ *	memcpy in memcpy.c and
+ *	memmove in memmove.c
+ * ... in arch/microblaze/lib
+ *
+ *
+ * assly_fastcopy.S
+ *
+ * Attempt at quicker memcpy and memmove for MicroBlaze
+ *	Input :	Operand1 in Reg r5 - destination address
+ *		Operand2 in Reg r6 - source address
+ *		Operand3 in Reg r7 - number of bytes to transfer
+ *	Output: Result in Reg r3 - starting destinaition address
+ *
+ *
+ * Explanation:
+ *	Perform (possibly unaligned) copy of a block of memory
+ *	between mem locations with size of xfer spec'd in bytes
+ */
+
+	.text
+	.globl	memcpy
+	.type  memcpy, @function
+	.ent	memcpy
+
+memcpy:
+fast_memcpy_ascending:
+	/* move d to return register as value of function */
+	addi	r3, r5, 0
+
+	addi	r4, r0, 4	/* n = 4 */
+	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
+	blti	r4, a_xfer_end	/* if n < 0, less than one word to transfer */
+
+	/* transfer first 0~3 bytes to get aligned dest address */
+	andi	r4, r5, 3		/* n = d & 3 */
+	/* if zero, destination already aligned */
+	beqi	r4, a_dalign_done
+	/* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */
+	rsubi	r4, r4, 4
+	rsub	r7, r4, r7		/* c = c - n adjust c */
+
+a_xfer_first_loop:
+	/* if no bytes left to transfer, transfer the bulk */
+	beqi	r4, a_dalign_done
+	lbui	r11, r6, 0		/* h = *s */
+	sbi	r11, r5, 0		/* *d = h */
+	addi	r6, r6, 1		/* s++ */
+	addi	r5, r5, 1		/* d++ */
+	brid	a_xfer_first_loop	/* loop */
+	addi	r4, r4, -1		/* n-- (IN DELAY SLOT) */
+
+a_dalign_done:
+	addi	r4, r0, 32		/* n = 32 */
+	cmpu	r4, r4, r7		/* n = c - n  (unsigned) */
+	/* if n < 0, less than one block to transfer */
+	blti	r4, a_block_done
+
+a_block_xfer:
+	andi	r9, r6, 3		/* t1 = s & 3 */
+	/* if temp == 0, everything is word-aligned */
+	beqi	r9, a_word_xfer
+
+a_block_unaligned:
+	andi	r4, r7, 0xffffffe0	/* n = c & ~31 */
+	rsub	r7, r4, r7		/* c = c - n */
+	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
+	add	r6, r6, r4		/* s = s + n */
+	lwi	r11, r8, 0		/* h = *(as + 0) */
+
+	addi	r9, r9, -1
+	beqi	r9, a_block_u1		/* t1 was 1 => 1 byte offset */
+	addi	r9, r9, -1
+	beqi	r9, a_block_u2		/* t1 was 2 => 2 byte offset */
+
+a_block_u3:
+	bslli	r11, r11, 24	/* h = h << 24 */
+a_bu3_loop:
+	lwi	r12, r8, 4	/* v = *(as + 4) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 0	/* *(d + 0) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 8	/* v = *(as + 8) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 4	/* *(d + 4) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 12	/* v = *(as + 12) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 8	/* *(d + 8) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 16	/* v = *(as + 16) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 12	/* *(d + 12) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 20	/* v = *(as + 20) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 16	/* *(d + 16) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 24	/* v = *(as + 24) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 20	/* *(d + 20) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 28	/* v = *(as + 28) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 24	/* *(d + 24) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	lwi	r12, r8, 32	/* v = *(as + 32) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 28	/* *(d + 28) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	addi	r8, r8, 32	/* as = as + 32 */
+	addi	r4, r4, -32	/* n = n - 32 */
+	bneid	r4, a_bu3_loop	/* while (n) loop */
+	addi	r5, r5, 32	/* d = d + 32 (IN DELAY SLOT) */
+	bri	a_block_done
+
+a_block_u1:
+	bslli	r11, r11, 8	/* h = h << 8 */
+a_bu1_loop:
+	lwi	r12, r8, 4	/* v = *(as + 4) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 0	/* *(d + 0) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 8	/* v = *(as + 8) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 4	/* *(d + 4) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 12	/* v = *(as + 12) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 8	/* *(d + 8) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 16	/* v = *(as + 16) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 12	/* *(d + 12) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 20	/* v = *(as + 20) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 16	/* *(d + 16) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 24	/* v = *(as + 24) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 20	/* *(d + 20) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 28	/* v = *(as + 28) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 24	/* *(d + 24) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	lwi	r12, r8, 32	/* v = *(as + 32) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 28	/* *(d + 28) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	addi	r8, r8, 32	/* as = as + 32 */
+	addi	r4, r4, -32	/* n = n - 32 */
+	bneid	r4, a_bu1_loop	/* while (n) loop */
+	addi	r5, r5, 32	/* d = d + 32 (IN DELAY SLOT) */
+	bri	a_block_done
+
+a_block_u2:
+	bslli	r11, r11, 16	/* h = h << 16 */
+a_bu2_loop:
+	lwi	r12, r8, 4	/* v = *(as + 4) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 0	/* *(d + 0) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 8	/* v = *(as + 8) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 4	/* *(d + 4) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 12	/* v = *(as + 12) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 8	/* *(d + 8) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 16	/* v = *(as + 16) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 12	/* *(d + 12) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 20	/* v = *(as + 20) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 16	/* *(d + 16) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 24	/* v = *(as + 24) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 20	/* *(d + 20) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 28	/* v = *(as + 28) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 24	/* *(d + 24) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	lwi	r12, r8, 32	/* v = *(as + 32) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 28	/* *(d + 28) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	addi	r8, r8, 32	/* as = as + 32 */
+	addi	r4, r4, -32	/* n = n - 32 */
+	bneid	r4, a_bu2_loop	/* while (n) loop */
+	addi	r5, r5, 32	/* d = d + 32 (IN DELAY SLOT) */
+
+a_block_done:
+	addi	r4, r0, 4	/* n = 4 */
+	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
+	blti	r4, a_xfer_end	/* if n < 0, less than one word to transfer */
+
+a_word_xfer:
+	andi	r4, r7, 0xfffffffc	/* n = c & ~3 */
+	addi	r10, r0, 0		/* offset = 0 */
+
+	andi	r9, r6, 3		/* t1 = s & 3 */
+	/* if temp != 0, unaligned transfers needed */
+	bnei	r9, a_word_unaligned
+
+a_word_aligned:
+	lw	r9, r6, r10		/* t1 = *(s+offset) */
+	sw	r9, r5, r10		/* *(d+offset) = t1 */
+	addi	r4, r4,-4		/* n-- */
+	bneid	r4, a_word_aligned	/* loop */
+	addi	r10, r10, 4		/* offset++ (IN DELAY SLOT) */
+
+	bri	a_word_done
+
+a_word_unaligned:
+	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
+	lwi	r11, r8, 0		/* h = *(as + 0) */
+	addi	r8, r8, 4		/* as = as + 4 */
+
+	addi	r9, r9, -1
+	beqi	r9, a_word_u1		/* t1 was 1 => 1 byte offset */
+	addi	r9, r9, -1
+	beqi	r9, a_word_u2		/* t1 was 2 => 2 byte offset */
+
+a_word_u3:
+	bslli	r11, r11, 24	/* h = h << 24 */
+a_wu3_loop:
+	lw	r12, r8, r10	/* v = *(as + offset) */
+	bsrli	r9, r12, 8	/* t1 = v >> 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	sw	r9, r5, r10	/* *(d + offset) = t1 */
+	bslli	r11, r12, 24	/* h = v << 24 */
+	addi	r4, r4,-4	/* n = n - 4 */
+	bneid	r4, a_wu3_loop	/* while (n) loop */
+	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
+
+	bri	a_word_done
+
+a_word_u1:
+	bslli	r11, r11, 8	/* h = h << 8 */
+a_wu1_loop:
+	lw	r12, r8, r10	/* v = *(as + offset) */
+	bsrli	r9, r12, 24	/* t1 = v >> 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	sw	r9, r5, r10	/* *(d + offset) = t1 */
+	bslli	r11, r12, 8	/* h = v << 8 */
+	addi	r4, r4,-4	/* n = n - 4 */
+	bneid	r4, a_wu1_loop	/* while (n) loop */
+	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
+
+	bri	a_word_done
+
+a_word_u2:
+	bslli	r11, r11, 16	/* h = h << 16 */
+a_wu2_loop:
+	lw	r12, r8, r10	/* v = *(as + offset) */
+	bsrli	r9, r12, 16	/* t1 = v >> 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	sw	r9, r5, r10	/* *(d + offset) = t1 */
+	bslli	r11, r12, 16	/* h = v << 16 */
+	addi	r4, r4,-4	/* n = n - 4 */
+	bneid	r4, a_wu2_loop	/* while (n) loop */
+	addi	r10, r10, 4	/* offset = ofset + 4 (IN DELAY SLOT) */
+
+a_word_done:
+	add	r5, r5, r10	/* d = d + offset */
+	add	r6, r6, r10	/* s = s + offset */
+	rsub	r7, r10, r7	/* c = c - offset */
+
+a_xfer_end:
+a_xfer_end_loop:
+	beqi	r7, a_done		/* while (c) */
+	lbui	r9, r6, 0		/* t1 = *s */
+	addi	r6, r6, 1		/* s++ */
+	sbi	r9, r5, 0		/* *d = t1 */
+	addi	r7, r7, -1		/* c-- */
+	brid	a_xfer_end_loop		/* loop */
+	addi	r5, r5, 1		/* d++ (IN DELAY SLOT) */
+
+a_done:
+	rtsd	r15, 8
+	nop
+
+.size  memcpy, . - memcpy
+.end memcpy
+libc_hidden_def(memcpy)
diff --git a/libc/string/microblaze/memmove.S b/libc/string/microblaze/memmove.S
new file mode 100644
index 000000000..29233f566
--- /dev/null
+++ b/libc/string/microblaze/memmove.S
@@ -0,0 +1,348 @@
+/*
+ * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
+ * Copyright (C) 2008-2009 PetaLogix
+ * Copyright (C) 2008 Jim Law - Iris LP  All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of this
+ * archive for more details.
+ *
+ * Written by Jim Law <jlaw@irispower.com>
+ *
+ * intended to replace:
+ *	memcpy in memcpy.c and
+ *	memmove in memmove.c
+ * ... in arch/microblaze/lib
+ *
+ *
+ * assly_fastcopy.S
+ *
+ * Attempt at quicker memcpy and memmove for MicroBlaze
+ *	Input :	Operand1 in Reg r5 - destination address
+ *		Operand2 in Reg r6 - source address
+ *		Operand3 in Reg r7 - number of bytes to transfer
+ *	Output: Result in Reg r3 - starting destinaition address
+ *
+ *
+ * Explanation:
+ *	Perform (possibly unaligned) copy of a block of memory
+ *	between mem locations with size of xfer spec'd in bytes
+ */
+
+	.globl	memmove
+	.type  memmove, @function
+	.ent	memmove
+
+memmove:
+	cmpu	r4, r5, r6	/* n = s - d */
+	bgei	r4, HIDDEN_JUMPTARGET(memcpy)
+
+fast_memcpy_descending:
+	/* move d to return register as value of function */
+	addi	r3, r5, 0
+
+	add	r5, r5, r7	/* d = d + c */
+	add	r6, r6, r7	/* s = s + c */
+
+	addi	r4, r0, 4	/* n = 4 */
+	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
+	blti	r4,d_xfer_end	/* if n < 0, less than one word to transfer */
+
+	/* transfer first 0~3 bytes to get aligned dest address */
+	andi	r4, r5, 3		/* n = d & 3 */
+	/* if zero, destination already aligned */
+	beqi	r4,d_dalign_done
+	rsub	r7, r4, r7		/* c = c - n adjust c */
+
+d_xfer_first_loop:
+	/* if no bytes left to transfer, transfer the bulk */
+	beqi	r4,d_dalign_done
+	addi	r6, r6, -1		/* s-- */
+	addi	r5, r5, -1		/* d-- */
+	lbui	r11, r6, 0		/* h = *s */
+	sbi	r11, r5, 0		/* *d = h */
+	brid	d_xfer_first_loop	/* loop */
+	addi	r4, r4, -1		/* n-- (IN DELAY SLOT) */
+
+d_dalign_done:
+	addi	r4, r0, 32	/* n = 32 */
+	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
+	/* if n < 0, less than one block to transfer */
+	blti	r4, d_block_done
+
+d_block_xfer:
+	andi	r4, r7, 0xffffffe0	/* n = c & ~31 */
+	rsub	r7, r4, r7		/* c = c - n */
+
+	andi	r9, r6, 3		/* t1 = s & 3 */
+	/* if temp != 0, unaligned transfers needed */
+	bnei	r9, d_block_unaligned
+
+d_block_aligned:
+	addi	r6, r6, -32		/* s = s - 32 */
+	addi	r5, r5, -32		/* d = d - 32 */
+	lwi	r9, r6, 28		/* t1 = *(s + 28) */
+	lwi	r10, r6, 24		/* t2 = *(s + 24) */
+	lwi	r11, r6, 20		/* t3 = *(s + 20) */
+	lwi	r12, r6, 16		/* t4 = *(s + 16) */
+	swi	r9, r5, 28		/* *(d + 28) = t1 */
+	swi	r10, r5, 24		/* *(d + 24) = t2 */
+	swi	r11, r5, 20		/* *(d + 20) = t3 */
+	swi	r12, r5, 16		/* *(d + 16) = t4 */
+	lwi	r9, r6, 12		/* t1 = *(s + 12) */
+	lwi	r10, r6, 8		/* t2 = *(s + 8) */
+	lwi	r11, r6, 4		/* t3 = *(s + 4) */
+	lwi	r12, r6, 0		/* t4 = *(s + 0) */
+	swi	r9, r5, 12		/* *(d + 12) = t1 */
+	swi	r10, r5, 8		/* *(d + 8) = t2 */
+	swi	r11, r5, 4		/* *(d + 4) = t3 */
+	addi	r4, r4, -32		/* n = n - 32 */
+	bneid	r4, d_block_aligned	/* while (n) loop */
+	swi	r12, r5, 0		/* *(d + 0) = t4 (IN DELAY SLOT) */
+	bri	d_block_done
+
+d_block_unaligned:
+	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
+	rsub	r6, r4, r6		/* s = s - n */
+	lwi	r11, r8, 0		/* h = *(as + 0) */
+
+	addi	r9, r9, -1
+	beqi	r9,d_block_u1		/* t1 was 1 => 1 byte offset */
+	addi	r9, r9, -1
+	beqi	r9,d_block_u2		/* t1 was 2 => 2 byte offset */
+
+d_block_u3:
+	bsrli	r11, r11, 8	/* h = h >> 8 */
+d_bu3_loop:
+	addi	r8, r8, -32	/* as = as - 32 */
+	addi	r5, r5, -32	/* d = d - 32 */
+	lwi	r12, r8, 28	/* v = *(as + 28) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 28	/* *(d + 28) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 24	/* v = *(as + 24) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 24	/* *(d + 24) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 20	/* v = *(as + 20) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 20	/* *(d + 20) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 16	/* v = *(as + 16) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 16	/* *(d + 16) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 12	/* v = *(as + 12) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 12	/* *(d + 112) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 8	/* v = *(as + 8) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 8	/* *(d + 8) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 4	/* v = *(as + 4) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 4	/* *(d + 4) = t1 */
+	bsrli	r11, r12, 8	/* h = v >> 8 */
+	lwi	r12, r8, 0	/* v = *(as + 0) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 0	/* *(d + 0) = t1 */
+	addi	r4, r4, -32	/* n = n - 32 */
+	bneid	r4, d_bu3_loop	/* while (n) loop */
+	bsrli	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
+	bri	d_block_done
+
+d_block_u1:
+	bsrli	r11, r11, 24	/* h = h >> 24 */
+d_bu1_loop:
+	addi	r8, r8, -32	/* as = as - 32 */
+	addi	r5, r5, -32	/* d = d - 32 */
+	lwi	r12, r8, 28	/* v = *(as + 28) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 28	/* *(d + 28) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 24	/* v = *(as + 24) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 24	/* *(d + 24) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 20	/* v = *(as + 20) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 20	/* *(d + 20) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 16	/* v = *(as + 16) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 16	/* *(d + 16) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 12	/* v = *(as + 12) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 12	/* *(d + 112) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 8	/* v = *(as + 8) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 8	/* *(d + 8) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 4	/* v = *(as + 4) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 4	/* *(d + 4) = t1 */
+	bsrli	r11, r12, 24	/* h = v >> 24 */
+	lwi	r12, r8, 0	/* v = *(as + 0) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 0	/* *(d + 0) = t1 */
+	addi	r4, r4, -32	/* n = n - 32 */
+	bneid	r4, d_bu1_loop	/* while (n) loop */
+	bsrli	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
+	bri	d_block_done
+
+d_block_u2:
+	bsrli	r11, r11, 16	/* h = h >> 16 */
+d_bu2_loop:
+	addi	r8, r8, -32	/* as = as - 32 */
+	addi	r5, r5, -32	/* d = d - 32 */
+	lwi	r12, r8, 28	/* v = *(as + 28) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 28	/* *(d + 28) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 24	/* v = *(as + 24) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 24	/* *(d + 24) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 20	/* v = *(as + 20) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 20	/* *(d + 20) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 16	/* v = *(as + 16) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 16	/* *(d + 16) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 12	/* v = *(as + 12) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 12	/* *(d + 112) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 8	/* v = *(as + 8) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 8	/* *(d + 8) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 4	/* v = *(as + 4) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 4	/* *(d + 4) = t1 */
+	bsrli	r11, r12, 16	/* h = v >> 16 */
+	lwi	r12, r8, 0	/* v = *(as + 0) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	swi	r9, r5, 0	/* *(d + 0) = t1 */
+	addi	r4, r4, -32	/* n = n - 32 */
+	bneid	r4, d_bu2_loop	/* while (n) loop */
+	bsrli	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
+
+d_block_done:
+	addi	r4, r0, 4	/* n = 4 */
+	cmpu	r4, r4, r7	/* n = c - n  (unsigned) */
+	blti	r4,d_xfer_end	/* if n < 0, less than one word to transfer */
+
+d_word_xfer:
+	andi	r4, r7, 0xfffffffc	/* n = c & ~3 */
+	rsub	r5, r4, r5		/* d = d - n */
+	rsub	r6, r4, r6		/* s = s - n */
+	rsub	r7, r4, r7		/* c = c - n */
+
+	andi	r9, r6, 3		/* t1 = s & 3 */
+	/* if temp != 0, unaligned transfers needed */
+	bnei	r9, d_word_unaligned
+
+d_word_aligned:
+	addi	r4, r4,-4		/* n-- */
+	lw	r9, r6, r4		/* t1 = *(s+n) */
+	bneid	r4, d_word_aligned	/* loop */
+	sw	r9, r5, r4		/* *(d+n) = t1 (IN DELAY SLOT) */
+
+	bri	d_word_done
+
+d_word_unaligned:
+	andi	r8, r6, 0xfffffffc	/* as = s & ~3 */
+	lw	r11, r8, r4		/* h = *(as + n) */
+
+	addi	r9, r9, -1
+	beqi	r9,d_word_u1		/* t1 was 1 => 1 byte offset */
+	addi	r9, r9, -1
+	beqi	r9,d_word_u2		/* t1 was 2 => 2 byte offset */
+
+d_word_u3:
+	bsrli	r11, r11, 8	/* h = h >> 8 */
+d_wu3_loop:
+	addi	r4, r4,-4	/* n = n - 4 */
+	lw	r12, r8, r4	/* v = *(as + n) */
+	bslli	r9, r12, 24	/* t1 = v << 24 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	sw	r9, r5, r4	/* *(d + n) = t1 */
+	bneid	r4, d_wu3_loop	/* while (n) loop */
+	bsrli	r11, r12, 8	/* h = v >> 8 (IN DELAY SLOT) */
+
+	bri	d_word_done
+
+d_word_u1:
+	bsrli	r11, r11, 24	/* h = h >> 24 */
+d_wu1_loop:
+	addi	r4, r4,-4	/* n = n - 4 */
+	lw	r12, r8, r4	/* v = *(as + n) */
+	bslli	r9, r12, 8	/* t1 = v << 8 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	sw	r9, r5, r4	/* *(d + n) = t1 */
+	bneid	r4, d_wu1_loop	/* while (n) loop */
+	bsrli	r11, r12, 24	/* h = v >> 24 (IN DELAY SLOT) */
+
+	bri	d_word_done
+
+d_word_u2:
+	bsrli	r11, r11, 16	/* h = h >> 16 */
+d_wu2_loop:
+	addi	r4, r4,-4	/* n = n - 4 */
+	lw	r12, r8, r4	/* v = *(as + n) */
+	bslli	r9, r12, 16	/* t1 = v << 16 */
+	or	r9, r11, r9	/* t1 = h | t1 */
+	sw	r9, r5, r4	/* *(d + n) = t1 */
+	bneid	r4, d_wu2_loop	/* while (n) loop */
+	bsrli	r11, r12, 16	/* h = v >> 16 (IN DELAY SLOT) */
+
+d_word_done:
+
+d_xfer_end:
+d_xfer_end_loop:
+	beqi	r7, a_done		/* while (c) */
+	addi	r6, r6, -1		/* s-- */
+	lbui	r9, r6, 0		/* t1 = *s */
+	addi	r5, r5, -1		/* d-- */
+	sbi	r9, r5, 0		/* *d = t1 */
+	brid	d_xfer_end_loop		/* loop */
+	addi	r7, r7, -1		/* c-- (IN DELAY SLOT) */
+
+a_done:
+d_done:
+	rtsd	r15, 8
+	nop
+
+.size  memmove, . - memmove
+.end memmove
+libc_hidden_def(memmove)
diff --git a/libc/sysdeps/linux/alpha/bits/mathdef.h b/libc/sysdeps/linux/alpha/bits/mathdef.h
index 2e5258230..615eb9db6 100644
--- a/libc/sysdeps/linux/alpha/bits/mathdef.h
+++ b/libc/sysdeps/linux/alpha/bits/mathdef.h
@@ -44,12 +44,12 @@ typedef double double_t;
 /* Due to an ABI change, we need to remap the complex float symbols.  */
 #  define _Mdouble_		float
 #  define __MATHCALL(function, args) \
-    __MATHDECL (_Complex float, function, args)
+	__MATHDECL(_Complex float, function, args)
 #  define __MATHDECL(type, function, args) \
-    __MATHDECL_1(type, function##f, args, __c1_##function##f); \
-    __MATHDECL_1(type, __##function##f, args, __c1_##function##f)
+	__MATHDECL_1(type, function##f, args, __c1_##function##f); \
+	__MATHDECL_1(type, __##function##f, args, __c1_##function##f)
 #  define __MATHDECL_1(type, function, args, alias) \
-    extern type function args __asm__(#alias) __THROW
+	extern type function args __asm__(#alias) __THROW
 
 #  include <bits/cmathcalls.h>
 
diff --git a/libc/sysdeps/linux/common/Makefile.in b/libc/sysdeps/linux/common/Makefile.in
index 4222912fc..8a4add813 100644
--- a/libc/sysdeps/linux/common/Makefile.in
+++ b/libc/sysdeps/linux/common/Makefile.in
@@ -49,6 +49,11 @@ CSRC := $(filter-out waitpid.c, $(CSRC))
 endif
 endif
 
+ifneq ($(ARCH_USE_MMU),y)
+# stubbed out in mman.h
+CSRC := $(filter-out msync.c, $(CSRC))
+endif
+
 ifneq ($(UCLIBC_BSD_SPECIFIC),y)
 # we need these internally: getdomainname.c
 CSRC := $(filter-out mincore.c setdomainname.c,$(CSRC))
diff --git a/libc/sysdeps/linux/common/__rt_sigtimedwait.c b/libc/sysdeps/linux/common/__rt_sigtimedwait.c
index 79b94adef..a7ab8fb61 100644
--- a/libc/sysdeps/linux/common/__rt_sigtimedwait.c
+++ b/libc/sysdeps/linux/common/__rt_sigtimedwait.c
@@ -12,11 +12,7 @@
 #include <signal.h>
 #include <string.h>
 
-libc_hidden_proto(memcpy)
-
 #ifdef __NR_rt_sigtimedwait
-#include <string.h>
-libc_hidden_proto(memcpy)
 
 # ifdef __UCLIBC_HAS_THREADS_NATIVE__
 #  include <sysdep-cancel.h>
diff --git a/libc/sysdeps/linux/common/__rt_sigwaitinfo.c b/libc/sysdeps/linux/common/__rt_sigwaitinfo.c
index c8953bfbc..92a11c9b6 100644
--- a/libc/sysdeps/linux/common/__rt_sigwaitinfo.c
+++ b/libc/sysdeps/linux/common/__rt_sigwaitinfo.c
@@ -12,12 +12,8 @@
 #include <signal.h>
 #include <string.h>
 
-libc_hidden_proto(memcpy)
-
 #ifdef __NR_rt_sigtimedwait
 
-#include <string.h>
-
 # ifdef __UCLIBC_HAS_THREADS_NATIVE__
 #  include <sysdep-cancel.h>
 
diff --git a/libc/sysdeps/linux/common/bits/mathcalls.h b/libc/sysdeps/linux/common/bits/mathcalls.h
index 3772b7d0d..1e92b527e 100644
--- a/libc/sysdeps/linux/common/bits/mathcalls.h
+++ b/libc/sysdeps/linux/common/bits/mathcalls.h
@@ -48,11 +48,16 @@
 #endif
 
 
-/* __MATHCALLX and __MATHCALLI include libm_hidden_def
+/* __MATHCALLX(type,function,[suffix],args,attrib) and
+ * __MATHCALLI(type,function,[suffix],args) include libm_hidden_def
  * (for "double" versions only, xxxf and xxxl do not get this treatment).
- * __MATHCALL does not.
- * __MATHDECL_PRIV includes libm_hidden_def (always)
- * and declares __foo, not foo.
+ *
+ * __MATHDECL(type,function,[suffix],args) does not.
+ * __MATHCALL(function,[suffix],args) also does not
+ * (it is just a shortcut to __MATHDECL(_Mdouble_,function,[suffix],args)).
+ *
+ * __MATHDECL_PRIV(type,function,[suffix],args,attrib)
+ * includes libm_hidden_def (always) and declares __foo, not foo.
  */
 
 
diff --git a/libc/sysdeps/linux/common/bits/sigset.h b/libc/sysdeps/linux/common/bits/sigset.h
index 2f67a4ebf..4ef22311a 100644
--- a/libc/sysdeps/linux/common/bits/sigset.h
+++ b/libc/sysdeps/linux/common/bits/sigset.h
@@ -53,10 +53,6 @@ typedef struct {
 #if !defined _SIGSET_H_fns && defined _SIGNAL_H
 # define _SIGSET_H_fns 1
 
-# ifndef _EXTERN_INLINE
-#  define _EXTERN_INLINE extern __inline
-# endif
-
 /* Return a mask that includes the bit for SIG only.  */
 /* Unsigned cast ensures shift/mask insns are used.  */
 # define __sigmask(sig) \
@@ -156,20 +152,30 @@ typedef struct {
 /* These functions needn't check for a bogus signal number -- error
    checking is done in the non __ versions.  */
 
+# if !defined __USE_EXTERN_INLINES || defined __PROVIDE_OUT_OF_LINE_SIGSETFN
 extern int __sigismember (__const __sigset_t *, int);
 libc_hidden_proto(__sigismember)
 extern int __sigaddset (__sigset_t *, int);
 libc_hidden_proto(__sigaddset)
 extern int __sigdelset (__sigset_t *, int);
 libc_hidden_proto(__sigdelset)
+# endif
 
 # ifdef __USE_EXTERN_INLINES
+#  undef _EXTERN_INLINE
+#  ifdef __PROVIDE_OUT_OF_LINE_SIGSETFN
+#   define _EXTERN_INLINE
+#  else /* normal case */
+    /* dropped extern below: otherwise every module with __USE_EXTERN_INLINES
+     * will have its own copy of out-of line function emitted. */
+#   define _EXTERN_INLINE /*extern*/ __always_inline
+#  endif
 #  define __SIGSETFN(NAME, BODY, CONST)					\
 _EXTERN_INLINE int							\
 NAME (CONST __sigset_t *__set, int __sig)				\
 {									\
 	unsigned long __mask = __sigmask (__sig);			\
-	unsigned long __word = __sigword (__sig);			\
+	unsigned __word = __sigword (__sig);				\
 	return BODY;							\
 }
 
@@ -180,5 +186,38 @@ __SIGSETFN (__sigdelset, ((__set->__val[__word] &= ~__mask), 0), )
 #  undef __SIGSETFN
 # endif
 
+# ifdef _LIBC
+/* It's far too much PITA to __USE_EXTERN_INLINES from within libc.
+ * Especially since we want to inline only calls with const sig,
+ * but __USE_EXTERN_INLINES will inline all calls!
+ */
+static __always_inline unsigned long
+const_sigismember(const __sigset_t *set, int sig)
+{
+	unsigned long mask = __sigmask(sig);
+	unsigned word = __sigword(sig);
+	return (set->__val[word] & mask);
+}
+#  define __sigismember(set, sig) \
+	(__builtin_constant_p(sig) ? (const_sigismember(set, sig) != 0) : __sigismember(set, sig))
+static __always_inline void
+const_sigaddset(__sigset_t *set, int sig)
+{
+	unsigned long mask = __sigmask(sig);
+	unsigned word = __sigword(sig);
+	set->__val[word] |= mask;
+}
+#  define __sigaddset(set, sig) \
+	(__builtin_constant_p(sig) ? (const_sigaddset(set, sig), 0)  : __sigaddset(set, sig))
+static __always_inline void
+const_sigdelset(__sigset_t *set, int sig)
+{
+	unsigned long mask = __sigmask(sig);
+	unsigned word = __sigword(sig);
+	set->__val[word] &= ~mask;
+}
+#  define __sigdelset(set, sig) \
+	(__builtin_constant_p(sig) ? (const_sigdelset(set, sig), 0) : __sigdelset(set, sig))
+# endif
 
 #endif /* ! _SIGSET_H_fns.  */
diff --git a/libc/sysdeps/linux/common/pause.c b/libc/sysdeps/linux/common/pause.c
index 33eb409c6..ab16fa73c 100644
--- a/libc/sysdeps/linux/common/pause.c
+++ b/libc/sysdeps/linux/common/pause.c
@@ -25,7 +25,7 @@ __libc_pause (void)
 {
   sigset_t set;
 
-  __sigemptyset (&set);
+  /*__sigemptyset (&set); - why? */
   sigprocmask (SIG_BLOCK, NULL, &set);
 
   /* pause is a cancellation point, but so is sigsuspend.
diff --git a/libc/sysdeps/linux/i386/bits/mathinline.h b/libc/sysdeps/linux/i386/bits/mathinline.h
index 53cbcb2b3..9e67182d5 100644
--- a/libc/sysdeps/linux/i386/bits/mathinline.h
+++ b/libc/sysdeps/linux/i386/bits/mathinline.h
@@ -206,7 +206,7 @@ __NTH (__signbitl (long double __x))
   __MATH_INLINE float_type __NTH (func (float_type __x))		      \
   {									      \
     register float_type __result;					      \
-    __asm__ __volatile__ (op : "=t" (__result) : params);			      \
+    __asm__ __volatile__ (op : "=t" (__result) : params);		      \
     return __result;							      \
   }
 
diff --git a/libc/sysdeps/linux/microblaze/Makefile b/libc/sysdeps/linux/microblaze/Makefile
index a7a832b24..338abc086 100644
--- a/libc/sysdeps/linux/microblaze/Makefile
+++ b/libc/sysdeps/linux/microblaze/Makefile
@@ -1,67 +1,25 @@
 # Makefile for uClibc
 #
-# Copyright (C) 2001,2002  NEC Corporation
-# Copyright (C) 2001,2002  Miles Bader <miles@gnu.org>
+# Copyright (C) 2000-2003 Erik Andersen <andersen@uclibc.org>
 #
-# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
-
-TOPDIR=../../../../
-include $(TOPDIR)Rules.mak
-
-#FIXME -- this arch should include its own crti.S and crtn.S
-UCLIBC_CTOR_DTOR=n
-
-CFLAGS += -I..
-ASFLAGS += -I.. -D__ASSEMBLER -DASM_GLOBAL_DIRECTIVE=.globl
-
-TARGET_MACHINE_TYPE=$(shell $(CC) -dumpmachine)
-
-CRT_SRC := crt0.S
-CRT_OBJ := crt0.o crt1.o
-CTOR_TARGETS := $(TOPDIR)lib/crti.o $(TOPDIR)lib/crtn.o
-
-SSRC := setjmp.S __longjmp.S vfork.S
-SOBJ := $(patsubst %.S,%.o, $(SSRC))
-
-CSRC := mmap.c syscall.c clone.c
-COBJ := $(patsubst %.c,%.o, $(CSRC))
-
-OBJS := $(SOBJ) $(COBJ)
-
-OBJ_LIST := ../../../obj.sysdeps.$(TARGET_ARCH)
-
-all: $(OBJ_LIST) $(CTOR_TARGETS)
-
-$(OBJ_LIST): $(OBJS) $(CRT_OBJ)
-	$(STRIPTOOL) -x -R .note -R .comment $^
-	$(INSTALL) -d $(TOPDIR)lib/
-	cp $(CRT_OBJ) $(TOPDIR)lib/
-	echo $(patsubst %, sysdeps/linux/$(TARGET_ARCH)/%, $(OBJS)) > $@
-
-$(CRT_OBJ): $(CRT_SRC)
-	$(CC) $(ASFLAGS) -DL_$* $< -c -o $*.o
-
-$(SOBJ): %.o : %.S
-	$(CC) $(ASFLAGS) -c $< -o $@
-
-$(COBJ): %.o : %.c
-	$(CC) $(CFLAGS) -c $< -o $@
-
-ifeq ($(UCLIBC_CTOR_DTOR),y)
-$(TOPDIR)lib/crti.o: crti.S
-	$(INSTALL) -d $(TOPDIR)lib/
-	$(CC) $(ASFLAGS) $(SSP_DISABLE_FLAGS) -c $< -o $@
-
-$(TOPDIR)lib/crtn.o: crtn.S
-	$(INSTALL) -d $(TOPDIR)lib/
-	$(CC) $(ASFLAGS) $(SSP_DISABLE_FLAGS) -c $< -o $@
-else
-$(CTOR_TARGETS):
-	$(INSTALL) -d $(TOPDIR)lib/
-	$(AR) $(ARFLAGS) $@
-endif
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Library General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Library General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 
-headers:
+top_srcdir=../../../../
+top_builddir=../../../../
+all: objs
 
-clean:
-	$(RM) *.o *~ core
+include $(top_builddir)Rules.mak
+include Makefile.arch
+include $(top_srcdir)Makerules
diff --git a/libc/sysdeps/linux/microblaze/Makefile.arch b/libc/sysdeps/linux/microblaze/Makefile.arch
new file mode 100644
index 000000000..ecbd80141
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/Makefile.arch
@@ -0,0 +1,14 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2001,2002  NEC Corporation
+# Copyright (C) 2001,2002  Miles Bader <miles@gnu.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+
+CSRC := mmap.c clone.c fixdfsi.c
+
+SSRC := setjmp.S __longjmp.S vfork.S
+
+ARCH_HEADERS := floatlib.h
+
+include $(top_srcdir)libc/sysdeps/linux/Makefile.commonarch
diff --git a/libc/sysdeps/linux/microblaze/__longjmp.S b/libc/sysdeps/linux/microblaze/__longjmp.S
index 2752f0bd3..c4423bec5 100644
--- a/libc/sysdeps/linux/microblaze/__longjmp.S
+++ b/libc/sysdeps/linux/microblaze/__longjmp.S
@@ -16,30 +16,34 @@
 #define _ASM
 #include <bits/setjmp.h>
 
-#include <clinkage.h>
+#include <libc-symbols.h>
 
 	.text
-C_ENTRY(__longjmp):
+	.globl C_SYMBOL_NAME(__longjmp)
+	.align 4
+C_SYMBOL_NAME(__longjmp):
 	/* load registers from memory to r5 (arg0) */
 	lwi	r1, r5, 0
 	lwi	r15, r5, 4
-	lwi	r18, r5, 8
-	lwi	r19, r5, 12
-	lwi	r20, r5, 16
-	lwi	r21, r5, 20
-	lwi	r22, r5, 24
-	lwi	r23, r5, 28
-	lwi	r24, r5, 32
-	lwi	r25, r5, 36
-	lwi	r26, r5, 40
-	lwi	r27, r5, 44
-	lwi	r28, r5, 48
-	lwi	r29, r5, 52
-	lwi	r30, r5, 56
+	lwi	r2,  r5, 8
+	lwi	r13, r5, 12
+	lwi	r18, r5, 16
+	lwi	r19, r5, 20
+	lwi	r20, r5, 24
+	lwi	r21, r5, 28
+	lwi	r22, r5, 32
+	lwi	r23, r5, 36
+	lwi	r24, r5, 40
+	lwi	r25, r5, 44
+	lwi	r26, r5, 48
+	lwi	r27, r5, 52
+	lwi	r28, r5, 56
+	lwi	r29, r5, 60
+	lwi	r30, r5, 64
+	lwi	r31, r5, 68
 
 	addi	r3, r0, 1		/* return val */
 	rtsd	r15, 8			/* normal return */
 	nop
 
-C_END(__longjmp)
 libc_hidden_def(__longjmp)
diff --git a/libc/sysdeps/linux/microblaze/bits/fcntl.h b/libc/sysdeps/linux/microblaze/bits/fcntl.h
index bb6727ad8..44e8f3f5b 100644
--- a/libc/sysdeps/linux/microblaze/bits/fcntl.h
+++ b/libc/sysdeps/linux/microblaze/bits/fcntl.h
@@ -23,6 +23,9 @@
 
 
 #include <sys/types.h>
+#ifdef __USE_GNU
+# include <bits/uio.h>
+#endif
 
 /* open/fcntl - O_SYNC is only implemented on blocks devices and on files
    located on an ext2 file system */
@@ -42,11 +45,11 @@
 #define O_ASYNC		 020000
 
 #ifdef __USE_GNU
-# define O_DIRECTORY	 040000	/* Must be a directory.	 */
-# define O_NOFOLLOW	0100000	/* Do not follow links.	 */
-# define O_DIRECT	0200000	/* Direct disk access.	*/
-# define O_NOATIME	01000000 /* Do not set atime.  */
-# define O_CLOEXEC	02000000 /* set close_on_exec */
+# define O_DIRECTORY	 0200000	/* Must be a directory.	 */
+# define O_NOFOLLOW	0400000	/* Do not follow links.	 */
+# define O_DIRECT	040000	/* Direct disk access.	*/
+# define O_NOATIME     01000000 /* Do not set atime.  */
+# define O_CLOEXEC     02000000 /* set close_on_exec */
 #endif
 
 /* For now Linux has synchronisity options for data and read operations.
@@ -58,7 +61,7 @@
 #endif
 
 #ifdef __USE_LARGEFILE64
-# define O_LARGEFILE	0400000
+# define O_LARGEFILE	0100000
 #endif
 
 /* Values for the second argument to `fcntl'.  */
@@ -96,6 +99,8 @@
 # define F_NOTIFY	1026	/* Request notfications on a directory.	 */
 # define F_DUPFD_CLOEXEC 1030	/* Duplicate file descriptor with
 				   close-on-exit set on new fd.  */
+# define F_SETPIPE_SZ	1031	/* Set of pipe page size array */
+# define F_GETPIPE_SZ	1032	/* Get of pipe page size array */
 #endif
 
 /* For F_[GET|SET]FL.  */
diff --git a/libc/sysdeps/linux/microblaze/bits/kernel_stat.h b/libc/sysdeps/linux/microblaze/bits/kernel_stat.h
index de35488bd..2c5eb28af 100644
--- a/libc/sysdeps/linux/microblaze/bits/kernel_stat.h
+++ b/libc/sysdeps/linux/microblaze/bits/kernel_stat.h
@@ -9,50 +9,44 @@
 
 struct kernel_stat
 {
-  __kernel_dev_t	st_dev;
-  __kernel_ino_t	st_ino;
-  __kernel_mode_t	st_mode;
-  __kernel_nlink_t 	st_nlink;
-  __kernel_uid_t 	st_uid;
-  __kernel_gid_t 	st_gid;
-  __kernel_dev_t	st_rdev;
-  __kernel_off_t	st_size;
-	unsigned long  st_blksize;
-	unsigned long  st_blocks;
-	struct timespec st_atim;
-	struct timespec st_mtim;
-	struct timespec st_ctim;
-	unsigned long  __unused4;
-	unsigned long  __unused5;
+	unsigned long	st_dev;		/* Device.  */
+	unsigned long	st_ino;		/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long	st_rdev;	/* Device number, if device.  */
+	unsigned long	__pad1;
+	long		st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long		st_blocks;	/* Number 512-byte blocks allocated. */
+	struct timespec	st_atim;
+	struct timespec	st_mtim;
+	struct timespec	st_ctim;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
 };
 
 struct kernel_stat64
 {
-  __kernel_dev_t	st_dev;
-  unsigned long		__unused0;
-  unsigned long		__unused1;
-
-  __kernel_ino64_t	st_ino;
-
-  __kernel_mode_t	st_mode;
-  __kernel_nlink_t 	st_nlink;
-
-  __kernel_uid_t	st_uid;
-  __kernel_gid_t	st_gid;
-
-  __kernel_dev_t	st_rdev;
-  unsigned long		__unused2;
-  unsigned long		__unused3;
-
-  __kernel_loff_t	st_size;
-	unsigned long	st_blksize;
-
-  unsigned long		__unused4; /* future possible st_blocks high bits */
-	unsigned long	st_blocks;	/* Number 512-byte blocks allocated. */
-
-  struct timespec	st_atim;
-  struct timespec	st_mtim;
-  struct timespec	st_ctim;
+	unsigned long long st_dev;	/* Device.  */
+	unsigned long long st_ino;	/* File serial number.  */
+	unsigned int	st_mode;	/* File mode.  */
+	unsigned int	st_nlink;	/* Link count.  */
+	unsigned int	st_uid;		/* User ID of the file's owner.  */
+	unsigned int	st_gid;		/* Group ID of the file's group. */
+	unsigned long long st_rdev;	/* Device number, if device.  */
+	unsigned long long __pad1;
+	long long	st_size;	/* Size of file, in bytes.  */
+	int		st_blksize;	/* Optimal block size for I/O.  */
+	int		__pad2;
+	long long	st_blocks;	/* Number 512-byte blocks allocated. */
+	struct timespec	st_atim;
+	struct timespec	st_mtim;
+	struct timespec	st_ctim;
+	unsigned int	__unused4;
+	unsigned int	__unused5;
 };
 
 #endif	/*  _BITS_STAT_STRUCT_H */
diff --git a/libc/sysdeps/linux/microblaze/bits/kernel_types.h b/libc/sysdeps/linux/microblaze/bits/kernel_types.h
index 1ca2dae30..2a7057502 100644
--- a/libc/sysdeps/linux/microblaze/bits/kernel_types.h
+++ b/libc/sysdeps/linux/microblaze/bits/kernel_types.h
@@ -13,18 +13,17 @@
  * Microblaze port by John Williams
  */
 
-#ifndef __MICROBLAZE_POSIX_TYPES_H__
-#define __MICROBLAZE_POSIX_TYPES_H__
+#ifndef _ASM_MICROBLAZE_POSIX_TYPES_H
+#define _ASM_MICROBLAZE_POSIX_TYPES_H
 
-typedef unsigned int	__kernel_dev_t;
+typedef unsigned long	__kernel_dev_t;
 typedef unsigned long	__kernel_ino_t;
-typedef unsigned long long __kernel_ino64_t;
-typedef unsigned int	__kernel_mode_t;
-typedef unsigned int	__kernel_nlink_t;
+//typedef unsigned long long __kernel_ino64_t;
+typedef unsigned short	__kernel_mode_t;
+typedef unsigned long	__kernel_nlink_t;
 typedef long		__kernel_off_t;
-typedef long long	__kernel_loff_t;
 typedef int		__kernel_pid_t;
-typedef unsigned short	__kernel_ipc_pid_t;
+typedef int		__kernel_ipc_pid_t;
 typedef unsigned int	__kernel_uid_t;
 typedef unsigned int	__kernel_gid_t;
 typedef unsigned int	__kernel_size_t;
@@ -33,6 +32,8 @@ typedef int		__kernel_ptrdiff_t;
 typedef long		__kernel_time_t;
 typedef long		__kernel_suseconds_t;
 typedef long		__kernel_clock_t;
+typedef int		__kernel_timer_t;
+typedef int		__kernel_clockid_t;
 typedef int		__kernel_daddr_t;
 typedef char *		__kernel_caddr_t;
 typedef unsigned short	__kernel_uid16_t;
@@ -40,9 +41,13 @@ typedef unsigned short	__kernel_gid16_t;
 typedef unsigned int	__kernel_uid32_t;
 typedef unsigned int	__kernel_gid32_t;
 
-typedef unsigned short	__kernel_old_uid_t;
-typedef unsigned short	__kernel_old_gid_t;
-typedef __kernel_dev_t	__kernel_old_dev_t;
+typedef unsigned int	__kernel_old_uid_t;
+typedef unsigned int	__kernel_old_gid_t;
+typedef unsigned int	__kernel_old_dev_t;
+
+#ifdef __GNUC__
+typedef long long	__kernel_loff_t;
+#endif
 
 typedef struct {
 #ifdef __USE_ALL
@@ -52,4 +57,4 @@ typedef struct {
 #endif
 } __kernel_fsid_t;
 
-#endif /* __MICROBLAZE_POSIX_TYPES_H__ */
+#endif /* _ASM_MICROBLAZE_POSIX_TYPES_H */
diff --git a/libc/sysdeps/linux/microblaze/bits/poll.h b/libc/sysdeps/linux/microblaze/bits/poll.h
index f7a739315..78ee877c0 100644
--- a/libc/sysdeps/linux/microblaze/bits/poll.h
+++ b/libc/sysdeps/linux/microblaze/bits/poll.h
@@ -29,10 +29,10 @@
 
 #ifdef __USE_XOPEN
 /* These values are defined in XPG4.2.  */
-# define POLLRDNORM	0x040		/* Normal data may be read.  */
-# define POLLRDBAND	0x080		/* Priority data may be read.  */
-# define POLLWRNORM	POLLOUT		/* Writing now will not block.  */
-# define POLLWRBAND	0x100		/* Priority data may be written.  */
+# define POLLRDNORM	0x0040		/* Normal data may be read.  */
+# define POLLRDBAND	0x0080		/* Priority data may be read.  */
+# define POLLWRNORM	0x0100		/* Writing now will not block.  */
+# define POLLWRBAND	0x0200		/* Priority data may be written.  */
 #endif
 
 /* Event types always implicitly polled for.  These bits need not be set in
diff --git a/libc/sysdeps/linux/microblaze/bits/setjmp.h b/libc/sysdeps/linux/microblaze/bits/setjmp.h
index d966efd2b..0c0573cd7 100644
--- a/libc/sysdeps/linux/microblaze/bits/setjmp.h
+++ b/libc/sysdeps/linux/microblaze/bits/setjmp.h
@@ -28,12 +28,16 @@ typedef struct
     /* Link pointer.  */
     void *__lp;
 
-    /* Callee-saved registers r18-r30.  */
-    int __regs[13];
+    /* SDA pointers */
+    void *__SDA;
+    void *__SDA2;
+
+    /* Callee-saved registers r18-r31.  */
+    int __regs[14];
   } __jmp_buf[1];
 #endif
 
-#define JB_SIZE		(4 * 15)
+#define JB_SIZE		(4 * 18)
 
 /* Test if longjmp to JMPBUF would unwind the frame
    containing a local variable at ADDRESS.  */
diff --git a/libc/sysdeps/linux/microblaze/bits/stackinfo.h b/libc/sysdeps/linux/microblaze/bits/stackinfo.h
new file mode 100644
index 000000000..819e81974
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/bits/stackinfo.h
@@ -0,0 +1,28 @@
+/* Copyright (C) 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* This file contains a bit of information about the stack allocation
+   of the processor.  */
+
+#ifndef _STACKINFO_H
+#define _STACKINFO_H	1
+
+/* On microblaze the stack grows down.  */
+#define _STACK_GROWS_DOWN	1
+
+#endif	/* stackinfo.h */
diff --git a/libc/sysdeps/linux/microblaze/bits/syscalls.h b/libc/sysdeps/linux/microblaze/bits/syscalls.h
index b21851333..f4bdc4522 100644
--- a/libc/sysdeps/linux/microblaze/bits/syscalls.h
+++ b/libc/sysdeps/linux/microblaze/bits/syscalls.h
@@ -4,12 +4,54 @@
 # error "Never use <bits/syscalls.h> directly; include <sys/syscall.h> instead."
 #endif
 
-#include <features.h>
 
-/* Do something very evil for now.  Until we create our own syscall
- * macros, short circuit bits/sysnum.h  and use asm/unistd.h instead */
-#warning "fixme -- add arch specific syscall macros.h"
-#include <asm/unistd.h>
+#ifndef __ASSEMBLER__
 
-#endif /* _BITS_SYSCALLS_H */
+#include <errno.h>
+
+#define INTERNAL_SYSCALL_NCS(name, err, nr, args...)			\
+	({								\
+		register int __ret __asm__("r3");			\
+		register int _scno __asm__("r12") = name;		\
+		LOAD_ARGS_##nr (args);					\
+		__asm__ __volatile__("brki	r14, 0x8"		\
+			      : "=r" (__ret)				\
+			      :  "r"(_scno) ASM_ARGS_##nr		\
+			      : __SYSCALL_CLOBBERS );			\
+		__ret;							\
+	})
+
+#define INTERNAL_SYSCALL_ERROR_P(val, err)		\
+	((unsigned int)(val) >= 0xfffff001U)
 
+#define LOAD_ARGS_0() do { } while(0)
+#define ASM_ARGS_0
+#define LOAD_ARGS_1(a1)				\
+	register int _a1 __asm__("r5") = (int)(a1);	\
+	LOAD_ARGS_0()
+#define ASM_ARGS_1	ASM_ARGS_0, "r"(_a1)
+#define LOAD_ARGS_2(a1, a2)				\
+	register int _a2 __asm__("r6") = (int)(a2);	\
+	LOAD_ARGS_1(a1)
+#define ASM_ARGS_2	ASM_ARGS_1, "r"(_a2)
+#define LOAD_ARGS_3(a1, a2, a3)				\
+	register int _a3 __asm__("r7") = (int)(a3);	\
+	LOAD_ARGS_2(a1, a2)
+#define ASM_ARGS_3	ASM_ARGS_2, "r"(_a3)
+#define LOAD_ARGS_4(a1, a2, a3, a4)			\
+	register int _a4 __asm__("r8") = (int)(a4);	\
+	LOAD_ARGS_3(a1, a2, a3)
+#define ASM_ARGS_4	ASM_ARGS_3, "r"(_a4)
+#define LOAD_ARGS_5(a1, a2, a3, a4, a5)			\
+	register int _a5 __asm__("r9") = (int)(a5);	\
+	LOAD_ARGS_4(a1, a2, a3, a4)
+#define ASM_ARGS_5	ASM_ARGS_4, "r"(_a5)
+#define LOAD_ARGS_6(a1, a2, a3, a4, a5, a6)		\
+	register int _a6 __asm__("r10") = (int)(a6);	\
+	LOAD_ARGS_5(a1, a2, a3, a4, a5)
+#define ASM_ARGS_6	ASM_ARGS_5, "r"(_a6)
+
+#define __SYSCALL_CLOBBERS "r4", "r14", "cc", "memory"
+
+#endif /* __ASSEMBLER__ */
+#endif /* _BITS_SYSCALLS_H */
diff --git a/libc/sysdeps/linux/microblaze/bits/uClibc_arch_features.h b/libc/sysdeps/linux/microblaze/bits/uClibc_arch_features.h
index 2a9422e23..86d00084c 100644
--- a/libc/sysdeps/linux/microblaze/bits/uClibc_arch_features.h
+++ b/libc/sysdeps/linux/microblaze/bits/uClibc_arch_features.h
@@ -10,7 +10,7 @@
 #undef __UCLIBC_ABORT_INSTRUCTION__
 
 /* can your target use syscall6() for mmap ? */
-#undef __UCLIBC_MMAP_HAS_6_ARGS__
+#define __UCLIBC_MMAP_HAS_6_ARGS__
 
 /* does your target use syscall4() for truncate64 ? (32bit arches only) */
 #undef __UCLIBC_TRUNCATE64_HAS_4_ARGS__
@@ -22,7 +22,7 @@
 #undef __UCLIBC_HANDLE_OLDER_RLIMIT__
 
 /* does your target have an asm .set ? */
-#define __UCLIBC_HAVE_ASM_SET_DIRECTIVE__
+#undef __UCLIBC_HAVE_ASM_SET_DIRECTIVE__
 
 /* define if target doesn't like .global */
 #undef __UCLIBC_ASM_GLOBAL_DIRECTIVE__
diff --git a/libc/sysdeps/linux/microblaze/bits/uClibc_page.h b/libc/sysdeps/linux/microblaze/bits/uClibc_page.h
new file mode 100644
index 000000000..8fc81ae32
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/bits/uClibc_page.h
@@ -0,0 +1,41 @@
+/*  Copyright (C) 2004     Erik Andersen
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supply an architecture specific value for PAGE_SIZE and friends.  */
+
+#ifndef _UCLIBC_PAGE_H
+#define _UCLIBC_PAGE_H
+
+#include <linux/autoconf.h>
+
+#if   defined(CONFIG_MICROBLAZE_32K_PAGES)
+#define PAGE_SHIFT		15
+#elif defined(CONFIG_MICROBLAZE_16K_PAGES)
+#define PAGE_SHIFT		14
+#elif defined(CONFIG_MICROBLAZE_8K_PAGES)
+#define PAGE_SHIFT		13
+#elif defined(CONFIG_MICROBLAZE_4K_PAGES)
+#define PAGE_SHIFT		12
+#else
+#warning Missing CONFIG_MICROBLAZE_nnK_PAGES, assuming 4K
+#define PAGE_SHIFT		12
+#endif
+
+#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#endif /* _UCLIBC_PAGE_H */
diff --git a/libc/sysdeps/linux/microblaze/clinkage.h b/libc/sysdeps/linux/microblaze/clinkage.h
deleted file mode 100644
index a9beffc96..000000000
--- a/libc/sysdeps/linux/microblaze/clinkage.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * libc/sysdeps/linux/microblaze/clinkage.h -- Macros for C symbols in assembler
- *
- *  Copyright (C) 2003  John Williams <jwilliams@itee.uq.edu.au>
- *  Copyright (C) 2001  NEC Corporation
- *  Copyright (C) 2001  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU Lesser
- * General Public License.  See the file COPYING.LIB in the main
- * directory of this archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <asm/clinkage.h>
diff --git a/libc/sysdeps/linux/microblaze/clone.c b/libc/sysdeps/linux/microblaze/clone.c
index a47ee8c68..d92100865 100644
--- a/libc/sysdeps/linux/microblaze/clone.c
+++ b/libc/sysdeps/linux/microblaze/clone.c
@@ -1,52 +1,47 @@
 /*
- * libc/sysdeps/linux/microblaze/clone.c -- `clone' syscall for linux/microblaze
+ * Copyright (C) 2004 Atmel Corporation
  *
- *  Copyright (C) 2003     John Williams <jwilliams@itee.uq.edu.au>
- *  Copyright (C) 2002,03  NEC Electronics Corporation
- *  Copyright (C) 2002,03  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU Lesser
- * General Public License.  See the file COPYING.LIB in the main
- * directory of this archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- * Microblaze port by John Williams
+ * This file is subject to the terms and conditions of the GNU Lesser General
+ * Public License.  See the file "COPYING.LIB" in the main directory of this
+ * archive for more details.
  */
-
+#include <sched.h>
 #include <errno.h>
 #include <sys/syscall.h>
+#include <unistd.h>
 
-int
-clone (int (*fn)(void *arg), void *child_stack, int flags, void *arg)
+int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg, ...)
 {
-  register unsigned long rval __asm__ (SYSCALL_RET) = -EINVAL;
+	int rval = -EINVAL;
+	if (fn && child_stack)
+		rval = INTERNAL_SYSCALL(clone, 0, 2, flags, child_stack);
+
+	if (rval == 0)
+	{
+		int exitCode = fn(arg);
+		rval = INTERNAL_SYSCALL(exit, 0, 1, exitCode);
+	}
 
-  if (fn && child_stack)
-    {
-      register unsigned long syscall __asm__ (SYSCALL_NUM);
-      register unsigned long arg0 __asm__ (SYSCALL_ARG0);
-      register unsigned long arg1 __asm__ (SYSCALL_ARG1);
+	return rval;
+}
 
-      /* Clone this thread.  */
-      arg0 = flags;
-      arg1 = (unsigned long)child_stack;
-      syscall = __NR_clone;
-      __asm__ __volatile__ ("bralid r17, trap;nop;"
-		    : "=r" (rval), "=r" (syscall)
-		    : "1" (syscall), "r" (arg0), "r" (arg1)
-		    : SYSCALL_CLOBBERS);
+#ifdef __NR_clone2
+int
+__clone2(int (*fn)(void *arg), void *child_stack, size_t stack_size,
+	 int flags, void *arg, ...)
+{
+	int rval = -EINVAL;
+	if (fn && child_stack)
+	{
+		rval = INTERNAL_SYSCALL(clone2, 0, 3, flags, child_stack, stack_size);
+	}
 
-      if (rval == 0)
-	/* In child thread, call FN and exit.  */
+	if (rval == 0)
 	{
-	  arg0 = (*fn) (arg);
-	  syscall = __NR_exit;
-	  __asm__ __volatile__ ("bralid r17, trap;nop;"
-			: "=r" (rval), "=r" (syscall)
-			: "1" (syscall), "r" (arg0)
-			: SYSCALL_CLOBBERS);
+		int exitCode = fn(arg);
+		rval = INTERNAL_SYSCALL(exit, 0, 1, exitCode);
 	}
-    }
 
-  __syscall_return (int, rval);
+	return rval;
 }
+#endif
diff --git a/libc/sysdeps/linux/microblaze/crt1.S b/libc/sysdeps/linux/microblaze/crt1.S
new file mode 100644
index 000000000..e9564cfba
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/crt1.S
@@ -0,0 +1,61 @@
+/*
+ * libc/sysdeps/linux/microblaze/crt1.S -- Initial program entry point for linux/microblaze
+ *
+ *  Copyright (C) 2009       Meyer Sound Laboratories
+ *  Copyright (C) 2003       John Williams <jwilliams@itee.uq.edu.au>
+ *  Copyright (C) 2001,2002  NEC Corporation
+ *  Copyright (C) 2001,2002  Miles Bader <miles@gnu.org>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License.  See the file COPYING.LIB in the main
+ * directory of this archive for more details.
+ *
+ * Written by Miles Bader <miles@gnu.org>
+ */
+
+#include <libc-symbols.h>
+
+/* Upon entry, the stack contains the following data:
+	argc, argv[0], ..., argv[argc-1], 0, envp[0], ..., 0
+*/
+
+	.text
+	.globl C_SYMBOL_NAME(_start)
+	.align 4
+C_SYMBOL_NAME(_start):
+
+	/* Load SDAs */
+	la	r2, r0, C_SYMBOL_NAME(_SDA2_BASE_) /* in the original source r2 was SDA, and r13 was SDA2, no idea why */
+	la	r13, r0, C_SYMBOL_NAME(_SDA_BASE_)
+
+	/*
+	Preparing arguments for uClibc's startup routine.
+	The routine has 6 arguments, so 5 of them are placed
+	into registers, one on the stack
+	*/
+
+	la      r5, r0, C_SYMBOL_NAME(main) /* Arg 1: main() */
+	lw	r6, r0, r1		    /* Arg 2: argc   */
+	addi	r7, r1, 4		    /* Arg 3: argv   */
+	la	r8, r0, _init               /* Arg 4: init   */
+	la	r9, r0, _fini               /* Arg 5: fini   */
+	addk	r10,r0,r0                   /* Arg 6: rtld_fini = NULL */
+
+
+	/* Reserve space for __uClibc_main to save parameters
+	   (Microblaze ABI stack calling convention)
+	   and for stack_end argument to __uClibc_main  */
+	add 	r3, r1, r0
+	addi    r1, r1, -32
+
+	/* tail-call uClibc's startup routine */
+	brid	C_SYMBOL_NAME(__uClibc_main)
+	swi 	r3, r1, 28	/* Arg 7: stack end [DELAY SLOT] */
+
+/* Define a symbol for the first piece of initialized data.  */
+	.data
+	.globl __data_start
+__data_start:
+	.long 0
+	.weak data_start
+	data_start = __data_start
diff --git a/libc/sysdeps/linux/microblaze/crti.S b/libc/sysdeps/linux/microblaze/crti.S
new file mode 100644
index 000000000..e00396897
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/crti.S
@@ -0,0 +1,41 @@
+/*
+ * libc/sysdeps/linux/microblaze/crti.S -- init/fini entry code for microblaze
+ *                                         (baselined with gcc 4.1.2)
+ *
+ *  Copyright (C) 2010       Digital Design Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License.  See the file COPYING.LIB in the main
+ * directory of this archive for more details.
+ */
+
+#define END_INIT
+#define END_FINI
+#define ALIGN
+#include <libc-symbols.h>
+
+/*@HEADER_ENDS*/
+
+	.section .init
+	.align	2
+	.globl	_init
+_init:
+	addik	r1, r1, -32
+	swi	r19, r1, 28
+	addk	r19, r1, r0
+	swi	r15, r1, 0
+
+	ALIGN
+	END_INIT
+
+	.section .fini
+	.align	2
+	.globl	_fini
+_fini:
+	addik	r1, r1, -32
+	swi	r19, r1, 28
+	addk	r19, r1, r0
+	swi	r15, r1, 0
+
+	ALIGN
+	END_FINI
diff --git a/libc/sysdeps/linux/microblaze/crtn.S b/libc/sysdeps/linux/microblaze/crtn.S
new file mode 100644
index 000000000..da8c920ef
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/crtn.S
@@ -0,0 +1,45 @@
+/*
+ * libc/sysdeps/linux/microblaze/crtn.S -- init/fini exit code for microblaze
+ *                                         (baselined with gcc 4.1.2)
+ *
+ *  Copyright (C) 2010       Digital Design Corporation
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License.  See the file COPYING.LIB in the main
+ * directory of this archive for more details.
+ */
+
+#define END_INIT
+#define END_FINI
+#define ALIGN
+#include <libc-symbols.h>
+
+	.section .init
+	.align	2
+	.globl	_init
+	.ent	_init
+
+	lwi	r15, r1, 0
+	lwi	r19, r1, 28
+	rtsd	r15, 8
+	addik	r1, r1, 32	# Delay slot
+
+	.end	_init
+$Lfe2:
+	.size	_init,$Lfe2-_init
+
+	.section .fini
+	.align	2
+	.globl	_fini
+	.ent	_fini
+
+	lwi	r15, r1, 0
+	lwi	r19, r1, 28
+	rtsd	r15, 8
+	addik	r1, r1, 32	# Delay slot
+
+	.end	_fini
+$Lfe3:
+	.size	_fini,$Lfe3-_fini
+
+/*@TRAILER_BEGINS*/
diff --git a/libc/sysdeps/linux/microblaze/fixdfsi.c b/libc/sysdeps/linux/microblaze/fixdfsi.c
new file mode 100644
index 000000000..1611176aa
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/fixdfsi.c
@@ -0,0 +1,85 @@
+/*
+** libgcc support for software floating point.
+** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
+** Permission is granted to do *anything* you want with this file,
+** commercial or otherwise, provided this message remains intact.  So there!
+** I would appreciate receiving any updates/patches/changes that anyone
+** makes, and am willing to be the repository for said changes (am I
+** making a big mistake?).
+
+Warning! Only single-precision is actually implemented.  This file
+won't really be much use until double-precision is supported.
+
+However, once that is done, this file might eventually become a
+replacement for libgcc1.c.  It might also make possible
+cross-compilation for an IEEE target machine from a non-IEEE
+host such as a VAX.
+
+If you'd like to work on completing this, please talk to rms@gnu.ai.mit.edu.
+
+--> Double precision floating support added by James Carlson on 20 April 1998.
+
+**
+** Pat Wood
+** Pipeline Associates, Inc.
+** pipeline!phw@motown.com or
+** sun!pipeline!phw or
+** uunet!motown!pipeline!phw
+**
+** 05/01/91 -- V1.0 -- first release to gcc mailing lists
+** 05/04/91 -- V1.1 -- added float and double prototypes and return values
+**                  -- fixed problems with adding and subtracting zero
+**                  -- fixed rounding in truncdfsf2
+**                  -- fixed SWAP define and tested on 386
+*/
+
+/*
+** The following are routines that replace the libgcc soft floating point
+** routines that are called automatically when -msoft-float is selected.
+** The support single and double precision IEEE format, with provisions
+** for byte-swapped machines (tested on 386).  Some of the double-precision
+** routines work at full precision, but most of the hard ones simply punt
+** and call the single precision routines, producing a loss of accuracy.
+** long long support is not assumed or included.
+** Overall accuracy is close to IEEE (actually 68882) for single-precision
+** arithmetic.  I think there may still be a 1 in 1000 chance of a bit
+** being rounded the wrong way during a multiply.  I'm not fussy enough to
+** bother with it, but if anyone is, knock yourself out.
+**
+** Efficiency has only been addressed where it was obvious that something
+** would make a big difference.  Anyone who wants to do this right for
+** best speed should go in and rewrite in assembler.
+**
+** I have tested this only on a 68030 workstation and 386/ix integrated
+** in with -msoft-float.
+*/
+
+#include "floatlib.h"
+
+/* convert double to int */
+long
+__fixdfsi (double a1)
+{
+  register union double_long dl1;
+  register int exp;
+  register long l;
+
+  dl1.d = a1;
+
+  if (!dl1.l.upper && !dl1.l.lower)
+    return (0);
+
+  exp = EXPD (dl1) - EXCESSD - 31;
+  l = MANTD (dl1);
+
+  if (exp > 0)
+      return SIGND(dl1) ? (1<<31) : ((1ul<<31)-1);
+
+  /* shift down until exp = 0 or l = 0 */
+  if (exp < 0 && exp > -32 && l)
+    l >>= -exp;
+  else
+    return (0);
+
+  return (SIGND (dl1) ? -l : l);
+}
diff --git a/libc/sysdeps/linux/microblaze/floatlib.h b/libc/sysdeps/linux/microblaze/floatlib.h
new file mode 100644
index 000000000..817ba7de0
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/floatlib.h
@@ -0,0 +1,140 @@
+/*
+** libgcc support for software floating point.
+** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
+** Permission is granted to do *anything* you want with this file,
+** commercial or otherwise, provided this message remains intact.  So there!
+** I would appreciate receiving any updates/patches/changes that anyone
+** makes, and am willing to be the repository for said changes (am I
+** making a big mistake?).
+
+Warning! Only single-precision is actually implemented.  This file
+won't really be much use until double-precision is supported.
+
+However, once that is done, this file might eventually become a
+replacement for libgcc1.c.  It might also make possible
+cross-compilation for an IEEE target machine from a non-IEEE
+host such as a VAX.
+
+If you'd like to work on completing this, please talk to rms@gnu.ai.mit.edu.
+
+--> Double precision floating support added by James Carlson on 20 April 1998.
+
+**
+** Pat Wood
+** Pipeline Associates, Inc.
+** pipeline!phw@motown.com or
+** sun!pipeline!phw or
+** uunet!motown!pipeline!phw
+**
+** 05/01/91 -- V1.0 -- first release to gcc mailing lists
+** 05/04/91 -- V1.1 -- added float and double prototypes and return values
+**                  -- fixed problems with adding and subtracting zero
+**                  -- fixed rounding in truncdfsf2
+**                  -- fixed SWAP define and tested on 386
+*/
+
+/*
+** The following are routines that replace the libgcc soft floating point
+** routines that are called automatically when -msoft-float is selected.
+** The support single and double precision IEEE format, with provisions
+** for byte-swapped machines (tested on 386).  Some of the double-precision
+** routines work at full precision, but most of the hard ones simply punt
+** and call the single precision routines, producing a loss of accuracy.
+** long long support is not assumed or included.
+** Overall accuracy is close to IEEE (actually 68882) for single-precision
+** arithmetic.  I think there may still be a 1 in 1000 chance of a bit
+** being rounded the wrong way during a multiply.  I'm not fussy enough to
+** bother with it, but if anyone is, knock yourself out.
+**
+** Efficiency has only been addressed where it was obvious that something
+** would make a big difference.  Anyone who wants to do this right for
+** best speed should go in and rewrite in assembler.
+**
+** I have tested this only on a 68030 workstation and 386/ix integrated
+** in with -msoft-float.
+*/
+
+#ifndef __FLOAT_LIB_H__
+#define __FLOAT_LIB_H__
+/* the following deal with IEEE single-precision numbers */
+#define EXCESS		126
+#define SIGNBIT		0x80000000
+#define HIDDEN		(1 << 23)
+#define SIGN(fp)	((fp) & SIGNBIT)
+#define EXP(fp)		(((fp) >> 23) & 0xFF)
+#define MANT(fp)	(((fp) & 0x7FFFFF) | HIDDEN)
+#define PACK(s,e,m)	((s) | ((e) << 23) | (m))
+
+/* the following deal with IEEE double-precision numbers */
+#define EXCESSD		1022
+#define HIDDEND		(1 << 20)
+#define EXPD(fp)	(((fp.l.upper) >> 20) & 0x7FF)
+#define SIGND(fp)	((fp.l.upper) & SIGNBIT)
+#define MANTD(fp)	(((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \
+				(fp.l.lower >> 22))
+#define HIDDEND_LL	((long long)1 << 52)
+#define MANTD_LL(fp)	((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL)
+#define PACKD_LL(s,e,m)	(((long long)((s)+((e)<<20))<<32)|(m))
+
+/* define SWAP for 386/960 reverse-byte-order brain-damaged CPUs */
+union double_long {
+    double d;
+#ifdef SWAP
+    struct {
+      unsigned long lower;
+      long upper;
+    } l;
+#else
+    struct {
+      long upper;
+      unsigned long lower;
+    } l;
+#endif
+    long long ll;
+};
+
+union float_long
+  {
+    float f;
+    long l;
+  };
+
+#endif
+
+/* Functions defined in different files */
+
+float __addsf3 (float, float);
+float __subsf3 (float, float);
+long __cmpsf2 (float, float);
+float __mulsf3 (float, float);
+float __divsf3 (float, float);
+double __floatsidf (register long);
+double __floatdidf (register long long);
+float __floatsisf (register long );
+float __floatdisf (register long long );
+float __negsf2 (float);
+double __negdf2 (double);
+double __extendsfdf2 (float);
+float __truncdfsf2 (double);
+long __cmpdf2 (double, double);
+long __fixsfsi (float);
+long __fixdfsi (double);
+long long __fixdfdi (double);
+unsigned long __fixunsdfsi (double);
+unsigned long long __fixunsdfdi (double);
+double __adddf3 (double, double);
+double __subdf3 (double, double);
+double __muldf3 (double, double);
+double __divdf3 (double, double);
+int __gtdf2 (double, double);
+int __gedf2 (double, double);
+int __ltdf2 (double, double);
+int __ledf2 (double, double);
+int __eqdf2 (double, double);
+int __nedf2 (double, double);
+int __gtsf2 (float, float);
+int __gesf2 (float, float);
+int __ltsf2 (float, float);
+int __lesf2 (float, float);
+int __eqsf2 (float, float);
+int __nesf2 (float, float);
diff --git a/libc/sysdeps/linux/microblaze/setjmp.S b/libc/sysdeps/linux/microblaze/setjmp.S
index 7068d4b40..7acb9ea5d 100644
--- a/libc/sysdeps/linux/microblaze/setjmp.S
+++ b/libc/sysdeps/linux/microblaze/setjmp.S
@@ -8,7 +8,7 @@
  * This file is subject to the terms and conditions of the GNU Lesser
  * General Public License.  See the file COPYING.LIB in the main
  * directory of this archive for more details.
- * 
+ *
  * Written by Miles Bader <miles@gnu.org>
  */
 
@@ -16,38 +16,41 @@
 #define _ASM
 #include <bits/setjmp.h>
 
-#include <clinkage.h>
+#include <libc-symbols.h>
 
 	.text
-C_ENTRY(setjmp):
-	addi	r6, r0, 1			/* Save the signal mask.  */
+	.globl C_SYMBOL_NAME(setjmp)
+	.align 4
+C_SYMBOL_NAME(setjmp):
 	braid	C_SYMBOL_NAME(__sigsetjmp)
-	nop
+	addi	r6, r0, 1			/* Save the signal mask.  */
 
 	.globl C_SYMBOL_NAME(_setjmp)
 C_SYMBOL_NAME(_setjmp):
-	add	r6, r0, r0			/* Don't save the signal mask.  */
+	and	r6, r0, r0			/* Don't save the signal mask.  */
 
 	.globl C_SYMBOL_NAME(__sigsetjmp)
 C_SYMBOL_NAME(__sigsetjmp):
 	/* Save registers relative to r5 (arg0)*/
 	swi	r1, r5, 0			/* stack pointer */
 	swi	r15, r5, 4			/* link register */
-	swi	r18, r5, 8			/* assembler temp */
-	swi	r19, r5, 12			/* now call-preserved regs */
-	swi	r20, r5, 16
-	swi	r21, r5, 20
-	swi	r22, r5, 24
-	swi	r23, r5, 28
-	swi	r24, r5, 32
-	swi	r25, r5, 36
-	swi	r26, r5, 40
-	swi	r27, r5, 44
-	swi	r28, r5, 48
-	swi	r29, r5, 52
-	swi	r30, r5, 56
+	swi	r2,  r5, 8			/* SDA and SDA2 ptrs */
+	swi	r13, r5, 12
+	swi	r18, r5, 16			/* assembler temp */
+	swi	r19, r5, 20			/* now call-preserved regs */
+	swi	r20, r5, 24
+	swi	r21, r5, 28
+	swi	r22, r5, 32
+	swi	r23, r5, 36
+	swi	r24, r5, 40
+	swi	r25, r5, 44
+	swi	r26, r5, 48
+	swi	r27, r5, 52
+	swi	r28, r5, 56
+	swi	r29, r5, 60
+	swi	r30, r5, 64
+	swi	r31, r5, 68
 
 	/* Make a tail call to __sigjmp_save; it takes the same args.  */
 	braid	C_SYMBOL_NAME(__sigjmp_save)
 	nop
-C_END(setjmp)
diff --git a/libc/sysdeps/linux/microblaze/sys/procfs.h b/libc/sysdeps/linux/microblaze/sys/procfs.h
new file mode 100644
index 000000000..8fb87ef1e
--- /dev/null
+++ b/libc/sysdeps/linux/microblaze/sys/procfs.h
@@ -0,0 +1,145 @@
+/* Copyright (C) 1996, 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _SYS_PROCFS_H
+#define _SYS_PROCFS_H	1
+
+/* This is somewhat modelled after the file of the same name on SVR4
+   systems.  It provides a definition of the core file format for ELF
+   used on Linux.  It doesn't have anything to do with the /proc file
+   system, even though Linux has one.
+
+   Anyway, the whole purpose of this file is for GDB and GDB only.
+   Don't read too much into it.  Don't use it for anything other than
+   GDB unless you know what you are doing.  */
+
+#include <features.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+
+/* Type for a general-purpose register.  */
+#ifndef ELF_GREG_T
+#define ELF_GREG_T
+typedef unsigned long elf_greg_t;
+#endif
+
+/* This is exactly the same as `struct pt_regs' in the kernel.  */
+struct elf_gregset
+{
+	elf_greg_t gpr[32];	/* General purpose registers.  */
+
+	elf_greg_t pc;		/* program counter */
+	elf_greg_t psw;		/* program status word */
+	elf_greg_t ear;		/* Exception address register */
+	elf_greg_t esr;		/* Excep[tion Status Register */
+	elf_greg_t fsr;		/* FPU Status register */
+
+	elf_greg_t kernel_mode;	/* 1 if in `kernel mode', 0 if user mode */
+	elf_greg_t single_step;	/* 1 if in single step mode */
+};
+
+#ifndef ELF_NGREG
+#define ELF_NGREG (sizeof (struct elf_gregset) / sizeof(elf_greg_t))
+#endif
+
+#ifndef ELF_GREGSET_T
+#define ELF_GREGSET_T
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+#endif
+
+/* Register set for the floating-point registers.  */
+#ifndef ELF_FPREGSET_T
+#define ELF_FPREGSET_T
+typedef elf_greg_t elf_fpregset_t;
+#endif
+
+struct elf_siginfo
+{
+	int	si_signo;			/* signal number */
+	int	si_code;			/* extra code */
+	int	si_errno;			/* errno */
+};
+
+
+/*
+ * Definitions to generate Intel SVR4-like core files.
+ * These mostly have the same names as the SVR4 types with "elf_"
+ * tacked on the front to prevent clashes with linux definitions,
+ * and the typedef forms have been avoided.  This is mostly like
+ * the SVR4 structure, but more Linuxy, with things that Linux does
+ * not support and which gdb doesn't really use excluded.
+ * Fields present but not used are marked with "XXX".
+ */
+struct elf_prstatus
+{
+	struct elf_siginfo pr_info;	/* Info associated with signal */
+	short	pr_cursig;		/* Current signal */
+	unsigned long pr_sigpend;	/* Set of pending signals */
+	unsigned long pr_sighold;	/* Set of held signals */
+	__kernel_pid_t	pr_pid;
+	__kernel_pid_t	pr_ppid;
+	__kernel_pid_t	pr_pgrp;
+	__kernel_pid_t	pr_sid;
+	struct timeval pr_utime;	/* User time */
+	struct timeval pr_stime;	/* System time */
+	struct timeval pr_cutime;	/* Cumulative user time */
+	struct timeval pr_cstime;	/* Cumulative system time */
+	elf_gregset_t pr_reg;	/* GP registers */
+	int pr_fpvalid;		/* True if math co-processor being used.  */
+};
+
+#define ELF_PRARGSZ	(80)	/* Number of chars for args */
+
+struct elf_prpsinfo
+{
+	char	pr_state;	/* numeric process state */
+	char	pr_sname;	/* char for pr_state */
+	char	pr_zomb;	/* zombie */
+	char	pr_nice;	/* nice val */
+	unsigned long pr_flag;	/* flags */
+	__kernel_uid_t	pr_uid;
+	__kernel_gid_t	pr_gid;
+	__kernel_pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
+	/* Lots missing */
+	char	pr_fname[16];	/* filename of executable */
+	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
+};
+
+
+/* The rest of this file provides the types for emulation of the
+   Solaris <proc_service.h> interfaces that should be implemented by
+   users of libthread_db.  */
+
+/* Addresses.  */
+typedef void *psaddr_t;
+
+/* Register sets.  Linux has different names.  */
+typedef elf_gregset_t prgregset_t;
+typedef elf_fpregset_t prfpregset_t;
+
+/* We don't have any differences between processes and threads,
+   therefore have only one PID type.  */
+typedef __kernel_pid_t lwpid_t;
+
+/* Process status and info.  In the end we do provide typedefs for them.  */
+typedef struct elf_prstatus prstatus_t;
+typedef struct elf_prpsinfo prpsinfo_t;
+
+
+#endif	/* sys/procfs.h */
diff --git a/libc/sysdeps/linux/microblaze/sys/ptrace.h b/libc/sysdeps/linux/microblaze/sys/ptrace.h
index b7a9adf25..26d153ce1 100644
--- a/libc/sysdeps/linux/microblaze/sys/ptrace.h
+++ b/libc/sysdeps/linux/microblaze/sys/ptrace.h
@@ -65,12 +65,16 @@ enum __ptrace_request
   PTRACE_KILL = 8,
 #define PT_KILL PTRACE_KILL
 
+  /* Single step the process.  */
+  PTRACE_SINGLESTEP = 9,
+#define PT_STEP PTRACE_SINGLESTEP
+
   /* Attach to a process that is already running. */
-  PTRACE_ATTACH = 0x10,
+  PTRACE_ATTACH = 16,
 #define PT_ATTACH PTRACE_ATTACH
 
   /* Detach from a process attached to with PTRACE_ATTACH.  */
-  PTRACE_DETACH = 0x11,
+  PTRACE_DETACH = 17,
 #define PT_DETACH PTRACE_DETACH
 
   /* Continue and stop at the next (return from) syscall.  */
diff --git a/libc/sysdeps/linux/microblaze/syscall.c b/libc/sysdeps/linux/microblaze/syscall.c
deleted file mode 100644
index 0e07e4595..000000000
--- a/libc/sysdeps/linux/microblaze/syscall.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * libc/sysdeps/linux/microblaze/syscall.c -- generic syscall function for linux/microblaze
- *
- *  Copyright (C) 2003  John Williams <jwilliams@itee.uq.edu.au>
- *  Copyright (C) 2002  NEC Corporation
- *  Copyright (C) 2002  Miles Bader <miles@gnu.org>
- *
- * This file is subject to the terms and conditions of the GNU Lesser
- * General Public License.  See the file COPYING.LIB in the main
- * directory of this archive for more details.
- *
- * Written by Miles Bader <miles@gnu.org>
- */
-
-#include <errno.h>
-#include <sys/syscall.h>
-
-typedef unsigned long arg_t;
-
-/* Invoke `system call' NUM, passing it the remaining arguments.
-   This is completely system-dependent, and not often useful.  */
-long
-syscall (long num, arg_t a1, arg_t a2, arg_t a3, arg_t a4, arg_t a5, arg_t a6)
-{
-  /* We don't know how many arguments are valid, so A5 and A6 are fetched
-     off the stack even for (the majority of) system calls with fewer
-     arguments; hopefully this won't cause any problems.  A1-A4 are in
-     registers, so they're OK.  */
-  register arg_t a __asm__ (SYSCALL_ARG0) = a1;
-  register arg_t b __asm__ (SYSCALL_ARG1) = a2;
-  register arg_t c __asm__ (SYSCALL_ARG2) = a3;
-  register arg_t d __asm__ (SYSCALL_ARG3) = a4;
-  register arg_t e __asm__ (SYSCALL_ARG4) = a5;
-  register arg_t f __asm__ (SYSCALL_ARG5) = a6;
-  register unsigned long syscall __asm__ (SYSCALL_NUM) = num;
-  register unsigned long ret __asm__ (SYSCALL_RET);
-	unsigned long ret_sav;
-
-  *((unsigned long *)0xFFFF4004) = (unsigned int)('+');
-  __asm__ ("brlid r17, 08x; nop;"
-       : "=r" (ret)
-       : "r" (syscall), "r" (a), "r" (b), "r" (c), "r" (d), "r" (e), "r" (f)
-       : SYSCALL_CLOBBERS);
-
-  ret_sav=ret;
-  *((unsigned long *)0xFFFF4004) = (unsigned int)('-');
-
-
-
-  __syscall_return (long, ret);
-}
diff --git a/libc/sysdeps/linux/microblaze/vfork.S b/libc/sysdeps/linux/microblaze/vfork.S
index 42458308a..c4b4dbf2f 100644
--- a/libc/sysdeps/linux/microblaze/vfork.S
+++ b/libc/sysdeps/linux/microblaze/vfork.S
@@ -1,14 +1,14 @@
 /*
  * libc/sysdeps/linux/microblaze/vfork.S -- `vfork' syscall for linux/microblaze
  *
+ * Copyright (C) 2003  John Williams <jwilliams@itee.uq.edu.au>
  * Copyright (C) 2001  NEC Corporation
  * Copyright (C) 2001  Miles Bader <miles@gnu.org>
- * Copyright (C) 2003  John Williams <jwilliams@itee.uq.edu.au>
- * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
  *
- * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
- */
-/*
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License.  See the file COPYING.LIB in the main
+ * directory of this archive for more details.
+ *
  * Written by Miles Bader <miles@gnu.org>
  * Microblaze port by John Williams
  */
@@ -18,7 +18,7 @@
 #define _SYSCALL_H
 #include <bits/sysnum.h>
 
-#include <clinkage.h>
+#include <libc-symbols.h>
 
 /* Clone the calling process, but without copying the whole address space.
    The calling process is suspended until the new process exits or is
@@ -27,18 +27,20 @@
 
 .global C_SYMBOL_NAME(errno)
 
-C_ENTRY (__vfork):
+	.globl __vfork
+	.align 4
+__vfork:
 	addi	r12, r0, SYS_vfork
-	bralid	r17, 0x08;
-	nop
+	brki	r14, 0x08;
 	addi	r4, r3, 125		/* minimum err value */
 	blti	r4, 1f			/* is r3 < -125? */
-	rtsd	r15, 8			/* normal return */
-	nop
-1:	sub	r3, r3, r0		/* r3 = -r3 */
+	bri	2f			/* normal return */
+1:	sub 	r3, r3, r0		/* r3 = -r3 */
 	swi	r3, r0, C_SYMBOL_NAME(errno);
-	rtsd	r15, 8			/* error return */
+					/* state restore etc */
+2:	rtsd	r15, 8			/* error return */
 	nop
-C_END(__vfork)
+       .size   __vfork, .-__vfork
+
 weak_alias(__vfork,vfork)
 libc_hidden_weak(vfork)
diff --git a/libc/sysdeps/linux/mips/sysdep.h b/libc/sysdeps/linux/mips/sysdep.h
index 0860c1a48..9de4f1ace 100644
--- a/libc/sysdeps/linux/mips/sysdep.h
+++ b/libc/sysdeps/linux/mips/sysdep.h
@@ -98,7 +98,8 @@
 #ifdef __PIC__
 #define PSEUDO(name, syscall_name, args) 		\
   .align 2;						\
-  99: la t9,__syscall_error;				\
+  99: move a0, v0;					\
+  la t9,__syscall_error;				\
   jr t9;						\
   ENTRY(name)						\
   .set noreorder;					\
@@ -112,7 +113,8 @@ L(syse1):
 #define PSEUDO(name, syscall_name, args) 		\
   .set noreorder;					\
   .align 2;						\
-  99: j __syscall_error;				\
+  99: move a0, v0;					\
+  j __syscall_error;					\
   nop;							\
   ENTRY(name)						\
   .set noreorder;					\
diff --git a/libc/sysdeps/linux/sparc/Makefile.arch b/libc/sysdeps/linux/sparc/Makefile.arch
index 8a624205a..91c6e85e4 100644
--- a/libc/sysdeps/linux/sparc/Makefile.arch
+++ b/libc/sysdeps/linux/sparc/Makefile.arch
@@ -5,7 +5,7 @@
 # Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
 #
 
-CSRC := brk.c __syscall_error.c qp_ops.c
+CSRC := brk.c __syscall_error.c
 
 SSRC := \
 	__longjmp.S setjmp.S bsd-setjmp.S bsd-_setjmp.S \
@@ -16,4 +16,9 @@ CSRC += sigaction.c
 SSRC += fork.S vfork.S
 endif
 
-
+CSRC += $(foreach f, \
+	q_div.c   q_fle.c    q_mul.c   q_qtoll.c   q_stoq.c    \
+	mp_clz_tab.c  q_dtoq.c  q_flt.c    q_neg.c   q_qtos.c    q_sub.c    \
+	q_add.c      q_feq.c   q_fne.c    q_qtod.c  q_qtou.c    q_ulltoq.c  \
+	q_cmp.c      q_fge.c   q_itoq.c   q_qtoull.c  q_util.c    \
+	q_cmpe.c     q_fgt.c   q_lltoq.c  q_qtoi.c  q_sqrt.c    q_utoq.c, soft-fp/$(f))
diff --git a/libc/sysdeps/linux/sparc/pipe.S b/libc/sysdeps/linux/sparc/pipe.S
index 76a5b9617..94913d486 100644
--- a/libc/sysdeps/linux/sparc/pipe.S
+++ b/libc/sysdeps/linux/sparc/pipe.S
@@ -49,7 +49,7 @@ pipe:
     st %o0,[%i0]
     st %o1,[%i0+4]
 	ret
-	 restore %o0,%g0,%o0
+	 restore %g0,%g0,%o0
 
 .Lerror:
 	call	HIDDEN_JUMPTARGET(__errno_location)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/double.h b/libc/sysdeps/linux/sparc/soft-fp/double.h
new file mode 100644
index 000000000..b012d9d51
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/double.h
@@ -0,0 +1,264 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Double Precision
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_D		(2 * _FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_D		_FP_W_TYPE_SIZE
+#endif
+
+#define _FP_FRACBITS_D		53
+#define _FP_FRACXBITS_D		(_FP_FRACTBITS_D - _FP_FRACBITS_D)
+#define _FP_WFRACBITS_D		(_FP_WORKBITS + _FP_FRACBITS_D)
+#define _FP_WFRACXBITS_D	(_FP_FRACTBITS_D - _FP_WFRACBITS_D)
+#define _FP_EXPBITS_D		11
+#define _FP_EXPBIAS_D		1023
+#define _FP_EXPMAX_D		2047
+
+#define _FP_QNANBIT_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_D		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_D-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_D		\
+	((_FP_W_TYPE)1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE)
+
+typedef float DFtype __attribute__((mode(DF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_D
+{
+  DFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign  : 1;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+#else
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned sign  : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_D(X)		_FP_DECL(2,X)
+#define FP_UNPACK_RAW_D(X,val)	_FP_UNPACK_RAW_2(D,X,val)
+#define FP_UNPACK_RAW_DP(X,val)	_FP_UNPACK_RAW_2_P(D,X,val)
+#define FP_PACK_RAW_D(val,X)	_FP_PACK_RAW_2(D,val,X)
+#define FP_PACK_RAW_DP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(D,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_D(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2(D,X,val);		\
+    _FP_UNPACK_CANONICAL(D,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_DP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2_P(D,X,val);	\
+    _FP_UNPACK_CANONICAL(D,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_D(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2(D,X,val);		\
+    _FP_UNPACK_SEMIRAW(D,2,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_DP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2_P(D,X,val);	\
+    _FP_UNPACK_SEMIRAW(D,2,X);		\
+  } while (0)
+
+#define FP_PACK_D(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,2,X);		\
+    _FP_PACK_RAW_2(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_DP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(D,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_D(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,2,X);		\
+    _FP_PACK_RAW_2(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_DP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(D,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN(D,2,X)
+#define FP_NEG_D(R,X)			_FP_NEG(D,2,R,X)
+#define FP_ADD_D(R,X,Y)			_FP_ADD(D,2,R,X,Y)
+#define FP_SUB_D(R,X,Y)			_FP_SUB(D,2,R,X,Y)
+#define FP_MUL_D(R,X,Y)			_FP_MUL(D,2,R,X,Y)
+#define FP_DIV_D(R,X,Y)			_FP_DIV(D,2,R,X,Y)
+#define FP_SQRT_D(R,X)			_FP_SQRT(D,2,R,X)
+#define _FP_SQRT_MEAT_D(R,S,T,X,Q)	_FP_SQRT_MEAT_2(R,S,T,X,Q)
+
+#define FP_CMP_D(r,X,Y,un)	_FP_CMP(D,2,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)	_FP_CMP_EQ(D,2,r,X,Y)
+#define FP_CMP_UNORD_D(r,X,Y)	_FP_CMP_UNORD(D,2,r,X,Y)
+
+#define FP_TO_INT_D(r,X,rsz,rsg)	_FP_TO_INT(D,2,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)	_FP_FROM_INT(D,2,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_D(X)	_FP_FRAC_HIGH_2(X)
+#define _FP_FRAC_HIGH_RAW_D(X)	_FP_FRAC_HIGH_2(X)
+
+#else
+
+union _FP_UNION_D
+{
+  DFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign   : 1;
+    unsigned exp    : _FP_EXPBITS_D;
+    _FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+#else
+    _FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+    unsigned exp    : _FP_EXPBITS_D;
+    unsigned sign   : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_D(X)		_FP_DECL(1,X)
+#define FP_UNPACK_RAW_D(X,val)	_FP_UNPACK_RAW_1(D,X,val)
+#define FP_UNPACK_RAW_DP(X,val)	_FP_UNPACK_RAW_1_P(D,X,val)
+#define FP_PACK_RAW_D(val,X)	_FP_PACK_RAW_1(D,val,X)
+#define FP_PACK_RAW_DP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(D,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_D(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1(D,X,val);		\
+    _FP_UNPACK_CANONICAL(D,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_DP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1_P(D,X,val);	\
+    _FP_UNPACK_CANONICAL(D,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_D(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2(1,X,val);		\
+    _FP_UNPACK_SEMIRAW(D,1,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_DP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2_P(1,X,val);	\
+    _FP_UNPACK_SEMIRAW(D,1,X);		\
+  } while (0)
+
+#define FP_PACK_D(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,1,X);		\
+    _FP_PACK_RAW_1(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_DP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(D,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(D,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_D(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,1,X);		\
+    _FP_PACK_RAW_1(D,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_DP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(D,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(D,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN(D,1,X)
+#define FP_NEG_D(R,X)			_FP_NEG(D,1,R,X)
+#define FP_ADD_D(R,X,Y)			_FP_ADD(D,1,R,X,Y)
+#define FP_SUB_D(R,X,Y)			_FP_SUB(D,1,R,X,Y)
+#define FP_MUL_D(R,X,Y)			_FP_MUL(D,1,R,X,Y)
+#define FP_DIV_D(R,X,Y)			_FP_DIV(D,1,R,X,Y)
+#define FP_SQRT_D(R,X)			_FP_SQRT(D,1,R,X)
+#define _FP_SQRT_MEAT_D(R,S,T,X,Q)	_FP_SQRT_MEAT_1(R,S,T,X,Q)
+
+/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
+   the target machine.  */
+
+#define FP_CMP_D(r,X,Y,un)	_FP_CMP(D,1,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)	_FP_CMP_EQ(D,1,r,X,Y)
+#define FP_CMP_UNORD_D(r,X,Y)	_FP_CMP_UNORD(D,1,r,X,Y)
+
+#define FP_TO_INT_D(r,X,rsz,rsg)	_FP_TO_INT(D,1,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)	_FP_FROM_INT(D,1,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_D(X)	_FP_FRAC_HIGH_1(X)
+#define _FP_FRAC_HIGH_RAW_D(X)	_FP_FRAC_HIGH_1(X)
+
+#endif /* W_TYPE_SIZE < 64 */
diff --git a/libc/sysdeps/linux/sparc/soft-fp/extended.h b/libc/sysdeps/linux/sparc/soft-fp/extended.h
new file mode 100644
index 000000000..e5f16debe
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/extended.h
@@ -0,0 +1,431 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Extended Precision.
+   Copyright (C) 1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel, kid. Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_E         (4*_FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_E		(2*_FP_W_TYPE_SIZE)
+#endif
+
+#define _FP_FRACBITS_E		64
+#define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
+#define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
+#define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
+#define _FP_EXPBITS_E		15
+#define _FP_EXPBIAS_E		16383
+#define _FP_EXPMAX_E		32767
+
+#define _FP_QNANBIT_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_E		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_E		\
+	((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
+
+typedef float XFtype __attribute__((mode(XF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_E
+{
+   XFtype flt;
+   struct 
+   {
+#if __BYTE_ORDER == __BIG_ENDIAN
+      unsigned long pad1 : _FP_W_TYPE_SIZE;
+      unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
+      unsigned long sign : 1;
+      unsigned long exp : _FP_EXPBITS_E;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+#else
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned exp : _FP_EXPBITS_E;
+      unsigned sign : 1;
+#endif /* not bigendian */
+   } bits __attribute__((packed));
+};
+
+
+#define FP_DECL_E(X)		_FP_DECL(4,X)
+
+#define FP_UNPACK_RAW_E(X, val)				\
+  do {							\
+    union _FP_UNION_E _flo; _flo.flt = (val);		\
+							\
+    X##_f[2] = 0; X##_f[3] = 0;				\
+    X##_f[0] = _flo.bits.frac0;				\
+    X##_f[1] = _flo.bits.frac1;				\
+    X##_e  = _flo.bits.exp;				\
+    X##_s  = _flo.bits.sign;				\
+  } while (0)
+
+#define FP_UNPACK_RAW_EP(X, val)			\
+  do {							\
+    union _FP_UNION_E *_flo =				\
+    (union _FP_UNION_E *)(val);				\
+							\
+    X##_f[2] = 0; X##_f[3] = 0;				\
+    X##_f[0] = _flo->bits.frac0;			\
+    X##_f[1] = _flo->bits.frac1;			\
+    X##_e  = _flo->bits.exp;				\
+    X##_s  = _flo->bits.sign;				\
+  } while (0)
+
+#define FP_PACK_RAW_E(val, X)				\
+  do {							\
+    union _FP_UNION_E _flo;				\
+							\
+    if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
+    else X##_f[1] &= ~(_FP_IMPLBIT_E);			\
+    _flo.bits.frac0 = X##_f[0];				\
+    _flo.bits.frac1 = X##_f[1];				\
+    _flo.bits.exp   = X##_e;				\
+    _flo.bits.sign  = X##_s;				\
+							\
+    (val) = _flo.flt;					\
+  } while (0)
+
+#define FP_PACK_RAW_EP(val, X)				\
+  do {							\
+    if (!FP_INHIBIT_RESULTS)				\
+      {							\
+	union _FP_UNION_E *_flo =			\
+	  (union _FP_UNION_E *)(val);			\
+							\
+	if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
+	else X##_f[1] &= ~(_FP_IMPLBIT_E);		\
+	_flo->bits.frac0 = X##_f[0];			\
+	_flo->bits.frac1 = X##_f[1];			\
+	_flo->bits.exp   = X##_e;			\
+	_flo->bits.sign  = X##_s;			\
+      }							\
+  } while (0)
+
+#define FP_UNPACK_E(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_CANONICAL(E,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_EP(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_CANONICAL(E,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_E(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,4,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_EP(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,4,X);		\
+  } while (0)
+
+#define FP_PACK_E(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,4,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_EP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,4,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_E(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,4,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_EP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,4,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,4,X)
+#define FP_NEG_E(R,X)		_FP_NEG(E,4,R,X)
+#define FP_ADD_E(R,X,Y)		_FP_ADD(E,4,R,X,Y)
+#define FP_SUB_E(R,X,Y)		_FP_SUB(E,4,R,X,Y)
+#define FP_MUL_E(R,X,Y)		_FP_MUL(E,4,R,X,Y)
+#define FP_DIV_E(R,X,Y)		_FP_DIV(E,4,R,X,Y)
+#define FP_SQRT_E(R,X)		_FP_SQRT(E,4,R,X)
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ * This has special _E version because standard _4 square
+ * root would not work (it has to start normally with the
+ * second word and not the first), but as we have to do it
+ * anyway, we optimize it by doing most of the calculations
+ * in two UWtype registers instead of four.
+ */
+ 
+#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
+  do {							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    _FP_FRAC_SRL_4(X, (_FP_WORKBITS));			\
+    while (q)						\
+      {							\
+	T##_f[1] = S##_f[1] + q;			\
+	if (T##_f[1] <= X##_f[1])			\
+	  {						\
+	    S##_f[1] = T##_f[1] + q;			\
+	    X##_f[1] -= T##_f[1];			\
+	    R##_f[1] += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    while (q)						\
+      {							\
+	T##_f[0] = S##_f[0] + q;			\
+	T##_f[1] = S##_f[1];				\
+	if (T##_f[1] < X##_f[1] || 			\
+	    (T##_f[1] == X##_f[1] &&			\
+	     T##_f[0] <= X##_f[0]))			\
+	  {						\
+	    S##_f[0] = T##_f[0] + q;			\
+	    S##_f[1] += (T##_f[0] > S##_f[0]);		\
+	    _FP_FRAC_DEC_2(X, T);			\
+	    R##_f[0] += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    _FP_FRAC_SLL_4(R, (_FP_WORKBITS));			\
+    if (X##_f[0] | X##_f[1])				\
+      {							\
+	if (S##_f[1] < X##_f[1] || 			\
+	    (S##_f[1] == X##_f[1] &&			\
+	     S##_f[0] < X##_f[0]))			\
+	  R##_f[0] |= _FP_WORK_ROUND;			\
+	R##_f[0] |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+
+#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,4,r,X,Y,un)
+#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,4,r,X,Y)
+#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,4,r,X,Y)
+
+#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,4,r,X,rsz,rsg)
+#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,4,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_E(X)	(X##_f[2])
+#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
+
+#else   /* not _FP_W_TYPE_SIZE < 64 */
+union _FP_UNION_E
+{
+  XFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
+    unsigned sign   : 1;
+    unsigned exp    : _FP_EXPBITS_E;
+    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
+#else
+    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
+    unsigned exp    : _FP_EXPBITS_E;
+    unsigned sign   : 1;
+#endif
+  } bits;
+};
+
+#define FP_DECL_E(X)		_FP_DECL(2,X)
+
+#define FP_UNPACK_RAW_E(X, val)					\
+  do {								\
+    union _FP_UNION_E _flo; _flo.flt = (val);			\
+								\
+    X##_f0 = _flo.bits.frac;					\
+    X##_f1 = 0;							\
+    X##_e = _flo.bits.exp;					\
+    X##_s = _flo.bits.sign;					\
+  } while (0)
+
+#define FP_UNPACK_RAW_EP(X, val)				\
+  do {								\
+    union _FP_UNION_E *_flo =					\
+      (union _FP_UNION_E *)(val);				\
+								\
+    X##_f0 = _flo->bits.frac;					\
+    X##_f1 = 0;							\
+    X##_e = _flo->bits.exp;					\
+    X##_s = _flo->bits.sign;					\
+  } while (0)
+
+#define FP_PACK_RAW_E(val, X)					\
+  do {								\
+    union _FP_UNION_E _flo;					\
+								\
+    if (X##_e) X##_f0 |= _FP_IMPLBIT_E;				\
+    else X##_f0 &= ~(_FP_IMPLBIT_E);				\
+    _flo.bits.frac = X##_f0;					\
+    _flo.bits.exp  = X##_e;					\
+    _flo.bits.sign = X##_s;					\
+								\
+    (val) = _flo.flt;						\
+  } while (0)
+
+#define FP_PACK_RAW_EP(fs, val, X)				\
+  do {								\
+    if (!FP_INHIBIT_RESULTS)					\
+      {								\
+	union _FP_UNION_E *_flo =				\
+	  (union _FP_UNION_E *)(val);				\
+								\
+	if (X##_e) X##_f0 |= _FP_IMPLBIT_E;			\
+	else X##_f0 &= ~(_FP_IMPLBIT_E);			\
+	_flo->bits.frac = X##_f0;				\
+	_flo->bits.exp  = X##_e;				\
+	_flo->bits.sign = X##_s;				\
+      }								\
+  } while (0)
+
+
+#define FP_UNPACK_E(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_CANONICAL(E,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_EP(X,val)		\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_CANONICAL(E,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_E(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_E(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,2,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_EP(X,val)	\
+  do {					\
+    FP_UNPACK_RAW_EP(X,val);		\
+    _FP_UNPACK_SEMIRAW(E,2,X);		\
+  } while (0)
+
+#define FP_PACK_E(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,2,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_EP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(E,2,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_E(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,2,X);		\
+    FP_PACK_RAW_E(val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_EP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(E,2,X);		\
+    FP_PACK_RAW_EP(val,X);		\
+  } while (0)
+
+#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,2,X)
+#define FP_NEG_E(R,X)		_FP_NEG(E,2,R,X)
+#define FP_ADD_E(R,X,Y)		_FP_ADD(E,2,R,X,Y)
+#define FP_SUB_E(R,X,Y)		_FP_SUB(E,2,R,X,Y)
+#define FP_MUL_E(R,X,Y)		_FP_MUL(E,2,R,X,Y)
+#define FP_DIV_E(R,X,Y)		_FP_DIV(E,2,R,X,Y)
+#define FP_SQRT_E(R,X)		_FP_SQRT(E,2,R,X)
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ * We optimize it by doing most of the calculations
+ * in one UWtype registers instead of two, although we don't
+ * have to.
+ */
+#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
+  do {							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    _FP_FRAC_SRL_2(X, (_FP_WORKBITS));			\
+    while (q)						\
+      {							\
+        T##_f0 = S##_f0 + q;				\
+        if (T##_f0 <= X##_f0)				\
+          {						\
+            S##_f0 = T##_f0 + q;			\
+            X##_f0 -= T##_f0;				\
+            R##_f0 += q;				\
+          }						\
+        _FP_FRAC_SLL_1(X, 1);				\
+        q >>= 1;					\
+      }							\
+    _FP_FRAC_SLL_2(R, (_FP_WORKBITS));			\
+    if (X##_f0)						\
+      {							\
+	if (S##_f0 < X##_f0)				\
+	  R##_f0 |= _FP_WORK_ROUND;			\
+	R##_f0 |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+ 
+#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,2,r,X,Y,un)
+#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,2,r,X,Y)
+#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,2,r,X,Y)
+
+#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,2,r,X,rsz,rsg)
+#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,2,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_E(X)	(X##_f1)
+#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
+
+#endif /* not _FP_W_TYPE_SIZE < 64 */
diff --git a/libc/sysdeps/linux/sparc/soft-fp/longlong.h b/libc/sysdeps/linux/sparc/soft-fp/longlong.h
new file mode 100644
index 000000000..a2f38ae2a
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/longlong.h
@@ -0,0 +1,1461 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+   2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* You have to define the following before including this file:
+
+   UWtype -- An unsigned type, default type for operations (typically a "word")
+   UHWtype -- An unsigned type, at least half the size of UWtype.
+   UDWtype -- An unsigned type, at least twice as large a UWtype
+   W_TYPE_SIZE -- size in bits of UWtype
+
+   UQItype -- Unsigned 8 bit type.
+   SItype, USItype -- Signed and unsigned 32 bit types.
+   DItype, UDItype -- Signed and unsigned 64 bit types.
+
+   On a 32 bit machine UWtype should typically be USItype;
+   on a 64 bit machine, UWtype should typically be UDItype.  */
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#ifndef W_TYPE_SIZE
+#define W_TYPE_SIZE	32
+#define UWtype		USItype
+#define UHWtype		USItype
+#define UDWtype		UDItype
+#endif
+
+extern const UQItype __clz_tab[256] attribute_hidden;
+
+/* Define auxiliary asm macros.
+
+   1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
+   UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
+   word product in HIGH_PROD and LOW_PROD.
+
+   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+   UDWtype product.  This is just a variant of umul_ppmm.
+
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+   is rounded towards 0.
+
+   5) count_leading_zeros(count, x) counts the number of zero-bits from the
+   msb to the first nonzero bit in the UWtype X.  This is the number of
+   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+   from the least significant end.
+
+   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two UWtype integers, composed by
+   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+   (i.e. carry out) is not stored anywhere, and is lost.
+
+   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+
+   Please add support for more CPUs here, or improve the current support
+   for the CPUs below!
+   (E.g. WE32100, IBM360.)  */
+
+#if defined (__GNUC__) && !defined (NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+   understood by gcc1.  Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (ph) = __builtin_alpha_umulh (__m0, __m1);				\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UDItype __r;							\
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
+    (r) = __r;								\
+  } while (0)
+extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#ifdef __alpha_cix__
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clzl (X))
+#define count_trailing_zeros(COUNT,X)	((COUNT) = __builtin_ctzl (X))
+#define COUNT_LEADING_ZEROS_0 64
+#else
+#define count_leading_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __t = __builtin_alpha_cmpbge (0, __xr);				\
+    __a = __clz_tab[__t ^ 0xff] - 1;					\
+    __t = __builtin_alpha_extbl (__xr, __a);				\
+    (COUNT) = 64 - (__clz_tab[__t] + __a*8);				\
+  } while (0)
+#define count_trailing_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __t = __builtin_alpha_cmpbge (0, __xr);				\
+    __t = ~__t & -~__t;							\
+    __a = ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    __t = __builtin_alpha_extbl (__xr, __a);				\
+    __a <<= 3;								\
+    __t &= -__t;							\
+    __a += ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    (COUNT) = __a;							\
+  } while (0)
+#endif /* __alpha_cix__ */
+#endif /* __alpha */
+
+#if defined (__arc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.f	%1, %4, %5\n\tadc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rIJ" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rIJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.f	%1, %4, %5\n\tsbc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rIJ" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rIJ" ((USItype) (bl)))
+/* Call libgcc routine.  */
+#define umul_ppmm(w1, w0, u, v) \
+do {									\
+  DWunion __w;								\
+  __w.ll = __umulsidi3 (u, v);						\
+  w1 = __w.s.high;							\
+  w0 = __w.s.low;							\
+} while (0)
+#define __umulsidi3 __umulsidi3
+UDItype __umulsidi3 (USItype, USItype);
+#endif
+
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds	%1, %4, %5\n\tadc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)) __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs	%1, %4, %5\n\tsbc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)) __CLOBBER_CC)
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;					\
+  __asm__ ("%@ Inlined umul_ppmm\n"					\
+	   "	mov	%2, %5, lsr #16\n"				\
+	   "	mov	%0, %6, lsr #16\n"				\
+	   "	bic	%3, %5, %2, lsl #16\n"				\
+	   "	bic	%4, %6, %0, lsl #16\n"				\
+	   "	mul	%1, %3, %4\n"					\
+	   "	mul	%4, %2, %4\n"					\
+	   "	mul	%3, %0, %3\n"					\
+	   "	mul	%0, %2, %0\n"					\
+	   "	adds	%3, %4, %3\n"					\
+	   "	addcs	%0, %0, #65536\n"				\
+	   "	adds	%1, %1, %3, lsl #16\n"				\
+	   "	adc	%0, %0, %3, lsr #16"				\
+	   : "=&r" ((USItype) (xh)),					\
+	     "=r" ((USItype) (xl)),					\
+	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
+	   : "r" ((USItype) (a)),					\
+	     "r" ((USItype) (b)) __CLOBBER_CC );}
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+#if defined(__arm__)
+/* Let gcc decide how best to implement count_leading_zeros.  */
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+
+#if defined (__CRIS__) && __CRIS_arch_version >= 3
+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
+#if __CRIS_arch_version >= 8
+#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
+#endif
+#endif /* __CRIS__ */
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rM" ((USItype) (ah)),					\
+	     "rM" ((USItype) (bh)),					\
+	     "%rM" ((USItype) (al)),					\
+	     "rM" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rM" ((USItype) (ah)),					\
+	     "rM" ((USItype) (bh)),					\
+	     "rM" ((USItype) (al)),					\
+	     "rM" ((USItype) (bl)))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    union								\
+      {									\
+	UDItype __f;							\
+	struct {USItype __w1, __w0;} __w1w0;				\
+      } __t;								\
+    __asm__ ("xmpyu %1,%2,%0"						\
+	     : "=x" (__t.__f)						\
+	     : "x" ((USItype) (u)),					\
+	       "x" ((USItype) (v)));					\
+    (w1) = __t.__w1w0.__w1;						\
+    (w0) = __t.__w1w0.__w0;						\
+     } while (0)
+#define UMUL_TIME 8
+#else
+#define UMUL_TIME 30
+#endif
+#define UDIV_TIME 40
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __tmp;							\
+    __asm__ (								\
+       "ldi		1,%0\n"						\
+"	extru,=		%1,15,16,%%r0		; Bits 31..16 zero?\n"	\
+"	extru,tr	%1,15,16,%1		; No.  Shift down, skip add.\n"\
+"	ldo		16(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,23,8,%%r0		; Bits 15..8 zero?\n"	\
+"	extru,tr	%1,23,8,%1		; No.  Shift down, skip add.\n"\
+"	ldo		8(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,27,4,%%r0		; Bits 7..4 zero?\n"	\
+"	extru,tr	%1,27,4,%1		; No.  Shift down, skip add.\n"\
+"	ldo		4(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,29,2,%%r0		; Bits 3..2 zero?\n"	\
+"	extru,tr	%1,29,2,%1		; No.  Shift down, skip add.\n"\
+"	ldo		2(%0),%0		; Yes.  Perform add.\n"	\
+"	extru		%1,30,1,%1		; Extract bit 1.\n"	\
+"	sub		%0,%1,%0		; Subtract it.\n"	\
+	: "=r" (count), "=r" (__tmp) : "1" (x));			\
+  } while (0)
+#endif
+
+#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#define smul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("lr %N0,%1\n\tmr %0,%2"					\
+	     : "=&r" (__x.__ll)						\
+	     : "r" (m0), "r" (m1));					\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("dr %0,%2"							\
+	     : "=r" (__x.__ll)						\
+	     : "0" (__x.__ll), "r" (d));				\
+    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
+  } while (0)
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mul{l} %3"							\
+	   : "=a" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "rm" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("div{l} %4"							\
+	   : "=a" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "rm" ((USItype) (dv)))
+#define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
+#define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
+#define UMUL_TIME 40
+#define UDIV_TIME 40
+#endif /* 80x86 */
+
+#if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"		\
+	   : "=r" ((UDItype) (sh)),					\
+	     "=&r" ((UDItype) (sl))					\
+	   : "%0" ((UDItype) (ah)),					\
+	     "rme" ((UDItype) (bh)),					\
+	     "%1" ((UDItype) (al)),					\
+	     "rme" ((UDItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"		\
+	   : "=r" ((UDItype) (sh)),					\
+	     "=&r" ((UDItype) (sl))					\
+	   : "0" ((UDItype) (ah)),					\
+	     "rme" ((UDItype) (bh)),					\
+	     "1" ((UDItype) (al)),					\
+	     "rme" ((UDItype) (bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mul{q} %3"							\
+	   : "=a" ((UDItype) (w0)),					\
+	     "=d" ((UDItype) (w1))					\
+	   : "%0" ((UDItype) (u)),					\
+	     "rm" ((UDItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("div{q} %4"							\
+	   : "=a" ((UDItype) (q)),					\
+	     "=d" ((UDItype) (r))					\
+	   : "0" ((UDItype) (n0)),					\
+	     "1" ((UDItype) (n1)),					\
+	     "rm" ((UDItype) (dv)))
+#define count_leading_zeros(count, x)	((count) = __builtin_clzl (x))
+#define count_trailing_zeros(count, x)	((count) = __builtin_ctzl (x))
+#define UMUL_TIME 40
+#define UDIV_TIME 40
+#endif /* x86_64 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __asm__ ("emul	%2,%1,%0"					\
+	   : "=d" (__xx.__ll)						\
+	   : "%dI" ((USItype) (u)),					\
+	     "dI" ((USItype) (v)));					\
+  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("emul	%2,%1,%0"					\
+	     : "=d" (__w)						\
+	     : "%dI" ((USItype) (u)),					\
+	       "dI" ((USItype) (v)));					\
+    __w; })
+#endif /* __i960__ */
+
+#if defined (__M32R__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  /* The cmp clears the condition bit.  */ \
+  __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl))					\
+	   : "cbit")
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  /* The cmp clears the condition bit.  */ \
+  __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl))					\
+	   : "cbit")
+#endif /* __M32R__ */
+
+#if defined (__mc68000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
+	   : "=d" ((USItype) (sh)),					\
+	     "=&d" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "d" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
+	   : "=d" ((USItype) (sh)),					\
+	     "=&d" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "d" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+
+/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
+#if (defined (__mc68020__) && !defined (__mc68060__))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulu%.l %3,%1:%0"						\
+	   : "=d" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "dmi" ((USItype) (v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divu%.l %4,%1:%0"						\
+	   : "=d" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "dmi" ((USItype) (d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divs%.l %4,%1:%0"						\
+	   : "=d" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "dmi" ((USItype) (d)))
+
+#elif defined (__mcoldfire__) /* not mc68020 */
+
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("| Inlined umul_ppmm\n"					\
+	   "	move%.l	%2,%/d0\n"					\
+	   "	move%.l	%3,%/d1\n"					\
+	   "	move%.l	%/d0,%/d2\n"					\
+	   "	swap	%/d0\n"						\
+	   "	move%.l	%/d1,%/d3\n"					\
+	   "	swap	%/d1\n"						\
+	   "	move%.w	%/d2,%/d4\n"					\
+	   "	mulu	%/d3,%/d4\n"					\
+	   "	mulu	%/d1,%/d2\n"					\
+	   "	mulu	%/d0,%/d3\n"					\
+	   "	mulu	%/d0,%/d1\n"					\
+	   "	move%.l	%/d4,%/d0\n"					\
+	   "	clr%.w	%/d0\n"						\
+	   "	swap	%/d0\n"						\
+	   "	add%.l	%/d0,%/d2\n"					\
+	   "	add%.l	%/d3,%/d2\n"					\
+	   "	jcc	1f\n"						\
+	   "	add%.l	%#65536,%/d1\n"					\
+	   "1:	swap	%/d2\n"						\
+	   "	moveq	%#0,%/d0\n"					\
+	   "	move%.w	%/d2,%/d0\n"					\
+	   "	move%.w	%/d4,%/d2\n"					\
+	   "	move%.l	%/d2,%1\n"					\
+	   "	add%.l	%/d1,%/d0\n"					\
+	   "	move%.l	%/d0,%0"					\
+	   : "=g" ((USItype) (xh)),					\
+	     "=g" ((USItype) (xl))					\
+	   : "g" ((USItype) (a)),					\
+	     "g" ((USItype) (b))					\
+	   : "d0", "d1", "d2", "d3", "d4")
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#else /* not ColdFire */
+/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("| Inlined umul_ppmm\n"					\
+	   "	move%.l	%2,%/d0\n"					\
+	   "	move%.l	%3,%/d1\n"					\
+	   "	move%.l	%/d0,%/d2\n"					\
+	   "	swap	%/d0\n"						\
+	   "	move%.l	%/d1,%/d3\n"					\
+	   "	swap	%/d1\n"						\
+	   "	move%.w	%/d2,%/d4\n"					\
+	   "	mulu	%/d3,%/d4\n"					\
+	   "	mulu	%/d1,%/d2\n"					\
+	   "	mulu	%/d0,%/d3\n"					\
+	   "	mulu	%/d0,%/d1\n"					\
+	   "	move%.l	%/d4,%/d0\n"					\
+	   "	eor%.w	%/d0,%/d0\n"					\
+	   "	swap	%/d0\n"						\
+	   "	add%.l	%/d0,%/d2\n"					\
+	   "	add%.l	%/d3,%/d2\n"					\
+	   "	jcc	1f\n"						\
+	   "	add%.l	%#65536,%/d1\n"					\
+	   "1:	swap	%/d2\n"						\
+	   "	moveq	%#0,%/d0\n"					\
+	   "	move%.w	%/d2,%/d0\n"					\
+	   "	move%.w	%/d4,%/d2\n"					\
+	   "	move%.l	%/d2,%1\n"					\
+	   "	add%.l	%/d1,%/d0\n"					\
+	   "	move%.l	%/d0,%0"					\
+	   : "=g" ((USItype) (xh)),					\
+	     "=g" ((USItype) (xl))					\
+	   : "g" ((USItype) (a)),					\
+	     "g" ((USItype) (b))					\
+	   : "d0", "d1", "d2", "d3", "d4")
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+
+#endif /* not mc68020 */
+
+/* The '020, '030, '040 and '060 have bitfield insns.
+   cpu32 disguises as a 68020, but lacks them.  */
+#if defined (__mc68020__) && !defined (__mcpu32__)
+#define count_leading_zeros(count, x) \
+  __asm__ ("bfffo %1{%b2:%b2},%0"					\
+	   : "=d" ((USItype) (count))					\
+	   : "od" ((USItype) (x)), "n" (0))
+/* Some ColdFire architectures have a ff1 instruction supported via
+   __builtin_clz. */
+#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
+#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rJ" ((USItype) (ah)),					\
+	     "rJ" ((USItype) (bh)),					\
+	     "%rJ" ((USItype) (al)),					\
+	     "rJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rJ" ((USItype) (ah)),					\
+	     "rJ" ((USItype) (bh)),					\
+	     "rJ" ((USItype) (al)),					\
+	     "rJ" ((USItype) (bl)))
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("ff1 %0,%1"						\
+	     : "=r" (__cbtmp)						\
+	     : "r" ((USItype) (x)));					\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__mc88110__)
+#define umul_ppmm(wh, wl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __xx;							\
+    __asm__ ("mulu.d	%0,%1,%2"					\
+	     : "=r" (__xx.__ll)						\
+	     : "r" ((USItype) (u)),					\
+	       "r" ((USItype) (v)));					\
+    (wh) = __xx.__i.__h;						\
+    (wl) = __xx.__i.__l;						\
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __xx;							\
+  USItype __q;								\
+  __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
+  __asm__ ("divu.d %0,%1,%2"						\
+	   : "=r" (__q)							\
+	   : "r" (__xx.__ll),						\
+	     "r" ((USItype) (d)));					\
+  (r) = (n0) - __q * (d); (q) = __q; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __mc88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
+    (w1) = (USItype) (__x >> 32);					\
+    (w0) = (USItype) (__x);						\
+  } while (0)
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+
+#if (__mips == 32 || __mips == 64) && ! __mips16
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* __mips__ */
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __asm__ ("meid %2,%0"							\
+	   : "=g" (__xx.__ll)						\
+	   : "%0" ((USItype) (u)),					\
+	     "g" ((USItype) (v)));					\
+  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("meid %2,%0"						\
+	     : "=g" (__w)						\
+	     : "%0" ((USItype) (u)),					\
+	       "g" ((USItype) (v)));					\
+    __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
+  __asm__ ("deid %2,%0"							\
+	   : "=g" (__xx.__ll)						\
+	   : "0" (__xx.__ll),						\
+	     "g" ((USItype) (d)));					\
+  (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count,x) \
+  do {									\
+    __asm__ ("ffsd     %2,%0"						\
+            : "=r" ((USItype) (count))					\
+            : "0" ((USItype) 0),					\
+              "r" ((USItype) (x)));					\
+  } while (0)
+#endif /* __ns32000__ */
+
+/* FIXME: We should test _IBMR2 here when we add assembly support for the
+   system vendor compilers.
+   FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
+   enough, since that hits ARM and m68k too.  */
+#if (defined (_ARCH_PPC)	/* AIX */				\
+     || defined (_ARCH_PWR)	/* AIX */				\
+     || defined (_ARCH_COM)	/* AIX */				\
+     || defined (__powerpc__)	/* gcc */				\
+     || defined (__POWERPC__)	/* BEOS */				\
+     || defined (__ppc__)	/* Darwin */				\
+     || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
+     || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
+         && CPU_FAMILY == PPC)                                                \
+     ) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
+  || defined (__ppc__)                                                    \
+  || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
+  || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
+         && CPU_FAMILY == PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    SItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#elif defined (_ARCH_PWR)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+  __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#define UDIV_TIME 100
+#endif
+#endif /* 32-bit POWER architecture variants.  */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+   vendor compilers.  */
+#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    DItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define SMUL_TIME 14  /* ??? */
+#define UDIV_TIME 120 /* ??? */
+#endif /* 64-bit PowerPC.  */
+
+#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("a %1,%5\n\tae %0,%3"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("s %1,%5\n\tse %0,%3"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ (								\
+       "s	r2,r2\n"						\
+"	mts	r10,%2\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	cas	%0,r2,r0\n"						\
+"	mfs	r10,%1"							\
+	     : "=r" ((USItype) (ph)),					\
+	       "=r" ((USItype) (pl))					\
+	     : "%r" (__m0),						\
+		"r" (__m1)						\
+	     : "r2");							\
+    (ph) += ((((SItype) __m0 >> 31) & __m1)				\
+	     + (((SItype) __m1 >> 31) & __m0));				\
+  } while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+  do {									\
+    if ((x) >= 0x10000)							\
+      __asm__ ("clz	%0,%1"						\
+	       : "=r" ((USItype) (count))				\
+	       : "r" ((USItype) (x) >> 16));				\
+    else								\
+      {									\
+	__asm__ ("clz	%0,%1"						\
+		 : "=r" ((USItype) (count))				\
+		 : "r" ((USItype) (x)));					\
+	(count) += 16;							\
+      }									\
+  } while (0)
+#endif
+
+#if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
+#ifndef __sh1__
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ (								\
+       "dmulu.l	%2,%3\n\tsts%M1	macl,%1\n\tsts%M0	mach,%0"	\
+	   : "=r<" ((USItype)(w1)),					\
+	     "=r<" ((USItype)(w0))					\
+	   : "r" ((USItype)(u)),					\
+	     "r" ((USItype)(v))						\
+	   : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+/* This is the same algorithm as __udiv_qrnnd_c.  */
+#define UDIV_NEEDS_NORMALIZATION 1
+
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
+                        __attribute__ ((visibility ("hidden")));	\
+    /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
+    __asm__ (								\
+	"mov%M4 %4,r5\n"						\
+"	swap.w %3,r4\n"							\
+"	swap.w r5,r6\n"							\
+"	jsr @%5\n"							\
+"	shll16 r6\n"							\
+"	swap.w r4,r4\n"							\
+"	jsr @%5\n"							\
+"	swap.w r1,%0\n"							\
+"	or r1,%0"							\
+	: "=r" (q), "=&z" (r)						\
+	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
+	: "r1", "r2", "r4", "r5", "r6", "pr");				\
+  } while (0)
+
+#define UDIV_TIME 80
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  __asm__ ("clrt;subc %5,%1; subc %4,%0"				\
+	   : "=r" (sh), "=r" (sl)					\
+	   : "0" (ah), "1" (al), "r" (bh), "r" (bl))
+
+#endif /* __sh__ */
+
+#if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
+#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
+#define count_leading_zeros(count, x) \
+  do									\
+    {									\
+      UDItype x_ = (USItype)(x);					\
+      SItype c_;							\
+									\
+      __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));			\
+      (count) = c_ - 31;						\
+    }									\
+  while (0)
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+
+#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
+    && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rJ" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%rJ" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl))					\
+	   __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rJ" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "rJ" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl))					\
+	   __CLOBBER_CC)
+#if defined (__sparc_v8__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "r" ((USItype) (u)),					\
+	     "r" ((USItype) (v)))
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__n1)),					\
+	     "r" ((USItype) (__n0)),					\
+	     "r" ((USItype) (__d)))
+#else
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+   instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "r" ((USItype) (u)),					\
+	     "r" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
+"	tst	%%g0\n"							\
+"	divscc	%3,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%0\n"						\
+"	rd	%%y,%1\n"						\
+"	bl,a 1f\n"							\
+"	add	%1,%4,%1\n"						\
+"1:	! End of inline udiv_qrnnd"					\
+	   : "=r" ((USItype) (q)),					\
+	     "=r" ((USItype) (r))					\
+	   : "r" ((USItype) (n1)),					\
+	     "r" ((USItype) (n0)),					\
+	     "rI" ((USItype) (d))					\
+	   : "g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+  do {                                                                  \
+  __asm__ ("scan %1,1,%0"                                               \
+           : "=r" ((USItype) (count))                                   \
+           : "r" ((USItype) (x)));					\
+  } while (0)
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+   undefined.  */
+#else
+/* SPARC without integer multiplication and divide instructions.
+   (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm\n"					\
+"	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n"\
+"	sra	%3,31,%%o5	! Don't move this insn\n"		\
+"	and	%2,%%o5,%%o5	! Don't move this insn\n"		\
+"	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,0,%%g1\n"						\
+"	add	%%g1,%%o5,%0\n"						\
+"	rd	%%y,%1"							\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "%rI" ((USItype) (u)),					\
+	     "r" ((USItype) (v))						\
+	   : "g1", "o5" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	mov	32,%%g1\n"						\
+"	subcc	%1,%2,%%g0\n"						\
+"1:	bcs	5f\n"							\
+"	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
+"	sub	%1,%2,%1	! this kills msb of n\n"		\
+"	addx	%1,%1,%1	! so this can't give carry\n"		\
+"	subcc	%%g1,1,%%g1\n"						\
+"2:	bne	1b\n"							\
+"	 subcc	%1,%2,%%g0\n"						\
+"	bcs	3f\n"							\
+"	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
+"	b	3f\n"							\
+"	 sub	%1,%2,%1	! this kills msb of n\n"		\
+"4:	sub	%1,%2,%1\n"						\
+"5:	addxcc	%1,%1,%1\n"						\
+"	bcc	2b\n"							\
+"	 subcc	%%g1,1,%%g1\n"						\
+"! Got carry from n.  Subtract next step to cancel this carry.\n"	\
+"	bne	4b\n"							\
+"	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n"	\
+"	sub	%1,%2,%1\n"						\
+"3:	xnor	%0,0,%0\n"						\
+"	! End of inline udiv_qrnnd"					\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__d)),					\
+	     "1" ((USItype) (__n1)),					\
+	     "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
+#define UDIV_TIME (3+7*32)	/* 7 instructions/iteration. 32 iterations.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* sparc32 */
+
+#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
+    && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+  __asm__ ("addcc %r4,%5,%1\n\t"					\
+   	   "add %r2,%3,%0\n\t"						\
+   	   "bcs,a,pn %%xcc, 1f\n\t"					\
+   	   "add %0, 1, %0\n"						\
+	   "1:"								\
+	   : "=r" ((UDItype)(sh)),				      	\
+	     "=&r" ((UDItype)(sl))				      	\
+	   : "%rJ" ((UDItype)(ah)),				     	\
+	     "rI" ((UDItype)(bh)),				      	\
+	     "%rJ" ((UDItype)(al)),				     	\
+	     "rI" ((UDItype)(bl))				       	\
+	   __CLOBBER_CC)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) 				\
+  __asm__ ("subcc %r4,%5,%1\n\t"					\
+   	   "sub %r2,%3,%0\n\t"						\
+   	   "bcs,a,pn %%xcc, 1f\n\t"					\
+   	   "sub %0, 1, %0\n\t"						\
+	   "1:"								\
+	   : "=r" ((UDItype)(sh)),				      	\
+	     "=&r" ((UDItype)(sl))				      	\
+	   : "rJ" ((UDItype)(ah)),				     	\
+	     "rI" ((UDItype)(bh)),				      	\
+	     "rJ" ((UDItype)(al)),				     	\
+	     "rI" ((UDItype)(bl))				       	\
+	   __CLOBBER_CC)
+
+#define umul_ppmm(wh, wl, u, v)						\
+  do {									\
+	  UDItype tmp1, tmp2, tmp3, tmp4;				\
+	  __asm__ __volatile__ (					\
+		   "srl %7,0,%3\n\t"					\
+		   "mulx %3,%6,%1\n\t"					\
+		   "srlx %6,32,%2\n\t"					\
+		   "mulx %2,%3,%4\n\t"					\
+		   "sllx %4,32,%5\n\t"					\
+		   "srl %6,0,%3\n\t"					\
+		   "sub %1,%5,%5\n\t"					\
+		   "srlx %5,32,%5\n\t"					\
+		   "addcc %4,%5,%4\n\t"					\
+		   "srlx %7,32,%5\n\t"					\
+		   "mulx %3,%5,%3\n\t"					\
+		   "mulx %2,%5,%5\n\t"					\
+		   "sethi %%hi(0x80000000),%2\n\t"			\
+		   "addcc %4,%3,%4\n\t"					\
+		   "srlx %4,32,%4\n\t"					\
+		   "add %2,%2,%2\n\t"					\
+		   "movcc %%xcc,%%g0,%2\n\t"				\
+		   "addcc %5,%4,%5\n\t"					\
+		   "sllx %3,32,%3\n\t"					\
+		   "add %1,%3,%1\n\t"					\
+		   "add %5,%2,%0"					\
+	   : "=r" ((UDItype)(wh)),					\
+	     "=&r" ((UDItype)(wl)),					\
+	     "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)	\
+	   : "r" ((UDItype)(u)),					\
+	     "r" ((UDItype)(v))						\
+	   __CLOBBER_CC);						\
+  } while (0)
+#define UMUL_TIME 96
+#define UDIV_TIME 230
+#endif /* sparc64 */
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
+	   : "=g" ((USItype) (sh)),					\
+	     "=&g" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
+	   : "=g" ((USItype) (sh)),					\
+	     "=&g" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {								\
+	UDItype __ll;							\
+	struct {USItype __l, __h;} __i;					\
+      } __xx;								\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("emul %1,%2,$0,%0"						\
+	     : "=r" (__xx.__ll)						\
+	     : "g" (__m0),						\
+	       "g" (__m1));						\
+    (xh) = __xx.__i.__h;						\
+    (xl) = __xx.__i.__l;						\
+    (xh) += ((((SItype) __m0 >> 31) & __m1)				\
+	     + (((SItype) __m1 >> 31) & __m0));				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {SItype __l, __h;} __i;				\
+	  } __xx;							\
+    __xx.__i.__h = n1; __xx.__i.__l = n0;				\
+    __asm__ ("ediv %3,%2,%0,%1"						\
+	     : "=g" (q), "=g" (r)					\
+	     : "g" (__xx.__ll), "g" (d));				\
+  } while (0)
+#endif /* __vax__ */
+
+#if defined (__xtensa__) && W_TYPE_SIZE == 32
+/* This code is not Xtensa-configuration-specific, so rely on the compiler
+   to expand builtin functions depending on what configuration features
+   are available.  This avoids library calls when the operation can be
+   performed in-line.  */
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    DWunion __w;							\
+    __w.ll = __builtin_umulsidi3 (u, v);				\
+    w1 = __w.s.high;							\
+    w0 = __w.s.low;							\
+  } while (0)
+#define __umulsidi3(u, v)		__builtin_umulsidi3 (u, v)
+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT, X)	((COUNT) = __builtin_ctz (X))
+#endif /* __xtensa__ */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
+	   : "=r" ((unsigned int)(sh)),					\
+	     "=&r" ((unsigned int)(sl))					\
+	   : "%0" ((unsigned int)(ah)),					\
+	     "r" ((unsigned int)(bh)),					\
+	     "%1" ((unsigned int)(al)),					\
+	     "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
+	   : "=r" ((unsigned int)(sh)),					\
+	     "=&r" ((unsigned int)(sl))					\
+	   : "0" ((unsigned int)(ah)),					\
+	     "r" ((unsigned int)(bh)),					\
+	     "1" ((unsigned int)(al)),					\
+	     "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {long int __ll;						\
+	   struct {unsigned int __h, __l;} __i;				\
+	  } __xx;							\
+    unsigned int __m0 = (m0), __m1 = (m1);				\
+    __asm__ ("mult	%S0,%H3"					\
+	     : "=r" (__xx.__i.__h),					\
+	       "=r" (__xx.__i.__l)					\
+	     : "%1" (__m0),						\
+	       "rQR" (__m1));						\
+    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
+    (xh) += ((((signed int) __m0 >> 15) & __m1)				\
+	     + (((signed int) __m1 >> 15) & __m0));			\
+  } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) + (bl);							\
+    (sh) = (ah) + (bh) + (__x < (al));					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - (__x > (al));					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+   smul_ppmm.  */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __w1;							\
+    UWtype __xm0 = (u), __xm1 = (v);					\
+    smul_ppmm (__w1, w0, __xm0, __xm1);					\
+    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
+		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
+  } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C.  */
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+									\
+    __ul = __ll_lowpart (u);						\
+    __uh = __ll_highpart (u);						\
+    __vl = __ll_lowpart (v);						\
+    __vh = __ll_highpart (v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
+  } while (0)
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+  ({DWunion __w;							\
+    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
+    __w.ll; })
+#endif
+
+/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {									\
+    USItype __r;							\
+    (q) = __udiv_w_sdiv (&__r, nh, nl, d);				\
+    (r) = __r;								\
+  } while (0)
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+#define count_leading_zeros(count, x) \
+  do {									\
+    UWtype __xr = (x);							\
+    UWtype __a;								\
+									\
+    if (W_TYPE_SIZE <= 32)						\
+      {									\
+	__a = __xr < ((UWtype)1<<2*__BITS4)				\
+	  ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)			\
+	  : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);	\
+      }									\
+    else								\
+      {									\
+	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
+	  if (((__xr >> __a) & 0xff) != 0)				\
+	    break;							\
+      }									\
+									\
+    (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+   defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+  do {									\
+    UWtype __ctz_x = (x);						\
+    UWtype __ctz_c;							\
+    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
+    (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
+  } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/libc/sysdeps/linux/sparc/soft-fp/mp_clz_tab.c b/libc/sysdeps/linux/sparc/soft-fp/mp_clz_tab.c
new file mode 100644
index 000000000..2220299e7
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/mp_clz_tab.c
@@ -0,0 +1,37 @@
+/* __clz_tab -- support for longlong.h
+   Copyright (C) 1991, 1993, 1994, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.  Its master source is NOT part of
+   the C library, however.  The master source lives in the GNU MP Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#if 0
+#include <gmp.h>
+#include "gmp-impl.h"
+#endif
+
+const
+unsigned char __clz_tab[] =
+{
+  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
diff --git a/libc/sysdeps/linux/sparc/soft-fp/op-1.h b/libc/sysdeps/linux/sparc/soft-fp/op-1.h
new file mode 100644
index 000000000..35cd0ba7b
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/op-1.h
@@ -0,0 +1,302 @@
+/* Software floating-point emulation.
+   Basic one-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_FRAC_DECL_1(X)	_FP_W_TYPE X##_f
+#define _FP_FRAC_COPY_1(D,S)	(D##_f = S##_f)
+#define _FP_FRAC_SET_1(X,I)	(X##_f = I)
+#define _FP_FRAC_HIGH_1(X)	(X##_f)
+#define _FP_FRAC_LOW_1(X)	(X##_f)
+#define _FP_FRAC_WORD_1(X,w)	(X##_f)
+
+#define _FP_FRAC_ADDI_1(X,I)	(X##_f += I)
+#define _FP_FRAC_SLL_1(X,N)			\
+  do {						\
+    if (__builtin_constant_p(N) && (N) == 1)	\
+      X##_f += X##_f;				\
+    else					\
+      X##_f <<= (N);				\
+  } while (0)
+#define _FP_FRAC_SRL_1(X,N)	(X##_f >>= N)
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRST_1(X,S,N,sz)	__FP_FRAC_SRST_1(X##_f, S, N, sz)
+#define _FP_FRAC_SRS_1(X,N,sz)	__FP_FRAC_SRS_1(X##_f, N, sz)
+
+#define __FP_FRAC_SRST_1(X,S,N,sz)			\
+do {							\
+  S = (__builtin_constant_p(N) && (N) == 1		\
+       ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0);	\
+  X = X >> (N);						\
+} while (0)
+
+#define __FP_FRAC_SRS_1(X,N,sz)						\
+   (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1		\
+		     ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
+
+#define _FP_FRAC_ADD_1(R,X,Y)	(R##_f = X##_f + Y##_f)
+#define _FP_FRAC_SUB_1(R,X,Y)	(R##_f = X##_f - Y##_f)
+#define _FP_FRAC_DEC_1(X,Y)	(X##_f -= Y##_f)
+#define _FP_FRAC_CLZ_1(z, X)	__FP_CLZ(z, X##_f)
+
+/* Predicates */
+#define _FP_FRAC_NEGP_1(X)	((_FP_WS_TYPE)X##_f < 0)
+#define _FP_FRAC_ZEROP_1(X)	(X##_f == 0)
+#define _FP_FRAC_OVERP_1(fs,X)	(X##_f & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_1(fs,X)	(X##_f &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_1(X, Y)	(X##_f == Y##_f)
+#define _FP_FRAC_GE_1(X, Y)	(X##_f >= Y##_f)
+#define _FP_FRAC_GT_1(X, Y)	(X##_f > Y##_f)
+
+#define _FP_ZEROFRAC_1		0
+#define _FP_MINFRAC_1		1
+#define _FP_MAXFRAC_1		(~(_FP_WS_TYPE)0)
+
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+
+#define _FP_UNPACK_RAW_1(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
+								\
+    X##_f = _flo.bits.frac;					\
+    X##_e = _flo.bits.exp;					\
+    X##_s = _flo.bits.sign;					\
+  } while (0)
+
+#define _FP_UNPACK_RAW_1_P(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    X##_f = _flo->bits.frac;					\
+    X##_e = _flo->bits.exp;					\
+    X##_s = _flo->bits.sign;					\
+  } while (0)
+
+/*
+ * Repack the raw bits of a native fp value.
+ */
+
+#define _FP_PACK_RAW_1(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs _flo;					\
+								\
+    _flo.bits.frac = X##_f;					\
+    _flo.bits.exp  = X##_e;					\
+    _flo.bits.sign = X##_s;					\
+								\
+    (val) = _flo.flt;						\
+  } while (0)
+
+#define _FP_PACK_RAW_1_P(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    _flo->bits.frac = X##_f;					\
+    _flo->bits.exp  = X##_e;					\
+    _flo->bits.sign = X##_s;					\
+  } while (0)
+
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   multiplication immediately.  */
+
+#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y)				\
+  do {									\
+    R##_f = X##_f * Y##_f;						\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_1(R, wfracbits-1, 2*wfracbits);			\
+  } while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit)			\
+  do {									\
+    _FP_W_TYPE _Z_f0, _Z_f1;						\
+    doit(_Z_f1, _Z_f0, X##_f, Y##_f);					\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_2(_Z, wfracbits-1, 2*wfracbits);			\
+    R##_f = _Z_f0;							\
+  } while (0)
+
+/* Finally, a simple widening multiply algorithm.  What fun!  */
+
+#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1;		\
+									\
+    /* split the words in half */					\
+    _xh = X##_f >> (_FP_W_TYPE_SIZE/2);					\
+    _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);		\
+    _yh = Y##_f >> (_FP_W_TYPE_SIZE/2);					\
+    _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);		\
+									\
+    /* multiply the pieces */						\
+    _z_f0 = _xl * _yl;							\
+    _a_f0 = _xh * _yl;							\
+    _a_f1 = _xl * _yh;							\
+    _z_f1 = _xh * _yh;							\
+									\
+    /* reassemble into two full words */				\
+    if ((_a_f0 += _a_f1) < _a_f1)					\
+      _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);			\
+    _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);				\
+    _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);				\
+    _FP_FRAC_ADD_2(_z, _z, _a);						\
+									\
+    /* normalize */							\
+    _FP_FRAC_SRS_2(_z, wfracbits - 1, 2*wfracbits);			\
+    R##_f = _z_f0;							\
+  } while (0)
+
+
+/*
+ * Division algorithms:
+ */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   division immediately.  Give this macro either _FP_DIV_HELP_imm for
+   C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
+   choose will depend on what the compiler does with divrem4.  */
+
+#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)		\
+  do {							\
+    _FP_W_TYPE _q, _r;					\
+    X##_f <<= (X##_f < Y##_f				\
+	       ? R##_e--, _FP_WFRACBITS_##fs		\
+	       : _FP_WFRACBITS_##fs - 1);		\
+    doit(_q, _r, X##_f, Y##_f);				\
+    R##_f = _q | (_r != 0);				\
+  } while (0)
+
+/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
+   that may be useful in this situation.  This first is for a primitive
+   that requires normalization, the second for one that does not.  Look
+   for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
+
+#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _nh, _nl, _q, _r, _y;					\
+									\
+    /* Normalize Y -- i.e. make the most significant bit set.  */	\
+    _y = Y##_f << _FP_WFRACXBITS_##fs;					\
+									\
+    /* Shift X op correspondingly high, that is, up one full word.  */	\
+    if (X##_f < Y##_f)							\
+      {									\
+	R##_e--;							\
+	_nl = 0;							\
+	_nh = X##_f;							\
+      }									\
+    else								\
+      {									\
+	_nl = X##_f << (_FP_W_TYPE_SIZE - 1);				\
+	_nh = X##_f >> 1;						\
+      }									\
+    									\
+    udiv_qrnnd(_q, _r, _nh, _nl, _y);					\
+    R##_f = _q | (_r != 0);						\
+  } while (0)
+
+#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)		\
+  do {							\
+    _FP_W_TYPE _nh, _nl, _q, _r;			\
+    if (X##_f < Y##_f)					\
+      {							\
+	R##_e--;					\
+	_nl = X##_f << _FP_WFRACBITS_##fs;		\
+	_nh = X##_f >> _FP_WFRACXBITS_##fs;		\
+      }							\
+    else						\
+      {							\
+	_nl = X##_f << (_FP_WFRACBITS_##fs - 1);	\
+	_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);	\
+      }							\
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);		\
+    R##_f = _q | (_r != 0);				\
+  } while (0)
+  
+  
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+ 
+#define _FP_SQRT_MEAT_1(R, S, T, X, q)			\
+  do {							\
+    while (q != _FP_WORK_ROUND)				\
+      {							\
+        T##_f = S##_f + q;				\
+        if (T##_f <= X##_f)				\
+          {						\
+            S##_f = T##_f + q;				\
+            X##_f -= T##_f;				\
+            R##_f += q;					\
+          }						\
+        _FP_FRAC_SLL_1(X, 1);				\
+        q >>= 1;					\
+      }							\
+    if (X##_f)						\
+      {							\
+	if (S##_f < X##_f)				\
+	  R##_f |= _FP_WORK_ROUND;			\
+	R##_f |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+
+/*
+ * Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+
+#define _FP_FRAC_ASSEMBLE_1(r, X, rsize)	(r = X##_f)
+#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)	(X##_f = r)
+
+
+/*
+ * Convert FP values between word sizes
+ */
+
+#define _FP_FRAC_COPY_1_1(D, S)		(D##_f = S##_f)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/op-2.h b/libc/sysdeps/linux/sparc/soft-fp/op-2.h
new file mode 100644
index 000000000..3a3b3aa06
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/op-2.h
@@ -0,0 +1,617 @@
+/* Software floating-point emulation.
+   Basic two-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0, X##_f1
+#define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
+#define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
+#define _FP_FRAC_HIGH_2(X)	(X##_f1)
+#define _FP_FRAC_LOW_2(X)	(X##_f0)
+#define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
+
+#define _FP_FRAC_SLL_2(X,N)						    \
+(void)(((N) < _FP_W_TYPE_SIZE)						    \
+       ? ({								    \
+	    if (__builtin_constant_p(N) && (N) == 1)			    \
+	      {								    \
+		X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
+		X##_f0 += X##_f0;					    \
+	      }								    \
+	    else							    \
+	      {								    \
+		X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
+		X##_f0 <<= (N);						    \
+	      }								    \
+	    0;								    \
+	  })								    \
+       : ({								    \
+	    X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);			    \
+	    X##_f0 = 0;							    \
+	  }))
+
+
+#define _FP_FRAC_SRL_2(X,N)						\
+(void)(((N) < _FP_W_TYPE_SIZE)						\
+       ? ({								\
+	    X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));	\
+	    X##_f1 >>= (N);						\
+	  })								\
+       : ({								\
+	    X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);			\
+	    X##_f1 = 0;							\
+	  }))
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRST_2(X,S, N,sz)					  \
+(void)(((N) < _FP_W_TYPE_SIZE)						  \
+       ? ({								  \
+	    S = (__builtin_constant_p(N) && (N) == 1			  \
+		 ? X##_f0 & 1						  \
+		 : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);		  \
+	    X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
+	    X##_f1 >>= (N);						  \
+	  })								  \
+       : ({								  \
+	    S = ((((N) == _FP_W_TYPE_SIZE				  \
+		   ? 0							  \
+		   : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))		  \
+		  | X##_f0) != 0);					  \
+	    X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));		  \
+	    X##_f1 = 0;							  \
+	  }))
+
+#define _FP_FRAC_SRS_2(X,N,sz)						  \
+(void)(((N) < _FP_W_TYPE_SIZE)						  \
+       ? ({								  \
+	    X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
+		      (__builtin_constant_p(N) && (N) == 1		  \
+		       ? X##_f0 & 1					  \
+		       : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));	  \
+	    X##_f1 >>= (N);						  \
+	  })								  \
+       : ({								  \
+	    X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |		  \
+		      ((((N) == _FP_W_TYPE_SIZE				  \
+			 ? 0						  \
+			 : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))	  \
+			| X##_f0) != 0));				  \
+	    X##_f1 = 0;							  \
+	  }))
+
+#define _FP_FRAC_ADDI_2(X,I)	\
+  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
+
+#define _FP_FRAC_ADD_2(R,X,Y)	\
+  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_SUB_2(R,X,Y)	\
+  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_DEC_2(X,Y)	\
+  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_CLZ_2(R,X)	\
+  do {				\
+    if (X##_f1)			\
+      __FP_CLZ(R,X##_f1);	\
+    else 			\
+    {				\
+      __FP_CLZ(R,X##_f0);	\
+      R += _FP_W_TYPE_SIZE;	\
+    }				\
+  } while(0)
+
+/* Predicates */
+#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
+#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
+#define _FP_FRAC_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
+#define _FP_FRAC_GT_2(X, Y)	\
+  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
+#define _FP_FRAC_GE_2(X, Y)	\
+  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
+
+#define _FP_ZEROFRAC_2		0, 0
+#define _FP_MINFRAC_2		0, 1
+#define _FP_MAXFRAC_2		(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
+
+/*
+ * Internals 
+ */
+
+#define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
+
+#define __FP_CLZ_2(R, xh, xl)	\
+  do {				\
+    if (xh)			\
+      __FP_CLZ(R,xh);		\
+    else 			\
+    {				\
+      __FP_CLZ(R,xl);		\
+      R += _FP_W_TYPE_SIZE;	\
+    }				\
+  } while(0)
+
+#if 0
+
+#ifndef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i)	\
+  (xh += ((xl += i) < i))
+#endif
+#ifndef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
+  (rh = xh + yh + ((rl = xl + yl) < xl))
+#endif
+#ifndef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
+  (rh = xh - yh - ((rl = xl - yl) > xl))
+#endif
+#ifndef __FP_FRAC_DEC_2
+#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
+  do {					\
+    UWtype _t = xl;			\
+    xh -= yh + ((xl -= yl) > _t);	\
+  } while (0)
+#endif
+
+#else
+
+#undef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
+#undef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2			add_ssaaaa
+#undef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2			sub_ddmmss
+#undef __FP_FRAC_DEC_2
+#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	sub_ddmmss(xh, xl, xh, xl, yh, yl)
+
+#endif
+
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+
+#define _FP_UNPACK_RAW_2(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);	\
+							\
+    X##_f0 = _flo.bits.frac0;				\
+    X##_f1 = _flo.bits.frac1;				\
+    X##_e  = _flo.bits.exp;				\
+    X##_s  = _flo.bits.sign;				\
+  } while (0)
+
+#define _FP_UNPACK_RAW_2_P(fs, X, val)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+      (union _FP_UNION_##fs *)(val);			\
+							\
+    X##_f0 = _flo->bits.frac0;				\
+    X##_f1 = _flo->bits.frac1;				\
+    X##_e  = _flo->bits.exp;				\
+    X##_s  = _flo->bits.sign;				\
+  } while (0)
+
+
+/*
+ * Repack the raw bits of a native fp value.
+ */
+
+#define _FP_PACK_RAW_2(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs _flo;				\
+							\
+    _flo.bits.frac0 = X##_f0;				\
+    _flo.bits.frac1 = X##_f1;				\
+    _flo.bits.exp   = X##_e;				\
+    _flo.bits.sign  = X##_s;				\
+							\
+    (val) = _flo.flt;					\
+  } while (0)
+
+#define _FP_PACK_RAW_2_P(fs, val, X)			\
+  do {							\
+    union _FP_UNION_##fs *_flo =			\
+      (union _FP_UNION_##fs *)(val);			\
+							\
+    _flo->bits.frac0 = X##_f0;				\
+    _flo->bits.frac1 = X##_f1;				\
+    _flo->bits.exp   = X##_e;				\
+    _flo->bits.sign  = X##_s;				\
+  } while (0)
+
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
+  do {									\
+    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
+									\
+    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);	\
+    doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
+    doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
+    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);	\
+									\
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,		\
+		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1));				\
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,		\
+		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1));				\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
+    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
+    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
+  } while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
+   Do only 3 multiplications instead of four. This one is for machines
+   where multiplication is much more expensive than subtraction.  */
+
+#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
+  do {									\
+    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
+    _FP_W_TYPE _d;							\
+    int _c1, _c2;							\
+									\
+    _b_f0 = X##_f0 + X##_f1;						\
+    _c1 = _b_f0 < X##_f0;						\
+    _b_f1 = Y##_f0 + Y##_f1;						\
+    _c2 = _b_f1 < Y##_f0;						\
+    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);			\
+    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);	\
+    doit(_c_f1, _c_f0, X##_f1, Y##_f1);					\
+									\
+    _b_f0 &= -_c2;							\
+    _b_f1 &= -_c1;							\
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,		\
+		    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));	\
+    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		     _b_f0);						\
+    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		     _b_f1);						\
+    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1),				\
+		    0, _d, _FP_FRAC_WORD_4(_z,0));			\
+    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
+		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);		\
+    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),	\
+		    _c_f1, _c_f0,					\
+		    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));	\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
+    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
+    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
+  } while (0)
+
+#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
+  do {									\
+    _FP_FRAC_DECL_4(_z);						\
+    _FP_W_TYPE _x[2], _y[2];						\
+    _x[0] = X##_f0; _x[1] = X##_f1;					\
+    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
+									\
+    mpn_mul_n(_z_f, _x, _y, 2);						\
+									\
+    /* Normalize since we know where the msb of the multiplicands	\
+       were (bit B), we know that the msb of the of the product is	\
+       at either 2B or 2B-1.  */					\
+    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
+    R##_f0 = _z_f[0];							\
+    R##_f1 = _z_f[1];							\
+  } while (0)
+
+/* Do at most 120x120=240 bits multiplication using double floating
+   point multiplication.  This is useful if floating point
+   multiplication has much bigger throughput than integer multiply.
+   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
+   between 106 and 120 only.  
+   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
+   SETFETZ is a macro which will disable all FPU exceptions and set rounding
+   towards zero,  RESETFE should optionally reset it back.  */
+
+#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)	\
+  do {										\
+    static const double _const[] = {						\
+      /* 2^-24 */ 5.9604644775390625e-08,					\
+      /* 2^-48 */ 3.5527136788005009e-15,					\
+      /* 2^-72 */ 2.1175823681357508e-22,					\
+      /* 2^-96 */ 1.2621774483536189e-29,					\
+      /* 2^28 */ 2.68435456e+08,						\
+      /* 2^4 */ 1.600000e+01,							\
+      /* 2^-20 */ 9.5367431640625e-07,						\
+      /* 2^-44 */ 5.6843418860808015e-14,					\
+      /* 2^-68 */ 3.3881317890172014e-21,					\
+      /* 2^-92 */ 2.0194839173657902e-28,					\
+      /* 2^-116 */ 1.2037062152420224e-35};					\
+    double _a240, _b240, _c240, _d240, _e240, _f240, 				\
+	   _g240, _h240, _i240, _j240, _k240;					\
+    union { double d; UDItype i; } _l240, _m240, _n240, _o240,			\
+				   _p240, _q240, _r240, _s240;			\
+    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;			\
+										\
+    if (wfracbits < 106 || wfracbits > 120)					\
+      abort();									\
+										\
+    setfetz;									\
+										\
+    _e240 = (double)(long)(X##_f0 & 0xffffff);					\
+    _j240 = (double)(long)(Y##_f0 & 0xffffff);					\
+    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);				\
+    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);				\
+    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));	\
+    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));	\
+    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);				\
+    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);				\
+    _a240 = (double)(long)(X##_f1 >> 32);					\
+    _f240 = (double)(long)(Y##_f1 >> 32);					\
+    _e240 *= _const[3];								\
+    _j240 *= _const[3];								\
+    _d240 *= _const[2];								\
+    _i240 *= _const[2];								\
+    _c240 *= _const[1];								\
+    _h240 *= _const[1];								\
+    _b240 *= _const[0];								\
+    _g240 *= _const[0];								\
+    _s240.d =							      _e240*_j240;\
+    _r240.d =						_d240*_j240 + _e240*_i240;\
+    _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240;\
+    _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
+    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
+    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;		\
+    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;				\
+    _l240.d = _a240*_g240 + _b240*_f240;					\
+    _k240 =   _a240*_f240;							\
+    _r240.d += _s240.d;								\
+    _q240.d += _r240.d;								\
+    _p240.d += _q240.d;								\
+    _o240.d += _p240.d;								\
+    _n240.d += _o240.d;								\
+    _m240.d += _n240.d;								\
+    _l240.d += _m240.d;								\
+    _k240 += _l240.d;								\
+    _s240.d -= ((_const[10]+_s240.d)-_const[10]);				\
+    _r240.d -= ((_const[9]+_r240.d)-_const[9]);					\
+    _q240.d -= ((_const[8]+_q240.d)-_const[8]);					\
+    _p240.d -= ((_const[7]+_p240.d)-_const[7]);					\
+    _o240.d += _const[7];							\
+    _n240.d += _const[6];							\
+    _m240.d += _const[5];							\
+    _l240.d += _const[4];							\
+    if (_s240.d != 0.0) _y240 = 1;						\
+    if (_r240.d != 0.0) _y240 = 1;						\
+    if (_q240.d != 0.0) _y240 = 1;						\
+    if (_p240.d != 0.0) _y240 = 1;						\
+    _t240 = (DItype)_k240;							\
+    _u240 = _l240.i;								\
+    _v240 = _m240.i;								\
+    _w240 = _n240.i;								\
+    _x240 = _o240.i;								\
+    R##_f1 = (_t240 << (128 - (wfracbits - 1)))					\
+	     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));			\
+    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))			\
+    	     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))			\
+    	     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))			\
+    	     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))			\
+    	     | _y240;								\
+    resetfe;									\
+  } while (0)
+
+/*
+ * Division algorithms:
+ */
+
+#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
+  do {									\
+    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;		\
+    if (_FP_FRAC_GT_2(X, Y))						\
+      {									\
+	_n_f2 = X##_f1 >> 1;						\
+	_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
+	_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
+      }									\
+    else								\
+      {									\
+	R##_e--;							\
+	_n_f2 = X##_f1;							\
+	_n_f1 = X##_f0;							\
+	_n_f0 = 0;							\
+      }									\
+									\
+    /* Normalize, i.e. make the most significant bit of the 		\
+       denominator set. */						\
+    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);				\
+									\
+    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
+    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);				\
+    _r_f0 = _n_f0;							\
+    if (_FP_FRAC_GT_2(_m, _r))						\
+      {									\
+	R##_f1--;							\
+	_FP_FRAC_ADD_2(_r, Y, _r);					\
+	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
+	  {								\
+	    R##_f1--;							\
+	    _FP_FRAC_ADD_2(_r, Y, _r);					\
+	  }								\
+      }									\
+    _FP_FRAC_DEC_2(_r, _m);						\
+									\
+    if (_r_f1 == Y##_f1)						\
+      {									\
+	/* This is a special case, not an optimization			\
+	   (_r/Y##_f1 would not fit into UWtype).			\
+	   As _r is guaranteed to be < Y,  R##_f0 can be either		\
+	   (UWtype)-1 or (UWtype)-2.  But as we know what kind		\
+	   of bits it is (sticky, guard, round),  we don't care.	\
+	   We also don't care what the reminder is,  because the	\
+	   guard bit will be set anyway.  -jj */			\
+	R##_f0 = -1;							\
+      }									\
+    else								\
+      {									\
+	udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
+	umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);			\
+	_r_f0 = 0;							\
+	if (_FP_FRAC_GT_2(_m, _r))					\
+	  {								\
+	    R##_f0--;							\
+	    _FP_FRAC_ADD_2(_r, Y, _r);					\
+	    if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
+	      {								\
+		R##_f0--;						\
+		_FP_FRAC_ADD_2(_r, Y, _r);				\
+	      }								\
+	  }								\
+	if (!_FP_FRAC_EQ_2(_r, _m))					\
+	  R##_f0 |= _FP_WORK_STICKY;					\
+      }									\
+  } while (0)
+
+
+#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
+  do {									\
+    _FP_W_TYPE _x[4], _y[2], _z[4];					\
+    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
+    _x[0] = _x[3] = 0;							\
+    if (_FP_FRAC_GT_2(X, Y))						\
+      {									\
+	R##_e++;							\
+	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |	\
+		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
+			    (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE)));	\
+	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);	\
+      }									\
+    else								\
+      {									\
+	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |	\
+		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
+			    (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));	\
+	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);	\
+      }									\
+									\
+    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
+    R##_f1 = _z[1];							\
+    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
+  } while (0)
+
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+ 
+#define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
+  do {							\
+    while (q)						\
+      {							\
+	T##_f1 = S##_f1 + q;				\
+	if (T##_f1 <= X##_f1)				\
+	  {						\
+	    S##_f1 = T##_f1 + q;			\
+	    X##_f1 -= T##_f1;				\
+	    R##_f1 += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
+    while (q != _FP_WORK_ROUND)				\
+      {							\
+	T##_f0 = S##_f0 + q;				\
+	T##_f1 = S##_f1;				\
+	if (T##_f1 < X##_f1 || 				\
+	    (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
+	  {						\
+	    S##_f0 = T##_f0 + q;			\
+	    S##_f1 += (T##_f0 > S##_f0);		\
+	    _FP_FRAC_DEC_2(X, T);			\
+	    R##_f0 += q;				\
+	  }						\
+	_FP_FRAC_SLL_2(X, 1);				\
+	q >>= 1;					\
+      }							\
+    if (X##_f0 | X##_f1)				\
+      {							\
+	if (S##_f1 < X##_f1 || 				\
+	    (S##_f1 == X##_f1 && S##_f0 < X##_f0))	\
+	  R##_f0 |= _FP_WORK_ROUND;			\
+	R##_f0 |= _FP_WORK_STICKY;			\
+      }							\
+  } while (0)
+
+
+/*
+ * Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+
+#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
+(void)((rsize <= _FP_W_TYPE_SIZE)		\
+       ? ({ r = X##_f0; })			\
+       : ({					\
+	    r = X##_f1;				\
+	    r <<= _FP_W_TYPE_SIZE;		\
+	    r += X##_f0;			\
+	  }))
+
+#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
+  do {									\
+    X##_f0 = r;								\
+    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
+  } while (0)
+
+/*
+ * Convert FP values between word sizes
+ */
+
+#define _FP_FRAC_COPY_1_2(D, S)		(D##_f = S##_f0)
+
+#define _FP_FRAC_COPY_2_1(D, S)		((D##_f0 = S##_f), (D##_f1 = 0))
+
+#define _FP_FRAC_COPY_2_2(D,S)		_FP_FRAC_COPY_2(D,S)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/op-4.h b/libc/sysdeps/linux/sparc/soft-fp/op-4.h
new file mode 100644
index 000000000..70b9fafbe
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/op-4.h
@@ -0,0 +1,688 @@
+/* Software floating-point emulation.
+   Basic four-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
+#define _FP_FRAC_COPY_4(D,S)			\
+  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
+   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+#define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
+#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
+#define _FP_FRAC_LOW_4(X)	(X##_f[0])
+#define _FP_FRAC_WORD_4(X,w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_4(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _up = (N) % _FP_W_TYPE_SIZE;					\
+    _down = _FP_W_TYPE_SIZE - _up;					\
+    if (!_up)								\
+      for (_i = 3; _i >= _skip; --_i)					\
+	X##_f[_i] = X##_f[_i-_skip];					\
+    else								\
+      {									\
+	for (_i = 3; _i > _skip; --_i)					\
+	  X##_f[_i] = X##_f[_i-_skip] << _up				\
+		      | X##_f[_i-_skip-1] >> _down;			\
+	X##_f[_i--] = X##_f[0] << _up; 					\
+      }									\
+    for (; _i >= 0; --_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+/* This one was broken too */
+#define _FP_FRAC_SRL_4(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    if (!_down)								\
+      for (_i = 0; _i <= 3-_skip; ++_i)					\
+	X##_f[_i] = X##_f[_i+_skip];					\
+    else								\
+      {									\
+	for (_i = 0; _i < 3-_skip; ++_i)				\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
+		      | X##_f[_i+_skip+1] << _up;			\
+	X##_f[_i++] = X##_f[3] >> _down;				\
+      }									\
+    for (; _i < 4; ++_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+
+/* Right shift with sticky-lsb. 
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRST_4(X,S,N,size)			\
+  do {							\
+    _FP_I_TYPE _up, _down, _skip, _i;			\
+    _FP_W_TYPE _s;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;			\
+    _down = (N) % _FP_W_TYPE_SIZE;			\
+    _up = _FP_W_TYPE_SIZE - _down;			\
+    for (_s = _i = 0; _i < _skip; ++_i)			\
+      _s |= X##_f[_i];					\
+    if (!_down)						\
+      for (_i = 0; _i <= 3-_skip; ++_i)			\
+	X##_f[_i] = X##_f[_i+_skip];			\
+    else						\
+      {							\
+	_s |= X##_f[_i] << _up;				\
+	for (_i = 0; _i < 3-_skip; ++_i)		\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down		\
+		      | X##_f[_i+_skip+1] << _up;	\
+	X##_f[_i++] = X##_f[3] >> _down;		\
+      }							\
+    for (; _i < 4; ++_i)				\
+      X##_f[_i] = 0;					\
+    S = (_s != 0);					\
+  } while (0)
+
+#define _FP_FRAC_SRS_4(X,N,size)		\
+  do {						\
+    int _sticky;				\
+    _FP_FRAC_SRST_4(X, _sticky, N, size);	\
+    X##_f[0] |= _sticky;			\
+  } while (0)
+
+#define _FP_FRAC_ADD_4(R,X,Y)						\
+  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_SUB_4(R,X,Y)						\
+  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
+		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_DEC_4(X,Y)						\
+  __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
+		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_ADDI_4(X,I)						\
+  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+
+#define _FP_ZEROFRAC_4  0,0,0,0
+#define _FP_MINFRAC_4   0,0,0,1
+#define _FP_MAXFRAC_4	(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
+
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
+
+#define _FP_FRAC_EQ_4(X,Y)				\
+ (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]		\
+  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+
+#define _FP_FRAC_GT_4(X,Y)				\
+ (X##_f[3] > Y##_f[3] ||				\
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
+    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])	\
+   ))							\
+  ))							\
+ )
+
+#define _FP_FRAC_GE_4(X,Y)				\
+ (X##_f[3] > Y##_f[3] ||				\
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
+    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])	\
+   ))							\
+  ))							\
+ )
+
+
+#define _FP_FRAC_CLZ_4(R,X)		\
+  do {					\
+    if (X##_f[3])			\
+    {					\
+	__FP_CLZ(R,X##_f[3]);		\
+    }					\
+    else if (X##_f[2])			\
+    {					\
+	__FP_CLZ(R,X##_f[2]);		\
+	R += _FP_W_TYPE_SIZE;		\
+    }					\
+    else if (X##_f[1])			\
+    {					\
+	__FP_CLZ(R,X##_f[1]);		\
+	R += _FP_W_TYPE_SIZE*2;		\
+    }					\
+    else				\
+    {					\
+	__FP_CLZ(R,X##_f[0]);		\
+	R += _FP_W_TYPE_SIZE*3;		\
+    }					\
+  } while(0)
+
+
+#define _FP_UNPACK_RAW_4(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
+    X##_f[0] = _flo.bits.frac0;					\
+    X##_f[1] = _flo.bits.frac1;					\
+    X##_f[2] = _flo.bits.frac2;					\
+    X##_f[3] = _flo.bits.frac3;					\
+    X##_e  = _flo.bits.exp;					\
+    X##_s  = _flo.bits.sign;					\
+  } while (0)
+
+#define _FP_UNPACK_RAW_4_P(fs, X, val)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    X##_f[0] = _flo->bits.frac0;				\
+    X##_f[1] = _flo->bits.frac1;				\
+    X##_f[2] = _flo->bits.frac2;				\
+    X##_f[3] = _flo->bits.frac3;				\
+    X##_e  = _flo->bits.exp;					\
+    X##_s  = _flo->bits.sign;					\
+  } while (0)
+
+#define _FP_PACK_RAW_4(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs _flo;					\
+    _flo.bits.frac0 = X##_f[0];					\
+    _flo.bits.frac1 = X##_f[1];					\
+    _flo.bits.frac2 = X##_f[2];					\
+    _flo.bits.frac3 = X##_f[3];					\
+    _flo.bits.exp   = X##_e;					\
+    _flo.bits.sign  = X##_s;					\
+    (val) = _flo.flt;				   		\
+  } while (0)
+
+#define _FP_PACK_RAW_4_P(fs, val, X)				\
+  do {								\
+    union _FP_UNION_##fs *_flo =				\
+      (union _FP_UNION_##fs *)(val);				\
+								\
+    _flo->bits.frac0 = X##_f[0];				\
+    _flo->bits.frac1 = X##_f[1];				\
+    _flo->bits.frac2 = X##_f[2];				\
+    _flo->bits.frac3 = X##_f[3];				\
+    _flo->bits.exp   = X##_e;					\
+    _flo->bits.sign  = X##_s;					\
+  } while (0)
+
+/*
+ * Multiplication algorithms:
+ */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)			    \
+  do {									    \
+    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	    \
+    _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);	    \
+									    \
+    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
+    doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);				    \
+    doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);				    \
+    doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);				    \
+    doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);				    \
+    doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,		    \
+		    0,0,_FP_FRAC_WORD_8(_z,1));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,		    \
+		    _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,		    \
+		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,		    \
+		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
+		    _FP_FRAC_WORD_8(_z,2));				    \
+    doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);				    \
+    doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);				    \
+    doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);				    \
+    doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,		    \
+		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,		    \
+		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,		    \
+		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
+		    _FP_FRAC_WORD_8(_z,3));				    \
+    doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);				    \
+    doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);				    \
+    doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);				    \
+    doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);				    \
+    doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,		    \
+		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,		    \
+		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
+		    _FP_FRAC_WORD_8(_z,4));				    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,		    \
+		    0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));	    \
+    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,		    \
+		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _FP_FRAC_WORD_8(_z,5));				    \
+    doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);				    \
+    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
+		    _b_f1,_b_f0,					    \
+		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));	    \
+									    \
+    /* Normalize since we know where the msb of the multiplicands	    \
+       were (bit B), we know that the msb of the of the product is	    \
+       at either 2B or 2B-1.  */					    \
+    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);			    \
+    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
+  } while (0)
+
+#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)				    \
+  do {									    \
+    _FP_FRAC_DECL_8(_z);						    \
+									    \
+    mpn_mul_n(_z_f, _x_f, _y_f, 4);					    \
+									    \
+    /* Normalize since we know where the msb of the multiplicands	    \
+       were (bit B), we know that the msb of the of the product is	    \
+       at either 2B or 2B-1.  */					    \
+    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);	 		    \
+    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
+		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
+  } while (0)
+
+/*
+ * Helper utility for _FP_DIV_MEAT_4_udiv:
+ * pppp = m * nnn
+ */
+#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)				    \
+  do {									    \
+    UWtype _t;								    \
+    umul_ppmm(p1,p0,m,n0);						    \
+    umul_ppmm(p2,_t,m,n1);						    \
+    __FP_FRAC_ADDI_2(p2,p1,_t);						    \
+    umul_ppmm(p3,_t,m,n2);						    \
+    __FP_FRAC_ADDI_2(p3,p2,_t);						    \
+  } while (0)
+
+/*
+ * Division algorithms:
+ */
+
+#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)				    \
+  do {									    \
+    int _i;								    \
+    _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);				    \
+    _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);					    \
+    if (_FP_FRAC_GT_4(X, Y))						    \
+      {									    \
+	_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);			    \
+	_FP_FRAC_SRL_4(X, 1);						    \
+      }									    \
+    else								    \
+      R##_e--;								    \
+									    \
+    /* Normalize, i.e. make the most significant bit of the 		    \
+       denominator set. */						    \
+    _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);				    \
+									    \
+    for (_i = 3; ; _i--)						    \
+      {									    \
+        if (X##_f[3] == Y##_f[3])					    \
+          {								    \
+            /* This is a special case, not an optimization		    \
+               (X##_f[3]/Y##_f[3] would not fit into UWtype).		    \
+               As X## is guaranteed to be < Y,  R##_f[_i] can be either	    \
+               (UWtype)-1 or (UWtype)-2.  */				    \
+            R##_f[_i] = -1;						    \
+            if (!_i)							    \
+	      break;							    \
+            __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],	    \
+			    Y##_f[2], Y##_f[1], Y##_f[0], 0,		    \
+			    X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);	    \
+            _FP_FRAC_SUB_4(X, Y, X);					    \
+            if (X##_f[3] > Y##_f[3])					    \
+              {								    \
+                R##_f[_i] = -2;						    \
+                _FP_FRAC_ADD_4(X, Y, X);				    \
+              }								    \
+          }								    \
+        else								    \
+          {								    \
+            udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
+            umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],		    \
+			  R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);	    \
+            X##_f[2] = X##_f[1];					    \
+            X##_f[1] = X##_f[0];					    \
+            X##_f[0] = _n_f[_i];					    \
+            if (_FP_FRAC_GT_4(_m, X))					    \
+              {								    \
+                R##_f[_i]--;						    \
+                _FP_FRAC_ADD_4(X, Y, X);				    \
+                if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))	    \
+                  {							    \
+		    R##_f[_i]--;					    \
+		    _FP_FRAC_ADD_4(X, Y, X);				    \
+                  }							    \
+              }								    \
+            _FP_FRAC_DEC_4(X, _m);					    \
+            if (!_i)							    \
+	      {								    \
+		if (!_FP_FRAC_EQ_4(X, _m))				    \
+		  R##_f[0] |= _FP_WORK_STICKY;				    \
+		break;							    \
+	      }								    \
+          }								    \
+      }									    \
+  } while (0)
+
+
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+ 
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)				\
+  do {								\
+    while (q)							\
+      {								\
+	T##_f[3] = S##_f[3] + q;				\
+	if (T##_f[3] <= X##_f[3])				\
+	  {							\
+	    S##_f[3] = T##_f[3] + q;				\
+	    X##_f[3] -= T##_f[3];				\
+	    R##_f[3] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
+    while (q)							\
+      {								\
+	T##_f[2] = S##_f[2] + q;				\
+	T##_f[3] = S##_f[3];					\
+	if (T##_f[3] < X##_f[3] || 				\
+	    (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))	\
+	  {							\
+	    S##_f[2] = T##_f[2] + q;				\
+	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	    __FP_FRAC_DEC_2(X##_f[3], X##_f[2],			\
+			    T##_f[3], T##_f[2]);		\
+	    R##_f[2] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
+    while (q)							\
+      {								\
+	T##_f[1] = S##_f[1] + q;				\
+	T##_f[2] = S##_f[2];					\
+	T##_f[3] = S##_f[3];					\
+	if (T##_f[3] < X##_f[3] || 				\
+	    (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||	\
+	     (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))	\
+	  {							\
+	    S##_f[1] = T##_f[1] + q;				\
+	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
+	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	    __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],	\
+	    		    T##_f[3], T##_f[2], T##_f[1]);	\
+	    R##_f[1] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
+    while (q != _FP_WORK_ROUND)					\
+      {								\
+	T##_f[0] = S##_f[0] + q;				\
+	T##_f[1] = S##_f[1];					\
+	T##_f[2] = S##_f[2];					\
+	T##_f[3] = S##_f[3];					\
+	if (_FP_FRAC_GE_4(X,T))					\
+	  {							\
+	    S##_f[0] = T##_f[0] + q;				\
+	    S##_f[1] += (T##_f[0] > S##_f[0]);			\
+	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
+	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	    _FP_FRAC_DEC_4(X, T);				\
+	    R##_f[0] += q;					\
+	  }							\
+	_FP_FRAC_SLL_4(X, 1);					\
+	q >>= 1;						\
+      }								\
+    if (!_FP_FRAC_ZEROP_4(X))					\
+      {								\
+	if (_FP_FRAC_GT_4(X,S))					\
+	  R##_f[0] |= _FP_WORK_ROUND;				\
+	R##_f[0] |= _FP_WORK_STICKY;				\
+      }								\
+  } while (0)
+
+
+/*
+ * Internals 
+ */
+
+#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\
+  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
+
+#ifndef __FP_FRAC_ADD_3
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
+  do {								\
+    _FP_W_TYPE _c1, _c2;					\
+    r0 = x0 + y0;						\
+    _c1 = r0 < x0;						\
+    r1 = x1 + y1;						\
+    _c2 = r1 < x1;						\
+    r1 += _c1;							\
+    _c2 |= r1 < _c1;						\
+    r2 = x2 + y2 + _c2;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_ADD_4
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
+  do {								\
+    _FP_W_TYPE _c1, _c2, _c3;					\
+    r0 = x0 + y0;						\
+    _c1 = r0 < x0;						\
+    r1 = x1 + y1;						\
+    _c2 = r1 < x1;						\
+    r1 += _c1;							\
+    _c2 |= r1 < _c1;						\
+    r2 = x2 + y2;						\
+    _c3 = r2 < x2;						\
+    r2 += _c2;							\
+    _c3 |= r2 < _c2;						\
+    r3 = x3 + y3 + _c3;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_SUB_3
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
+  do {								\
+    _FP_W_TYPE _c1, _c2;					\
+    r0 = x0 - y0;						\
+    _c1 = r0 > x0;						\
+    r1 = x1 - y1;						\
+    _c2 = r1 > x1;						\
+    r1 -= _c1;							\
+    _c2 |= _c1 && (y1 == x1);					\
+    r2 = x2 - y2 - _c2;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_SUB_4
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
+  do {								\
+    _FP_W_TYPE _c1, _c2, _c3;					\
+    r0 = x0 - y0;						\
+    _c1 = r0 > x0;						\
+    r1 = x1 - y1;						\
+    _c2 = r1 > x1;						\
+    r1 -= _c1;							\
+    _c2 |= _c1 && (y1 == x1);					\
+    r2 = x2 - y2;						\
+    _c3 = r2 > x2;						\
+    r2 -= _c2;							\
+    _c3 |= _c2 && (y2 == x2);					\
+    r3 = x3 - y3 - _c3;						\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_DEC_3
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)				\
+  do {									\
+    UWtype _t0, _t1, _t2;						\
+    _t0 = x0, _t1 = x1, _t2 = x2;					\
+    __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);		\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_DEC_4
+#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)			\
+  do {									\
+    UWtype _t0, _t1, _t2, _t3;						\
+    _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;				\
+    __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);		\
+  } while (0)
+#endif
+
+#ifndef __FP_FRAC_ADDI_4
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
+  do {									\
+    UWtype _t;								\
+    _t = ((x0 += i) < i);						\
+    x1 += _t; _t = (x1 < _t);						\
+    x2 += _t; _t = (x2 < _t);						\
+    x3 += _t;								\
+  } while (0)
+#endif
+
+/* Convert FP values between word sizes. This appears to be more
+ * complicated than I'd have expected it to be, so these might be
+ * wrong... These macros are in any case somewhat bogus because they
+ * use information about what various FRAC_n variables look like 
+ * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+ * the ones in op-2.h and op-1.h. 
+ */
+#define _FP_FRAC_COPY_1_4(D, S)		(D##_f = S##_f[0])
+
+#define _FP_FRAC_COPY_2_4(D, S)			\
+do {						\
+  D##_f0 = S##_f[0];				\
+  D##_f1 = S##_f[1];				\
+} while (0)
+
+/* Assembly/disassembly for converting to/from integral types.  
+ * No shifting or overflow handled here.
+ */
+/* Put the FP value X into r, which is an integer of size rsize. */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)				\
+  do {									\
+    if (rsize <= _FP_W_TYPE_SIZE)					\
+      r = X##_f[0];							\
+    else if (rsize <= 2*_FP_W_TYPE_SIZE)				\
+    {									\
+      r = X##_f[1];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[0];							\
+    }									\
+    else								\
+    {									\
+      /* I'm feeling lazy so we deal with int == 3words (implausible)*/	\
+      /* and int == 4words as a single case.			 */	\
+      r = X##_f[3];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[2];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[1];							\
+      r <<= _FP_W_TYPE_SIZE;						\
+      r += X##_f[0];							\
+    }									\
+  } while (0)
+
+/* "No disassemble Number Five!" */
+/* move an integer of size rsize into X's fractional part. We rely on
+ * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+ * having to mask the values we store into it.
+ */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)				\
+  do {									\
+    X##_f[0] = r;							\
+    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
+    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
+    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
+  } while (0);
+
+#define _FP_FRAC_COPY_4_1(D, S)			\
+do {						\
+  D##_f[0] = S##_f;				\
+  D##_f[1] = D##_f[2] = D##_f[3] = 0;		\
+} while (0)
+
+#define _FP_FRAC_COPY_4_2(D, S)			\
+do {						\
+  D##_f[0] = S##_f0;				\
+  D##_f[1] = S##_f1;				\
+  D##_f[2] = D##_f[3] = 0;			\
+} while (0)
+
+#define _FP_FRAC_COPY_4_4(D,S)	_FP_FRAC_COPY_4(D,S)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/op-8.h b/libc/sysdeps/linux/sparc/soft-fp/op-8.h
new file mode 100644
index 000000000..e0612a5e6
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/op-8.h
@@ -0,0 +1,111 @@
+/* Software floating-point emulation.
+   Basic eight-word fraction declaration and manipulation.
+   Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+/* We need just a few things from here for op-4, if we ever need some
+   other macros, they can be added. */
+#define _FP_FRAC_DECL_8(X)	_FP_W_TYPE X##_f[8]
+#define _FP_FRAC_HIGH_8(X)	(X##_f[7])
+#define _FP_FRAC_LOW_8(X)	(X##_f[0])
+#define _FP_FRAC_WORD_8(X,w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_8(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _up = (N) % _FP_W_TYPE_SIZE;					\
+    _down = _FP_W_TYPE_SIZE - _up;					\
+    if (!_up)								\
+      for (_i = 7; _i >= _skip; --_i)					\
+	X##_f[_i] = X##_f[_i-_skip];					\
+    else								\
+      {									\
+	for (_i = 7; _i > _skip; --_i)					\
+	  X##_f[_i] = X##_f[_i-_skip] << _up				\
+		      | X##_f[_i-_skip-1] >> _down;			\
+	X##_f[_i--] = X##_f[0] << _up; 					\
+      }									\
+    for (; _i >= 0; --_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+#define _FP_FRAC_SRL_8(X,N)						\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    if (!_down)								\
+      for (_i = 0; _i <= 7-_skip; ++_i)					\
+	X##_f[_i] = X##_f[_i+_skip];					\
+    else								\
+      {									\
+	for (_i = 0; _i < 7-_skip; ++_i)				\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
+		      | X##_f[_i+_skip+1] << _up;			\
+	X##_f[_i++] = X##_f[7] >> _down;				\
+      }									\
+    for (; _i < 8; ++_i)						\
+      X##_f[_i] = 0;							\
+  } while (0)
+
+
+/* Right shift with sticky-lsb. 
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRS_8(X,N,size)					\
+  do {									\
+    _FP_I_TYPE _up, _down, _skip, _i;					\
+    _FP_W_TYPE _s;							\
+    _skip = (N) / _FP_W_TYPE_SIZE;					\
+    _down = (N) % _FP_W_TYPE_SIZE;					\
+    _up = _FP_W_TYPE_SIZE - _down;					\
+    for (_s = _i = 0; _i < _skip; ++_i)					\
+      _s |= X##_f[_i];							\
+    if (!_down)								\
+      for (_i = 0; _i <= 7-_skip; ++_i)					\
+	X##_f[_i] = X##_f[_i+_skip];					\
+    else								\
+      {									\
+	_s |= X##_f[_i] << _up;						\
+	for (_i = 0; _i < 7-_skip; ++_i)				\
+	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
+		      | X##_f[_i+_skip+1] << _up;			\
+	X##_f[_i++] = X##_f[7] >> _down;				\
+      }									\
+    for (; _i < 8; ++_i)						\
+      X##_f[_i] = 0;							\
+    /* don't fix the LSB until the very end when we're sure f[0] is stable */	\
+    X##_f[0] |= (_s != 0);						\
+  } while (0)
+
diff --git a/libc/sysdeps/linux/sparc/soft-fp/op-common.h b/libc/sysdeps/linux/sparc/soft-fp/op-common.h
new file mode 100644
index 000000000..ef11b527b
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/op-common.h
@@ -0,0 +1,1359 @@
+/* Software floating-point emulation. Common operations.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#define _FP_DECL(wc, X)						\
+  _FP_I_TYPE X##_c __attribute__((unused)), X##_s, X##_e;	\
+  _FP_FRAC_DECL_##wc(X)
+
+/*
+ * Finish truely unpacking a native fp value by classifying the kind
+ * of fp value and normalizing both the exponent and the fraction.
+ */
+
+#define _FP_UNPACK_CANONICAL(fs, wc, X)					\
+do {									\
+  switch (X##_e)							\
+  {									\
+  default:								\
+    _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_IMPLBIT_##fs;			\
+    _FP_FRAC_SLL_##wc(X, _FP_WORKBITS);					\
+    X##_e -= _FP_EXPBIAS_##fs;						\
+    X##_c = FP_CLS_NORMAL;						\
+    break;								\
+									\
+  case 0:								\
+    if (_FP_FRAC_ZEROP_##wc(X))						\
+      X##_c = FP_CLS_ZERO;						\
+    else								\
+      {									\
+	/* a denormalized number */					\
+	_FP_I_TYPE _shift;						\
+	_FP_FRAC_CLZ_##wc(_shift, X);					\
+	_shift -= _FP_FRACXBITS_##fs;					\
+	_FP_FRAC_SLL_##wc(X, (_shift+_FP_WORKBITS));			\
+	X##_e -= _FP_EXPBIAS_##fs - 1 + _shift;				\
+	X##_c = FP_CLS_NORMAL;						\
+	FP_SET_EXCEPTION(FP_EX_DENORM);					\
+      }									\
+    break;								\
+									\
+  case _FP_EXPMAX_##fs:							\
+    if (_FP_FRAC_ZEROP_##wc(X))						\
+      X##_c = FP_CLS_INF;						\
+    else								\
+      {									\
+	X##_c = FP_CLS_NAN;						\
+	/* Check for signaling NaN */					\
+	if (!(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))		\
+	  FP_SET_EXCEPTION(FP_EX_INVALID);				\
+      }									\
+    break;								\
+  }									\
+} while (0)
+
+/* Finish unpacking an fp value in semi-raw mode: the mantissa is
+   shifted by _FP_WORKBITS but the implicit MSB is not inserted and
+   other classification is not done.  */
+#define _FP_UNPACK_SEMIRAW(fs, wc, X)	_FP_FRAC_SLL_##wc(X, _FP_WORKBITS)
+
+/* A semi-raw value has overflowed to infinity.  Adjust the mantissa
+   and exponent appropriately.  */
+#define _FP_OVERFLOW_SEMIRAW(fs, wc, X)			\
+do {							\
+  if (FP_ROUNDMODE == FP_RND_NEAREST			\
+      || (FP_ROUNDMODE == FP_RND_PINF && !X##_s)	\
+      || (FP_ROUNDMODE == FP_RND_MINF && X##_s))	\
+    {							\
+      X##_e = _FP_EXPMAX_##fs;				\
+      _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);		\
+    }							\
+  else							\
+    {							\
+      X##_e = _FP_EXPMAX_##fs - 1;			\
+      _FP_FRAC_SET_##wc(X, _FP_MAXFRAC_##wc);		\
+    }							\
+    FP_SET_EXCEPTION(FP_EX_INEXACT);			\
+    FP_SET_EXCEPTION(FP_EX_OVERFLOW);			\
+} while (0)
+
+/* Check for a semi-raw value being a signaling NaN and raise the
+   invalid exception if so.  */
+#define _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X)			\
+do {								\
+  if (X##_e == _FP_EXPMAX_##fs					\
+      && !_FP_FRAC_ZEROP_##wc(X)				\
+      && !(_FP_FRAC_HIGH_##fs(X) & _FP_QNANBIT_SH_##fs))	\
+    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+} while (0)
+
+/* Choose a NaN result from an operation on two semi-raw NaN
+   values.  */
+#define _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP)			\
+do {									\
+  /* _FP_CHOOSENAN expects raw values, so shift as required.  */	\
+  _FP_FRAC_SRL_##wc(X, _FP_WORKBITS);					\
+  _FP_FRAC_SRL_##wc(Y, _FP_WORKBITS);					\
+  _FP_CHOOSENAN(fs, wc, R, X, Y, OP);					\
+  _FP_FRAC_SLL_##wc(R, _FP_WORKBITS);					\
+} while (0)
+
+/* Test whether a biased exponent is normal (not zero or maximum).  */
+#define _FP_EXP_NORMAL(fs, wc, X)	(((X##_e + 1) & _FP_EXPMAX_##fs) > 1)
+
+/* Prepare to pack an fp value in semi-raw mode: the mantissa is
+   rounded and shifted right, with the rounding possibly increasing
+   the exponent (including changing a finite value to infinity).  */
+#define _FP_PACK_SEMIRAW(fs, wc, X)				\
+do {								\
+  _FP_ROUND(wc, X);						\
+  if (_FP_FRAC_HIGH_##fs(X)					\
+      & (_FP_OVERFLOW_##fs >> 1))				\
+    {								\
+      _FP_FRAC_HIGH_##fs(X) &= ~(_FP_OVERFLOW_##fs >> 1);	\
+      X##_e++;							\
+      if (X##_e == _FP_EXPMAX_##fs)				\
+	_FP_OVERFLOW_SEMIRAW(fs, wc, X);			\
+    }								\
+  _FP_FRAC_SRL_##wc(X, _FP_WORKBITS);				\
+  if (!_FP_EXP_NORMAL(fs, wc, X) && !_FP_FRAC_ZEROP_##wc(X))	\
+    {								\
+      if (X##_e == 0)						\
+	FP_SET_EXCEPTION(FP_EX_UNDERFLOW);			\
+      else							\
+	{							\
+	  if (!_FP_KEEPNANFRACP)				\
+	    {							\
+	      _FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs);		\
+	      X##_s = _FP_NANSIGN_##fs;				\
+	    }							\
+	  else							\
+	    _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_QNANBIT_##fs;	\
+	}							\
+    }								\
+} while (0)
+
+/*
+ * Before packing the bits back into the native fp result, take care
+ * of such mundane things as rounding and overflow.  Also, for some
+ * kinds of fp values, the original parts may not have been fully
+ * extracted -- but that is ok, we can regenerate them now.
+ */
+
+#define _FP_PACK_CANONICAL(fs, wc, X)				\
+do {								\
+  switch (X##_c)						\
+  {								\
+  case FP_CLS_NORMAL:						\
+    X##_e += _FP_EXPBIAS_##fs;					\
+    if (X##_e > 0)						\
+      {								\
+	_FP_ROUND(wc, X);					\
+	if (_FP_FRAC_OVERP_##wc(fs, X))				\
+	  {							\
+	    _FP_FRAC_CLEAR_OVERP_##wc(fs, X);			\
+	    X##_e++;						\
+	  }							\
+	_FP_FRAC_SRL_##wc(X, _FP_WORKBITS);			\
+	if (X##_e >= _FP_EXPMAX_##fs)				\
+	  {							\
+	    /* overflow */					\
+	    switch (FP_ROUNDMODE)				\
+	      {							\
+	      case FP_RND_NEAREST:				\
+		X##_c = FP_CLS_INF;				\
+		break;						\
+	      case FP_RND_PINF:					\
+		if (!X##_s) X##_c = FP_CLS_INF;			\
+		break;						\
+	      case FP_RND_MINF:					\
+		if (X##_s) X##_c = FP_CLS_INF;			\
+		break;						\
+	      }							\
+	    if (X##_c == FP_CLS_INF)				\
+	      {							\
+		/* Overflow to infinity */			\
+		X##_e = _FP_EXPMAX_##fs;			\
+		_FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);	\
+	      }							\
+	    else						\
+	      {							\
+		/* Overflow to maximum normal */		\
+		X##_e = _FP_EXPMAX_##fs - 1;			\
+		_FP_FRAC_SET_##wc(X, _FP_MAXFRAC_##wc);		\
+	      }							\
+	    FP_SET_EXCEPTION(FP_EX_OVERFLOW);			\
+            FP_SET_EXCEPTION(FP_EX_INEXACT);			\
+	  }							\
+      }								\
+    else							\
+      {								\
+	/* we've got a denormalized number */			\
+	X##_e = -X##_e + 1;					\
+	if (X##_e <= _FP_WFRACBITS_##fs)			\
+	  {							\
+	    _FP_FRAC_SRS_##wc(X, X##_e, _FP_WFRACBITS_##fs);	\
+	    _FP_ROUND(wc, X);					\
+	    if (_FP_FRAC_HIGH_##fs(X)				\
+		& (_FP_OVERFLOW_##fs >> 1))			\
+	      {							\
+	        X##_e = 1;					\
+	        _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);	\
+	      }							\
+	    else						\
+	      {							\
+		X##_e = 0;					\
+		_FP_FRAC_SRL_##wc(X, _FP_WORKBITS);		\
+		FP_SET_EXCEPTION(FP_EX_UNDERFLOW);		\
+	      }							\
+	  }							\
+	else							\
+	  {							\
+	    /* underflow to zero */				\
+	    X##_e = 0;						\
+	    if (!_FP_FRAC_ZEROP_##wc(X))			\
+	      {							\
+	        _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);		\
+	        _FP_ROUND(wc, X);				\
+	        _FP_FRAC_LOW_##wc(X) >>= (_FP_WORKBITS);	\
+	      }							\
+	    FP_SET_EXCEPTION(FP_EX_UNDERFLOW);			\
+	  }							\
+      }								\
+    break;							\
+								\
+  case FP_CLS_ZERO:						\
+    X##_e = 0;							\
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    break;							\
+								\
+  case FP_CLS_INF:						\
+    X##_e = _FP_EXPMAX_##fs;					\
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			\
+    break;							\
+								\
+  case FP_CLS_NAN:						\
+    X##_e = _FP_EXPMAX_##fs;					\
+    if (!_FP_KEEPNANFRACP)					\
+      {								\
+	_FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs);			\
+	X##_s = _FP_NANSIGN_##fs;				\
+      }								\
+    else							\
+      _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_QNANBIT_##fs;		\
+    break;							\
+  }								\
+} while (0)
+
+/* This one accepts raw argument and not cooked,  returns
+ * 1 if X is a signaling NaN.
+ */
+#define _FP_ISSIGNAN(fs, wc, X)					\
+({								\
+  int __ret = 0;						\
+  if (X##_e == _FP_EXPMAX_##fs)					\
+    {								\
+      if (!_FP_FRAC_ZEROP_##wc(X)				\
+	  && !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+	__ret = 1;						\
+    }								\
+  __ret;							\
+})
+
+
+
+
+
+/* Addition on semi-raw values.  */
+#define _FP_ADD_INTERNAL(fs, wc, R, X, Y, OP)				 \
+do {									 \
+  if (X##_s == Y##_s)							 \
+    {									 \
+      /* Addition.  */							 \
+      R##_s = X##_s;							 \
+      int ediff = X##_e - Y##_e;					 \
+      if (ediff > 0)							 \
+	{								 \
+	  R##_e = X##_e;						 \
+	  if (Y##_e == 0)						 \
+	    {								 \
+	      /* Y is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(Y))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_FRAC_COPY_##wc(R, X);				 \
+		  goto add_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_ADD_##wc(R, X, Y);			 \
+		      goto add3;					 \
+		    }							 \
+		  if (X##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);		 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      goto add_done;					 \
+		    }							 \
+		  goto add1;						 \
+		}							 \
+	    }								 \
+	  else if (X##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* X is NaN or Inf, Y is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+	      _FP_FRAC_COPY_##wc(R, X);					 \
+	      goto add_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of Y.  */				 \
+	  _FP_FRAC_HIGH_##fs(Y) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	add1:								 \
+	  /* Shift the mantissa of Y to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of X.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(Y, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(Y))				 \
+	    _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_ADD_##wc(R, X, Y);					 \
+	}								 \
+      else if (ediff < 0)						 \
+	{								 \
+	  ediff = -ediff;						 \
+	  R##_e = Y##_e;						 \
+	  if (X##_e == 0)						 \
+	    {								 \
+	      /* X is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(X))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  _FP_FRAC_COPY_##wc(R, Y);				 \
+		  goto add_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_ADD_##wc(R, Y, X);			 \
+		      goto add3;					 \
+		    }							 \
+		  if (Y##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);		 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      goto add_done;					 \
+		    }							 \
+		  goto add2;						 \
+		}							 \
+	    }								 \
+	  else if (Y##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* Y is NaN or Inf, X is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+	      _FP_FRAC_COPY_##wc(R, Y);					 \
+	      goto add_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of X.  */				 \
+	  _FP_FRAC_HIGH_##fs(X) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	add2:								 \
+	  /* Shift the mantissa of X to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of Y.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(X, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(X))				 \
+	    _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_ADD_##wc(R, Y, X);					 \
+	}								 \
+      else								 \
+	{								 \
+	  /* ediff == 0.  */						 \
+	  if (!_FP_EXP_NORMAL(fs, wc, X))				 \
+	    {								 \
+	      if (X##_e == 0)						 \
+		{							 \
+		  /* X and Y are zero or denormalized.  */		 \
+		  R##_e = 0;						 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    {							 \
+		      if (!_FP_FRAC_ZEROP_##wc(Y))			 \
+			FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      goto add_done;					 \
+		    }							 \
+		  else if (_FP_FRAC_ZEROP_##wc(Y))			 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      goto add_done;					 \
+		    }							 \
+		  else							 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_ADD_##wc(R, X, Y);			 \
+		      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)	 \
+			{						 \
+			  /* Normalized result.  */			 \
+			  _FP_FRAC_HIGH_##fs(R)				 \
+			    &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs;	 \
+			  R##_e = 1;					 \
+			}						 \
+		      goto add_done;					 \
+		    }							 \
+		}							 \
+	      else							 \
+		{							 \
+		  /* X and Y are NaN or Inf.  */			 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  R##_e = _FP_EXPMAX_##fs;				 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    _FP_FRAC_COPY_##wc(R, Y);				 \
+		  else if (_FP_FRAC_ZEROP_##wc(Y))			 \
+		    _FP_FRAC_COPY_##wc(R, X);				 \
+		  else							 \
+		    _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP);		 \
+		  goto add_done;					 \
+		}							 \
+	    }								 \
+	  /* The exponents of X and Y, both normal, are equal.  The	 \
+	     implicit MSBs will always add to increase the		 \
+	     exponent.  */						 \
+	  _FP_FRAC_ADD_##wc(R, X, Y);					 \
+	  R##_e = X##_e + 1;						 \
+	  _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);			 \
+	  if (R##_e == _FP_EXPMAX_##fs)					 \
+	    /* Overflow to infinity (depending on rounding mode).  */	 \
+	    _FP_OVERFLOW_SEMIRAW(fs, wc, R);				 \
+	  goto add_done;						 \
+	}								 \
+    add3:								 \
+      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)			 \
+	{								 \
+	  /* Overflow.  */						 \
+	  _FP_FRAC_HIGH_##fs(R) &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs;	 \
+	  R##_e++;							 \
+	  _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);			 \
+	  if (R##_e == _FP_EXPMAX_##fs)					 \
+	    /* Overflow to infinity (depending on rounding mode).  */	 \
+	    _FP_OVERFLOW_SEMIRAW(fs, wc, R);				 \
+	}								 \
+    add_done: ;								 \
+    }									 \
+  else									 \
+    {									 \
+      /* Subtraction.  */						 \
+      int ediff = X##_e - Y##_e;					 \
+      if (ediff > 0)							 \
+	{								 \
+	  R##_e = X##_e;						 \
+	  R##_s = X##_s;						 \
+	  if (Y##_e == 0)						 \
+	    {								 \
+	      /* Y is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(Y))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_FRAC_COPY_##wc(R, X);				 \
+		  goto sub_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_SUB_##wc(R, X, Y);			 \
+		      goto sub3;					 \
+		    }							 \
+		  if (X##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);		 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      goto sub_done;					 \
+		    }							 \
+		  goto sub1;						 \
+		}							 \
+	    }								 \
+	  else if (X##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* X is NaN or Inf, Y is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+	      _FP_FRAC_COPY_##wc(R, X);					 \
+	      goto sub_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of Y.  */				 \
+	  _FP_FRAC_HIGH_##fs(Y) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	sub1:								 \
+	  /* Shift the mantissa of Y to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of X.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(Y, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(Y))				 \
+	    _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_SUB_##wc(R, X, Y);					 \
+	}								 \
+      else if (ediff < 0)						 \
+	{								 \
+	  ediff = -ediff;						 \
+	  R##_e = Y##_e;						 \
+	  R##_s = Y##_s;						 \
+	  if (X##_e == 0)						 \
+	    {								 \
+	      /* X is zero or denormalized.  */				 \
+	      if (_FP_FRAC_ZEROP_##wc(X))				 \
+		{							 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  _FP_FRAC_COPY_##wc(R, Y);				 \
+		  goto sub_done;					 \
+		}							 \
+	      else							 \
+		{							 \
+		  FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		  ediff--;						 \
+		  if (ediff == 0)					 \
+		    {							 \
+		      _FP_FRAC_SUB_##wc(R, Y, X);			 \
+		      goto sub3;					 \
+		    }							 \
+		  if (Y##_e == _FP_EXPMAX_##fs)				 \
+		    {							 \
+		      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);		 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      goto sub_done;					 \
+		    }							 \
+		  goto sub2;						 \
+		}							 \
+	    }								 \
+	  else if (Y##_e == _FP_EXPMAX_##fs)				 \
+	    {								 \
+	      /* Y is NaN or Inf, X is normal.  */			 \
+	      _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+	      _FP_FRAC_COPY_##wc(R, Y);					 \
+	      goto sub_done;						 \
+	    }								 \
+									 \
+	  /* Insert implicit MSB of X.  */				 \
+	  _FP_FRAC_HIGH_##fs(X) |= _FP_IMPLBIT_SH_##fs;			 \
+									 \
+	sub2:								 \
+	  /* Shift the mantissa of X to the right EDIFF steps;		 \
+	     remember to account later for the implicit MSB of Y.  */	 \
+	  if (ediff <= _FP_WFRACBITS_##fs)				 \
+	    _FP_FRAC_SRS_##wc(X, ediff, _FP_WFRACBITS_##fs);		 \
+	  else if (!_FP_FRAC_ZEROP_##wc(X))				 \
+	    _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);			 \
+	  _FP_FRAC_SUB_##wc(R, Y, X);					 \
+	}								 \
+      else								 \
+	{								 \
+	  /* ediff == 0.  */						 \
+	  if (!_FP_EXP_NORMAL(fs, wc, X))				 \
+	    {								 \
+	      if (X##_e == 0)						 \
+		{							 \
+		  /* X and Y are zero or denormalized.  */		 \
+		  R##_e = 0;						 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    {							 \
+		      _FP_FRAC_COPY_##wc(R, Y);				 \
+		      if (_FP_FRAC_ZEROP_##wc(Y))			 \
+			R##_s = (FP_ROUNDMODE == FP_RND_MINF);		 \
+		      else						 \
+			{						 \
+			  FP_SET_EXCEPTION(FP_EX_DENORM);		 \
+			  R##_s = Y##_s;				 \
+			}						 \
+		      goto sub_done;					 \
+		    }							 \
+		  else if (_FP_FRAC_ZEROP_##wc(Y))			 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_COPY_##wc(R, X);				 \
+		      R##_s = X##_s;					 \
+		      goto sub_done;					 \
+		    }							 \
+		  else							 \
+		    {							 \
+		      FP_SET_EXCEPTION(FP_EX_DENORM);			 \
+		      _FP_FRAC_SUB_##wc(R, X, Y);			 \
+		      R##_s = X##_s;					 \
+		      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)	 \
+			{						 \
+			  /* |X| < |Y|, negate result.  */		 \
+			  _FP_FRAC_SUB_##wc(R, Y, X);			 \
+			  R##_s = Y##_s;				 \
+			}						 \
+		      else if (_FP_FRAC_ZEROP_##wc(R))			 \
+			R##_s = (FP_ROUNDMODE == FP_RND_MINF);		 \
+		      goto sub_done;					 \
+		    }							 \
+		}							 \
+	      else							 \
+		{							 \
+		  /* X and Y are NaN or Inf, of opposite signs.  */	 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X);			 \
+		  _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, Y);			 \
+		  R##_e = _FP_EXPMAX_##fs;				 \
+		  if (_FP_FRAC_ZEROP_##wc(X))				 \
+		    {							 \
+		      if (_FP_FRAC_ZEROP_##wc(Y))			 \
+			{						 \
+			  /* Inf - Inf.  */				 \
+			  R##_s = _FP_NANSIGN_##fs;			 \
+			  _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);	 \
+			  _FP_FRAC_SLL_##wc(R, _FP_WORKBITS);		 \
+			  FP_SET_EXCEPTION(FP_EX_INVALID);		 \
+			}						 \
+		      else						 \
+			{						 \
+			  /* Inf - NaN.  */				 \
+			  R##_s = Y##_s;				 \
+			  _FP_FRAC_COPY_##wc(R, Y);			 \
+			}						 \
+		    }							 \
+		  else							 \
+		    {							 \
+		      if (_FP_FRAC_ZEROP_##wc(Y))			 \
+			{						 \
+			  /* NaN - Inf.  */				 \
+			  R##_s = X##_s;				 \
+			  _FP_FRAC_COPY_##wc(R, X);			 \
+			}						 \
+		      else						 \
+			{						 \
+			  /* NaN - NaN.  */				 \
+			  _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP);	 \
+			}						 \
+		    }							 \
+		  goto sub_done;					 \
+		}							 \
+	    }								 \
+	  /* The exponents of X and Y, both normal, are equal.  The	 \
+	     implicit MSBs cancel.  */					 \
+	  R##_e = X##_e;						 \
+	  _FP_FRAC_SUB_##wc(R, X, Y);					 \
+	  R##_s = X##_s;						 \
+	  if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)		 \
+	    {								 \
+	      /* |X| < |Y|, negate result.  */				 \
+	      _FP_FRAC_SUB_##wc(R, Y, X);				 \
+	      R##_s = Y##_s;						 \
+	    }								 \
+	  else if (_FP_FRAC_ZEROP_##wc(R))				 \
+	    {								 \
+	      R##_e = 0;						 \
+	      R##_s = (FP_ROUNDMODE == FP_RND_MINF);			 \
+	      goto sub_done;						 \
+	    }								 \
+	  goto norm;							 \
+	}								 \
+    sub3:								 \
+      if (_FP_FRAC_HIGH_##fs(R) & _FP_IMPLBIT_SH_##fs)			 \
+	{								 \
+	  int diff;							 \
+	  /* Carry into most significant bit of larger one of X and Y,	 \
+	     canceling it; renormalize.  */				 \
+	  _FP_FRAC_HIGH_##fs(R) &= _FP_IMPLBIT_SH_##fs - 1;		 \
+	norm:								 \
+	  _FP_FRAC_CLZ_##wc(diff, R);					 \
+	  diff -= _FP_WFRACXBITS_##fs;					 \
+	  _FP_FRAC_SLL_##wc(R, diff);					 \
+	  if (R##_e <= diff)						 \
+	    {								 \
+	      /* R is denormalized.  */					 \
+	      diff = diff - R##_e + 1;					 \
+	      _FP_FRAC_SRS_##wc(R, diff, _FP_WFRACBITS_##fs);		 \
+	      R##_e = 0;						 \
+	    }								 \
+	  else								 \
+	    {								 \
+	      R##_e -= diff;						 \
+	      _FP_FRAC_HIGH_##fs(R) &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs; \
+	    }								 \
+	}								 \
+    sub_done: ;								 \
+    }									 \
+} while (0)
+
+#define _FP_ADD(fs, wc, R, X, Y) _FP_ADD_INTERNAL(fs, wc, R, X, Y, '+')
+#define _FP_SUB(fs, wc, R, X, Y)					    \
+  do {									    \
+    if (!(Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y))) Y##_s ^= 1; \
+    _FP_ADD_INTERNAL(fs, wc, R, X, Y, '-');				    \
+  } while (0)
+
+
+/*
+ * Main negation routine.  FIXME -- when we care about setting exception
+ * bits reliably, this will not do.  We should examine all of the fp classes.
+ */
+
+#define _FP_NEG(fs, wc, R, X)		\
+  do {					\
+    _FP_FRAC_COPY_##wc(R, X);		\
+    R##_c = X##_c;			\
+    R##_e = X##_e;			\
+    R##_s = 1 ^ X##_s;			\
+  } while (0)
+
+
+/*
+ * Main multiplication routine.  The input values should be cooked.
+ */
+
+#define _FP_MUL(fs, wc, R, X, Y)			\
+do {							\
+  R##_s = X##_s ^ Y##_s;				\
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))		\
+  {							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_NORMAL;				\
+    R##_e = X##_e + Y##_e + 1;				\
+							\
+    _FP_MUL_MEAT_##fs(R,X,Y);				\
+							\
+    if (_FP_FRAC_OVERP_##wc(fs, R))			\
+      _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);	\
+    else						\
+      R##_e--;						\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):		\
+    _FP_CHOOSENAN(fs, wc, R, X, Y, '*');		\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
+    R##_s = X##_s;					\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):	\
+    _FP_FRAC_COPY_##wc(R, X);				\
+    R##_c = X##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
+    R##_s = Y##_s;					\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
+    _FP_FRAC_COPY_##wc(R, Y);				\
+    R##_c = Y##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):		\
+    R##_s = _FP_NANSIGN_##fs;				\
+    R##_c = FP_CLS_NAN;					\
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);		\
+    FP_SET_EXCEPTION(FP_EX_INVALID);			\
+    break;						\
+							\
+  default:						\
+    abort();						\
+  }							\
+} while (0)
+
+
+/*
+ * Main division routine.  The input values should be cooked.
+ */
+
+#define _FP_DIV(fs, wc, R, X, Y)			\
+do {							\
+  R##_s = X##_s ^ Y##_s;				\
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))		\
+  {							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_NORMAL;				\
+    R##_e = X##_e - Y##_e;				\
+							\
+    _FP_DIV_MEAT_##fs(R,X,Y);				\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):		\
+    _FP_CHOOSENAN(fs, wc, R, X, Y, '/');		\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):	\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
+    R##_s = X##_s;					\
+    _FP_FRAC_COPY_##wc(R, X);				\
+    R##_c = X##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):	\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
+    R##_s = Y##_s;					\
+    _FP_FRAC_COPY_##wc(R, Y);				\
+    R##_c = Y##_c;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_ZERO;				\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
+    FP_SET_EXCEPTION(FP_EX_DIVZERO);			\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
+    R##_c = FP_CLS_INF;					\
+    break;						\
+							\
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):	\
+    R##_s = _FP_NANSIGN_##fs;				\
+    R##_c = FP_CLS_NAN;					\
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);		\
+    FP_SET_EXCEPTION(FP_EX_INVALID);			\
+    break;						\
+							\
+  default:						\
+    abort();						\
+  }							\
+} while (0)
+
+
+/*
+ * Main differential comparison routine.  The inputs should be raw not
+ * cooked.  The return is -1,0,1 for normal values, 2 otherwise.
+ */
+
+#define _FP_CMP(fs, wc, ret, X, Y, un)					\
+  do {									\
+    /* NANs are unordered */						\
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))		\
+	|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))	\
+      {									\
+	ret = un;							\
+      }									\
+    else								\
+      {									\
+	int __is_zero_x;						\
+	int __is_zero_y;						\
+									\
+	__is_zero_x = (!X##_e && _FP_FRAC_ZEROP_##wc(X)) ? 1 : 0;	\
+	__is_zero_y = (!Y##_e && _FP_FRAC_ZEROP_##wc(Y)) ? 1 : 0;	\
+									\
+	if (__is_zero_x && __is_zero_y)					\
+		ret = 0;						\
+	else if (__is_zero_x)						\
+		ret = Y##_s ? 1 : -1;					\
+	else if (__is_zero_y)						\
+		ret = X##_s ? -1 : 1;					\
+	else if (X##_s != Y##_s)					\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_e > Y##_e)						\
+	  ret = X##_s ? -1 : 1;						\
+	else if (X##_e < Y##_e)						\
+	  ret = X##_s ? 1 : -1;						\
+	else if (_FP_FRAC_GT_##wc(X, Y))				\
+	  ret = X##_s ? -1 : 1;						\
+	else if (_FP_FRAC_GT_##wc(Y, X))				\
+	  ret = X##_s ? 1 : -1;						\
+	else								\
+	  ret = 0;							\
+      }									\
+  } while (0)
+
+
+/* Simplification for strict equality.  */
+
+#define _FP_CMP_EQ(fs, wc, ret, X, Y)					    \
+  do {									    \
+    /* NANs are unordered */						    \
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))		    \
+	|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))	    \
+      {									    \
+	ret = 1;							    \
+      }									    \
+    else								    \
+      {									    \
+	ret = !(X##_e == Y##_e						    \
+		&& _FP_FRAC_EQ_##wc(X, Y)				    \
+		&& (X##_s == Y##_s || (!X##_e && _FP_FRAC_ZEROP_##wc(X)))); \
+      }									    \
+  } while (0)
+
+/* Version to test unordered.  */
+
+#define _FP_CMP_UNORD(fs, wc, ret, X, Y)				\
+  do {									\
+    ret = ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))	\
+	   || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)));	\
+  } while (0)
+
+/*
+ * Main square root routine.  The input value should be cooked.
+ */
+
+#define _FP_SQRT(fs, wc, R, X)						\
+do {									\
+    _FP_FRAC_DECL_##wc(T); _FP_FRAC_DECL_##wc(S);			\
+    _FP_W_TYPE q;							\
+    switch (X##_c)							\
+    {									\
+    case FP_CLS_NAN:							\
+	_FP_FRAC_COPY_##wc(R, X);					\
+	R##_s = X##_s;							\
+    	R##_c = FP_CLS_NAN;						\
+    	break;								\
+    case FP_CLS_INF:							\
+    	if (X##_s)							\
+    	  {								\
+    	    R##_s = _FP_NANSIGN_##fs;					\
+	    R##_c = FP_CLS_NAN; /* NAN */				\
+	    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);			\
+	    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+    	  }								\
+    	else								\
+    	  {								\
+    	    R##_s = 0;							\
+    	    R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */			\
+    	  }								\
+    	break;								\
+    case FP_CLS_ZERO:							\
+	R##_s = X##_s;							\
+	R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */			\
+	break;								\
+    case FP_CLS_NORMAL:							\
+    	R##_s = 0;							\
+        if (X##_s)							\
+          {								\
+	    R##_c = FP_CLS_NAN; /* sNAN */				\
+	    R##_s = _FP_NANSIGN_##fs;					\
+	    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);			\
+	    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+	    break;							\
+          }								\
+    	R##_c = FP_CLS_NORMAL;						\
+        if (X##_e & 1)							\
+          _FP_FRAC_SLL_##wc(X, 1);					\
+        R##_e = X##_e >> 1;						\
+        _FP_FRAC_SET_##wc(S, _FP_ZEROFRAC_##wc);			\
+        _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc);			\
+        q = _FP_OVERFLOW_##fs >> 1;					\
+        _FP_SQRT_MEAT_##wc(R, S, T, X, q);				\
+    }									\
+  } while (0)
+
+/*
+ * Convert from FP to integer.  Input is raw.
+ */
+
+/* RSIGNED can have following values:
+ * 0:  the number is required to be 0..(2^rsize)-1, if not, NV is set plus
+ *     the result is either 0 or (2^rsize)-1 depending on the sign in such
+ *     case.
+ * 1:  the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not,
+ *     NV is set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
+ *     depending on the sign in such case.
+ * -1: the number is required to be -(2^(rsize-1))..(2^rsize)-1, if not, NV is
+ *     set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
+ *     depending on the sign in such case.
+ */
+#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned)			\
+do {									\
+  if (X##_e < _FP_EXPBIAS_##fs)						\
+    {									\
+      r = 0;								\
+      if (X##_e == 0)							\
+	{								\
+	  if (!_FP_FRAC_ZEROP_##wc(X))					\
+	    {								\
+	      FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				\
+	    }								\
+	}								\
+      else								\
+	FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+    }									\
+  else if (X##_e >= _FP_EXPBIAS_##fs + rsize - (rsigned > 0 || X##_s)	\
+	   || (!rsigned && X##_s))					\
+    {									\
+      /* Overflow or converting to the most negative integer.  */	\
+      if (rsigned)							\
+	{								\
+	  r = 1;							\
+	  r <<= rsize - 1;						\
+	  r -= 1 - X##_s;						\
+	} else {							\
+	  r = 0;							\
+	  if (X##_s)							\
+	    r = ~r;							\
+	}								\
+									\
+      if (rsigned && X##_s && X##_e == _FP_EXPBIAS_##fs + rsize - 1)	\
+	{								\
+	  /* Possibly converting to most negative integer; check the	\
+	     mantissa.  */						\
+	  int inexact = 0;						\
+	  (void)((_FP_FRACBITS_##fs > rsize)				\
+		 ? ({ _FP_FRAC_SRST_##wc(X, inexact,			\
+					 _FP_FRACBITS_##fs - rsize,	\
+					 _FP_FRACBITS_##fs); 0; })	\
+		 : 0);							\
+	  if (!_FP_FRAC_ZEROP_##wc(X))					\
+	    FP_SET_EXCEPTION(FP_EX_INVALID);				\
+	  else if (inexact)						\
+	    FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+	}								\
+      else								\
+	FP_SET_EXCEPTION(FP_EX_INVALID);				\
+    }									\
+  else									\
+    {									\
+      _FP_FRAC_HIGH_RAW_##fs(X) |= _FP_IMPLBIT_##fs;			\
+      if (X##_e >= _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1)		\
+	{								\
+	  _FP_FRAC_ASSEMBLE_##wc(r, X, rsize);				\
+	  r <<= X##_e - _FP_EXPBIAS_##fs - _FP_FRACBITS_##fs + 1;	\
+	}								\
+      else								\
+	{								\
+	  int inexact;							\
+	  _FP_FRAC_SRST_##wc(X, inexact,				\
+			    (_FP_FRACBITS_##fs + _FP_EXPBIAS_##fs - 1	\
+			     - X##_e),					\
+			    _FP_FRACBITS_##fs);				\
+	  if (inexact)							\
+	    FP_SET_EXCEPTION(FP_EX_INEXACT);				\
+	  _FP_FRAC_ASSEMBLE_##wc(r, X, rsize);				\
+	}								\
+      if (rsigned && X##_s)						\
+	r = -r;								\
+    }									\
+} while (0)
+
+/* Convert integer to fp.  Output is raw.  RTYPE is unsigned even if
+   input is signed.  */
+#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype)			     \
+  do {									     \
+    if (r)								     \
+      {									     \
+	rtype ur_;							     \
+									     \
+	if ((X##_s = (r < 0)))						     \
+	  r = -(rtype)r;						     \
+									     \
+	ur_ = (rtype) r;						     \
+	(void)((rsize <= _FP_W_TYPE_SIZE)				     \
+	       ? ({							     \
+		    int lz_;						     \
+		    __FP_CLZ(lz_, (_FP_W_TYPE)ur_);			     \
+		    X##_e = _FP_EXPBIAS_##fs + _FP_W_TYPE_SIZE - 1 - lz_;    \
+		  })							     \
+	       : ((rsize <= 2 * _FP_W_TYPE_SIZE)			     \
+		  ? ({							     \
+		       int lz_;						     \
+		       __FP_CLZ_2(lz_, (_FP_W_TYPE)(ur_ >> _FP_W_TYPE_SIZE), \
+				  (_FP_W_TYPE)ur_);			     \
+		       X##_e = (_FP_EXPBIAS_##fs + 2 * _FP_W_TYPE_SIZE - 1   \
+				- lz_);					     \
+		     })							     \
+		  : (abort(), 0)));					     \
+									     \
+	if (rsize - 1 + _FP_EXPBIAS_##fs >= _FP_EXPMAX_##fs		     \
+	    && X##_e >= _FP_EXPMAX_##fs)				     \
+	  {								     \
+	    /* Exponent too big; overflow to infinity.  (May also	     \
+	       happen after rounding below.)  */			     \
+	    _FP_OVERFLOW_SEMIRAW(fs, wc, X);				     \
+	    goto pack_semiraw;						     \
+	  }								     \
+									     \
+	if (rsize <= _FP_FRACBITS_##fs					     \
+	    || X##_e < _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs)		     \
+	  {								     \
+	    /* Exactly representable; shift left.  */			     \
+	    _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize);			     \
+	    _FP_FRAC_SLL_##wc(X, (_FP_EXPBIAS_##fs			     \
+				  + _FP_FRACBITS_##fs - 1 - X##_e));	     \
+	  }								     \
+	else								     \
+	  {								     \
+	    /* More bits in integer than in floating type; need to	     \
+	       round.  */						     \
+	    if (_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 < X##_e)	     \
+	      ur_ = ((ur_ >> (X##_e - _FP_EXPBIAS_##fs			     \
+			      - _FP_WFRACBITS_##fs + 1))		     \
+		     | ((ur_ << (rsize - (X##_e - _FP_EXPBIAS_##fs	     \
+					  - _FP_WFRACBITS_##fs + 1)))	     \
+			!= 0));						     \
+	    _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize);			     \
+	    if ((_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 - X##_e) > 0)     \
+	      _FP_FRAC_SLL_##wc(X, (_FP_EXPBIAS_##fs			     \
+				    + _FP_WFRACBITS_##fs - 1 - X##_e));	     \
+	    _FP_FRAC_HIGH_##fs(X) &= ~(_FP_W_TYPE)_FP_IMPLBIT_SH_##fs;	     \
+	  pack_semiraw:							     \
+	    _FP_PACK_SEMIRAW(fs, wc, X);				     \
+	  }								     \
+      }									     \
+    else								     \
+      {									     \
+	X##_s = 0;							     \
+	X##_e = 0;							     \
+	_FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);			     \
+      }									     \
+  } while (0)
+
+
+/* Extend from a narrower floating-point format to a wider one.  Input
+   and output are raw.  */
+#define FP_EXTEND(dfs,sfs,dwc,swc,D,S)					 \
+do {									 \
+  if (_FP_FRACBITS_##dfs < _FP_FRACBITS_##sfs				 \
+      || (_FP_EXPMAX_##dfs - _FP_EXPBIAS_##dfs				 \
+	  < _FP_EXPMAX_##sfs - _FP_EXPBIAS_##sfs)			 \
+      || (_FP_EXPBIAS_##dfs < _FP_EXPBIAS_##sfs + _FP_FRACBITS_##sfs - 1 \
+	  && _FP_EXPBIAS_##dfs != _FP_EXPBIAS_##sfs))			 \
+    abort();								 \
+  D##_s = S##_s;							 \
+  _FP_FRAC_COPY_##dwc##_##swc(D, S);					 \
+  if (_FP_EXP_NORMAL(sfs, swc, S))					 \
+    {									 \
+      D##_e = S##_e + _FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs;		 \
+      _FP_FRAC_SLL_##dwc(D, (_FP_FRACBITS_##dfs - _FP_FRACBITS_##sfs));	 \
+    }									 \
+  else									 \
+    {									 \
+      if (S##_e == 0)							 \
+	{								 \
+	  if (_FP_FRAC_ZEROP_##swc(S))					 \
+	    D##_e = 0;							 \
+	  else if (_FP_EXPBIAS_##dfs					 \
+		   < _FP_EXPBIAS_##sfs + _FP_FRACBITS_##sfs - 1)	 \
+	    {								 \
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				 \
+	      _FP_FRAC_SLL_##dwc(D, (_FP_FRACBITS_##dfs			 \
+				     - _FP_FRACBITS_##sfs));		 \
+	      D##_e = 0;						 \
+	    }								 \
+	  else								 \
+	    {								 \
+	      int _lz;							 \
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				 \
+	      _FP_FRAC_CLZ_##swc(_lz, S);				 \
+	      _FP_FRAC_SLL_##dwc(D,					 \
+				 _lz + _FP_FRACBITS_##dfs		 \
+				 - _FP_FRACTBITS_##sfs);		 \
+	      D##_e = (_FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs + 1	 \
+		       + _FP_FRACXBITS_##sfs - _lz);			 \
+	    }								 \
+	}								 \
+      else								 \
+	{								 \
+	  D##_e = _FP_EXPMAX_##dfs;					 \
+	  if (!_FP_FRAC_ZEROP_##swc(S))					 \
+	    {								 \
+	      if (!(_FP_FRAC_HIGH_RAW_##sfs(S) & _FP_QNANBIT_##sfs))	 \
+		FP_SET_EXCEPTION(FP_EX_INVALID);			 \
+	      _FP_FRAC_SLL_##dwc(D, (_FP_FRACBITS_##dfs			 \
+				     - _FP_FRACBITS_##sfs));		 \
+	    }								 \
+	}								 \
+    }									 \
+} while (0)
+
+/* Truncate from a wider floating-point format to a narrower one.
+   Input and output are semi-raw.  */
+#define FP_TRUNC(dfs,sfs,dwc,swc,D,S)					     \
+do {									     \
+  if (_FP_FRACBITS_##sfs < _FP_FRACBITS_##dfs				     \
+      || (_FP_EXPBIAS_##sfs < _FP_EXPBIAS_##dfs + _FP_FRACBITS_##dfs - 1     \
+	  && _FP_EXPBIAS_##sfs != _FP_EXPBIAS_##dfs))			     \
+    abort();								     \
+  D##_s = S##_s;							     \
+  if (_FP_EXP_NORMAL(sfs, swc, S))					     \
+    {									     \
+      D##_e = S##_e + _FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs;		     \
+      if (D##_e >= _FP_EXPMAX_##dfs)					     \
+	_FP_OVERFLOW_SEMIRAW(dfs, dwc, D);				     \
+      else								     \
+	{								     \
+	  if (D##_e <= 0)						     \
+	    {								     \
+	      if (D##_e < 1 - _FP_FRACBITS_##dfs)			     \
+		{							     \
+		  _FP_FRAC_SET_##swc(S, _FP_ZEROFRAC_##swc);		     \
+		  _FP_FRAC_LOW_##swc(S) |= 1;				     \
+		}							     \
+	      else							     \
+		{							     \
+		  _FP_FRAC_HIGH_##sfs(S) |= _FP_IMPLBIT_SH_##sfs;	     \
+		  _FP_FRAC_SRS_##swc(S, (_FP_WFRACBITS_##sfs		     \
+					 - _FP_WFRACBITS_##dfs + 1 - D##_e), \
+				     _FP_WFRACBITS_##sfs);		     \
+		}							     \
+	      D##_e = 0;						     \
+	    }								     \
+	  else								     \
+	    _FP_FRAC_SRS_##swc(S, (_FP_WFRACBITS_##sfs			     \
+				   - _FP_WFRACBITS_##dfs),		     \
+			       _FP_WFRACBITS_##sfs);			     \
+	  _FP_FRAC_COPY_##dwc##_##swc(D, S);				     \
+	}								     \
+    }									     \
+  else									     \
+    {									     \
+      if (S##_e == 0)							     \
+	{								     \
+	  D##_e = 0;							     \
+	  if (_FP_FRAC_ZEROP_##swc(S))					     \
+	    _FP_FRAC_SET_##dwc(D, _FP_ZEROFRAC_##dwc);			     \
+	  else								     \
+	    {								     \
+	      FP_SET_EXCEPTION(FP_EX_DENORM);				     \
+	      if (_FP_EXPBIAS_##sfs					     \
+		  < _FP_EXPBIAS_##dfs + _FP_FRACBITS_##dfs - 1)		     \
+		{							     \
+		  _FP_FRAC_SRS_##swc(S, (_FP_WFRACBITS_##sfs		     \
+					 - _FP_WFRACBITS_##dfs),	     \
+				     _FP_WFRACBITS_##sfs);		     \
+		  _FP_FRAC_COPY_##dwc##_##swc(D, S);			     \
+		}							     \
+	      else							     \
+		{							     \
+		  _FP_FRAC_SET_##dwc(D, _FP_ZEROFRAC_##dwc);		     \
+		  _FP_FRAC_LOW_##dwc(D) |= 1;				     \
+		}							     \
+	    }								     \
+	}								     \
+      else								     \
+	{								     \
+	  D##_e = _FP_EXPMAX_##dfs;					     \
+	  if (_FP_FRAC_ZEROP_##swc(S))					     \
+	    _FP_FRAC_SET_##dwc(D, _FP_ZEROFRAC_##dwc);			     \
+	  else								     \
+	    {								     \
+	      _FP_CHECK_SIGNAN_SEMIRAW(sfs, swc, S);			     \
+	      _FP_FRAC_SRL_##swc(S, (_FP_WFRACBITS_##sfs		     \
+				     - _FP_WFRACBITS_##dfs));		     \
+	      _FP_FRAC_COPY_##dwc##_##swc(D, S);			     \
+	      /* Semi-raw NaN must have all workbits cleared.  */	     \
+	      _FP_FRAC_LOW_##dwc(D)					     \
+		&= ~(_FP_W_TYPE) ((1 << _FP_WORKBITS) - 1);		     \
+	      _FP_FRAC_HIGH_##dfs(D) |= _FP_QNANBIT_SH_##dfs;		     \
+	    }								     \
+	}								     \
+    }									     \
+} while (0)
+
+/*
+ * Helper primitives.
+ */
+
+/* Count leading zeros in a word.  */
+
+#ifndef __FP_CLZ
+/* GCC 3.4 and later provide the builtins for us.  */
+#define __FP_CLZ(r, x)							      \
+  do {									      \
+    if (sizeof (_FP_W_TYPE) == sizeof (unsigned int))			      \
+      r = __builtin_clz (x);						      \
+    else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long))		      \
+      r = __builtin_clzl (x);						      \
+    else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long long))	      \
+      r = __builtin_clzll (x);						      \
+    else								      \
+      abort ();								      \
+  } while (0)
+#endif /* ndef __FP_CLZ */
+
+#define _FP_DIV_HELP_imm(q, r, n, d)		\
+  do {						\
+    q = n / d, r = n % d;			\
+  } while (0)
+
+
+/* A restoring bit-by-bit division primitive.  */
+
+#define _FP_DIV_MEAT_N_loop(fs, wc, R, X, Y)				\
+  do {									\
+    int count = _FP_WFRACBITS_##fs;					\
+    _FP_FRAC_DECL_##wc (u);						\
+    _FP_FRAC_DECL_##wc (v);						\
+    _FP_FRAC_COPY_##wc (u, X);						\
+    _FP_FRAC_COPY_##wc (v, Y);						\
+    _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc);				\
+    /* Normalize U and V.  */						\
+    _FP_FRAC_SLL_##wc (u, _FP_WFRACXBITS_##fs);				\
+    _FP_FRAC_SLL_##wc (v, _FP_WFRACXBITS_##fs);				\
+    /* First round.  Since the operands are normalized, either the	\
+       first or second bit will be set in the fraction.  Produce a	\
+       normalized result by checking which and adjusting the loop	\
+       count and exponent accordingly.  */				\
+    if (_FP_FRAC_GE_1 (u, v))						\
+      {									\
+	_FP_FRAC_SUB_##wc (u, u, v);					\
+	_FP_FRAC_LOW_##wc (R) |= 1;					\
+	count--;							\
+      }									\
+    else								\
+      R##_e--;								\
+    /* Subsequent rounds.  */						\
+    do {								\
+      int msb = (_FP_WS_TYPE) _FP_FRAC_HIGH_##wc (u) < 0;		\
+      _FP_FRAC_SLL_##wc (u, 1);						\
+      _FP_FRAC_SLL_##wc (R, 1);						\
+      if (msb || _FP_FRAC_GE_1 (u, v))					\
+	{								\
+	  _FP_FRAC_SUB_##wc (u, u, v);					\
+	  _FP_FRAC_LOW_##wc (R) |= 1;					\
+	}								\
+    } while (--count > 0);						\
+    /* If there's anything left in U, the result is inexact.  */	\
+    _FP_FRAC_LOW_##wc (R) |= !_FP_FRAC_ZEROP_##wc (u);			\
+  } while (0)
+
+#define _FP_DIV_MEAT_1_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 1, R, X, Y)
+#define _FP_DIV_MEAT_2_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 2, R, X, Y)
+#define _FP_DIV_MEAT_4_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 4, R, X, Y)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_add.c b/libc/sysdeps/linux/sparc/soft-fp/q_add.c
new file mode 100644
index 000000000..987c72558
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_add.c
@@ -0,0 +1,39 @@
+/* Software floating-point emulation.
+   Return a + b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_add(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+  FP_UNPACK_SEMIRAW_Q(B, b);
+  FP_ADD_Q(C, A, B);
+  FP_PACK_SEMIRAW_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_cmp.c b/libc/sysdeps/linux/sparc/soft-fp/q_cmp.c
new file mode 100644
index 000000000..a93792ba6
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_cmp.c
@@ -0,0 +1,41 @@
+/* Software floating-point emulation.
+   Compare a and b, return float condition code.
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_cmp(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, B, A, 3);
+  if (r == -1) r = 2;
+  if (r == 3 && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_cmpe.c b/libc/sysdeps/linux/sparc/soft-fp/q_cmpe.c
new file mode 100644
index 000000000..135d63cc6
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_cmpe.c
@@ -0,0 +1,42 @@
+/* Software floating-point emulation.
+   Compare a and b, return float condition code.
+   Signal exception (unless masked) if unordered.
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_cmpe(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, B, A, 3);
+  if (r == -1) r = 2;
+  if (r == 3)
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_div.c b/libc/sysdeps/linux/sparc/soft-fp/q_div.c
new file mode 100644
index 000000000..86db5ed9e
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_div.c
@@ -0,0 +1,39 @@
+/* Software floating-point emulation.
+   Return a / b
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_div(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_UNPACK_Q(B, b);
+  FP_DIV_Q(C, A, B);
+  FP_PACK_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_dtoq.c b/libc/sysdeps/linux/sparc/soft-fp/q_dtoq.c
new file mode 100644
index 000000000..6b119f450
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_dtoq.c
@@ -0,0 +1,44 @@
+/* Software floating-point emulation.
+   Return (long double)(a)
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+long double _Q_dtoq(const double a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_D(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,D,4,2,C,A);
+#else
+  FP_EXTEND(Q,D,2,1,C,A);
+#endif
+  FP_PACK_RAW_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_feq.c b/libc/sysdeps/linux/sparc/soft-fp/q_feq.c
new file mode 100644
index 000000000..12cd7972d
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_feq.c
@@ -0,0 +1,40 @@
+/* Software floating-point emulation.
+   Return 1 if a == b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_feq(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_EQ_Q(r, A, B);
+  if (r && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return !r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_fge.c b/libc/sysdeps/linux/sparc/soft-fp/q_fge.c
new file mode 100644
index 000000000..db1fdbe82
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_fge.c
@@ -0,0 +1,40 @@
+/* Software floating-point emulation.
+   Return 1 if a >= b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_fge(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, B, A, 3);
+  if (r == 3)
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return (r <= 0);
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_fgt.c b/libc/sysdeps/linux/sparc/soft-fp/q_fgt.c
new file mode 100644
index 000000000..a9f8cd0bb
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_fgt.c
@@ -0,0 +1,40 @@
+/* Software floating-point emulation.
+   Return 1 if a > b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_fgt(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, B, A, 3);
+  if (r == 3)
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return (r == -1);
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_fle.c b/libc/sysdeps/linux/sparc/soft-fp/q_fle.c
new file mode 100644
index 000000000..a4b97e6c6
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_fle.c
@@ -0,0 +1,40 @@
+/* Software floating-point emulation.
+   Return 1 if a <= b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_fle(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, B, A, -2);
+  if (r == -2)
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return (r >= 0);
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_flt.c b/libc/sysdeps/linux/sparc/soft-fp/q_flt.c
new file mode 100644
index 000000000..3979d6576
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_flt.c
@@ -0,0 +1,40 @@
+/* Software floating-point emulation.
+   Return 1 if a < b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_flt(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_Q(r, B, A, 3);
+  if (r == 3)
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return (r == 1);
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_fne.c b/libc/sysdeps/linux/sparc/soft-fp/q_fne.c
new file mode 100644
index 000000000..a38059bca
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_fne.c
@@ -0,0 +1,40 @@
+/* Software floating-point emulation.
+   Return 1 if a != b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_fne(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_UNPACK_RAW_Q(B, b);
+  FP_CMP_EQ_Q(r, A, B);
+  if (r && (FP_ISSIGNAN_Q(A) || FP_ISSIGNAN_Q(B)))
+    FP_SET_EXCEPTION(FP_EX_INVALID);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_itoq.c b/libc/sysdeps/linux/sparc/soft-fp/q_itoq.c
new file mode 100644
index 000000000..b50942f75
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_itoq.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (long double)(a)
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_itoq(const int a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  int b = a;
+  long double c;
+
+  FP_FROM_INT_Q(C, b, 32, unsigned int);
+  FP_PACK_RAW_Q(c, C);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_lltoq.c b/libc/sysdeps/linux/sparc/soft-fp/q_lltoq.c
new file mode 100644
index 000000000..f977585e4
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_lltoq.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (long double)a
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_lltoq(const long long a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  long double c;
+  long long b = a;
+
+  FP_FROM_INT_Q(C, b, 64, unsigned long long);
+  FP_PACK_RAW_Q(c, C);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_mul.c b/libc/sysdeps/linux/sparc/soft-fp/q_mul.c
new file mode 100644
index 000000000..61c3a8fac
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_mul.c
@@ -0,0 +1,39 @@
+/* Software floating-point emulation.
+   Return a * b
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_mul(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_UNPACK_Q(B, b);
+  FP_MUL_Q(C, A, B);
+  FP_PACK_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_neg.c b/libc/sysdeps/linux/sparc/soft-fp/q_neg.c
new file mode 100644
index 000000000..14f39e820
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_neg.c
@@ -0,0 +1,47 @@
+/* Software floating-point emulation.
+   Return !a
+   Copyright (C) 1997,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_neg(const long double a)
+{
+  FP_DECL_EX;
+  long double c = a;
+  
+#if (__BYTE_ORDER == __BIG_ENDIAN)
+  ((UWtype *)&c)[0] ^= (((UWtype)1) << (W_TYPE_SIZE - 1));
+#elif (__BYTE_ORDER == __LITTLE_ENDIAN) && (W_TYPE_SIZE == 64)
+  ((UWtype *)&c)[1] ^= (((UWtype)1) << (W_TYPE_SIZE - 1));
+#elif (__BYTE_ORDER == __LITTLE_ENDIAN) && (W_TYPE_SIZE == 32)
+  ((UWtype *)&c)[3] ^= (((UWtype)1) << (W_TYPE_SIZE - 1));
+#else
+  FP_DECL_Q(A); FP_DECL_Q(C);
+
+  FP_UNPACK_Q(A, a);
+  FP_NEG_Q(C, A);
+  FP_PACK_Q(c, C);
+#endif
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_qtod.c b/libc/sysdeps/linux/sparc/soft-fp/q_qtod.c
new file mode 100644
index 000000000..82b01ec8a
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_qtod.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Return (double)a
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+double _Q_qtod(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_D(R);
+  double r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(D,Q,2,4,R,A);
+#else
+  FP_TRUNC(D,Q,1,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_D(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_qtoi.c b/libc/sysdeps/linux/sparc/soft-fp/q_qtoi.c
new file mode 100644
index 000000000..270ba9f67
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_qtoi.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (int)a
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Q_qtoi(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, 32, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_qtoll.c b/libc/sysdeps/linux/sparc/soft-fp/q_qtoll.c
new file mode 100644
index 000000000..e0d29019b
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_qtoll.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (long long)a
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+long long _Q_qtoll(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned long long r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, 64, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_qtos.c b/libc/sysdeps/linux/sparc/soft-fp/q_qtos.c
new file mode 100644
index 000000000..93daa23ca
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_qtos.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Return (float)a
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "quad.h"
+
+float _Q_qtos(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_S(R);
+  float r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(S,Q,1,4,R,A);
+#else
+  FP_TRUNC(S,Q,1,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_S(r, R);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_qtou.c b/libc/sysdeps/linux/sparc/soft-fp/q_qtou.c
new file mode 100644
index 000000000..812b4e06e
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_qtou.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (unsigned int)a
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+unsigned int _Q_qtou(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned int r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, 32, -1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_qtoull.c b/libc/sysdeps/linux/sparc/soft-fp/q_qtoull.c
new file mode 100644
index 000000000..7a88c9f38
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_qtoull.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (unsigned long long)a
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+unsigned long long _Q_qtoull(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned long long r;
+
+  FP_UNPACK_RAW_Q(A, a);
+  FP_TO_INT_Q(r, A, 64, -1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_sqrt.c b/libc/sysdeps/linux/sparc/soft-fp/q_sqrt.c
new file mode 100644
index 000000000..373d02a63
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_sqrt.c
@@ -0,0 +1,39 @@
+/* Software floating-point emulation.
+   Return sqrtl(a)
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_sqrt(const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_SQRT_Q(C, A);
+  FP_PACK_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
+strong_alias (_Q_sqrt, __ieee754_sqrtl);
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_stoq.c b/libc/sysdeps/linux/sparc/soft-fp/q_stoq.c
new file mode 100644
index 000000000..714d880dd
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_stoq.c
@@ -0,0 +1,43 @@
+/* Software floating-point emulation.
+   c = (long double)(a)
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "quad.h"
+
+long double _Q_stoq(const float a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  FP_DECL_Q(C);
+  long double c;
+
+  FP_UNPACK_RAW_S(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,S,4,1,C,A);
+#else
+  FP_EXTEND(Q,S,2,1,C,A);
+#endif
+  FP_PACK_RAW_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_sub.c b/libc/sysdeps/linux/sparc/soft-fp/q_sub.c
new file mode 100644
index 000000000..8616727bb
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_sub.c
@@ -0,0 +1,39 @@
+/* Software floating-point emulation.
+   c = a - b
+   Copyright (C) 1997,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_sub(const long double a, const long double b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_Q(A, a);
+  FP_UNPACK_SEMIRAW_Q(B, b);
+  FP_SUB_Q(C, A, B);
+  FP_PACK_SEMIRAW_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_ulltoq.c b/libc/sysdeps/linux/sparc/soft-fp/q_ulltoq.c
new file mode 100644
index 000000000..53c0add2d
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_ulltoq.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   Return (long double)(a)
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_ulltoq(const unsigned long long a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  long double c;
+  unsigned long long b = a;
+
+  FP_FROM_INT_Q(C, b, 64, unsigned long long);
+  FP_PACK_RAW_Q(c, C);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_util.c b/libc/sysdeps/linux/sparc/soft-fp/q_util.c
new file mode 100644
index 000000000..41ecd685d
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_util.c
@@ -0,0 +1,57 @@
+/* Software floating-point emulation.
+   Helper routine for _Q_* routines.
+   Simulate exceptions using double arithmetics.
+   Copyright (C) 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+
+unsigned long long ___Q_numbers [] = {
+0x0000000000000000ULL, /* Zero */
+0x0010100000000000ULL, /* Very tiny number */
+0x0010000000000000ULL, /* Minimum normalized number */
+0x7fef000000000000ULL, /* A huge double number */
+};
+
+double ___Q_simulate_exceptions(int exceptions)
+{
+  double d, *p = (double *)___Q_numbers;
+  if (exceptions & FP_EX_INVALID)
+    d = p[0]/p[0];
+  if (exceptions & FP_EX_OVERFLOW)
+    {
+      d = p[3] + p[3];
+      exceptions &= ~FP_EX_INEXACT;
+    }
+  if (exceptions & FP_EX_UNDERFLOW)
+    {
+      if (exceptions & FP_EX_INEXACT)
+        {
+	  d = p[2] * p[2];
+	  exceptions &= ~FP_EX_INEXACT;
+	}
+      else
+	d = p[1] - p[2];
+    }
+  if (exceptions & FP_EX_DIVZERO)
+    d = 1.0/p[0];
+  if (exceptions & FP_EX_INEXACT)
+    d = p[3] - p[2];
+  return d;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/q_utoq.c b/libc/sysdeps/linux/sparc/soft-fp/q_utoq.c
new file mode 100644
index 000000000..f902bf839
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/q_utoq.c
@@ -0,0 +1,38 @@
+/* Software floating-point emulation.
+   c = (long double)(a)
+   Copyright (C) 1997, 1999, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double _Q_utoq(const unsigned int a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  long double c;
+  unsigned int b = a;
+
+  FP_FROM_INT_Q(C, b, 32, unsigned int);
+  FP_PACK_RAW_Q(c, C);
+  FP_CLEAR_EXCEPTIONS;
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
diff --git a/libc/sysdeps/linux/sparc/soft-fp/quad.h b/libc/sysdeps/linux/sparc/soft-fp/quad.h
new file mode 100644
index 000000000..c22e94402
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/quad.h
@@ -0,0 +1,271 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Quad Precision.
+   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel, kid. Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_Q         (4*_FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_Q		(2*_FP_W_TYPE_SIZE)
+#endif
+
+#define _FP_FRACBITS_Q		113
+#define _FP_FRACXBITS_Q		(_FP_FRACTBITS_Q - _FP_FRACBITS_Q)
+#define _FP_WFRACBITS_Q		(_FP_WORKBITS + _FP_FRACBITS_Q)
+#define _FP_WFRACXBITS_Q	(_FP_FRACTBITS_Q - _FP_WFRACBITS_Q)
+#define _FP_EXPBITS_Q		15
+#define _FP_EXPBIAS_Q		16383
+#define _FP_EXPMAX_Q		32767
+
+#define _FP_QNANBIT_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_Q		\
+	((_FP_W_TYPE)1 << (_FP_FRACBITS_Q-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_Q		\
+	((_FP_W_TYPE)1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE))
+
+typedef float TFtype __attribute__((mode(TF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_Q
+{
+   TFtype flt;
+   struct 
+   {
+#if __BYTE_ORDER == __BIG_ENDIAN
+      unsigned sign : 1;
+      unsigned exp : _FP_EXPBITS_Q;
+      unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+      unsigned long frac2 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+#else
+      unsigned long frac0 : _FP_W_TYPE_SIZE;
+      unsigned long frac1 : _FP_W_TYPE_SIZE;
+      unsigned long frac2 : _FP_W_TYPE_SIZE;
+      unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+      unsigned exp : _FP_EXPBITS_Q;
+      unsigned sign : 1;
+#endif /* not bigendian */
+   } bits __attribute__((packed));
+};
+
+
+#define FP_DECL_Q(X)		_FP_DECL(4,X)
+#define FP_UNPACK_RAW_Q(X,val)	_FP_UNPACK_RAW_4(Q,X,val)
+#define FP_UNPACK_RAW_QP(X,val)	_FP_UNPACK_RAW_4_P(Q,X,val)
+#define FP_PACK_RAW_Q(val,X)	_FP_PACK_RAW_4(Q,val,X)
+#define FP_PACK_RAW_QP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_4_P(Q,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_Q(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_4(Q,X,val);		\
+    _FP_UNPACK_CANONICAL(Q,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_QP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_4_P(Q,X,val);	\
+    _FP_UNPACK_CANONICAL(Q,4,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_Q(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_4(Q,X,val);		\
+    _FP_UNPACK_SEMIRAW(Q,4,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_QP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_4_P(Q,X,val);	\
+    _FP_UNPACK_SEMIRAW(Q,4,X);		\
+  } while (0)
+
+#define FP_PACK_Q(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,4,X);		\
+    _FP_PACK_RAW_4(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_QP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,4,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_4_P(Q,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_Q(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,4,X);		\
+    _FP_PACK_RAW_4(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_QP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,4,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_4_P(Q,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN(Q,4,X)
+#define FP_NEG_Q(R,X)			_FP_NEG(Q,4,R,X)
+#define FP_ADD_Q(R,X,Y)			_FP_ADD(Q,4,R,X,Y)
+#define FP_SUB_Q(R,X,Y)			_FP_SUB(Q,4,R,X,Y)
+#define FP_MUL_Q(R,X,Y)			_FP_MUL(Q,4,R,X,Y)
+#define FP_DIV_Q(R,X,Y)			_FP_DIV(Q,4,R,X,Y)
+#define FP_SQRT_Q(R,X)			_FP_SQRT(Q,4,R,X)
+#define _FP_SQRT_MEAT_Q(R,S,T,X,Q)	_FP_SQRT_MEAT_4(R,S,T,X,Q)
+
+#define FP_CMP_Q(r,X,Y,un)	_FP_CMP(Q,4,r,X,Y,un)
+#define FP_CMP_EQ_Q(r,X,Y)	_FP_CMP_EQ(Q,4,r,X,Y)
+#define FP_CMP_UNORD_Q(r,X,Y)	_FP_CMP_UNORD(Q,4,r,X,Y)
+
+#define FP_TO_INT_Q(r,X,rsz,rsg)	_FP_TO_INT(Q,4,r,X,rsz,rsg)
+#define FP_FROM_INT_Q(X,r,rs,rt)	_FP_FROM_INT(Q,4,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_Q(X)	_FP_FRAC_HIGH_4(X)
+#define _FP_FRAC_HIGH_RAW_Q(X)	_FP_FRAC_HIGH_4(X)
+
+#else   /* not _FP_W_TYPE_SIZE < 64 */
+union _FP_UNION_Q
+{
+  TFtype flt /* __attribute__((mode(TF))) */ ;
+  struct {
+    _FP_W_TYPE a, b;
+  } longs;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign    : 1;
+    unsigned exp     : _FP_EXPBITS_Q;
+    _FP_W_TYPE frac1 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0) - _FP_W_TYPE_SIZE;
+    _FP_W_TYPE frac0 : _FP_W_TYPE_SIZE;
+#else
+    _FP_W_TYPE frac0 : _FP_W_TYPE_SIZE;
+    _FP_W_TYPE frac1 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp     : _FP_EXPBITS_Q;
+    unsigned sign    : 1;
+#endif
+  } bits;
+};
+
+#define FP_DECL_Q(X)		_FP_DECL(2,X)
+#define FP_UNPACK_RAW_Q(X,val)	_FP_UNPACK_RAW_2(Q,X,val)
+#define FP_UNPACK_RAW_QP(X,val)	_FP_UNPACK_RAW_2_P(Q,X,val)
+#define FP_PACK_RAW_Q(val,X)	_FP_PACK_RAW_2(Q,val,X)
+#define FP_PACK_RAW_QP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(Q,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_Q(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2(Q,X,val);		\
+    _FP_UNPACK_CANONICAL(Q,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_QP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_2_P(Q,X,val);	\
+    _FP_UNPACK_CANONICAL(Q,2,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_Q(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2(Q,X,val);		\
+    _FP_UNPACK_SEMIRAW(Q,2,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_QP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_2_P(Q,X,val);	\
+    _FP_UNPACK_SEMIRAW(Q,2,X);		\
+  } while (0)
+
+#define FP_PACK_Q(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,2,X);		\
+    _FP_PACK_RAW_2(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_QP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(Q,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(Q,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_Q(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,2,X);		\
+    _FP_PACK_RAW_2(Q,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_QP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(Q,2,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_2_P(Q,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN(Q,2,X)
+#define FP_NEG_Q(R,X)			_FP_NEG(Q,2,R,X)
+#define FP_ADD_Q(R,X,Y)			_FP_ADD(Q,2,R,X,Y)
+#define FP_SUB_Q(R,X,Y)			_FP_SUB(Q,2,R,X,Y)
+#define FP_MUL_Q(R,X,Y)			_FP_MUL(Q,2,R,X,Y)
+#define FP_DIV_Q(R,X,Y)			_FP_DIV(Q,2,R,X,Y)
+#define FP_SQRT_Q(R,X)			_FP_SQRT(Q,2,R,X)
+#define _FP_SQRT_MEAT_Q(R,S,T,X,Q)	_FP_SQRT_MEAT_2(R,S,T,X,Q)
+
+#define FP_CMP_Q(r,X,Y,un)	_FP_CMP(Q,2,r,X,Y,un)
+#define FP_CMP_EQ_Q(r,X,Y)	_FP_CMP_EQ(Q,2,r,X,Y)
+#define FP_CMP_UNORD_Q(r,X,Y)	_FP_CMP_UNORD(Q,2,r,X,Y)
+
+#define FP_TO_INT_Q(r,X,rsz,rsg)	_FP_TO_INT(Q,2,r,X,rsz,rsg)
+#define FP_FROM_INT_Q(X,r,rs,rt)	_FP_FROM_INT(Q,2,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_Q(X)	_FP_FRAC_HIGH_2(X)
+#define _FP_FRAC_HIGH_RAW_Q(X)	_FP_FRAC_HIGH_2(X)
+
+#endif /* not _FP_W_TYPE_SIZE < 64 */
diff --git a/libc/sysdeps/linux/sparc/soft-fp/sfp-machine.h b/libc/sysdeps/linux/sparc/soft-fp/sfp-machine.h
new file mode 100644
index 000000000..f1211705e
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/sfp-machine.h
@@ -0,0 +1,213 @@
+/* Machine-dependent software floating-point definitions.
+   Sparc userland (_Q_*) version.
+   Copyright (C) 1997,1998,1999, 2002, 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz) and
+		  David S. Miller (davem@redhat.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <fpu_control.h>
+#include <stdlib.h>
+
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+/* For _Qp_* and _Q_*, this should prefer X, for
+ * CPU instruction emulation this should prefer Y.
+ * (see SPAMv9 B.2.2 section).
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+/* Some assembly to speed things up. */
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
+  __asm__ ("addcc %r7,%8,%2\n\
+	    addxcc %r5,%6,%1\n\
+	    addx %r3,%4,%0"						\
+	   : "=r" ((USItype)(r2)),					\
+	     "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc")
+
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
+  __asm__ ("subcc %r7,%8,%2\n\
+	    subxcc %r5,%6,%1\n\
+	    subx %r3,%4,%0"						\
+	   : "=r" ((USItype)(r2)),					\
+	     "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc")
+
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  do {									\
+    /* We need to fool gcc,  as we need to pass more than 10		\
+       input/outputs.  */						\
+    register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
+    __asm__ __volatile__ ("\
+	    addcc %r8,%9,%1\n\
+	    addxcc %r6,%7,%0\n\
+	    addxcc %r4,%5,%%g2\n\
+	    addx %r2,%3,%%g1"						\
+	   : "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x3)),					\
+	     "rI" ((USItype)(y3)),					\
+	     "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc", "g1", "g2");						\
+    __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2));			\
+    r3 = _t1; r2 = _t2;							\
+  } while (0)
+
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  do {									\
+    /* We need to fool gcc,  as we need to pass more than 10		\
+       input/outputs.  */						\
+    register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
+    __asm__ __volatile__ ("\
+	    subcc %r8,%9,%1\n\
+	    subxcc %r6,%7,%0\n\
+	    subxcc %r4,%5,%%g2\n\
+	    subx %r2,%3,%%g1"						\
+	   : "=&r" ((USItype)(r1)),					\
+	     "=&r" ((USItype)(r0))					\
+	   : "%rJ" ((USItype)(x3)),					\
+	     "rI" ((USItype)(y3)),					\
+	     "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc", "g1", "g2");						\
+    __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2));			\
+    r3 = _t1; r2 = _t2;							\
+  } while (0)
+
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
+
+#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)
+
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
+  __asm__ ("addcc %3,%4,%3\n\
+	    addxcc %2,%%g0,%2\n\
+	    addxcc %1,%%g0,%1\n\
+	    addx %0,%%g0,%0"						\
+	   : "=&r" ((USItype)(x3)),					\
+	     "=&r" ((USItype)(x2)),					\
+	     "=&r" ((USItype)(x1)),					\
+	     "=&r" ((USItype)(x0))					\
+	   : "rI" ((USItype)(i)),					\
+	     "0" ((USItype)(x3)),					\
+	     "1" ((USItype)(x2)),					\
+	     "2" ((USItype)(x1)),					\
+	     "3" ((USItype)(x0))					\
+	   : "cc")
+
+/* Obtain the current rounding mode. */
+#ifndef FP_ROUNDMODE
+#define FP_ROUNDMODE	((_fcw >> 30) & 0x3)
+#endif
+
+/* Exception flags. */
+#define FP_EX_INVALID		(1 << 4)
+#define FP_EX_OVERFLOW		(1 << 3)
+#define FP_EX_UNDERFLOW		(1 << 2)
+#define FP_EX_DIVZERO		(1 << 1)
+#define FP_EX_INEXACT		(1 << 0)
+
+#define _FP_DECL_EX	fpu_control_t _fcw
+
+#define FP_INIT_ROUNDMODE					\
+do {								\
+  _FPU_GETCW(_fcw);						\
+} while (0)
+
+/* Simulate exceptions using double arithmetics. */
+extern double ___Q_simulate_exceptions(int exc);
+
+#define FP_HANDLE_EXCEPTIONS					\
+do {								\
+  if (!_fex)							\
+    {								\
+      /* This is the common case, so we do it inline.		\
+       * We need to clear cexc bits if any.			\
+       */							\
+      extern unsigned long long ___Q_numbers[];			\
+      __asm__ __volatile__("\
+      	ldd [%0], %%f30\n\
+      	faddd %%f30, %%f30, %%f30\
+      	" : : "r" (___Q_numbers) : "f30");			\
+    }								\
+  else								\
+    ___Q_simulate_exceptions (_fex);			        \
+} while (0)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/single.h b/libc/sysdeps/linux/sparc/soft-fp/single.h
new file mode 100644
index 000000000..9c3734adf
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/single.h
@@ -0,0 +1,151 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Single Precision.
+   Copyright (C) 1997,1998,1999,2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#define _FP_FRACTBITS_S		_FP_W_TYPE_SIZE
+
+#define _FP_FRACBITS_S		24
+#define _FP_FRACXBITS_S		(_FP_FRACTBITS_S - _FP_FRACBITS_S)
+#define _FP_WFRACBITS_S		(_FP_WORKBITS + _FP_FRACBITS_S)
+#define _FP_WFRACXBITS_S	(_FP_FRACTBITS_S - _FP_WFRACBITS_S)
+#define _FP_EXPBITS_S		8
+#define _FP_EXPBIAS_S		127
+#define _FP_EXPMAX_S		255
+#define _FP_QNANBIT_S		((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2))
+#define _FP_QNANBIT_SH_S	((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2+_FP_WORKBITS))
+#define _FP_IMPLBIT_S		((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1))
+#define _FP_IMPLBIT_SH_S	((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1+_FP_WORKBITS))
+#define _FP_OVERFLOW_S		((_FP_W_TYPE)1 << (_FP_WFRACBITS_S))
+
+/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
+   chosen by the target machine.  */
+
+typedef float SFtype __attribute__((mode(SF)));
+
+union _FP_UNION_S
+{
+  SFtype flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+#else
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__((packed));
+};
+
+#define FP_DECL_S(X)		_FP_DECL(1,X)
+#define FP_UNPACK_RAW_S(X,val)	_FP_UNPACK_RAW_1(S,X,val)
+#define FP_UNPACK_RAW_SP(X,val)	_FP_UNPACK_RAW_1_P(S,X,val)
+#define FP_PACK_RAW_S(val,X)	_FP_PACK_RAW_1(S,val,X)
+#define FP_PACK_RAW_SP(val,X)		\
+  do {					\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(S,val,X);	\
+  } while (0)
+
+#define FP_UNPACK_S(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1(S,X,val);		\
+    _FP_UNPACK_CANONICAL(S,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_SP(X,val)		\
+  do {					\
+    _FP_UNPACK_RAW_1_P(S,X,val);	\
+    _FP_UNPACK_CANONICAL(S,1,X);	\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_S(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_1(S,X,val);		\
+    _FP_UNPACK_SEMIRAW(S,1,X);		\
+  } while (0)
+
+#define FP_UNPACK_SEMIRAW_SP(X,val)	\
+  do {					\
+    _FP_UNPACK_RAW_1_P(S,X,val);	\
+    _FP_UNPACK_SEMIRAW(S,1,X);		\
+  } while (0)
+
+#define FP_PACK_S(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(S,1,X);		\
+    _FP_PACK_RAW_1(S,val,X);		\
+  } while (0)
+
+#define FP_PACK_SP(val,X)		\
+  do {					\
+    _FP_PACK_CANONICAL(S,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(S,val,X);	\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_S(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(S,1,X);		\
+    _FP_PACK_RAW_1(S,val,X);		\
+  } while (0)
+
+#define FP_PACK_SEMIRAW_SP(val,X)	\
+  do {					\
+    _FP_PACK_SEMIRAW(S,1,X);		\
+    if (!FP_INHIBIT_RESULTS)		\
+      _FP_PACK_RAW_1_P(S,val,X);	\
+  } while (0)
+
+#define FP_ISSIGNAN_S(X)		_FP_ISSIGNAN(S,1,X)
+#define FP_NEG_S(R,X)			_FP_NEG(S,1,R,X)
+#define FP_ADD_S(R,X,Y)			_FP_ADD(S,1,R,X,Y)
+#define FP_SUB_S(R,X,Y)			_FP_SUB(S,1,R,X,Y)
+#define FP_MUL_S(R,X,Y)			_FP_MUL(S,1,R,X,Y)
+#define FP_DIV_S(R,X,Y)			_FP_DIV(S,1,R,X,Y)
+#define FP_SQRT_S(R,X)			_FP_SQRT(S,1,R,X)
+#define _FP_SQRT_MEAT_S(R,S,T,X,Q)	_FP_SQRT_MEAT_1(R,S,T,X,Q)
+
+#define FP_CMP_S(r,X,Y,un)	_FP_CMP(S,1,r,X,Y,un)
+#define FP_CMP_EQ_S(r,X,Y)	_FP_CMP_EQ(S,1,r,X,Y)
+#define FP_CMP_UNORD_S(r,X,Y)	_FP_CMP_UNORD(S,1,r,X,Y)
+
+#define FP_TO_INT_S(r,X,rsz,rsg)	_FP_TO_INT(S,1,r,X,rsz,rsg)
+#define FP_FROM_INT_S(X,r,rs,rt)	_FP_FROM_INT(S,1,X,r,rs,rt)
+
+#define _FP_FRAC_HIGH_S(X)	_FP_FRAC_HIGH_1(X)
+#define _FP_FRAC_HIGH_RAW_S(X)	_FP_FRAC_HIGH_1(X)
diff --git a/libc/sysdeps/linux/sparc/soft-fp/soft-fp.h b/libc/sysdeps/linux/sparc/soft-fp/soft-fp.h
new file mode 100644
index 000000000..40f0dbd54
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/soft-fp/soft-fp.h
@@ -0,0 +1,205 @@
+/* Software floating-point emulation.
+   Copyright (C) 1997,1998,1999,2000,2002,2003,2005,2006,2007
+	Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+#ifndef SOFT_FP_H
+#define SOFT_FP_H
+
+#include "sfp-machine.h"
+
+/* Allow sfp-machine to have its own byte order definitions. */
+#ifndef __BYTE_ORDER
+#ifdef _LIBC
+#include <endian.h>
+#else
+#error "endianness not defined by sfp-machine.h"
+#endif
+#endif
+
+#define _FP_WORKBITS		3
+#define _FP_WORK_LSB		((_FP_W_TYPE)1 << 3)
+#define _FP_WORK_ROUND		((_FP_W_TYPE)1 << 2)
+#define _FP_WORK_GUARD		((_FP_W_TYPE)1 << 1)
+#define _FP_WORK_STICKY		((_FP_W_TYPE)1 << 0)
+
+#ifndef FP_RND_NEAREST
+# define FP_RND_NEAREST		0
+# define FP_RND_ZERO		1
+# define FP_RND_PINF		2
+# define FP_RND_MINF		3
+#endif
+#ifndef FP_ROUNDMODE
+# define FP_ROUNDMODE		FP_RND_NEAREST
+#endif
+
+/* By default don't care about exceptions. */
+#ifndef FP_EX_INVALID
+#define FP_EX_INVALID		0
+#endif
+#ifndef FP_EX_OVERFLOW
+#define FP_EX_OVERFLOW		0
+#endif
+#ifndef FP_EX_UNDERFLOW
+#define FP_EX_UNDERFLOW		0
+#endif
+#ifndef FP_EX_DIVZERO
+#define FP_EX_DIVZERO		0
+#endif
+#ifndef FP_EX_INEXACT
+#define FP_EX_INEXACT		0
+#endif
+#ifndef FP_EX_DENORM
+#define FP_EX_DENORM		0
+#endif
+
+#ifdef _FP_DECL_EX
+#define FP_DECL_EX					\
+  int _fex = 0;						\
+  _FP_DECL_EX
+#else
+#define FP_DECL_EX int _fex = 0
+#endif
+
+#ifndef FP_INIT_ROUNDMODE
+#define FP_INIT_ROUNDMODE do {} while (0)
+#endif
+
+#ifndef FP_HANDLE_EXCEPTIONS
+#define FP_HANDLE_EXCEPTIONS do {} while (0)
+#endif
+
+#ifndef FP_INHIBIT_RESULTS
+/* By default we write the results always.
+ * sfp-machine may override this and e.g.
+ * check if some exceptions are unmasked
+ * and inhibit it in such a case.
+ */
+#define FP_INHIBIT_RESULTS 0
+#endif
+
+#define FP_SET_EXCEPTION(ex)				\
+  _fex |= (ex)
+
+#define FP_UNSET_EXCEPTION(ex)				\
+  _fex &= ~(ex)
+
+#define FP_CLEAR_EXCEPTIONS				\
+  _fex = 0
+
+#define _FP_ROUND_NEAREST(wc, X)			\
+do {							\
+    if ((_FP_FRAC_LOW_##wc(X) & 15) != _FP_WORK_ROUND)	\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);		\
+} while (0)
+
+#define _FP_ROUND_ZERO(wc, X)		(void)0
+
+#define _FP_ROUND_PINF(wc, X)				\
+do {							\
+    if (!X##_s && (_FP_FRAC_LOW_##wc(X) & 7))		\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);		\
+} while (0)
+
+#define _FP_ROUND_MINF(wc, X)				\
+do {							\
+    if (X##_s && (_FP_FRAC_LOW_##wc(X) & 7))		\
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);		\
+} while (0)
+
+#define _FP_ROUND(wc, X)			\
+do {						\
+	if (_FP_FRAC_LOW_##wc(X) & 7)		\
+	  FP_SET_EXCEPTION(FP_EX_INEXACT);	\
+	switch (FP_ROUNDMODE)			\
+	{					\
+	  case FP_RND_NEAREST:			\
+	    _FP_ROUND_NEAREST(wc,X);		\
+	    break;				\
+	  case FP_RND_ZERO:			\
+	    _FP_ROUND_ZERO(wc,X);		\
+	    break;				\
+	  case FP_RND_PINF:			\
+	    _FP_ROUND_PINF(wc,X);		\
+	    break;				\
+	  case FP_RND_MINF:			\
+	    _FP_ROUND_MINF(wc,X);		\
+	    break;				\
+	}					\
+} while (0)
+
+#define FP_CLS_NORMAL		0
+#define FP_CLS_ZERO		1
+#define FP_CLS_INF		2
+#define FP_CLS_NAN		3
+
+#define _FP_CLS_COMBINE(x,y)	(((x) << 2) | (y))
+
+#include "op-1.h"
+#include "op-2.h"
+#include "op-4.h"
+#include "op-8.h"
+#include "op-common.h"
+
+/* Sigh.  Silly things longlong.h needs.  */
+#define UWtype		_FP_W_TYPE
+#define W_TYPE_SIZE	_FP_W_TYPE_SIZE
+
+typedef int QItype __attribute__((mode(QI)));
+typedef int SItype __attribute__((mode(SI)));
+typedef int DItype __attribute__((mode(DI)));
+typedef unsigned int UQItype __attribute__((mode(QI)));
+typedef unsigned int USItype __attribute__((mode(SI)));
+typedef unsigned int UDItype __attribute__((mode(DI)));
+#if _FP_W_TYPE_SIZE == 32
+typedef unsigned int UHWtype __attribute__((mode(HI)));
+#elif _FP_W_TYPE_SIZE == 64
+typedef USItype UHWtype;
+#endif
+
+#ifndef CMPtype
+#define CMPtype		int
+#endif
+
+#define SI_BITS		(__CHAR_BIT__ * (int)sizeof(SItype))
+#define DI_BITS		(__CHAR_BIT__ * (int)sizeof(DItype))
+
+#ifndef umul_ppmm
+#include "longlong.h"
+#endif
+
+#ifdef _LIBC
+#include <stdlib.h>
+#else
+extern void abort (void);
+#endif
+
+#endif
diff --git a/libc/unistd/sleep.c b/libc/unistd/sleep.c
index b0031f022..9db115a29 100644
--- a/libc/unistd/sleep.c
+++ b/libc/unistd/sleep.c
@@ -24,11 +24,54 @@
 #include <unistd.h>
 
 
-
 /* version perusing nanosleep */
 #if defined __UCLIBC_HAS_REALTIME__
 
-#if 0
+/* I am unable to reproduce alleged "Linux quirk".
+ * I used the following test program:
+#include <unistd.h>
+#include <time.h>
+#include <signal.h>
+static void dummy(int sig) {}
+int main() {
+    struct timespec t = { 2, 0 };
+    if (fork() == 0) {
+	sleep(1);
+	return 0;
+    }
+    signal(SIGCHLD, SIG_DFL); //
+    signal(SIGCHLD, dummy);   // Pick one
+    signal(SIGCHLD, SIG_IGN); //
+    nanosleep(&t, &t);
+    return 0;
+}
+ * Testing on 2.4.20 and on 2.6.35-rc4:
+ * With SIG_DFL, nanosleep is not interrupted by SIGCHLD. Ok.
+ * With dummy handler, nanosleep is interrupted by SIGCHLD. Ok.
+ * With SIG_IGN, nanosleep is NOT interrupted by SIGCHLD.
+ * It looks like sleep's workaround for SIG_IGN is no longer needed?
+ * The only emails I can find are from 1998 (!):
+ * ----------
+ *  Subject: Re: sleep ignore sigchld
+ *  From: Linus Torvalds <torvalds@transmeta.com>
+ *  Date: Mon, 16 Nov 1998 11:02:15 -0800 (PST)
+ *
+ *  On Mon, 16 Nov 1998, H. J. Lu wrote:
+ *  > That is a kernel bug. SIGCHLD is a special one. Usually it cannot
+ *  > be ignored. [snip...]
+ *
+ *  No can do.
+ *
+ *  "nanosleep()" is implemented in a bad way that makes it impossible to
+ *  restart it cleanly. It was done that way because glibc wanted it that way,
+ *  not because it's a good idea. [snip...]
+ * ----------
+ * I assume that in the passed twelve+ years, nanosleep got fixed,
+ * but the hack in sleep to work around broken nanosleep was never removed.
+ */
+
+# if 0
+
 /* This is a quick and dirty, but not 100% compliant with
  * the stupid SysV SIGCHLD vs. SIG_IGN behaviour.  It is
  * fine unless you are messing with SIGCHLD...  */
@@ -41,7 +84,7 @@ unsigned int sleep (unsigned int sec)
 	return res;
 }
 
-#else
+# else
 
 /* We are going to use the `nanosleep' syscall of the kernel.  But the
    kernel does not implement the sstupid SysV SIGCHLD vs. SIG_IGN
@@ -49,64 +92,54 @@ unsigned int sleep (unsigned int sec)
 unsigned int sleep (unsigned int seconds)
 {
     struct timespec ts = { .tv_sec = (long int) seconds, .tv_nsec = 0 };
-    sigset_t set, oset;
+    sigset_t set;
+    struct sigaction oact;
     unsigned int result;
 
     /* This is not necessary but some buggy programs depend on this.  */
-    if (seconds == 0)
-	{
-# ifdef CANCELLATION_P
-		CANCELLATION_P (THREAD_SELF);
-# endif
-		return 0;
-	}
+    if (seconds == 0) {
+#  ifdef CANCELLATION_P
+	CANCELLATION_P (THREAD_SELF);
+#  endif
+	return 0;
+    }
 
     /* Linux will wake up the system call, nanosleep, when SIGCHLD
        arrives even if SIGCHLD is ignored.  We have to deal with it
-       in libc.  We block SIGCHLD first.  */
+       in libc.  */
+
     __sigemptyset (&set);
     __sigaddset (&set, SIGCHLD);
-    sigprocmask (SIG_BLOCK, &set, &oset); /* can't fail */
 
-    /* If SIGCHLD is already blocked, we don't have to do anything.  */
-    if (!__sigismember (&oset, SIGCHLD))
-    {
-	int saved_errno;
-	struct sigaction oact;
-
-	__sigemptyset (&set);
-	__sigaddset (&set, SIGCHLD);
-
-	sigaction (SIGCHLD, NULL, &oact); /* never fails */
-
-	if (oact.sa_handler == SIG_IGN)
-	{
-	    /* We should leave SIGCHLD blocked.  */
-	    result = nanosleep (&ts, &ts);
-
-	    saved_errno = errno;
-	    /* Restore the original signal mask.  */
-	    sigprocmask (SIG_SETMASK, &oset, NULL);
-	    __set_errno (saved_errno);
-	}
-	else
-	{
-	    /* We should unblock SIGCHLD.  Restore the original signal mask.  */
-	    sigprocmask (SIG_SETMASK, &oset, NULL);
-	    result = nanosleep (&ts, &ts);
-	}
+    /* Is SIGCHLD set to SIG_IGN? */
+    sigaction (SIGCHLD, NULL, &oact); /* never fails */
+    if (oact.sa_handler == SIG_IGN) {
+	/* Yes.  Block SIGCHLD, save old mask.  */
+	sigprocmask (SIG_BLOCK, &set, &set); /* never fails */
     }
-    else
-	result = nanosleep (&ts, &ts);
 
-    if (result != 0)
-	/* Round remaining time.  */
+    /* Run nanosleep, with SIGCHLD blocked if SIGCHLD is SIG_IGNed.  */
+    result = nanosleep (&ts, &ts);
+    if (result != 0) {
+	/* Got EINTR. Return remaining time.  */
 	result = (unsigned int) ts.tv_sec + (ts.tv_nsec >= 500000000L);
+    }
+
+    if (!__sigismember (&set, SIGCHLD)) {
+	/* We did block SIGCHLD, and old mask had no SIGCHLD bit.
+	   IOW: we need to unblock SIGCHLD now. Do it.  */
+	/* this sigprocmask call never fails, thus never updates errno,
+	   and therefore we don't need to save/restore it.  */
+	sigprocmask (SIG_SETMASK, &set, NULL); /* never fails */
+    }
 
     return result;
 }
-#endif
+
+# endif
+
 #else /* __UCLIBC_HAS_REALTIME__ */
+
 /* no nanosleep, use signals and alarm() */
 static void sleep_alarm_handler(int attribute_unused sig)
 {
@@ -154,5 +187,7 @@ unsigned int sleep (unsigned int seconds)
 
     return result > seconds ? 0 : seconds - result;
 }
+
 #endif /* __UCLIBC_HAS_REALTIME__ */
+
 libc_hidden_def(sleep)
diff --git a/libc/unistd/usleep.c b/libc/unistd/usleep.c
index 61ddb900a..8d01703ec 100644
--- a/libc/unistd/usleep.c
+++ b/libc/unistd/usleep.c
@@ -14,19 +14,19 @@
 
 int usleep (__useconds_t usec)
 {
-    const struct timespec ts = {
-	.tv_sec = (long int) (usec / 1000000),
-	.tv_nsec = (long int) (usec % 1000000) * 1000ul
-    };
-    return(nanosleep(&ts, NULL));
+	const struct timespec ts = {
+		.tv_sec = (long int) (usec / 1000000),
+		.tv_nsec = (long int) (usec % 1000000) * 1000ul
+	};
+	return nanosleep(&ts, NULL);
 }
 #else /* __UCLIBC_HAS_REALTIME__ */
 int usleep (__useconds_t usec)
 {
 	struct timeval tv;
 
-	tv.tv_sec = 0;
-	tv.tv_usec = usec;
+	tv.tv_sec = (long int) (usec / 1000000);
+	tv.tv_usec = (long int) (usec % 1000000);
 	return select(0, NULL, NULL, NULL, &tv);
 }
 #endif /* __UCLIBC_HAS_REALTIME__ */