sparc64: add basic support

No NPTL, no LDSO support. Bootup with Busybox Ash in Qemu working. Testuite shows only two failures, but mksh continue/break support doesn't work.
author: Waldemar Brodkorb <wbx@openadk.org> 2017-05-24 20:49:02 +0200
committer: Waldemar Brodkorb <wbx@openadk.org> 2017-06-23 23:46:04 +0200
commit: 041cdc2769407c4d3869b218ad7ee7638e1c306e (patch)
tree: 1ea25250d74dcb230cf5feee99226fc080dbd678 /libc/sysdeps/linux/sparc64/soft-fp
parent: 58a5ba12bffad5916d9897c2870fc483f1db8282 (diff)
44 files changed, 8827 insertions, 0 deletions
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/Makefile b/libc/sysdeps/linux/sparc64/soft-fp/Makefile
new file mode 100644
index 000000000..b145df283
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/Makefile
@@ -0,0 +1,33 @@
+#  Software floating-point emulation.
+#  Makefile for SPARC v9 ABI mandated long double utility
+#  functions (_Qp_*).
+#  Copyright (C) 1999-2017 Free Software Foundation, Inc.
+#  This file is part of the GNU C Library.
+#  Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+#
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+ifeq ($(subdir),soft-fp)
+sparc64-quad-routines := qp_add qp_cmp qp_cmpe qp_div qp_dtoq qp_feq qp_fge \
+	qp_fgt qp_fle qp_flt qp_fne qp_itoq qp_mul qp_neg qp_qtod qp_qtoi   \
+	qp_qtos qp_qtoui qp_qtoux qp_qtox qp_sqrt qp_stoq qp_sub qp_uitoq   \
+	qp_uxtoq qp_xtoq qp_util
+sysdep_routines += $(sparc64-quad-routines)
+endif
+
+ifeq ($(subdir),math)
+CPPFLAGS += -I../soft-fp/
+endif
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/Versions b/libc/sysdeps/linux/sparc64/soft-fp/Versions
new file mode 100644
index 000000000..9e89c3c3e
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/Versions
@@ -0,0 +1,8 @@
+libc {
+  GLIBC_2.2 {
+    _Qp_add; _Qp_cmp; _Qp_cmpe; _Qp_div; _Qp_dtoq; _Qp_feq; _Qp_fge; _Qp_fgt;
+    _Qp_fle; _Qp_flt; _Qp_fne; _Qp_itoq; _Qp_mul; _Qp_neg; _Qp_qtod; _Qp_qtoi;
+    _Qp_qtos; _Qp_qtoui; _Qp_qtoux; _Qp_qtox; _Qp_sqrt; _Qp_stoq; _Qp_sub;
+    _Qp_uitoq; _Qp_uxtoq; _Qp_xtoq;
+  }
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/double.h b/libc/sysdeps/linux/sparc64/soft-fp/double.h
new file mode 100644
index 000000000..055d9da6b
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/double.h
@@ -0,0 +1,323 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Double Precision
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_DOUBLE_H
+#define SOFT_FP_DOUBLE_H	1
+
+#if _FP_W_TYPE_SIZE < 32
+# error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+# define _FP_FRACTBITS_D	(2 * _FP_W_TYPE_SIZE)
+# define _FP_FRACTBITS_DW_D	(4 * _FP_W_TYPE_SIZE)
+#else
+# define _FP_FRACTBITS_D	_FP_W_TYPE_SIZE
+# define _FP_FRACTBITS_DW_D	(2 * _FP_W_TYPE_SIZE)
+#endif
+
+#define _FP_FRACBITS_D		53
+#define _FP_FRACXBITS_D		(_FP_FRACTBITS_D - _FP_FRACBITS_D)
+#define _FP_WFRACBITS_D		(_FP_WORKBITS + _FP_FRACBITS_D)
+#define _FP_WFRACXBITS_D	(_FP_FRACTBITS_D - _FP_WFRACBITS_D)
+#define _FP_EXPBITS_D		11
+#define _FP_EXPBIAS_D		1023
+#define _FP_EXPMAX_D		2047
+
+#define _FP_QNANBIT_D		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_D		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_D		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_D		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_D-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_D		\
+	((_FP_W_TYPE) 1 << _FP_WFRACBITS_D % _FP_W_TYPE_SIZE)
+
+#define _FP_WFRACBITS_DW_D	(2 * _FP_WFRACBITS_D)
+#define _FP_WFRACXBITS_DW_D	(_FP_FRACTBITS_DW_D - _FP_WFRACBITS_DW_D)
+#define _FP_HIGHBIT_DW_D	\
+  ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_D - 1) % _FP_W_TYPE_SIZE)
+
+typedef float DFtype __attribute__ ((mode (DF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_D
+{
+  DFtype flt;
+  struct _FP_STRUCT_LAYOUT
+  {
+# if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign  : 1;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+# else
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned sign  : 1;
+# endif
+  } bits __attribute__ ((packed));
+};
+
+# define FP_DECL_D(X)		_FP_DECL (2, X)
+# define FP_UNPACK_RAW_D(X, val)	_FP_UNPACK_RAW_2 (D, X, (val))
+# define FP_UNPACK_RAW_DP(X, val)	_FP_UNPACK_RAW_2_P (D, X, (val))
+# define FP_PACK_RAW_D(val, X)	_FP_PACK_RAW_2 (D, (val), X)
+# define FP_PACK_RAW_DP(val, X)			\
+  do						\
+    {						\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_2_P (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_D(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2 (D, X, (val));		\
+      _FP_UNPACK_CANONICAL (D, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_DP(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2_P (D, X, (val));		\
+      _FP_UNPACK_CANONICAL (D, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_D(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2 (D, X, (val));		\
+      _FP_UNPACK_SEMIRAW (D, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_DP(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2_P (D, X, (val));		\
+      _FP_UNPACK_SEMIRAW (D, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_D(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (D, 2, X);		\
+      _FP_PACK_RAW_2 (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_DP(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (D, 2, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_2_P (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_D(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (D, 2, X);		\
+      _FP_PACK_RAW_2 (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_DP(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (D, 2, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_2_P (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN (D, 2, X)
+# define FP_NEG_D(R, X)			_FP_NEG (D, 2, R, X)
+# define FP_ADD_D(R, X, Y)		_FP_ADD (D, 2, R, X, Y)
+# define FP_SUB_D(R, X, Y)		_FP_SUB (D, 2, R, X, Y)
+# define FP_MUL_D(R, X, Y)		_FP_MUL (D, 2, R, X, Y)
+# define FP_DIV_D(R, X, Y)		_FP_DIV (D, 2, R, X, Y)
+# define FP_SQRT_D(R, X)		_FP_SQRT (D, 2, R, X)
+# define _FP_SQRT_MEAT_D(R, S, T, X, Q)	_FP_SQRT_MEAT_2 (R, S, T, X, (Q))
+# define FP_FMA_D(R, X, Y, Z)		_FP_FMA (D, 2, 4, R, X, Y, Z)
+
+# define FP_CMP_D(r, X, Y, un, ex)	_FP_CMP (D, 2, (r), X, Y, (un), (ex))
+# define FP_CMP_EQ_D(r, X, Y, ex)	_FP_CMP_EQ (D, 2, (r), X, Y, (ex))
+# define FP_CMP_UNORD_D(r, X, Y, ex)	_FP_CMP_UNORD (D, 2, (r), X, Y, (ex))
+
+# define FP_TO_INT_D(r, X, rsz, rsg)	_FP_TO_INT (D, 2, (r), X, (rsz), (rsg))
+# define FP_TO_INT_ROUND_D(r, X, rsz, rsg)	\
+  _FP_TO_INT_ROUND (D, 2, (r), X, (rsz), (rsg))
+# define FP_FROM_INT_D(X, r, rs, rt)	_FP_FROM_INT (D, 2, X, (r), (rs), rt)
+
+# define _FP_FRAC_HIGH_D(X)	_FP_FRAC_HIGH_2 (X)
+# define _FP_FRAC_HIGH_RAW_D(X)	_FP_FRAC_HIGH_2 (X)
+
+# define _FP_FRAC_HIGH_DW_D(X)	_FP_FRAC_HIGH_4 (X)
+
+#else
+
+union _FP_UNION_D
+{
+  DFtype flt;
+  struct _FP_STRUCT_LAYOUT
+  {
+# if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign   : 1;
+    unsigned exp    : _FP_EXPBITS_D;
+    _FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+# else
+    _FP_W_TYPE frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+    unsigned exp    : _FP_EXPBITS_D;
+    unsigned sign   : 1;
+# endif
+  } bits __attribute__ ((packed));
+};
+
+# define FP_DECL_D(X)		_FP_DECL (1, X)
+# define FP_UNPACK_RAW_D(X, val)	_FP_UNPACK_RAW_1 (D, X, (val))
+# define FP_UNPACK_RAW_DP(X, val)	_FP_UNPACK_RAW_1_P (D, X, (val))
+# define FP_PACK_RAW_D(val, X)	_FP_PACK_RAW_1 (D, (val), X)
+# define FP_PACK_RAW_DP(val, X)			\
+  do						\
+    {						\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_1_P (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_D(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1 (D, X, (val));		\
+      _FP_UNPACK_CANONICAL (D, 1, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_DP(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1_P (D, X, (val));		\
+      _FP_UNPACK_CANONICAL (D, 1, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_D(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1 (D, X, (val));		\
+      _FP_UNPACK_SEMIRAW (D, 1, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_DP(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1_P (D, X, (val));		\
+      _FP_UNPACK_SEMIRAW (D, 1, X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_D(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (D, 1, X);		\
+      _FP_PACK_RAW_1 (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_DP(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (D, 1, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_1_P (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_D(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (D, 1, X);		\
+      _FP_PACK_RAW_1 (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_DP(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (D, 1, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_1_P (D, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_ISSIGNAN_D(X)		_FP_ISSIGNAN (D, 1, X)
+# define FP_NEG_D(R, X)			_FP_NEG (D, 1, R, X)
+# define FP_ADD_D(R, X, Y)		_FP_ADD (D, 1, R, X, Y)
+# define FP_SUB_D(R, X, Y)		_FP_SUB (D, 1, R, X, Y)
+# define FP_MUL_D(R, X, Y)		_FP_MUL (D, 1, R, X, Y)
+# define FP_DIV_D(R, X, Y)		_FP_DIV (D, 1, R, X, Y)
+# define FP_SQRT_D(R, X)		_FP_SQRT (D, 1, R, X)
+# define _FP_SQRT_MEAT_D(R, S, T, X, Q)	_FP_SQRT_MEAT_1 (R, S, T, X, (Q))
+# define FP_FMA_D(R, X, Y, Z)		_FP_FMA (D, 1, 2, R, X, Y, Z)
+
+/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
+   the target machine.  */
+
+# define FP_CMP_D(r, X, Y, un, ex)	_FP_CMP (D, 1, (r), X, Y, (un), (ex))
+# define FP_CMP_EQ_D(r, X, Y, ex)	_FP_CMP_EQ (D, 1, (r), X, Y, (ex))
+# define FP_CMP_UNORD_D(r, X, Y, ex)	_FP_CMP_UNORD (D, 1, (r), X, Y, (ex))
+
+# define FP_TO_INT_D(r, X, rsz, rsg)	_FP_TO_INT (D, 1, (r), X, (rsz), (rsg))
+# define FP_TO_INT_ROUND_D(r, X, rsz, rsg)	\
+  _FP_TO_INT_ROUND (D, 1, (r), X, (rsz), (rsg))
+# define FP_FROM_INT_D(X, r, rs, rt)	_FP_FROM_INT (D, 1, X, (r), (rs), rt)
+
+# define _FP_FRAC_HIGH_D(X)	_FP_FRAC_HIGH_1 (X)
+# define _FP_FRAC_HIGH_RAW_D(X)	_FP_FRAC_HIGH_1 (X)
+
+# define _FP_FRAC_HIGH_DW_D(X)	_FP_FRAC_HIGH_2 (X)
+
+#endif /* W_TYPE_SIZE < 64 */
+
+#endif /* !SOFT_FP_DOUBLE_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/e_ilogbl.c b/libc/sysdeps/linux/sparc64/soft-fp/e_ilogbl.c
new file mode 100644
index 000000000..5b19d12a4
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/e_ilogbl.c
@@ -0,0 +1,79 @@
+/* Software floating-point emulation.
+   ilogbl(x, exp)
+   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* ilogbl(long double x)
+ * return the binary exponent of non-zero x
+ * ilogbl(0) = 0x80000001
+ * ilogbl(inf/NaN) = 0x7fffffff (no signal is raised)
+ */
+
+#include "soft-fp.h"
+#include "quad.h"
+#include <math.h>
+
+int __ieee754_ilogbl (long double x)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(X);
+
+/*
+  FP_UNPACK_Q(X, x);
+  switch (X_c)
+    {
+    case FP_CLS_ZERO:
+      return FP_ILOGB0;
+    case FP_CLS_NAN:
+    case FP_CLS_INF:
+      return FP_ILOGBNAN;
+    default:
+      return X_e;
+    }
+ */
+  FP_UNPACK_RAW_Q(X, x);
+  switch (X_e)
+    {
+    default:
+      return X_e - _FP_EXPBIAS_Q;
+    case 0:
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+      if (_FP_FRAC_ZEROP_4(X))
+	return FP_ILOGB0;
+      else
+	{
+	  _FP_I_TYPE shift;
+	  _FP_FRAC_CLZ_4(shift, X);
+	  shift -= _FP_FRACXBITS_Q;
+	  return X_e - _FP_EXPBIAS_Q - 1 + shift;
+	}
+#else
+      if (_FP_FRAC_ZEROP_2(X))
+	return FP_ILOGB0;
+      else
+	{
+	  _FP_I_TYPE shift;
+	  _FP_FRAC_CLZ_2(shift, X);
+	  shift -= _FP_FRACXBITS_Q;
+	  return X_e - _FP_EXPBIAS_Q - 1 + shift;
+	}
+#endif
+    case _FP_EXPBIAS_Q:
+      return FP_ILOGBNAN;
+    }
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/longlong.h b/libc/sysdeps/linux/sparc64/soft-fp/longlong.h
new file mode 100644
index 000000000..0ec11c505
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/longlong.h
@@ -0,0 +1,1773 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991-2017 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* You have to define the following before including this file:
+
+   UWtype -- An unsigned type, default type for operations (typically a "word")
+   UHWtype -- An unsigned type, at least half the size of UWtype.
+   UDWtype -- An unsigned type, at least twice as large a UWtype
+   W_TYPE_SIZE -- size in bits of UWtype
+
+   UQItype -- Unsigned 8 bit type.
+   SItype, USItype -- Signed and unsigned 32 bit types.
+   DItype, UDItype -- Signed and unsigned 64 bit types.
+
+   On a 32 bit machine UWtype should typically be USItype;
+   on a 64 bit machine, UWtype should typically be UDItype.  */
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#ifndef W_TYPE_SIZE
+#define W_TYPE_SIZE	32
+#define UWtype		USItype
+#define UHWtype		USItype
+#define UDWtype		UDItype
+#endif
+
+/* Used in glibc only.  */
+#ifndef attribute_hidden
+#define attribute_hidden
+#endif
+
+extern const UQItype __clz_tab[256] attribute_hidden;
+
+/* Define auxiliary asm macros.
+
+   1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
+   UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
+   word product in HIGH_PROD and LOW_PROD.
+
+   2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+   UDWtype product.  This is just a variant of umul_ppmm.
+
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+   is rounded towards 0.
+
+   5) count_leading_zeros(count, x) counts the number of zero-bits from the
+   msb to the first nonzero bit in the UWtype X.  This is the number of
+   steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+   unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+   6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+   from the least significant end.
+
+   7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two UWtype integers, composed by
+   HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+   respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+   (i.e. carry out) is not stored anywhere, and is lost.
+
+   8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+   high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+   composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+   LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+   and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.  */
+
+/* The CPUs come in alphabetical order below.
+
+   Please add support for more CPUs here, or improve the current support
+   for the CPUs below!
+   (E.g. WE32100, IBM360.)  */
+
+#if defined (__GNUC__) && !defined (NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+   understood by gcc1.  Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if defined (__aarch64__)
+
+#if W_TYPE_SIZE == 32
+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* W_TYPE_SIZE == 32 */
+
+#if W_TYPE_SIZE == 64
+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clzll (X))
+#define count_trailing_zeros(COUNT, X)   ((COUNT) = __builtin_ctzll (X))
+#define COUNT_LEADING_ZEROS_0 64
+#endif /* W_TYPE_SIZE == 64 */
+
+#endif /* __aarch64__ */
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+/* There is a bug in g++ before version 5 that
+   errors on __builtin_alpha_umulh.  */
+#if !defined(__cplusplus) || __GNUC__ >= 5
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    (ph) = __builtin_alpha_umulh (__m0, __m1);				\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 46
+#endif /* !c++ */
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UDItype __r;							\
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));				\
+    (r) = __r;								\
+  } while (0)
+extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#ifdef __alpha_cix__
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clzl (X))
+#define count_trailing_zeros(COUNT,X)	((COUNT) = __builtin_ctzl (X))
+#define COUNT_LEADING_ZEROS_0 64
+#else
+#define count_leading_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __t = __builtin_alpha_cmpbge (0, __xr);				\
+    __a = __clz_tab[__t ^ 0xff] - 1;					\
+    __t = __builtin_alpha_extbl (__xr, __a);				\
+    (COUNT) = 64 - (__clz_tab[__t] + __a*8);				\
+  } while (0)
+#define count_trailing_zeros(COUNT,X) \
+  do {									\
+    UDItype __xr = (X), __t, __a;					\
+    __t = __builtin_alpha_cmpbge (0, __xr);				\
+    __t = ~__t & -~__t;							\
+    __a = ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    __t = __builtin_alpha_extbl (__xr, __a);				\
+    __a <<= 3;								\
+    __t &= -__t;							\
+    __a += ((__t & 0xCC) != 0) * 2;					\
+    __a += ((__t & 0xF0) != 0) * 4;					\
+    __a += ((__t & 0xAA) != 0);						\
+    (COUNT) = __a;							\
+  } while (0)
+#endif /* __alpha_cix__ */
+#endif /* __alpha */
+
+#if defined (__arc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add.f	%1, %4, %5\n\tadc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rIJ" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rIJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub.f	%1, %4, %5\n\tsbc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rIJ" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rIJ" ((USItype) (bl)))
+
+#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
+#ifdef __ARC_NORM__
+#define count_leading_zeros(count, x) \
+  do									\
+    {									\
+      SItype c_;							\
+									\
+      __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
+      (count) = c_ + 1;							\
+    }									\
+  while (0)
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif
+
+#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
+ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds	%1, %4, %5\n\tadc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)) __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs	%1, %4, %5\n\tsbc	%0, %2, %3"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "r" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "r" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl)) __CLOBBER_CC)
+# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
+     || defined(__ARM_ARCH_3__)
+#  define umul_ppmm(xh, xl, a, b)					\
+  do {									\
+    register USItype __t0, __t1, __t2;					\
+    __asm__ ("%@ Inlined umul_ppmm\n"					\
+	   "	mov	%2, %5, lsr #16\n"				\
+	   "	mov	%0, %6, lsr #16\n"				\
+	   "	bic	%3, %5, %2, lsl #16\n"				\
+	   "	bic	%4, %6, %0, lsl #16\n"				\
+	   "	mul	%1, %3, %4\n"					\
+	   "	mul	%4, %2, %4\n"					\
+	   "	mul	%3, %0, %3\n"					\
+	   "	mul	%0, %2, %0\n"					\
+	   "	adds	%3, %4, %3\n"					\
+	   "	addcs	%0, %0, #65536\n"				\
+	   "	adds	%1, %1, %3, lsl #16\n"				\
+	   "	adc	%0, %0, %3, lsr #16"				\
+	   : "=&r" ((USItype) (xh)),					\
+	     "=r" ((USItype) (xl)),					\
+	     "=&r" (__t0), "=&r" (__t1), "=r" (__t2)			\
+	   : "r" ((USItype) (a)),					\
+	     "r" ((USItype) (b)) __CLOBBER_CC );			\
+  } while (0)
+#  define UMUL_TIME 20
+# else
+#  define umul_ppmm(xh, xl, a, b)					\
+  do {									\
+    /* Generate umull, under compiler control.  */			\
+    register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b);	\
+    (xl) = (USItype)__t0;						\
+    (xh) = (USItype)(__t0 >> 32);					\
+  } while (0)
+#  define UMUL_TIME 3
+# endif
+# define UDIV_TIME 100
+#endif /* __arm__ */
+
+#if defined(__arm__)
+/* Let gcc decide how best to implement count_leading_zeros.  */
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+
+#if defined (__AVR__)
+
+#if W_TYPE_SIZE == 16
+#define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
+#define COUNT_LEADING_ZEROS_0 16
+#endif /* W_TYPE_SIZE == 16 */
+
+#if W_TYPE_SIZE == 32
+#define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzl (X))
+#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* W_TYPE_SIZE == 32 */
+
+#if W_TYPE_SIZE == 64
+#define count_leading_zeros(COUNT,X)  ((COUNT) = __builtin_clzll (X))
+#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
+#define COUNT_LEADING_ZEROS_0 64
+#endif /* W_TYPE_SIZE == 64 */
+
+#endif /* defined (__AVR__) */
+
+#if defined (__CRIS__)
+
+#if __CRIS_arch_version >= 3
+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __CRIS_arch_version >= 3 */
+
+#if __CRIS_arch_version >= 8
+#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
+#endif /* __CRIS_arch_version >= 8 */
+
+#if __CRIS_arch_version >= 10
+#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
+#else
+#define __umulsidi3 __umulsidi3
+extern UDItype __umulsidi3 (USItype, USItype);
+#endif /* __CRIS_arch_version >= 10 */
+
+#define umul_ppmm(w1, w0, u, v)		\
+  do {					\
+    UDItype __x = __umulsidi3 (u, v);	\
+    (w0) = (USItype) (__x);		\
+    (w1) = (USItype) (__x >> 32);	\
+  } while (0)
+
+/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
+   DFmode ("double" intrinsics, avoiding two of the three insns handling
+   carry), but defining them as open-code C composing and doing the
+   operation in DImode (UDImode) shows that the DImode needs work:
+   register pressure from requiring neighboring registers and the
+   traffic to and from them come to dominate, in the 4.7 series.  */
+
+#endif /* defined (__CRIS__) */
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rM" ((USItype) (ah)),					\
+	     "rM" ((USItype) (bh)),					\
+	     "%rM" ((USItype) (al)),					\
+	     "rM" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rM" ((USItype) (ah)),					\
+	     "rM" ((USItype) (bh)),					\
+	     "rM" ((USItype) (al)),					\
+	     "rM" ((USItype) (bl)))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    union								\
+      {									\
+	UDItype __f;							\
+	struct {USItype __w1, __w0;} __w1w0;				\
+      } __t;								\
+    __asm__ ("xmpyu %1,%2,%0"						\
+	     : "=x" (__t.__f)						\
+	     : "x" ((USItype) (u)),					\
+	       "x" ((USItype) (v)));					\
+    (w1) = __t.__w1w0.__w1;						\
+    (w0) = __t.__w1w0.__w0;						\
+     } while (0)
+#define UMUL_TIME 8
+#else
+#define UMUL_TIME 30
+#endif
+#define UDIV_TIME 40
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __tmp;							\
+    __asm__ (								\
+       "ldi		1,%0\n"						\
+"	extru,=		%1,15,16,%%r0		; Bits 31..16 zero?\n"	\
+"	extru,tr	%1,15,16,%1		; No.  Shift down, skip add.\n"\
+"	ldo		16(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,23,8,%%r0		; Bits 15..8 zero?\n"	\
+"	extru,tr	%1,23,8,%1		; No.  Shift down, skip add.\n"\
+"	ldo		8(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,27,4,%%r0		; Bits 7..4 zero?\n"	\
+"	extru,tr	%1,27,4,%1		; No.  Shift down, skip add.\n"\
+"	ldo		4(%0),%0		; Yes.  Perform add.\n"	\
+"	extru,=		%1,29,2,%%r0		; Bits 3..2 zero?\n"	\
+"	extru,tr	%1,29,2,%1		; No.  Shift down, skip add.\n"\
+"	ldo		2(%0),%0		; Yes.  Perform add.\n"	\
+"	extru		%1,30,1,%1		; Extract bit 1.\n"	\
+"	sub		%0,%1,%0		; Subtract it.\n"	\
+	: "=r" (count), "=r" (__tmp) : "1" (x));			\
+  } while (0)
+#endif
+
+#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#if !defined (__zarch__)
+#define smul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __asm__ ("lr %N0,%1\n\tmr %0,%2"					\
+	     : "=&r" (__x.__ll)						\
+	     : "r" (m0), "r" (m1));					\
+    (xh) = __x.__i.__h; (xl) = __x.__i.__l;				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {USItype __h, __l;} __i;				\
+	  } __x;							\
+    __x.__i.__h = n1; __x.__i.__l = n0;					\
+    __asm__ ("dr %0,%2"							\
+	     : "=r" (__x.__ll)						\
+	     : "0" (__x.__ll), "r" (d));				\
+    (q) = __x.__i.__l; (r) = __x.__i.__h;				\
+  } while (0)
+#else
+#define smul_ppmm(xh, xl, m0, m1) \
+  do {                                                                  \
+    register SItype __r0 __asm__ ("0");					\
+    register SItype __r1 __asm__ ("1") = (m0);				\
+									\
+    __asm__ ("mr\t%%r0,%3"                                              \
+	     : "=r" (__r0), "=r" (__r1)					\
+	     : "r"  (__r1),  "r" (m1));					\
+    (xh) = __r0; (xl) = __r1;						\
+  } while (0)
+
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    register SItype __r0 __asm__ ("0") = (n1);				\
+    register SItype __r1 __asm__ ("1") = (n0);				\
+									\
+    __asm__ ("dr\t%%r0,%4"                                              \
+	     : "=r" (__r0), "=r" (__r1)					\
+	     : "r" (__r0), "r" (__r1), "r" (d));			\
+    (q) = __r1; (r) = __r0;						\
+  } while (0)
+#endif /* __zarch__ */
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}"		\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mul{l} %3"							\
+	   : "=a" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "rm" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("div{l} %4"							\
+	   : "=a" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "rm" ((USItype) (dv)))
+#define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
+#define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
+#define UMUL_TIME 40
+#define UDIV_TIME 40
+#endif /* 80x86 */
+
+#if defined (__x86_64__) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}"		\
+	   : "=r" ((UDItype) (sh)),					\
+	     "=&r" ((UDItype) (sl))					\
+	   : "%0" ((UDItype) (ah)),					\
+	     "rme" ((UDItype) (bh)),					\
+	     "%1" ((UDItype) (al)),					\
+	     "rme" ((UDItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}"		\
+	   : "=r" ((UDItype) (sh)),					\
+	     "=&r" ((UDItype) (sl))					\
+	   : "0" ((UDItype) (ah)),					\
+	     "rme" ((UDItype) (bh)),					\
+	     "1" ((UDItype) (al)),					\
+	     "rme" ((UDItype) (bl)))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mul{q} %3"							\
+	   : "=a" ((UDItype) (w0)),					\
+	     "=d" ((UDItype) (w1))					\
+	   : "%0" ((UDItype) (u)),					\
+	     "rm" ((UDItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, dv) \
+  __asm__ ("div{q} %4"							\
+	   : "=a" ((UDItype) (q)),					\
+	     "=d" ((UDItype) (r))					\
+	   : "0" ((UDItype) (n0)),					\
+	     "1" ((UDItype) (n1)),					\
+	     "rm" ((UDItype) (dv)))
+#define count_leading_zeros(count, x)	((count) = __builtin_clzll (x))
+#define count_trailing_zeros(count, x)	((count) = __builtin_ctzll (x))
+#define UMUL_TIME 40
+#define UDIV_TIME 40
+#endif /* x86_64 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __asm__ ("emul	%2,%1,%0"					\
+	   : "=d" (__xx.__ll)						\
+	   : "%dI" ((USItype) (u)),					\
+	     "dI" ((USItype) (v)));					\
+  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("emul	%2,%1,%0"					\
+	     : "=d" (__w)						\
+	     : "%dI" ((USItype) (u)),					\
+	       "dI" ((USItype) (v)));					\
+    __w; })
+#endif /* __i960__ */
+
+#if defined (__ia64) && W_TYPE_SIZE == 64
+/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
+   "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency.  The generic
+   code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
+   register, which takes an extra cycle.  */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    if ((al) < (bl))							\
+      (sh) = (ah) - (bh) - 1;						\
+    else								\
+      (sh) = (ah) - (bh);						\
+    (sl) = __x;								\
+  } while (0)
+
+/* Do both product parts in assembly, since that gives better code with
+   all gcc versions.  Some callers will just use the upper part, and in
+   that situation we waste an instruction, but not any cycles.  */
+#define umul_ppmm(ph, pl, m0, m1)					\
+  __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0"		\
+	   : "=&f" (ph), "=f" (pl)					\
+	   : "f" (m0), "f" (m1))
+#define count_leading_zeros(count, x)					\
+  do {									\
+    UWtype _x = (x), _y, _a, _c;					\
+    __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x));		\
+    __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y));		\
+    _c = (_a - 1) << 3;							\
+    _x >>= _c;								\
+    if (_x >= 1 << 4)							\
+      _x >>= 4, _c += 4;						\
+    if (_x >= 1 << 2)							\
+      _x >>= 2, _c += 2;						\
+    _c += _x >> 1;							\
+    (count) =  W_TYPE_SIZE - 1 - _c;					\
+  } while (0)
+/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
+   based, and we don't need a special case for x==0 here */
+#define count_trailing_zeros(count, x)					\
+  do {									\
+    UWtype __ctz_x = (x);						\
+    __asm__ ("popcnt %0 = %1"						\
+	     : "=r" (count)						\
+	     : "r" ((__ctz_x-1) & ~__ctz_x));				\
+  } while (0)
+#define UMUL_TIME 14
+#endif
+
+#if defined (__M32R__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  /* The cmp clears the condition bit.  */ \
+  __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl))					\
+	   : "cbit")
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  /* The cmp clears the condition bit.  */ \
+  __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl))					\
+	   : "cbit")
+#endif /* __M32R__ */
+
+#if defined (__mc68000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0"				\
+	   : "=d" ((USItype) (sh)),					\
+	     "=&d" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "d" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0"				\
+	   : "=d" ((USItype) (sh)),					\
+	     "=&d" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "d" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+
+/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r.  */
+#if (defined (__mc68020__) && !defined (__mc68060__))
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulu%.l %3,%1:%0"						\
+	   : "=d" ((USItype) (w0)),					\
+	     "=d" ((USItype) (w1))					\
+	   : "%0" ((USItype) (u)),					\
+	     "dmi" ((USItype) (v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divu%.l %4,%1:%0"						\
+	   : "=d" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "dmi" ((USItype) (d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divs%.l %4,%1:%0"						\
+	   : "=d" ((USItype) (q)),					\
+	     "=d" ((USItype) (r))					\
+	   : "0" ((USItype) (n0)),					\
+	     "1" ((USItype) (n1)),					\
+	     "dmi" ((USItype) (d)))
+
+#elif defined (__mcoldfire__) /* not mc68020 */
+
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("| Inlined umul_ppmm\n"					\
+	   "	move%.l	%2,%/d0\n"					\
+	   "	move%.l	%3,%/d1\n"					\
+	   "	move%.l	%/d0,%/d2\n"					\
+	   "	swap	%/d0\n"						\
+	   "	move%.l	%/d1,%/d3\n"					\
+	   "	swap	%/d1\n"						\
+	   "	move%.w	%/d2,%/d4\n"					\
+	   "	mulu	%/d3,%/d4\n"					\
+	   "	mulu	%/d1,%/d2\n"					\
+	   "	mulu	%/d0,%/d3\n"					\
+	   "	mulu	%/d0,%/d1\n"					\
+	   "	move%.l	%/d4,%/d0\n"					\
+	   "	clr%.w	%/d0\n"						\
+	   "	swap	%/d0\n"						\
+	   "	add%.l	%/d0,%/d2\n"					\
+	   "	add%.l	%/d3,%/d2\n"					\
+	   "	jcc	1f\n"						\
+	   "	add%.l	%#65536,%/d1\n"					\
+	   "1:	swap	%/d2\n"						\
+	   "	moveq	%#0,%/d0\n"					\
+	   "	move%.w	%/d2,%/d0\n"					\
+	   "	move%.w	%/d4,%/d2\n"					\
+	   "	move%.l	%/d2,%1\n"					\
+	   "	add%.l	%/d1,%/d0\n"					\
+	   "	move%.l	%/d0,%0"					\
+	   : "=g" ((USItype) (xh)),					\
+	     "=g" ((USItype) (xl))					\
+	   : "g" ((USItype) (a)),					\
+	     "g" ((USItype) (b))					\
+	   : "d0", "d1", "d2", "d3", "d4")
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#else /* not ColdFire */
+/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX.  */
+#define umul_ppmm(xh, xl, a, b) \
+  __asm__ ("| Inlined umul_ppmm\n"					\
+	   "	move%.l	%2,%/d0\n"					\
+	   "	move%.l	%3,%/d1\n"					\
+	   "	move%.l	%/d0,%/d2\n"					\
+	   "	swap	%/d0\n"						\
+	   "	move%.l	%/d1,%/d3\n"					\
+	   "	swap	%/d1\n"						\
+	   "	move%.w	%/d2,%/d4\n"					\
+	   "	mulu	%/d3,%/d4\n"					\
+	   "	mulu	%/d1,%/d2\n"					\
+	   "	mulu	%/d0,%/d3\n"					\
+	   "	mulu	%/d0,%/d1\n"					\
+	   "	move%.l	%/d4,%/d0\n"					\
+	   "	eor%.w	%/d0,%/d0\n"					\
+	   "	swap	%/d0\n"						\
+	   "	add%.l	%/d0,%/d2\n"					\
+	   "	add%.l	%/d3,%/d2\n"					\
+	   "	jcc	1f\n"						\
+	   "	add%.l	%#65536,%/d1\n"					\
+	   "1:	swap	%/d2\n"						\
+	   "	moveq	%#0,%/d0\n"					\
+	   "	move%.w	%/d2,%/d0\n"					\
+	   "	move%.w	%/d4,%/d2\n"					\
+	   "	move%.l	%/d2,%1\n"					\
+	   "	add%.l	%/d1,%/d0\n"					\
+	   "	move%.l	%/d0,%0"					\
+	   : "=g" ((USItype) (xh)),					\
+	     "=g" ((USItype) (xl))					\
+	   : "g" ((USItype) (a)),					\
+	     "g" ((USItype) (b))					\
+	   : "d0", "d1", "d2", "d3", "d4")
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+
+#endif /* not mc68020 */
+
+/* The '020, '030, '040 and '060 have bitfield insns.
+   cpu32 disguises as a 68020, but lacks them.  */
+#if defined (__mc68020__) && !defined (__mcpu32__)
+#define count_leading_zeros(count, x) \
+  __asm__ ("bfffo %1{%b2:%b2},%0"					\
+	   : "=d" ((USItype) (count))					\
+	   : "od" ((USItype) (x)), "n" (0))
+/* Some ColdFire architectures have a ff1 instruction supported via
+   __builtin_clz. */
+#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
+#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rJ" ((USItype) (ah)),					\
+	     "rJ" ((USItype) (bh)),					\
+	     "%rJ" ((USItype) (al)),					\
+	     "rJ" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3"			\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rJ" ((USItype) (ah)),					\
+	     "rJ" ((USItype) (bh)),					\
+	     "rJ" ((USItype) (al)),					\
+	     "rJ" ((USItype) (bl)))
+#define count_leading_zeros(count, x) \
+  do {									\
+    USItype __cbtmp;							\
+    __asm__ ("ff1 %0,%1"						\
+	     : "=r" (__cbtmp)						\
+	     : "r" ((USItype) (x)));					\
+    (count) = __cbtmp ^ 31;						\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__mc88110__)
+#define umul_ppmm(wh, wl, u, v) \
+  do {									\
+    union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __xx;							\
+    __asm__ ("mulu.d	%0,%1,%2"					\
+	     : "=r" (__xx.__ll)						\
+	     : "r" ((USItype) (u)),					\
+	       "r" ((USItype) (v)));					\
+    (wh) = __xx.__i.__h;						\
+    (wl) = __xx.__i.__l;						\
+  } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __h, __l;} __i;				\
+	  } __xx;							\
+  USItype __q;								\
+  __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
+  __asm__ ("divu.d %0,%1,%2"						\
+	   : "=r" (__q)							\
+	   : "r" (__xx.__ll),						\
+	     "r" ((USItype) (d)));					\
+  (r) = (n0) - __q * (d); (q) = __q; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __mc88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mn10300__)
+# if defined (__AM33__)
+#  define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
+#  define umul_ppmm(w1, w0, u, v)		\
+    asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
+#  define smul_ppmm(w1, w0, u, v)		\
+    asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
+# else
+#  define umul_ppmm(w1, w0, u, v)		\
+    asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
+#  define smul_ppmm(w1, w0, u, v)		\
+    asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
+# endif
+# define add_ssaaaa(sh, sl, ah, al, bh, bl)	\
+  do {						\
+    DWunion __s, __a, __b;			\
+    __a.s.low = (al); __a.s.high = (ah);	\
+    __b.s.low = (bl); __b.s.high = (bh);	\
+    __s.ll = __a.ll + __b.ll;			\
+    (sl) = __s.s.low; (sh) = __s.s.high;	\
+  } while (0)
+# define sub_ddmmss(sh, sl, ah, al, bh, bl)	\
+  do {						\
+    DWunion __s, __a, __b;			\
+    __a.s.low = (al); __a.s.high = (ah);	\
+    __b.s.low = (bl); __b.s.high = (bh);	\
+    __s.ll = __a.ll - __b.ll;			\
+    (sl) = __s.s.low; (sh) = __s.s.high;	\
+  } while (0)
+# define udiv_qrnnd(q, r, nh, nl, d)		\
+  asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
+# define sdiv_qrnnd(q, r, nh, nl, d)		\
+  asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
+# define UMUL_TIME 3
+# define UDIV_TIME 38
+#endif
+
+#if defined (__mips__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
+    (w1) = (USItype) (__x >> 32);					\
+    (w0) = (USItype) (__x);						\
+  } while (0)
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+
+#if (__mips == 32 || __mips == 64) && ! defined (__mips16)
+#define count_leading_zeros(COUNT,X)	((COUNT) = __builtin_clz (X))
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#endif /* __mips__ */
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __asm__ ("meid %2,%0"							\
+	   : "=g" (__xx.__ll)						\
+	   : "%0" ((USItype) (u)),					\
+	     "g" ((USItype) (v)));					\
+  (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+  ({UDItype __w;							\
+    __asm__ ("meid %2,%0"						\
+	     : "=g" (__w)						\
+	     : "%0" ((USItype) (u)),					\
+	       "g" ((USItype) (v)));					\
+    __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  ({union {UDItype __ll;						\
+	   struct {USItype __l, __h;} __i;				\
+	  } __xx;							\
+  __xx.__i.__h = (n1); __xx.__i.__l = (n0);				\
+  __asm__ ("deid %2,%0"							\
+	   : "=g" (__xx.__ll)						\
+	   : "0" (__xx.__ll),						\
+	     "g" ((USItype) (d)));					\
+  (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count,x) \
+  do {									\
+    __asm__ ("ffsd     %2,%0"						\
+	    : "=r" ((USItype) (count))					\
+	    : "0" ((USItype) 0),					\
+	      "r" ((USItype) (x)));					\
+  } while (0)
+#endif /* __ns32000__ */
+
+/* FIXME: We should test _IBMR2 here when we add assembly support for the
+   system vendor compilers.
+   FIXME: What's needed for gcc PowerPC VxWorks?  __vxworks__ is not good
+   enough, since that hits ARM and m68k too.  */
+#if (defined (_ARCH_PPC)	/* AIX */				\
+     || defined (__powerpc__)	/* gcc */				\
+     || defined (__POWERPC__)	/* BEOS */				\
+     || defined (__ppc__)	/* Darwin */				\
+     || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */    \
+     || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */               \
+	 && CPU_FAMILY == PPC)                                                \
+     ) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
+  || defined (__ppc__)                                                    \
+  || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */       \
+  || (defined (PPC) && defined (CPU_FAMILY)    /* VxWorks */                  \
+	 && CPU_FAMILY == PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    SItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#endif
+#endif /* 32-bit POWER architecture variants.  */
+
+/* We should test _IBMR2 here when we add assembly support for the system
+   vendor compilers.  */
+#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (bh) && (bh) == 0)				\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"		\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+    else								\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"		\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
+  } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    if (__builtin_constant_p (ah) && (ah) == 0)				\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"	\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == 0)			\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"		\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else								\
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"	\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    UDItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    DItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
+    (pl) = __m0 * __m1;							\
+  } while (0)
+#define SMUL_TIME 14  /* ??? */
+#define UDIV_TIME 120 /* ??? */
+#endif /* 64-bit PowerPC.  */
+
+#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("a %1,%5\n\tae %0,%3"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("s %1,%5\n\tse %0,%3"					\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "r" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "r" ((USItype) (bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {									\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ (								\
+       "s	r2,r2\n"						\
+"	mts	r10,%2\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	m	r2,%3\n"						\
+"	cas	%0,r2,r0\n"						\
+"	mfs	r10,%1"							\
+	     : "=r" ((USItype) (ph)),					\
+	       "=r" ((USItype) (pl))					\
+	     : "%r" (__m0),						\
+		"r" (__m1)						\
+	     : "r2");							\
+    (ph) += ((((SItype) __m0 >> 31) & __m1)				\
+	     + (((SItype) __m1 >> 31) & __m0));				\
+  } while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+  do {									\
+    if ((x) >= 0x10000)							\
+      __asm__ ("clz	%0,%1"						\
+	       : "=r" ((USItype) (count))				\
+	       : "r" ((USItype) (x) >> 16));				\
+    else								\
+      {									\
+	__asm__ ("clz	%0,%1"						\
+		 : "=r" ((USItype) (count))				\
+		 : "r" ((USItype) (x)));					\
+	(count) += 16;							\
+      }									\
+  } while (0)
+#endif
+
+#if defined(__sh__) && (!defined (__SHMEDIA__) || !__SHMEDIA__) && W_TYPE_SIZE == 32
+#ifndef __sh1__
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ (								\
+       "dmulu.l	%2,%3\n\tsts%M1	macl,%1\n\tsts%M0	mach,%0"	\
+	   : "=r<" ((USItype)(w1)),					\
+	     "=r<" ((USItype)(w0))					\
+	   : "r" ((USItype)(u)),					\
+	     "r" ((USItype)(v))						\
+	   : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+/* This is the same algorithm as __udiv_qrnnd_c.  */
+#define UDIV_NEEDS_NORMALIZATION 1
+
+#ifdef __FDPIC__
+/* FDPIC needs a special version of the asm fragment to extract the
+   code address from the function descriptor. __udiv_qrnnd_16 is
+   assumed to be local and not to use the GOT, so loading r12 is
+   not needed. */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
+			__attribute__ ((visibility ("hidden")));	\
+    /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
+    __asm__ (								\
+	"mov%M4	%4,r5\n"						\
+"	swap.w	%3,r4\n"						\
+"	swap.w	r5,r6\n"						\
+"	mov.l	@%5,r2\n"						\
+"	jsr	@r2\n"							\
+"	shll16	r6\n"							\
+"	swap.w	r4,r4\n"						\
+"	mov.l	@%5,r2\n"						\
+"	jsr	@r2\n"							\
+"	swap.w	r1,%0\n"						\
+"	or	r1,%0"							\
+	: "=r" (q), "=&z" (r)						\
+	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
+	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
+  } while (0)
+#else
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    extern UWtype __udiv_qrnnd_16 (UWtype, UWtype)			\
+			__attribute__ ((visibility ("hidden")));	\
+    /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */	\
+    __asm__ (								\
+	"mov%M4 %4,r5\n"						\
+"	swap.w %3,r4\n"							\
+"	swap.w r5,r6\n"							\
+"	jsr @%5\n"							\
+"	shll16 r6\n"							\
+"	swap.w r4,r4\n"							\
+"	jsr @%5\n"							\
+"	swap.w r1,%0\n"							\
+"	or r1,%0"							\
+	: "=r" (q), "=&z" (r)						\
+	: "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16)		\
+	: "r1", "r2", "r4", "r5", "r6", "pr", "t");			\
+  } while (0)
+#endif /* __FDPIC__  */
+
+#define UDIV_TIME 80
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  __asm__ ("clrt;subc %5,%1; subc %4,%0"				\
+	   : "=r" (sh), "=r" (sl)					\
+	   : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
+
+#endif /* __sh__ */
+
+#if defined (__SH5__) && defined (__SHMEDIA__) && __SHMEDIA__ && W_TYPE_SIZE == 32
+#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
+#define count_leading_zeros(count, x) \
+  do									\
+    {									\
+      UDItype x_ = (USItype)(x);					\
+      SItype c_;							\
+									\
+      __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_));			\
+      (count) = c_ - 31;						\
+    }									\
+  while (0)
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+
+#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
+    && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "%rJ" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "%rJ" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl))					\
+	   __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0"				\
+	   : "=r" ((USItype) (sh)),					\
+	     "=&r" ((USItype) (sl))					\
+	   : "rJ" ((USItype) (ah)),					\
+	     "rI" ((USItype) (bh)),					\
+	     "rJ" ((USItype) (al)),					\
+	     "rI" ((USItype) (bl))					\
+	   __CLOBBER_CC)
+#if defined (__sparc_v9__)
+#define umul_ppmm(w1, w0, u, v) \
+  do {									\
+    register USItype __g1 asm ("g1");					\
+    __asm__ ("umul\t%2,%3,%1\n\t"					\
+	     "srlx\t%1, 32, %0"						\
+	     : "=r" ((USItype) (w1)),					\
+	       "=r" (__g1)						\
+	     : "r" ((USItype) (u)),					\
+	       "r" ((USItype) (v)));					\
+    (w0) = __g1;							\
+  } while (0)
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("mov\t%2,%%y\n\t"						\
+	   "udiv\t%3,%4,%0\n\t"						\
+	   "umul\t%0,%4,%1\n\t"						\
+	   "sub\t%3,%1,%1"						\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__n1)),					\
+	     "r" ((USItype) (__n0)),					\
+	     "r" ((USItype) (__d)))
+#else
+#if defined (__sparc_v8__)
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "r" ((USItype) (u)),					\
+	     "r" ((USItype) (v)))
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__n1)),					\
+	     "r" ((USItype) (__n0)),					\
+	     "r" ((USItype) (__d)))
+#else
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+   instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("umul %2,%3,%1;rd %%y,%0"					\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "r" ((USItype) (u)),					\
+	     "r" ((USItype) (v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
+"	tst	%%g0\n"							\
+"	divscc	%3,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%%g1\n"						\
+"	divscc	%%g1,%4,%0\n"						\
+"	rd	%%y,%1\n"						\
+"	bl,a 1f\n"							\
+"	add	%1,%4,%1\n"						\
+"1:	! End of inline udiv_qrnnd"					\
+	   : "=r" ((USItype) (q)),					\
+	     "=r" ((USItype) (r))					\
+	   : "r" ((USItype) (n1)),					\
+	     "r" ((USItype) (n0)),					\
+	     "rI" ((USItype) (d))					\
+	   : "g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+  do {                                                                  \
+  __asm__ ("scan %1,1,%0"                                               \
+	   : "=r" ((USItype) (count))                                   \
+	   : "r" ((USItype) (x)));					\
+  } while (0)
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+   implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+   undefined.  */
+#else
+/* SPARC without integer multiplication and divide instructions.
+   (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm\n"					\
+"	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n"\
+"	sra	%3,31,%%o5	! Don't move this insn\n"		\
+"	and	%2,%%o5,%%o5	! Don't move this insn\n"		\
+"	andcc	%%g0,0,%%g1	! Don't move this insn\n"		\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,%3,%%g1\n"						\
+"	mulscc	%%g1,0,%%g1\n"						\
+"	add	%%g1,%%o5,%0\n"						\
+"	rd	%%y,%1"							\
+	   : "=r" ((USItype) (w1)),					\
+	     "=r" ((USItype) (w0))					\
+	   : "%rI" ((USItype) (u)),					\
+	     "r" ((USItype) (v))						\
+	   : "g1", "o5" __AND_CLOBBER_CC)
+#define UMUL_TIME 39		/* 39 instructions */
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  __asm__ ("! Inlined udiv_qrnnd\n"					\
+"	mov	32,%%g1\n"						\
+"	subcc	%1,%2,%%g0\n"						\
+"1:	bcs	5f\n"							\
+"	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
+"	sub	%1,%2,%1	! this kills msb of n\n"		\
+"	addx	%1,%1,%1	! so this can't give carry\n"		\
+"	subcc	%%g1,1,%%g1\n"						\
+"2:	bne	1b\n"							\
+"	 subcc	%1,%2,%%g0\n"						\
+"	bcs	3f\n"							\
+"	 addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n"	\
+"	b	3f\n"							\
+"	 sub	%1,%2,%1	! this kills msb of n\n"		\
+"4:	sub	%1,%2,%1\n"						\
+"5:	addxcc	%1,%1,%1\n"						\
+"	bcc	2b\n"							\
+"	 subcc	%%g1,1,%%g1\n"						\
+"! Got carry from n.  Subtract next step to cancel this carry.\n"	\
+"	bne	4b\n"							\
+"	 addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n"	\
+"	sub	%1,%2,%1\n"						\
+"3:	xnor	%0,0,%0\n"						\
+"	! End of inline udiv_qrnnd"					\
+	   : "=&r" ((USItype) (__q)),					\
+	     "=&r" ((USItype) (__r))					\
+	   : "r" ((USItype) (__d)),					\
+	     "1" ((USItype) (__n1)),					\
+	     "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
+#define UDIV_TIME (3+7*32)	/* 7 instructions/iteration. 32 iterations.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+#endif /* __sparc_v9__ */
+#endif /* sparc32 */
+
+#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
+    && W_TYPE_SIZE == 64
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    UDItype __carry = 0;						\
+    __asm__ ("addcc\t%r5,%6,%1\n\t"					\
+	     "add\t%r3,%4,%0\n\t"					\
+	     "movcs\t%%xcc, 1, %2\n\t"					\
+	     "add\t%0, %2, %0"						\
+	     : "=r" ((UDItype)(sh)),				      	\
+	       "=&r" ((UDItype)(sl)),				      	\
+	       "+r" (__carry)				      		\
+	     : "%rJ" ((UDItype)(ah)),				     	\
+	       "rI" ((UDItype)(bh)),				      	\
+	       "%rJ" ((UDItype)(al)),				     	\
+	       "rI" ((UDItype)(bl))				       	\
+	     __CLOBBER_CC);						\
+  } while (0)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+  do {									\
+    UDItype __carry = 0;						\
+    __asm__ ("subcc\t%r5,%6,%1\n\t"					\
+	     "sub\t%r3,%4,%0\n\t"					\
+	     "movcs\t%%xcc, 1, %2\n\t"					\
+	     "sub\t%0, %2, %0"						\
+	     : "=r" ((UDItype)(sh)),				      	\
+	       "=&r" ((UDItype)(sl)),				      	\
+	       "+r" (__carry)				      		\
+	     : "%rJ" ((UDItype)(ah)),				     	\
+	       "rI" ((UDItype)(bh)),				      	\
+	       "%rJ" ((UDItype)(al)),				     	\
+	       "rI" ((UDItype)(bl))				       	\
+	     __CLOBBER_CC);						\
+  } while (0)
+
+#define umul_ppmm(wh, wl, u, v)						\
+  do {									\
+	  UDItype tmp1, tmp2, tmp3, tmp4;				\
+	  __asm__ __volatile__ (					\
+		   "srl %7,0,%3\n\t"					\
+		   "mulx %3,%6,%1\n\t"					\
+		   "srlx %6,32,%2\n\t"					\
+		   "mulx %2,%3,%4\n\t"					\
+		   "sllx %4,32,%5\n\t"					\
+		   "srl %6,0,%3\n\t"					\
+		   "sub %1,%5,%5\n\t"					\
+		   "srlx %5,32,%5\n\t"					\
+		   "addcc %4,%5,%4\n\t"					\
+		   "srlx %7,32,%5\n\t"					\
+		   "mulx %3,%5,%3\n\t"					\
+		   "mulx %2,%5,%5\n\t"					\
+		   "sethi %%hi(0x80000000),%2\n\t"			\
+		   "addcc %4,%3,%4\n\t"					\
+		   "srlx %4,32,%4\n\t"					\
+		   "add %2,%2,%2\n\t"					\
+		   "movcc %%xcc,%%g0,%2\n\t"				\
+		   "addcc %5,%4,%5\n\t"					\
+		   "sllx %3,32,%3\n\t"					\
+		   "add %1,%3,%1\n\t"					\
+		   "add %5,%2,%0"					\
+	   : "=r" ((UDItype)(wh)),					\
+	     "=&r" ((UDItype)(wl)),					\
+	     "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4)	\
+	   : "r" ((UDItype)(u)),					\
+	     "r" ((UDItype)(v))						\
+	   __CLOBBER_CC);						\
+  } while (0)
+#define UMUL_TIME 96
+#define UDIV_TIME 230
+#endif /* sparc64 */
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
+	   : "=g" ((USItype) (sh)),					\
+	     "=&g" ((USItype) (sl))					\
+	   : "%0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "%1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subl2 %5,%1\n\tsbwc %3,%0"					\
+	   : "=g" ((USItype) (sh)),					\
+	     "=&g" ((USItype) (sl))					\
+	   : "0" ((USItype) (ah)),					\
+	     "g" ((USItype) (bh)),					\
+	     "1" ((USItype) (al)),					\
+	     "g" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {								\
+	UDItype __ll;							\
+	struct {USItype __l, __h;} __i;					\
+      } __xx;								\
+    USItype __m0 = (m0), __m1 = (m1);					\
+    __asm__ ("emul %1,%2,$0,%0"						\
+	     : "=r" (__xx.__ll)						\
+	     : "g" (__m0),						\
+	       "g" (__m1));						\
+    (xh) = __xx.__i.__h;						\
+    (xl) = __xx.__i.__l;						\
+    (xh) += ((((SItype) __m0 >> 31) & __m1)				\
+	     + (((SItype) __m1 >> 31) & __m0));				\
+  } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+  do {									\
+    union {DItype __ll;							\
+	   struct {SItype __l, __h;} __i;				\
+	  } __xx;							\
+    __xx.__i.__h = n1; __xx.__i.__l = n0;				\
+    __asm__ ("ediv %3,%2,%0,%1"						\
+	     : "=g" (q), "=g" (r)					\
+	     : "g" (__xx.__ll), "g" (d));				\
+  } while (0)
+#endif /* __vax__ */
+
+#ifdef _TMS320C6X
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do									\
+    {									\
+      UDItype __ll;							\
+      __asm__ ("addu .l1 %1, %2, %0"					\
+	       : "=a" (__ll) : "a" (al), "a" (bl));			\
+      (sl) = (USItype)__ll;						\
+      (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh);			\
+    }									\
+  while (0)
+
+#ifdef _TMS320C6400_PLUS
+#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UDItype __x = (UDItype) (USItype) (u) * (USItype) (v);		\
+    (w1) = (USItype) (__x >> 32);					\
+    (w0) = (USItype) (__x);						\
+  } while (0)
+#endif  /* _TMS320C6400_PLUS */
+
+#define count_leading_zeros(count, x)	((count) = __builtin_clz (x))
+#ifdef _TMS320C6400
+#define count_trailing_zeros(count, x)	((count) = __builtin_ctz (x))
+#endif
+#define UMUL_TIME 4
+#define UDIV_TIME 40
+#endif /* _TMS320C6X */
+
+#if defined (__xtensa__) && W_TYPE_SIZE == 32
+/* This code is not Xtensa-configuration-specific, so rely on the compiler
+   to expand builtin functions depending on what configuration features
+   are available.  This avoids library calls when the operation can be
+   performed in-line.  */
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    DWunion __w;							\
+    __w.ll = __builtin_umulsidi3 (u, v);				\
+    w1 = __w.s.high;							\
+    w0 = __w.s.low;							\
+  } while (0)
+#define __umulsidi3(u, v)		__builtin_umulsidi3 (u, v)
+#define count_leading_zeros(COUNT, X)	((COUNT) = __builtin_clz (X))
+#define count_trailing_zeros(COUNT, X)	((COUNT) = __builtin_ctz (X))
+#endif /* __xtensa__ */
+
+#if defined xstormy16
+extern UHItype __stormy16_count_leading_zeros (UHItype);
+#define count_leading_zeros(count, x)					\
+  do									\
+    {									\
+      UHItype size;							\
+									\
+      /* We assume that W_TYPE_SIZE is a multiple of 16...  */		\
+      for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16)		\
+	{								\
+	  UHItype c;							\
+									\
+	  c = __clzhi2 ((x) >> (size - 16));				\
+	  (count) += c;							\
+	  if (c != 16)							\
+	    break;							\
+	}								\
+    }									\
+  while (0)
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("add	%H1,%H5\n\tadc	%H0,%H3"				\
+	   : "=r" ((unsigned int)(sh)),					\
+	     "=&r" ((unsigned int)(sl))					\
+	   : "%0" ((unsigned int)(ah)),					\
+	     "r" ((unsigned int)(bh)),					\
+	     "%1" ((unsigned int)(al)),					\
+	     "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("sub	%H1,%H5\n\tsbc	%H0,%H3"				\
+	   : "=r" ((unsigned int)(sh)),					\
+	     "=&r" ((unsigned int)(sl))					\
+	   : "0" ((unsigned int)(ah)),					\
+	     "r" ((unsigned int)(bh)),					\
+	     "1" ((unsigned int)(al)),					\
+	     "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+  do {									\
+    union {long int __ll;						\
+	   struct {unsigned int __h, __l;} __i;				\
+	  } __xx;							\
+    unsigned int __m0 = (m0), __m1 = (m1);				\
+    __asm__ ("mult	%S0,%H3"					\
+	     : "=r" (__xx.__i.__h),					\
+	       "=r" (__xx.__i.__l)					\
+	     : "%1" (__m0),						\
+	       "rQR" (__m1));						\
+    (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;				\
+    (xh) += ((((signed int) __m0 >> 15) & __m1)				\
+	     + (((signed int) __m1 >> 15) & __m0));			\
+  } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+/* If this machine has no inline assembler, use C macros.  */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) + (bl);							\
+    (sh) = (ah) + (bh) + (__x < (al));					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  do {									\
+    UWtype __x;								\
+    __x = (al) - (bl);							\
+    (sh) = (ah) - (bh) - (__x > (al));					\
+    (sl) = __x;								\
+  } while (0)
+#endif
+
+/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
+   smul_ppmm.  */
+#if !defined (umul_ppmm) && defined (smul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __w1;							\
+    UWtype __xm0 = (u), __xm1 = (v);					\
+    smul_ppmm (__w1, w0, __xm0, __xm1);					\
+    (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1)		\
+		+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0);		\
+  } while (0)
+#endif
+
+/* If we still don't have umul_ppmm, define it using plain C.  */
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v)						\
+  do {									\
+    UWtype __x0, __x1, __x2, __x3;					\
+    UHWtype __ul, __vl, __uh, __vh;					\
+									\
+    __ul = __ll_lowpart (u);						\
+    __uh = __ll_highpart (u);						\
+    __vl = __ll_lowpart (v);						\
+    __vh = __ll_highpart (v);						\
+									\
+    __x0 = (UWtype) __ul * __vl;					\
+    __x1 = (UWtype) __ul * __vh;					\
+    __x2 = (UWtype) __uh * __vl;					\
+    __x3 = (UWtype) __uh * __vh;					\
+									\
+    __x1 += __ll_highpart (__x0);/* this can't give carry */		\
+    __x1 += __x2;		/* but this indeed can */		\
+    if (__x1 < __x2)		/* did we get it? */			\
+      __x3 += __ll_B;		/* yes, add it in the proper pos.  */	\
+									\
+    (w1) = __x3 + __ll_highpart (__x1);					\
+    (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0);		\
+  } while (0)
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+  ({DWunion __w;							\
+    umul_ppmm (__w.s.high, __w.s.low, u, v);				\
+    __w.ll; })
+#endif
+
+/* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {									\
+    UWtype __d1, __d0, __q1, __q0;					\
+    UWtype __r1, __r0, __m;						\
+    __d1 = __ll_highpart (d);						\
+    __d0 = __ll_lowpart (d);						\
+									\
+    __r1 = (n1) % __d1;							\
+    __q1 = (n1) / __d1;							\
+    __m = (UWtype) __q1 * __d0;						\
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
+    if (__r1 < __m)							\
+      {									\
+	__q1--, __r1 += (d);						\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+	  if (__r1 < __m)						\
+	    __q1--, __r1 += (d);					\
+      }									\
+    __r1 -= __m;							\
+									\
+    __r0 = __r1 % __d1;							\
+    __q0 = __r1 / __d1;							\
+    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
+    if (__r0 < __m)							\
+      {									\
+	__q0--, __r0 += (d);						\
+	if (__r0 >= (d))						\
+	  if (__r0 < __m)						\
+	    __q0--, __r0 += (d);					\
+      }									\
+    __r0 -= __m;							\
+									\
+    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (r) = __r0;								\
+  } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+   __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+  do {									\
+    extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype);	\
+    UWtype __r;								\
+    (q) = __udiv_w_sdiv (&__r, nh, nl, d);				\
+    (r) = __r;								\
+  } while (0)
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+#define count_leading_zeros(count, x) \
+  do {									\
+    UWtype __xr = (x);							\
+    UWtype __a;								\
+									\
+    if (W_TYPE_SIZE <= 32)						\
+      {									\
+	__a = __xr < ((UWtype)1<<2*__BITS4)				\
+	  ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4)			\
+	  : (__xr < ((UWtype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);	\
+      }									\
+    else								\
+      {									\
+	for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8)			\
+	  if (((__xr >> __a) & 0xff) != 0)				\
+	    break;							\
+      }									\
+									\
+    (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);		\
+  } while (0)
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+   defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+  do {									\
+    UWtype __ctz_x = (x);						\
+    UWtype __ctz_c;							\
+    count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
+    (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
+  } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/op-1.h b/libc/sysdeps/linux/sparc64/soft-fp/op-1.h
new file mode 100644
index 000000000..e1e39664d
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/op-1.h
@@ -0,0 +1,369 @@
+/* Software floating-point emulation.
+   Basic one-word fraction declaration and manipulation.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_OP_1_H
+#define SOFT_FP_OP_1_H	1
+
+#define _FP_FRAC_DECL_1(X)	_FP_W_TYPE X##_f _FP_ZERO_INIT
+#define _FP_FRAC_COPY_1(D, S)	(D##_f = S##_f)
+#define _FP_FRAC_SET_1(X, I)	(X##_f = I)
+#define _FP_FRAC_HIGH_1(X)	(X##_f)
+#define _FP_FRAC_LOW_1(X)	(X##_f)
+#define _FP_FRAC_WORD_1(X, w)	(X##_f)
+
+#define _FP_FRAC_ADDI_1(X, I)	(X##_f += I)
+#define _FP_FRAC_SLL_1(X, N)			\
+  do						\
+    {						\
+      if (__builtin_constant_p (N) && (N) == 1)	\
+	X##_f += X##_f;				\
+      else					\
+	X##_f <<= (N);				\
+    }						\
+  while (0)
+#define _FP_FRAC_SRL_1(X, N)	(X##_f >>= N)
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRST_1(X, S, N, sz)	__FP_FRAC_SRST_1 (X##_f, S, (N), (sz))
+#define _FP_FRAC_SRS_1(X, N, sz)	__FP_FRAC_SRS_1 (X##_f, (N), (sz))
+
+#define __FP_FRAC_SRST_1(X, S, N, sz)			\
+  do							\
+    {							\
+      S = (__builtin_constant_p (N) && (N) == 1		\
+	   ? X & 1					\
+	   : (X << (_FP_W_TYPE_SIZE - (N))) != 0);	\
+      X = X >> (N);					\
+    }							\
+  while (0)
+
+#define __FP_FRAC_SRS_1(X, N, sz)				\
+  (X = (X >> (N) | (__builtin_constant_p (N) && (N) == 1	\
+		    ? X & 1					\
+		    : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
+
+#define _FP_FRAC_ADD_1(R, X, Y)	(R##_f = X##_f + Y##_f)
+#define _FP_FRAC_SUB_1(R, X, Y)	(R##_f = X##_f - Y##_f)
+#define _FP_FRAC_DEC_1(X, Y)	(X##_f -= Y##_f)
+#define _FP_FRAC_CLZ_1(z, X)	__FP_CLZ ((z), X##_f)
+
+/* Predicates.  */
+#define _FP_FRAC_NEGP_1(X)	((_FP_WS_TYPE) X##_f < 0)
+#define _FP_FRAC_ZEROP_1(X)	(X##_f == 0)
+#define _FP_FRAC_OVERP_1(fs, X)	(X##_f & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_1(fs, X)	(X##_f &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_HIGHBIT_DW_1(fs, X)	(X##_f & _FP_HIGHBIT_DW_##fs)
+#define _FP_FRAC_EQ_1(X, Y)	(X##_f == Y##_f)
+#define _FP_FRAC_GE_1(X, Y)	(X##_f >= Y##_f)
+#define _FP_FRAC_GT_1(X, Y)	(X##_f > Y##_f)
+
+#define _FP_ZEROFRAC_1		0
+#define _FP_MINFRAC_1		1
+#define _FP_MAXFRAC_1		(~(_FP_WS_TYPE) 0)
+
+/* Unpack the raw bits of a native fp value.  Do not classify or
+   normalize the data.  */
+
+#define _FP_UNPACK_RAW_1(fs, X, val)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo;	\
+      _FP_UNPACK_RAW_1_flo.flt = (val);			\
+							\
+      X##_f = _FP_UNPACK_RAW_1_flo.bits.frac;		\
+      X##_e = _FP_UNPACK_RAW_1_flo.bits.exp;		\
+      X##_s = _FP_UNPACK_RAW_1_flo.bits.sign;		\
+    }							\
+  while (0)
+
+#define _FP_UNPACK_RAW_1_P(fs, X, val)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo	\
+	= (union _FP_UNION_##fs *) (val);		\
+							\
+      X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac;	\
+      X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp;		\
+      X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign;	\
+    }							\
+  while (0)
+
+/* Repack the raw bits of a native fp value.  */
+
+#define _FP_PACK_RAW_1(fs, val, X)		\
+  do						\
+    {						\
+      union _FP_UNION_##fs _FP_PACK_RAW_1_flo;	\
+						\
+      _FP_PACK_RAW_1_flo.bits.frac = X##_f;	\
+      _FP_PACK_RAW_1_flo.bits.exp  = X##_e;	\
+      _FP_PACK_RAW_1_flo.bits.sign = X##_s;	\
+						\
+      (val) = _FP_PACK_RAW_1_flo.flt;		\
+    }						\
+  while (0)
+
+#define _FP_PACK_RAW_1_P(fs, val, X)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo	\
+	= (union _FP_UNION_##fs *) (val);		\
+							\
+      _FP_PACK_RAW_1_P_flo->bits.frac = X##_f;		\
+      _FP_PACK_RAW_1_P_flo->bits.exp  = X##_e;		\
+      _FP_PACK_RAW_1_P_flo->bits.sign = X##_s;		\
+    }							\
+  while (0)
+
+
+/* Multiplication algorithms: */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   multiplication immediately.  */
+
+#define _FP_MUL_MEAT_DW_1_imm(wfracbits, R, X, Y)	\
+  do							\
+    {							\
+      R##_f = X##_f * Y##_f;				\
+    }							\
+  while (0)
+
+#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y)				\
+  do									\
+    {									\
+      _FP_MUL_MEAT_DW_1_imm ((wfracbits), R, X, Y);			\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_1 (R, (wfracbits)-1, 2*(wfracbits));			\
+    }									\
+  while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_DW_1_wide(wfracbits, R, X, Y, doit)	\
+  do								\
+    {								\
+      doit (R##_f1, R##_f0, X##_f, Y##_f);			\
+    }								\
+  while (0)
+
+#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit)			\
+  do									\
+    {									\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z);				\
+      _FP_MUL_MEAT_DW_1_wide ((wfracbits), _FP_MUL_MEAT_1_wide_Z,	\
+			      X, Y, doit);				\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, (wfracbits)-1,		\
+		      2*(wfracbits));					\
+      R##_f = _FP_MUL_MEAT_1_wide_Z_f0;					\
+    }									\
+  while (0)
+
+/* Finally, a simple widening multiply algorithm.  What fun!  */
+
+#define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y)			\
+  do									\
+    {									\
+      _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl;	\
+      _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl;	\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a);			\
+									\
+      /* Split the words in half.  */					\
+      _FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2);		\
+      _FP_MUL_MEAT_DW_1_hard_xl						\
+	= X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1);	\
+      _FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2);		\
+      _FP_MUL_MEAT_DW_1_hard_yl						\
+	= Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1);	\
+									\
+      /* Multiply the pieces.  */					\
+      R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl;	\
+      _FP_MUL_MEAT_DW_1_hard_a_f0					\
+	= _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl;	\
+      _FP_MUL_MEAT_DW_1_hard_a_f1					\
+	= _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh;	\
+      R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh;	\
+									\
+      /* Reassemble into two full words.  */				\
+      if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1)	\
+	  < _FP_MUL_MEAT_DW_1_hard_a_f1)				\
+	R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2);		\
+      _FP_MUL_MEAT_DW_1_hard_a_f1					\
+	= _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2);		\
+      _FP_MUL_MEAT_DW_1_hard_a_f0					\
+	= _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2);		\
+      _FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a);			\
+    }									\
+  while (0)
+
+#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y)			\
+  do								\
+    {								\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z);			\
+      _FP_MUL_MEAT_DW_1_hard ((wfracbits),			\
+			      _FP_MUL_MEAT_1_hard_z, X, Y);	\
+								\
+      /* Normalize.  */						\
+      _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z,			\
+		      (wfracbits) - 1, 2*(wfracbits));		\
+      R##_f = _FP_MUL_MEAT_1_hard_z_f0;				\
+    }								\
+  while (0)
+
+
+/* Division algorithms: */
+
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   division immediately.  Give this macro either _FP_DIV_HELP_imm for
+   C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
+   choose will depend on what the compiler does with divrem4.  */
+
+#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)				\
+  do									\
+    {									\
+      _FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r;		\
+      X##_f <<= (X##_f < Y##_f						\
+		 ? R##_e--, _FP_WFRACBITS_##fs				\
+		 : _FP_WFRACBITS_##fs - 1);				\
+      doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f);	\
+      R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0);	\
+    }									\
+  while (0)
+
+/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
+   that may be useful in this situation.  This first is for a primitive
+   that requires normalization, the second for one that does not.  Look
+   for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
+
+#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)				\
+  do									\
+    {									\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh;				\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl;				\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q;				\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r;				\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y;				\
+									\
+      /* Normalize Y -- i.e. make the most significant bit set.  */	\
+      _FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs;	\
+									\
+      /* Shift X op correspondingly high, that is, up one full word.  */ \
+      if (X##_f < Y##_f)						\
+	{								\
+	  R##_e--;							\
+	  _FP_DIV_MEAT_1_udiv_norm_nl = 0;				\
+	  _FP_DIV_MEAT_1_udiv_norm_nh = X##_f;				\
+	}								\
+      else								\
+	{								\
+	  _FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1);	\
+	  _FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1;			\
+	}								\
+									\
+      udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q,				\
+		  _FP_DIV_MEAT_1_udiv_norm_r,				\
+		  _FP_DIV_MEAT_1_udiv_norm_nh,				\
+		  _FP_DIV_MEAT_1_udiv_norm_nl,				\
+		  _FP_DIV_MEAT_1_udiv_norm_y);				\
+      R##_f = (_FP_DIV_MEAT_1_udiv_norm_q				\
+	       | (_FP_DIV_MEAT_1_udiv_norm_r != 0));			\
+    }									\
+  while (0)
+
+#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)				\
+  do									\
+    {									\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl;	\
+      _FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r;		\
+      if (X##_f < Y##_f)						\
+	{								\
+	  R##_e--;							\
+	  _FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs;		\
+	  _FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs;	\
+	}								\
+      else								\
+	{								\
+	  _FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1);	\
+	  _FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);	\
+	}								\
+      udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r,		\
+		  _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl,	\
+		  Y##_f);						\
+      R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0);	\
+    }									\
+  while (0)
+
+
+/* Square root algorithms:
+   We have just one right now, maybe Newton approximation
+   should be added for those machines where division is fast.  */
+
+#define _FP_SQRT_MEAT_1(R, S, T, X, q)		\
+  do						\
+    {						\
+      while ((q) != _FP_WORK_ROUND)		\
+	{					\
+	  T##_f = S##_f + (q);			\
+	  if (T##_f <= X##_f)			\
+	    {					\
+	      S##_f = T##_f + (q);		\
+	      X##_f -= T##_f;			\
+	      R##_f += (q);			\
+	    }					\
+	  _FP_FRAC_SLL_1 (X, 1);		\
+	  (q) >>= 1;				\
+	}					\
+      if (X##_f)				\
+	{					\
+	  if (S##_f < X##_f)			\
+	    R##_f |= _FP_WORK_ROUND;		\
+	  R##_f |= _FP_WORK_STICKY;		\
+	}					\
+    }						\
+  while (0)
+
+/* Assembly/disassembly for converting to/from integral types.
+   No shifting or overflow handled here.  */
+
+#define _FP_FRAC_ASSEMBLE_1(r, X, rsize)	((r) = X##_f)
+#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)	(X##_f = (r))
+
+
+/* Convert FP values between word sizes.  */
+
+#define _FP_FRAC_COPY_1_1(D, S)		(D##_f = S##_f)
+
+#endif /* !SOFT_FP_OP_1_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/op-2.h b/libc/sysdeps/linux/sparc64/soft-fp/op-2.h
new file mode 100644
index 000000000..c010afa3e
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/op-2.h
@@ -0,0 +1,705 @@
+/* Software floating-point emulation.
+   Basic two-word fraction declaration and manipulation.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_OP_2_H
+#define SOFT_FP_OP_2_H	1
+
+#define _FP_FRAC_DECL_2(X)				\
+  _FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
+#define _FP_FRAC_COPY_2(D, S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
+#define _FP_FRAC_SET_2(X, I)	__FP_FRAC_SET_2 (X, I)
+#define _FP_FRAC_HIGH_2(X)	(X##_f1)
+#define _FP_FRAC_LOW_2(X)	(X##_f0)
+#define _FP_FRAC_WORD_2(X, w)	(X##_f##w)
+
+#define _FP_FRAC_SLL_2(X, N)						\
+  (void) (((N) < _FP_W_TYPE_SIZE)					\
+	  ? ({								\
+	      if (__builtin_constant_p (N) && (N) == 1)			\
+		{							\
+		  X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
+		  X##_f0 += X##_f0;					\
+		}							\
+	      else							\
+		{							\
+		  X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
+		  X##_f0 <<= (N);					\
+		}							\
+	      0;							\
+	    })								\
+	  : ({								\
+	      X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);		\
+	      X##_f0 = 0;						\
+	    }))
+
+
+#define _FP_FRAC_SRL_2(X, N)						\
+  (void) (((N) < _FP_W_TYPE_SIZE)					\
+	  ? ({								\
+	      X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
+	      X##_f1 >>= (N);						\
+	    })								\
+	  : ({								\
+	      X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);		\
+	      X##_f1 = 0;						\
+	    }))
+
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRST_2(X, S, N, sz)					\
+  (void) (((N) < _FP_W_TYPE_SIZE)					\
+	  ? ({								\
+	      S = (__builtin_constant_p (N) && (N) == 1			\
+		   ? X##_f0 & 1						\
+		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0);		\
+	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)); \
+	      X##_f1 >>= (N);						\
+	    })								\
+	  : ({								\
+	      S = ((((N) == _FP_W_TYPE_SIZE				\
+		     ? 0						\
+		     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))		\
+		    | X##_f0) != 0);					\
+	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE));		\
+	      X##_f1 = 0;						\
+	    }))
+
+#define _FP_FRAC_SRS_2(X, N, sz)					\
+  (void) (((N) < _FP_W_TYPE_SIZE)					\
+	  ? ({								\
+	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
+			| (__builtin_constant_p (N) && (N) == 1		\
+			   ? X##_f0 & 1					\
+			   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
+	      X##_f1 >>= (N);						\
+	    })								\
+	  : ({								\
+	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)		\
+			| ((((N) == _FP_W_TYPE_SIZE			\
+			     ? 0					\
+			     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))	\
+			    | X##_f0) != 0));				\
+	      X##_f1 = 0;						\
+	    }))
+
+#define _FP_FRAC_ADDI_2(X, I)	\
+  __FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
+
+#define _FP_FRAC_ADD_2(R, X, Y)	\
+  __FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_SUB_2(R, X, Y)	\
+  __FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_DEC_2(X, Y)	\
+  __FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
+
+#define _FP_FRAC_CLZ_2(R, X)			\
+  do						\
+    {						\
+      if (X##_f1)				\
+	__FP_CLZ ((R), X##_f1);			\
+      else					\
+	{					\
+	  __FP_CLZ ((R), X##_f0);		\
+	  (R) += _FP_W_TYPE_SIZE;		\
+	}					\
+    }						\
+  while (0)
+
+/* Predicates.  */
+#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE) X##_f1 < 0)
+#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
+#define _FP_FRAC_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_2(fs, X)	(_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
+#define _FP_FRAC_HIGHBIT_DW_2(fs, X)	\
+  (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
+#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
+#define _FP_FRAC_GT_2(X, Y)	\
+  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
+#define _FP_FRAC_GE_2(X, Y)	\
+  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
+
+#define _FP_ZEROFRAC_2		0, 0
+#define _FP_MINFRAC_2		0, 1
+#define _FP_MAXFRAC_2		(~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
+
+/* Internals.  */
+
+#define __FP_FRAC_SET_2(X, I1, I0)	(X##_f0 = I0, X##_f1 = I1)
+
+#define __FP_CLZ_2(R, xh, xl)			\
+  do						\
+    {						\
+      if (xh)					\
+	__FP_CLZ ((R), xh);			\
+      else					\
+	{					\
+	  __FP_CLZ ((R), xl);			\
+	  (R) += _FP_W_TYPE_SIZE;		\
+	}					\
+    }						\
+  while (0)
+
+#if 0
+
+# ifndef __FP_FRAC_ADDI_2
+#  define __FP_FRAC_ADDI_2(xh, xl, i)	\
+  (xh += ((xl += i) < i))
+# endif
+# ifndef __FP_FRAC_ADD_2
+#  define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
+  (rh = xh + yh + ((rl = xl + yl) < xl))
+# endif
+# ifndef __FP_FRAC_SUB_2
+#  define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
+  (rh = xh - yh - ((rl = xl - yl) > xl))
+# endif
+# ifndef __FP_FRAC_DEC_2
+#  define __FP_FRAC_DEC_2(xh, xl, yh, yl)		\
+  do							\
+    {							\
+      UWtype __FP_FRAC_DEC_2_t = xl;			\
+      xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t);	\
+    }							\
+  while (0)
+# endif
+
+#else
+
+# undef __FP_FRAC_ADDI_2
+# define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa (xh, xl, xh, xl, 0, i)
+# undef __FP_FRAC_ADD_2
+# define __FP_FRAC_ADD_2		add_ssaaaa
+# undef __FP_FRAC_SUB_2
+# define __FP_FRAC_SUB_2		sub_ddmmss
+# undef __FP_FRAC_DEC_2
+# define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
+  sub_ddmmss (xh, xl, xh, xl, yh, yl)
+
+#endif
+
+/* Unpack the raw bits of a native fp value.  Do not classify or
+   normalize the data.  */
+
+#define _FP_UNPACK_RAW_2(fs, X, val)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo;	\
+      _FP_UNPACK_RAW_2_flo.flt = (val);			\
+							\
+      X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0;		\
+      X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1;		\
+      X##_e  = _FP_UNPACK_RAW_2_flo.bits.exp;		\
+      X##_s  = _FP_UNPACK_RAW_2_flo.bits.sign;		\
+    }							\
+  while (0)
+
+#define _FP_UNPACK_RAW_2_P(fs, X, val)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo	\
+	= (union _FP_UNION_##fs *) (val);		\
+							\
+      X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0;	\
+      X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1;	\
+      X##_e  = _FP_UNPACK_RAW_2_P_flo->bits.exp;	\
+      X##_s  = _FP_UNPACK_RAW_2_P_flo->bits.sign;	\
+    }							\
+  while (0)
+
+
+/* Repack the raw bits of a native fp value.  */
+
+#define _FP_PACK_RAW_2(fs, val, X)		\
+  do						\
+    {						\
+      union _FP_UNION_##fs _FP_PACK_RAW_2_flo;	\
+						\
+      _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0;	\
+      _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1;	\
+      _FP_PACK_RAW_2_flo.bits.exp   = X##_e;	\
+      _FP_PACK_RAW_2_flo.bits.sign  = X##_s;	\
+						\
+      (val) = _FP_PACK_RAW_2_flo.flt;		\
+    }						\
+  while (0)
+
+#define _FP_PACK_RAW_2_P(fs, val, X)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo	\
+	= (union _FP_UNION_##fs *) (val);		\
+							\
+      _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0;	\
+      _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1;	\
+      _FP_PACK_RAW_2_P_flo->bits.exp   = X##_e;		\
+      _FP_PACK_RAW_2_P_flo->bits.sign  = X##_s;		\
+    }							\
+  while (0)
+
+
+/* Multiplication algorithms: */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit)		\
+  do									\
+    {									\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b);			\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c);			\
+									\
+      doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0),		\
+	    X##_f0, Y##_f0);						\
+      doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0,	\
+	    X##_f0, Y##_f1);						\
+      doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0,	\
+	    X##_f1, Y##_f0);						\
+      doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),		\
+	    X##_f1, Y##_f1);						\
+									\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1), 0,			\
+		       _FP_MUL_MEAT_DW_2_wide_b_f1,			\
+		       _FP_MUL_MEAT_DW_2_wide_b_f0,			\
+		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1), 0,			\
+		       _FP_MUL_MEAT_DW_2_wide_c_f1,			\
+		       _FP_MUL_MEAT_DW_2_wide_c_f0,			\
+		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1));				\
+    }									\
+  while (0)
+
+#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
+  do									\
+    {									\
+      _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z);				\
+									\
+      _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z,	\
+			      X, Y, doit);				\
+									\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1,		\
+		      2*(wfracbits));					\
+      R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0);		\
+      R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1);		\
+    }									\
+  while (0)
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
+   Do only 3 multiplications instead of four. This one is for machines
+   where multiplication is much more expensive than subtraction.  */
+
+#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit)		\
+  do									\
+    {									\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b);			\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c);			\
+      _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d;				\
+      int _FP_MUL_MEAT_DW_2_wide_3mul_c1;				\
+      int _FP_MUL_MEAT_DW_2_wide_3mul_c2;				\
+									\
+      _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1;		\
+      _FP_MUL_MEAT_DW_2_wide_3mul_c1					\
+	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0;			\
+      _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1;		\
+      _FP_MUL_MEAT_DW_2_wide_3mul_c2					\
+	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0;			\
+      doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0),	\
+	    X##_f0, Y##_f0);						\
+      doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1),		\
+	    _FP_MUL_MEAT_DW_2_wide_3mul_b_f0,				\
+	    _FP_MUL_MEAT_DW_2_wide_3mul_b_f1);				\
+      doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1,				\
+	    _FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1);		\
+									\
+      _FP_MUL_MEAT_DW_2_wide_3mul_b_f0					\
+	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2;				\
+      _FP_MUL_MEAT_DW_2_wide_3mul_b_f1					\
+	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1;				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1),				\
+		       (_FP_MUL_MEAT_DW_2_wide_3mul_c1			\
+			& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0,		\
+		       _FP_MUL_MEAT_DW_2_wide_3mul_d,			\
+		       0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
+      __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+			_FP_MUL_MEAT_DW_2_wide_3mul_b_f0);		\
+      __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+			_FP_MUL_MEAT_DW_2_wide_3mul_b_f1);		\
+      __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1),				\
+		       0, _FP_MUL_MEAT_DW_2_wide_3mul_d,		\
+		       _FP_FRAC_WORD_4 (R, 0));				\
+      __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_FRAC_WORD_4 (R, 1), 0,			\
+		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,		\
+		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f0);		\
+      __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2),	\
+		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f1,		\
+		       _FP_MUL_MEAT_DW_2_wide_3mul_c_f0,		\
+		       _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2));	\
+    }									\
+  while (0)
+
+#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
+  do									\
+    {									\
+      _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z);			\
+									\
+      _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits),				\
+				   _FP_MUL_MEAT_2_wide_3mul_z,		\
+				   X, Y, doit);				\
+									\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z,			\
+		      (wfracbits)-1, 2*(wfracbits));			\
+      R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0);		\
+      R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1);		\
+    }									\
+  while (0)
+
+#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y)	\
+  do							\
+    {							\
+      _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2];		\
+      _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2];		\
+      _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0;		\
+      _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1;		\
+      _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0;		\
+      _FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1;		\
+							\
+      mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x,	\
+		 _FP_MUL_MEAT_DW_2_gmp_y, 2);		\
+    }							\
+  while (0)
+
+#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
+  do									\
+    {									\
+      _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z);				\
+									\
+      _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y);	\
+									\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1,		\
+		      2*(wfracbits));					\
+      R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0];				\
+      R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1];				\
+    }									\
+  while (0)
+
+/* Do at most 120x120=240 bits multiplication using double floating
+   point multiplication.  This is useful if floating point
+   multiplication has much bigger throughput than integer multiply.
+   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
+   between 106 and 120 only.
+   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
+   SETFETZ is a macro which will disable all FPU exceptions and set rounding
+   towards zero,  RESETFE should optionally reset it back.  */
+
+#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
+  do									\
+    {									\
+      static const double _const[] =					\
+	{								\
+	  /* 2^-24 */ 5.9604644775390625e-08,				\
+	  /* 2^-48 */ 3.5527136788005009e-15,				\
+	  /* 2^-72 */ 2.1175823681357508e-22,				\
+	  /* 2^-96 */ 1.2621774483536189e-29,				\
+	  /* 2^28 */ 2.68435456e+08,					\
+	  /* 2^4 */ 1.600000e+01,					\
+	  /* 2^-20 */ 9.5367431640625e-07,				\
+	  /* 2^-44 */ 5.6843418860808015e-14,				\
+	  /* 2^-68 */ 3.3881317890172014e-21,				\
+	  /* 2^-92 */ 2.0194839173657902e-28,				\
+	  /* 2^-116 */ 1.2037062152420224e-35				\
+	};								\
+      double _a240, _b240, _c240, _d240, _e240, _f240,			\
+	_g240, _h240, _i240, _j240, _k240;				\
+      union { double d; UDItype i; } _l240, _m240, _n240, _o240,	\
+				       _p240, _q240, _r240, _s240;	\
+      UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;		\
+									\
+      _FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120,	\
+			 "wfracbits out of range");			\
+									\
+      setfetz;								\
+									\
+      _e240 = (double) (long) (X##_f0 & 0xffffff);			\
+      _j240 = (double) (long) (Y##_f0 & 0xffffff);			\
+      _d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff);		\
+      _i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff);		\
+      _c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
+      _h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
+      _b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff);		\
+      _g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff);		\
+      _a240 = (double) (long) (X##_f1 >> 32);				\
+      _f240 = (double) (long) (Y##_f1 >> 32);				\
+      _e240 *= _const[3];						\
+      _j240 *= _const[3];						\
+      _d240 *= _const[2];						\
+      _i240 *= _const[2];						\
+      _c240 *= _const[1];						\
+      _h240 *= _const[1];						\
+      _b240 *= _const[0];						\
+      _g240 *= _const[0];						\
+      _s240.d =							      _e240*_j240; \
+      _r240.d =						_d240*_j240 + _e240*_i240; \
+      _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240; \
+      _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240; \
+      _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240; \
+      _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;	\
+      _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;		\
+      _l240.d = _a240*_g240 + _b240*_f240;				\
+      _k240 =   _a240*_f240;						\
+      _r240.d += _s240.d;						\
+      _q240.d += _r240.d;						\
+      _p240.d += _q240.d;						\
+      _o240.d += _p240.d;						\
+      _n240.d += _o240.d;						\
+      _m240.d += _n240.d;						\
+      _l240.d += _m240.d;						\
+      _k240 += _l240.d;							\
+      _s240.d -= ((_const[10]+_s240.d)-_const[10]);			\
+      _r240.d -= ((_const[9]+_r240.d)-_const[9]);			\
+      _q240.d -= ((_const[8]+_q240.d)-_const[8]);			\
+      _p240.d -= ((_const[7]+_p240.d)-_const[7]);			\
+      _o240.d += _const[7];						\
+      _n240.d += _const[6];						\
+      _m240.d += _const[5];						\
+      _l240.d += _const[4];						\
+      if (_s240.d != 0.0)						\
+	_y240 = 1;							\
+      if (_r240.d != 0.0)						\
+	_y240 = 1;							\
+      if (_q240.d != 0.0)						\
+	_y240 = 1;							\
+      if (_p240.d != 0.0)						\
+	_y240 = 1;							\
+      _t240 = (DItype) _k240;						\
+      _u240 = _l240.i;							\
+      _v240 = _m240.i;							\
+      _w240 = _n240.i;							\
+      _x240 = _o240.i;							\
+      R##_f1 = ((_t240 << (128 - (wfracbits - 1)))			\
+		| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)));	\
+      R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1)))		\
+		| ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))	\
+		| ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))	\
+		| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))	\
+		| _y240);						\
+      resetfe;								\
+    }									\
+  while (0)
+
+/* Division algorithms: */
+
+#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
+  do									\
+    {									\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2;				\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1;				\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0;				\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1;				\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0;				\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1;				\
+      _FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0;				\
+      if (_FP_FRAC_GE_2 (X, Y))						\
+	{								\
+	  _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1;			\
+	  _FP_DIV_MEAT_2_udiv_n_f1					\
+	    = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
+	  _FP_DIV_MEAT_2_udiv_n_f0					\
+	    = X##_f0 << (_FP_W_TYPE_SIZE - 1);				\
+	}								\
+      else								\
+	{								\
+	  R##_e--;							\
+	  _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1;				\
+	  _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0;				\
+	  _FP_DIV_MEAT_2_udiv_n_f0 = 0;					\
+	}								\
+									\
+      /* Normalize, i.e. make the most significant bit of the		\
+	 denominator set.  */						\
+      _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs);				\
+									\
+      udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1,			\
+		  _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1,	\
+		  Y##_f1);						\
+      umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0,	\
+		 R##_f1, Y##_f0);					\
+      _FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0;		\
+      if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r))	\
+	{								\
+	  R##_f1--;							\
+	  _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
+			  _FP_DIV_MEAT_2_udiv_r);			\
+	  if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)			\
+	      && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,			\
+				_FP_DIV_MEAT_2_udiv_r))			\
+	    {								\
+	      R##_f1--;							\
+	      _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
+			      _FP_DIV_MEAT_2_udiv_r);			\
+	    }								\
+	}								\
+      _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m);	\
+									\
+      if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1)				\
+	{								\
+	  /* This is a special case, not an optimization		\
+	     (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype).	\
+	     As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y,		\
+	     R##_f0 can be either (UWtype)-1 or (UWtype)-2.  But as we	\
+	     know what kind of bits it is (sticky, guard, round),	\
+	     we don't care.  We also don't care what the reminder is,	\
+	     because the guard bit will be set anyway.  -jj */		\
+	  R##_f0 = -1;							\
+	}								\
+      else								\
+	{								\
+	  udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1,			\
+		      _FP_DIV_MEAT_2_udiv_r_f1,				\
+		      _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1);		\
+	  umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1,				\
+		     _FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0);		\
+	  _FP_DIV_MEAT_2_udiv_r_f0 = 0;					\
+	  if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,			\
+			     _FP_DIV_MEAT_2_udiv_r))			\
+	    {								\
+	      R##_f0--;							\
+	      _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,			\
+			      _FP_DIV_MEAT_2_udiv_r);			\
+	      if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y)		\
+		  && _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m,		\
+				    _FP_DIV_MEAT_2_udiv_r))		\
+		{							\
+		  R##_f0--;						\
+		  _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y,		\
+				  _FP_DIV_MEAT_2_udiv_r);		\
+		}							\
+	    }								\
+	  if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r,			\
+			      _FP_DIV_MEAT_2_udiv_m))			\
+	    R##_f0 |= _FP_WORK_STICKY;					\
+	}								\
+    }									\
+  while (0)
+
+
+/* Square root algorithms:
+   We have just one right now, maybe Newton approximation
+   should be added for those machines where division is fast.  */
+
+#define _FP_SQRT_MEAT_2(R, S, T, X, q)				\
+  do								\
+    {								\
+      while (q)							\
+	{							\
+	  T##_f1 = S##_f1 + (q);				\
+	  if (T##_f1 <= X##_f1)					\
+	    {							\
+	      S##_f1 = T##_f1 + (q);				\
+	      X##_f1 -= T##_f1;					\
+	      R##_f1 += (q);					\
+	    }							\
+	  _FP_FRAC_SLL_2 (X, 1);				\
+	  (q) >>= 1;						\
+	}							\
+      (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);		\
+      while ((q) != _FP_WORK_ROUND)				\
+	{							\
+	  T##_f0 = S##_f0 + (q);				\
+	  T##_f1 = S##_f1;					\
+	  if (T##_f1 < X##_f1					\
+	      || (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
+	    {							\
+	      S##_f0 = T##_f0 + (q);				\
+	      S##_f1 += (T##_f0 > S##_f0);			\
+	      _FP_FRAC_DEC_2 (X, T);				\
+	      R##_f0 += (q);					\
+	    }							\
+	  _FP_FRAC_SLL_2 (X, 1);				\
+	  (q) >>= 1;						\
+	}							\
+      if (X##_f0 | X##_f1)					\
+	{							\
+	  if (S##_f1 < X##_f1					\
+	      || (S##_f1 == X##_f1 && S##_f0 < X##_f0))		\
+	    R##_f0 |= _FP_WORK_ROUND;				\
+	  R##_f0 |= _FP_WORK_STICKY;				\
+	}							\
+    }								\
+  while (0)
+
+
+/* Assembly/disassembly for converting to/from integral types.
+   No shifting or overflow handled here.  */
+
+#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
+  (void) (((rsize) <= _FP_W_TYPE_SIZE)		\
+	  ? ({ (r) = X##_f0; })			\
+	  : ({					\
+	      (r) = X##_f1;			\
+	      (r) <<= _FP_W_TYPE_SIZE;		\
+	      (r) += X##_f0;			\
+	    }))
+
+#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)	\
+  do						\
+    {						\
+      X##_f0 = (r);				\
+      X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE	\
+		? 0				\
+		: (r) >> _FP_W_TYPE_SIZE);	\
+    }						\
+  while (0)
+
+/* Convert FP values between word sizes.  */
+
+#define _FP_FRAC_COPY_1_2(D, S)		(D##_f = S##_f0)
+
+#define _FP_FRAC_COPY_2_1(D, S)		((D##_f0 = S##_f), (D##_f1 = 0))
+
+#define _FP_FRAC_COPY_2_2(D, S)		_FP_FRAC_COPY_2 (D, S)
+
+#endif /* !SOFT_FP_OP_2_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/op-4.h b/libc/sysdeps/linux/sparc64/soft-fp/op-4.h
new file mode 100644
index 000000000..5034667e3
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/op-4.h
@@ -0,0 +1,875 @@
+/* Software floating-point emulation.
+   Basic four-word fraction declaration and manipulation.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_OP_4_H
+#define SOFT_FP_OP_4_H	1
+
+#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
+#define _FP_FRAC_COPY_4(D, S)			\
+  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
+   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+#define _FP_FRAC_SET_4(X, I)	__FP_FRAC_SET_4 (X, I)
+#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
+#define _FP_FRAC_LOW_4(X)	(X##_f[0])
+#define _FP_FRAC_WORD_4(X, w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_4(X, N)						\
+  do									\
+    {									\
+      _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down;		\
+      _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i;			\
+      _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up;	\
+      if (!_FP_FRAC_SLL_4_up)						\
+	for (_FP_FRAC_SLL_4_i = 3;					\
+	     _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip;			\
+	     --_FP_FRAC_SLL_4_i)					\
+	  X##_f[_FP_FRAC_SLL_4_i]					\
+	    = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip];		\
+      else								\
+	{								\
+	  for (_FP_FRAC_SLL_4_i = 3;					\
+	       _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip;			\
+	       --_FP_FRAC_SLL_4_i)					\
+	    X##_f[_FP_FRAC_SLL_4_i]					\
+	      = ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]		\
+		  << _FP_FRAC_SLL_4_up)					\
+		 | (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1]	\
+		    >> _FP_FRAC_SLL_4_down));				\
+	  X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up;	\
+	}								\
+      for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i)			\
+	X##_f[_FP_FRAC_SLL_4_i] = 0;					\
+    }									\
+  while (0)
+
+/* This one was broken too.  */
+#define _FP_FRAC_SRL_4(X, N)						\
+  do									\
+    {									\
+      _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down;		\
+      _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i;			\
+      _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down;	\
+      if (!_FP_FRAC_SRL_4_down)						\
+	for (_FP_FRAC_SRL_4_i = 0;					\
+	     _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip;			\
+	     ++_FP_FRAC_SRL_4_i)					\
+	  X##_f[_FP_FRAC_SRL_4_i]					\
+	    = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip];		\
+      else								\
+	{								\
+	  for (_FP_FRAC_SRL_4_i = 0;					\
+	       _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip;		\
+	       ++_FP_FRAC_SRL_4_i)					\
+	    X##_f[_FP_FRAC_SRL_4_i]					\
+	      = ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]		\
+		  >> _FP_FRAC_SRL_4_down)				\
+		 | (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1]	\
+		    << _FP_FRAC_SRL_4_up));				\
+	  X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down;	\
+	}								\
+      for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i)			\
+	X##_f[_FP_FRAC_SRL_4_i] = 0;					\
+    }									\
+  while (0)
+
+
+/* Right shift with sticky-lsb.
+   What this actually means is that we do a standard right-shift,
+   but that if any of the bits that fall off the right hand side
+   were one then we always set the LSbit.  */
+#define _FP_FRAC_SRST_4(X, S, N, size)					\
+  do									\
+    {									\
+      _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down;		\
+      _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i;		\
+      _FP_W_TYPE _FP_FRAC_SRST_4_s;					\
+      _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down;	\
+      for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0;			\
+	   _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip;			\
+	   ++_FP_FRAC_SRST_4_i)						\
+	_FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i];			\
+      if (!_FP_FRAC_SRST_4_down)					\
+	for (_FP_FRAC_SRST_4_i = 0;					\
+	     _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip;		\
+	     ++_FP_FRAC_SRST_4_i)					\
+	  X##_f[_FP_FRAC_SRST_4_i]					\
+	    = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip];		\
+      else								\
+	{								\
+	  _FP_FRAC_SRST_4_s						\
+	    |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up;		\
+	  for (_FP_FRAC_SRST_4_i = 0;					\
+	       _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip;		\
+	       ++_FP_FRAC_SRST_4_i)					\
+	    X##_f[_FP_FRAC_SRST_4_i]					\
+	      = ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]		\
+		  >> _FP_FRAC_SRST_4_down)				\
+		 | (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1]	\
+		    << _FP_FRAC_SRST_4_up));				\
+	  X##_f[_FP_FRAC_SRST_4_i++]					\
+	    = X##_f[3] >> _FP_FRAC_SRST_4_down;				\
+	}								\
+      for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i)		\
+	X##_f[_FP_FRAC_SRST_4_i] = 0;					\
+      S = (_FP_FRAC_SRST_4_s != 0);					\
+    }									\
+  while (0)
+
+#define _FP_FRAC_SRS_4(X, N, size)				\
+  do								\
+    {								\
+      int _FP_FRAC_SRS_4_sticky;				\
+      _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size));	\
+      X##_f[0] |= _FP_FRAC_SRS_4_sticky;			\
+    }								\
+  while (0)
+
+#define _FP_FRAC_ADD_4(R, X, Y)					\
+  __FP_FRAC_ADD_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0],	\
+		   X##_f[3], X##_f[2], X##_f[1], X##_f[0],	\
+		   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_SUB_4(R, X, Y)					\
+  __FP_FRAC_SUB_4 (R##_f[3], R##_f[2], R##_f[1], R##_f[0],	\
+		   X##_f[3], X##_f[2], X##_f[1], X##_f[0],	\
+		   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_DEC_4(X, Y)					\
+  __FP_FRAC_DEC_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0],	\
+		   Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+
+#define _FP_FRAC_ADDI_4(X, I)					\
+  __FP_FRAC_ADDI_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+
+#define _FP_ZEROFRAC_4  0, 0, 0, 0
+#define _FP_MINFRAC_4   0, 0, 0, 1
+#define _FP_MAXFRAC_4	(~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
+
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE) X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs, X)  (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_HIGHBIT_DW_4(fs, X)	\
+  (_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
+#define _FP_FRAC_CLEAR_OVERP_4(fs, X)  (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
+
+#define _FP_FRAC_EQ_4(X, Y)				\
+  (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]		\
+   && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+
+#define _FP_FRAC_GT_4(X, Y)				\
+  (X##_f[3] > Y##_f[3]					\
+   || (X##_f[3] == Y##_f[3]				\
+       && (X##_f[2] > Y##_f[2]				\
+	   || (X##_f[2] == Y##_f[2]			\
+	       && (X##_f[1] > Y##_f[1]			\
+		   || (X##_f[1] == Y##_f[1]		\
+		       && X##_f[0] > Y##_f[0]))))))
+
+#define _FP_FRAC_GE_4(X, Y)				\
+  (X##_f[3] > Y##_f[3]					\
+   || (X##_f[3] == Y##_f[3]				\
+       && (X##_f[2] > Y##_f[2]				\
+	   || (X##_f[2] == Y##_f[2]			\
+	       && (X##_f[1] > Y##_f[1]			\
+		   || (X##_f[1] == Y##_f[1]		\
+		       && X##_f[0] >= Y##_f[0]))))))
+
+
+#define _FP_FRAC_CLZ_4(R, X)			\
+  do						\
+    {						\
+      if (X##_f[3])				\
+	__FP_CLZ ((R), X##_f[3]);		\
+      else if (X##_f[2])			\
+	{					\
+	  __FP_CLZ ((R), X##_f[2]);		\
+	  (R) += _FP_W_TYPE_SIZE;		\
+	}					\
+      else if (X##_f[1])			\
+	{					\
+	  __FP_CLZ ((R), X##_f[1]);		\
+	  (R) += _FP_W_TYPE_SIZE*2;		\
+	}					\
+      else					\
+	{					\
+	  __FP_CLZ ((R), X##_f[0]);		\
+	  (R) += _FP_W_TYPE_SIZE*3;		\
+	}					\
+    }						\
+  while (0)
+
+
+#define _FP_UNPACK_RAW_4(fs, X, val)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo;	\
+      _FP_UNPACK_RAW_4_flo.flt = (val);			\
+      X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0;	\
+      X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1;	\
+      X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2;	\
+      X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3;	\
+      X##_e  = _FP_UNPACK_RAW_4_flo.bits.exp;		\
+      X##_s  = _FP_UNPACK_RAW_4_flo.bits.sign;		\
+    }							\
+  while (0)
+
+#define _FP_UNPACK_RAW_4_P(fs, X, val)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo	\
+	= (union _FP_UNION_##fs *) (val);		\
+							\
+      X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0;	\
+      X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1;	\
+      X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2;	\
+      X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3;	\
+      X##_e  = _FP_UNPACK_RAW_4_P_flo->bits.exp;	\
+      X##_s  = _FP_UNPACK_RAW_4_P_flo->bits.sign;	\
+    }							\
+  while (0)
+
+#define _FP_PACK_RAW_4(fs, val, X)		\
+  do						\
+    {						\
+      union _FP_UNION_##fs _FP_PACK_RAW_4_flo;	\
+      _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0];	\
+      _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1];	\
+      _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2];	\
+      _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3];	\
+      _FP_PACK_RAW_4_flo.bits.exp   = X##_e;	\
+      _FP_PACK_RAW_4_flo.bits.sign  = X##_s;	\
+      (val) = _FP_PACK_RAW_4_flo.flt;		\
+    }						\
+  while (0)
+
+#define _FP_PACK_RAW_4_P(fs, val, X)			\
+  do							\
+    {							\
+      union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo	\
+	= (union _FP_UNION_##fs *) (val);		\
+							\
+      _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0];	\
+      _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1];	\
+      _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2];	\
+      _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3];	\
+      _FP_PACK_RAW_4_P_flo->bits.exp   = X##_e;		\
+      _FP_PACK_RAW_4_P_flo->bits.sign  = X##_s;		\
+    }							\
+  while (0)
+
+/* Multiplication algorithms: */
+
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+
+#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit)		\
+  do									\
+    {									\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b);			\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c);			\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d);			\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e);			\
+      _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f);			\
+									\
+      doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0),		\
+	    X##_f[0], Y##_f[0]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0,	\
+	    X##_f[0], Y##_f[1]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0,	\
+	    X##_f[1], Y##_f[0]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0,	\
+	    X##_f[1], Y##_f[1]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0,	\
+	    X##_f[0], Y##_f[2]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0,	\
+	    X##_f[2], Y##_f[0]);					\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2),	\
+		       _FP_FRAC_WORD_8 (R, 1), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f0,			\
+		       0, 0, _FP_FRAC_WORD_8 (R, 1));			\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2),	\
+		       _FP_FRAC_WORD_8 (R, 1), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_c_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_c_f0,			\
+		       _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2),	\
+		       _FP_FRAC_WORD_8 (R, 1));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),	\
+		       _FP_FRAC_WORD_8 (R, 2), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_d_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_d_f0,			\
+		       0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),	\
+		       _FP_FRAC_WORD_8 (R, 2), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_e_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_e_f0,			\
+		       _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),	\
+		       _FP_FRAC_WORD_8 (R, 2));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),	\
+		       _FP_FRAC_WORD_8 (R, 2), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_f_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_f_f0,			\
+		       _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3),	\
+		       _FP_FRAC_WORD_8 (R, 2));				\
+      doit (_FP_MUL_MEAT_DW_4_wide_b_f1,				\
+	    _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]);		\
+      doit (_FP_MUL_MEAT_DW_4_wide_c_f1,				\
+	    _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]);		\
+      doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0,	\
+	    X##_f[1], Y##_f[2]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0,	\
+	    X##_f[2], Y##_f[1]);					\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f0,			\
+		       0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_c_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_c_f0,			\
+		       _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_d_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_d_f0,			\
+		       _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_e_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_e_f0,			\
+		       _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4),	\
+		       _FP_FRAC_WORD_8 (R, 3));				\
+      doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0,	\
+	    X##_f[2], Y##_f[2]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0,	\
+	    X##_f[1], Y##_f[3]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0,	\
+	    X##_f[3], Y##_f[1]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0,	\
+	    X##_f[2], Y##_f[3]);					\
+      doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0,	\
+	    X##_f[3], Y##_f[2]);					\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),	\
+		       _FP_FRAC_WORD_8 (R, 4), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f0,			\
+		       0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),	\
+		       _FP_FRAC_WORD_8 (R, 4), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_c_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_c_f0,			\
+		       _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),	\
+		       _FP_FRAC_WORD_8 (R, 4));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),	\
+		       _FP_FRAC_WORD_8 (R, 4), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_d_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_d_f0,			\
+		       _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5),	\
+		       _FP_FRAC_WORD_8 (R, 4));				\
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),	\
+		       _FP_FRAC_WORD_8 (R, 5), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_e_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_e_f0,			\
+		       0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
+      __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),	\
+		       _FP_FRAC_WORD_8 (R, 5), 0,			\
+		       _FP_MUL_MEAT_DW_4_wide_f_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_f_f0,			\
+		       _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),	\
+		       _FP_FRAC_WORD_8 (R, 5));				\
+      doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0,	\
+	    X##_f[3], Y##_f[3]);					\
+      __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6),	\
+		       _FP_MUL_MEAT_DW_4_wide_b_f1,			\
+		       _FP_MUL_MEAT_DW_4_wide_b_f0,			\
+		       _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6));	\
+    }									\
+  while (0)
+
+#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)			\
+  do									\
+    {									\
+      _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z);				\
+									\
+      _FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z,	\
+			      X, Y, doit);				\
+									\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1,		\
+		      2*(wfracbits));					\
+      __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3),	\
+		       _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2),	\
+		       _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1),	\
+		       _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0));	\
+    }									\
+  while (0)
+
+#define _FP_MUL_MEAT_DW_4_gmp(wfracbits, R, X, Y)	\
+  do							\
+    {							\
+      mpn_mul_n (R##_f, _x_f, _y_f, 4);			\
+    }							\
+  while (0)
+
+#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)				\
+  do									\
+    {									\
+      _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z);				\
+									\
+      _FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y);	\
+									\
+      /* Normalize since we know where the msb of the multiplicands	\
+	 were (bit B), we know that the msb of the of the product is	\
+	 at either 2B or 2B-1.  */					\
+      _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1,		\
+		      2*(wfracbits));					\
+      __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3),	\
+		       _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2),	\
+		       _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1),	\
+		       _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0));	\
+    }									\
+  while (0)
+
+/* Helper utility for _FP_DIV_MEAT_4_udiv:
+ * pppp = m * nnn.  */
+#define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0)	\
+  do							\
+    {							\
+      UWtype umul_ppppmnnn_t;				\
+      umul_ppmm (p1, p0, m, n0);			\
+      umul_ppmm (p2, umul_ppppmnnn_t, m, n1);		\
+      __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t);	\
+      umul_ppmm (p3, umul_ppppmnnn_t, m, n2);		\
+      __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t);	\
+    }							\
+  while (0)
+
+/* Division algorithms: */
+
+#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)				\
+  do									\
+    {									\
+      int _FP_DIV_MEAT_4_udiv_i;					\
+      _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n);				\
+      _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m);				\
+      _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4);		\
+      if (_FP_FRAC_GE_4 (X, Y))						\
+	{								\
+	  _FP_DIV_MEAT_4_udiv_n_f[3]					\
+	    = X##_f[0] << (_FP_W_TYPE_SIZE - 1);			\
+	  _FP_FRAC_SRL_4 (X, 1);					\
+	}								\
+      else								\
+	R##_e--;							\
+									\
+      /* Normalize, i.e. make the most significant bit of the		\
+	 denominator set.  */						\
+      _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs);				\
+									\
+      for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--)	\
+	{								\
+	  if (X##_f[3] == Y##_f[3])					\
+	    {								\
+	      /* This is a special case, not an optimization		\
+		 (X##_f[3]/Y##_f[3] would not fit into UWtype).		\
+		 As X## is guaranteed to be < Y,			\
+		 R##_f[_FP_DIV_MEAT_4_udiv_i] can be either		\
+		 (UWtype)-1 or (UWtype)-2.  */				\
+	      R##_f[_FP_DIV_MEAT_4_udiv_i] = -1;			\
+	      if (!_FP_DIV_MEAT_4_udiv_i)				\
+		break;							\
+	      __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0],	\
+			       Y##_f[2], Y##_f[1], Y##_f[0], 0,		\
+			       X##_f[2], X##_f[1], X##_f[0],		\
+			       _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
+	      _FP_FRAC_SUB_4 (X, Y, X);					\
+	      if (X##_f[3] > Y##_f[3])					\
+		{							\
+		  R##_f[_FP_DIV_MEAT_4_udiv_i] = -2;			\
+		  _FP_FRAC_ADD_4 (X, Y, X);				\
+		}							\
+	    }								\
+	  else								\
+	    {								\
+	      udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i],			\
+			  X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);	\
+	      umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3],		\
+			     _FP_DIV_MEAT_4_udiv_m_f[2],		\
+			     _FP_DIV_MEAT_4_udiv_m_f[1],		\
+			     _FP_DIV_MEAT_4_udiv_m_f[0],		\
+			     R##_f[_FP_DIV_MEAT_4_udiv_i],		\
+			     Y##_f[2], Y##_f[1], Y##_f[0]);		\
+	      X##_f[2] = X##_f[1];					\
+	      X##_f[1] = X##_f[0];					\
+	      X##_f[0]							\
+		= _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i];	\
+	      if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X))		\
+		{							\
+		  R##_f[_FP_DIV_MEAT_4_udiv_i]--;			\
+		  _FP_FRAC_ADD_4 (X, Y, X);				\
+		  if (_FP_FRAC_GE_4 (X, Y)				\
+		      && _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X))	\
+		    {							\
+		      R##_f[_FP_DIV_MEAT_4_udiv_i]--;			\
+		      _FP_FRAC_ADD_4 (X, Y, X);				\
+		    }							\
+		}							\
+	      _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m);		\
+	      if (!_FP_DIV_MEAT_4_udiv_i)				\
+		{							\
+		  if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m))	\
+		    R##_f[0] |= _FP_WORK_STICKY;			\
+		  break;						\
+		}							\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+
+/* Square root algorithms:
+   We have just one right now, maybe Newton approximation
+   should be added for those machines where division is fast.  */
+
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)					\
+  do									\
+    {									\
+      while (q)								\
+	{								\
+	  T##_f[3] = S##_f[3] + (q);					\
+	  if (T##_f[3] <= X##_f[3])					\
+	    {								\
+	      S##_f[3] = T##_f[3] + (q);				\
+	      X##_f[3] -= T##_f[3];					\
+	      R##_f[3] += (q);						\
+	    }								\
+	  _FP_FRAC_SLL_4 (X, 1);					\
+	  (q) >>= 1;							\
+	}								\
+      (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);			\
+      while (q)								\
+	{								\
+	  T##_f[2] = S##_f[2] + (q);					\
+	  T##_f[3] = S##_f[3];						\
+	  if (T##_f[3] < X##_f[3]					\
+	      || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))	\
+	    {								\
+	      S##_f[2] = T##_f[2] + (q);				\
+	      S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	      __FP_FRAC_DEC_2 (X##_f[3], X##_f[2],			\
+			       T##_f[3], T##_f[2]);			\
+	      R##_f[2] += (q);						\
+	    }								\
+	  _FP_FRAC_SLL_4 (X, 1);					\
+	  (q) >>= 1;							\
+	}								\
+      (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);			\
+      while (q)								\
+	{								\
+	  T##_f[1] = S##_f[1] + (q);					\
+	  T##_f[2] = S##_f[2];						\
+	  T##_f[3] = S##_f[3];						\
+	  if (T##_f[3] < X##_f[3]					\
+	      || (T##_f[3] == X##_f[3]					\
+		  && (T##_f[2] < X##_f[2]				\
+		      || (T##_f[2] == X##_f[2]				\
+			  && T##_f[1] <= X##_f[1]))))			\
+	    {								\
+	      S##_f[1] = T##_f[1] + (q);				\
+	      S##_f[2] += (T##_f[1] > S##_f[1]);			\
+	      S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	      __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1],		\
+			       T##_f[3], T##_f[2], T##_f[1]);		\
+	      R##_f[1] += (q);						\
+	    }								\
+	  _FP_FRAC_SLL_4 (X, 1);					\
+	  (q) >>= 1;							\
+	}								\
+      (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1);			\
+      while ((q) != _FP_WORK_ROUND)					\
+	{								\
+	  T##_f[0] = S##_f[0] + (q);					\
+	  T##_f[1] = S##_f[1];						\
+	  T##_f[2] = S##_f[2];						\
+	  T##_f[3] = S##_f[3];						\
+	  if (_FP_FRAC_GE_4 (X, T))					\
+	    {								\
+	      S##_f[0] = T##_f[0] + (q);				\
+	      S##_f[1] += (T##_f[0] > S##_f[0]);			\
+	      S##_f[2] += (T##_f[1] > S##_f[1]);			\
+	      S##_f[3] += (T##_f[2] > S##_f[2]);			\
+	      _FP_FRAC_DEC_4 (X, T);					\
+	      R##_f[0] += (q);						\
+	    }								\
+	  _FP_FRAC_SLL_4 (X, 1);					\
+	  (q) >>= 1;							\
+	}								\
+      if (!_FP_FRAC_ZEROP_4 (X))					\
+	{								\
+	  if (_FP_FRAC_GT_4 (X, S))					\
+	    R##_f[0] |= _FP_WORK_ROUND;					\
+	  R##_f[0] |= _FP_WORK_STICKY;					\
+	}								\
+    }									\
+  while (0)
+
+
+/* Internals.  */
+
+#define __FP_FRAC_SET_4(X, I3, I2, I1, I0)			\
+  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
+
+#ifndef __FP_FRAC_ADD_3
+# define __FP_FRAC_ADD_3(r2, r1, r0, x2, x1, x0, y2, y1, y0)	\
+  do								\
+    {								\
+      _FP_W_TYPE __FP_FRAC_ADD_3_c1, __FP_FRAC_ADD_3_c2;	\
+      r0 = x0 + y0;						\
+      __FP_FRAC_ADD_3_c1 = r0 < x0;				\
+      r1 = x1 + y1;						\
+      __FP_FRAC_ADD_3_c2 = r1 < x1;				\
+      r1 += __FP_FRAC_ADD_3_c1;					\
+      __FP_FRAC_ADD_3_c2 |= r1 < __FP_FRAC_ADD_3_c1;		\
+      r2 = x2 + y2 + __FP_FRAC_ADD_3_c2;			\
+    }								\
+  while (0)
+#endif
+
+#ifndef __FP_FRAC_ADD_4
+# define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
+  do									\
+    {									\
+      _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2;		\
+      _FP_W_TYPE __FP_FRAC_ADD_4_c3;					\
+      r0 = x0 + y0;							\
+      __FP_FRAC_ADD_4_c1 = r0 < x0;					\
+      r1 = x1 + y1;							\
+      __FP_FRAC_ADD_4_c2 = r1 < x1;					\
+      r1 += __FP_FRAC_ADD_4_c1;						\
+      __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1;			\
+      r2 = x2 + y2;							\
+      __FP_FRAC_ADD_4_c3 = r2 < x2;					\
+      r2 += __FP_FRAC_ADD_4_c2;						\
+      __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2;			\
+      r3 = x3 + y3 + __FP_FRAC_ADD_4_c3;				\
+    }									\
+  while (0)
+#endif
+
+#ifndef __FP_FRAC_SUB_3
+# define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0)	\
+  do								\
+    {								\
+      _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2;	\
+      r0 = x0 - y0;						\
+      __FP_FRAC_SUB_3_c1 = r0 > x0;				\
+      r1 = x1 - y1;						\
+      __FP_FRAC_SUB_3_c2 = r1 > x1;				\
+      r1 -= __FP_FRAC_SUB_3_c1;					\
+      __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1);	\
+      r2 = x2 - y2 - __FP_FRAC_SUB_3_c2;			\
+    }								\
+  while (0)
+#endif
+
+#ifndef __FP_FRAC_SUB_4
+# define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
+  do									\
+    {									\
+      _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2;		\
+      _FP_W_TYPE __FP_FRAC_SUB_4_c3;					\
+      r0 = x0 - y0;							\
+      __FP_FRAC_SUB_4_c1 = r0 > x0;					\
+      r1 = x1 - y1;							\
+      __FP_FRAC_SUB_4_c2 = r1 > x1;					\
+      r1 -= __FP_FRAC_SUB_4_c1;						\
+      __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1);		\
+      r2 = x2 - y2;							\
+      __FP_FRAC_SUB_4_c3 = r2 > x2;					\
+      r2 -= __FP_FRAC_SUB_4_c2;						\
+      __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2);		\
+      r3 = x3 - y3 - __FP_FRAC_SUB_4_c3;				\
+    }									\
+  while (0)
+#endif
+
+#ifndef __FP_FRAC_DEC_3
+# define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0)		\
+  do								\
+    {								\
+      UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1;		\
+      UWtype __FP_FRAC_DEC_3_t2;				\
+      __FP_FRAC_DEC_3_t0 = x0;					\
+      __FP_FRAC_DEC_3_t1 = x1;					\
+      __FP_FRAC_DEC_3_t2 = x2;					\
+      __FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2,		\
+		       __FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0,	\
+		       y2, y1, y0);				\
+    }								\
+  while (0)
+#endif
+
+#ifndef __FP_FRAC_DEC_4
+# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0)	\
+  do								\
+    {								\
+      UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1;		\
+      UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3;		\
+      __FP_FRAC_DEC_4_t0 = x0;					\
+      __FP_FRAC_DEC_4_t1 = x1;					\
+      __FP_FRAC_DEC_4_t2 = x2;					\
+      __FP_FRAC_DEC_4_t3 = x3;					\
+      __FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3,	\
+		       __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1,	\
+		       __FP_FRAC_DEC_4_t0, y3, y2, y1, y0);	\
+    }								\
+  while (0)
+#endif
+
+#ifndef __FP_FRAC_ADDI_4
+# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i)		\
+  do							\
+    {							\
+      UWtype __FP_FRAC_ADDI_4_t;			\
+      __FP_FRAC_ADDI_4_t = ((x0 += i) < i);		\
+      x1 += __FP_FRAC_ADDI_4_t;				\
+      __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t);	\
+      x2 += __FP_FRAC_ADDI_4_t;				\
+      __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t);	\
+      x3 += __FP_FRAC_ADDI_4_t;				\
+    }							\
+  while (0)
+#endif
+
+/* Convert FP values between word sizes. This appears to be more
+   complicated than I'd have expected it to be, so these might be
+   wrong... These macros are in any case somewhat bogus because they
+   use information about what various FRAC_n variables look like
+   internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+   the ones in op-2.h and op-1.h.  */
+#define _FP_FRAC_COPY_1_4(D, S)		(D##_f = S##_f[0])
+
+#define _FP_FRAC_COPY_2_4(D, S)			\
+  do						\
+    {						\
+      D##_f0 = S##_f[0];			\
+      D##_f1 = S##_f[1];			\
+    }						\
+  while (0)
+
+/* Assembly/disassembly for converting to/from integral types.
+   No shifting or overflow handled here.  */
+/* Put the FP value X into r, which is an integer of size rsize.  */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)				\
+  do									\
+    {									\
+      if ((rsize) <= _FP_W_TYPE_SIZE)					\
+	(r) = X##_f[0];							\
+	else if ((rsize) <= 2*_FP_W_TYPE_SIZE)				\
+	{								\
+	  (r) = X##_f[1];						\
+	  (r) = ((rsize) <= _FP_W_TYPE_SIZE				\
+		 ? 0							\
+		 : (r) << _FP_W_TYPE_SIZE);				\
+	  (r) += X##_f[0];						\
+	}								\
+      else								\
+	{								\
+	  /* I'm feeling lazy so we deal with int == 3words		\
+	     (implausible) and int == 4words as a single case.  */	\
+	  (r) = X##_f[3];						\
+	  (r) = ((rsize) <= _FP_W_TYPE_SIZE				\
+		 ? 0							\
+		 : (r) << _FP_W_TYPE_SIZE);				\
+	  (r) += X##_f[2];						\
+	  (r) = ((rsize) <= _FP_W_TYPE_SIZE				\
+		 ? 0							\
+		 : (r) << _FP_W_TYPE_SIZE);				\
+	  (r) += X##_f[1];						\
+	  (r) = ((rsize) <= _FP_W_TYPE_SIZE				\
+		 ? 0							\
+		 : (r) << _FP_W_TYPE_SIZE);				\
+	  (r) += X##_f[0];						\
+	}								\
+    }									\
+  while (0)
+
+/* "No disassemble Number Five!" */
+/* Move an integer of size rsize into X's fractional part. We rely on
+   the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+   having to mask the values we store into it.  */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)	\
+  do						\
+    {						\
+      X##_f[0] = (r);				\
+      X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE	\
+		  ? 0				\
+		  : (r) >> _FP_W_TYPE_SIZE);	\
+      X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE	\
+		  ? 0				\
+		  : (r) >> 2*_FP_W_TYPE_SIZE);	\
+      X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE	\
+		  ? 0				\
+		  : (r) >> 3*_FP_W_TYPE_SIZE);	\
+    }						\
+  while (0)
+
+#define _FP_FRAC_COPY_4_1(D, S)			\
+  do						\
+    {						\
+      D##_f[0] = S##_f;				\
+      D##_f[1] = D##_f[2] = D##_f[3] = 0;	\
+    }						\
+  while (0)
+
+#define _FP_FRAC_COPY_4_2(D, S)			\
+  do						\
+    {						\
+      D##_f[0] = S##_f0;			\
+      D##_f[1] = S##_f1;			\
+      D##_f[2] = D##_f[3] = 0;			\
+    }						\
+  while (0)
+
+#define _FP_FRAC_COPY_4_4(D, S)	_FP_FRAC_COPY_4 (D, S)
+
+#endif /* !SOFT_FP_OP_4_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/op-8.h b/libc/sysdeps/linux/sparc64/soft-fp/op-8.h
new file mode 100644
index 000000000..1f424e102
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/op-8.h
@@ -0,0 +1,150 @@
+/* Software floating-point emulation.
+   Basic eight-word fraction declaration and manipulation.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_OP_8_H
+#define SOFT_FP_OP_8_H	1
+
+/* We need just a few things from here for op-4, if we ever need some
+   other macros, they can be added.  */
+#define _FP_FRAC_DECL_8(X)	_FP_W_TYPE X##_f[8]
+#define _FP_FRAC_HIGH_8(X)	(X##_f[7])
+#define _FP_FRAC_LOW_8(X)	(X##_f[0])
+#define _FP_FRAC_WORD_8(X, w)	(X##_f[w])
+
+#define _FP_FRAC_SLL_8(X, N)						\
+  do									\
+    {									\
+      _FP_I_TYPE _FP_FRAC_SLL_8_up, _FP_FRAC_SLL_8_down;		\
+      _FP_I_TYPE _FP_FRAC_SLL_8_skip, _FP_FRAC_SLL_8_i;			\
+      _FP_FRAC_SLL_8_skip = (N) / _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SLL_8_up = (N) % _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SLL_8_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_8_up;	\
+      if (!_FP_FRAC_SLL_8_up)						\
+	for (_FP_FRAC_SLL_8_i = 7;					\
+	     _FP_FRAC_SLL_8_i >= _FP_FRAC_SLL_8_skip;			\
+	     --_FP_FRAC_SLL_8_i)					\
+	  X##_f[_FP_FRAC_SLL_8_i]					\
+	    = X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip];		\
+      else								\
+	{								\
+	  for (_FP_FRAC_SLL_8_i = 7;					\
+	       _FP_FRAC_SLL_8_i > _FP_FRAC_SLL_8_skip;			\
+	       --_FP_FRAC_SLL_8_i)					\
+	    X##_f[_FP_FRAC_SLL_8_i]					\
+	      = ((X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip]		\
+		  << _FP_FRAC_SLL_8_up)					\
+		 | (X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip-1]	\
+		    >> _FP_FRAC_SLL_8_down));				\
+	  X##_f[_FP_FRAC_SLL_8_i--] = X##_f[0] << _FP_FRAC_SLL_8_up;	\
+	}								\
+      for (; _FP_FRAC_SLL_8_i >= 0; --_FP_FRAC_SLL_8_i)			\
+	X##_f[_FP_FRAC_SLL_8_i] = 0;					\
+    }									\
+  while (0)
+
+#define _FP_FRAC_SRL_8(X, N)						\
+  do									\
+    {									\
+      _FP_I_TYPE _FP_FRAC_SRL_8_up, _FP_FRAC_SRL_8_down;		\
+      _FP_I_TYPE _FP_FRAC_SRL_8_skip, _FP_FRAC_SRL_8_i;			\
+      _FP_FRAC_SRL_8_skip = (N) / _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRL_8_down = (N) % _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRL_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_8_down;	\
+      if (!_FP_FRAC_SRL_8_down)						\
+	for (_FP_FRAC_SRL_8_i = 0;					\
+	     _FP_FRAC_SRL_8_i <= 7-_FP_FRAC_SRL_8_skip;			\
+	     ++_FP_FRAC_SRL_8_i)					\
+	  X##_f[_FP_FRAC_SRL_8_i]					\
+	    = X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip];		\
+      else								\
+	{								\
+	  for (_FP_FRAC_SRL_8_i = 0;					\
+	       _FP_FRAC_SRL_8_i < 7-_FP_FRAC_SRL_8_skip;		\
+	       ++_FP_FRAC_SRL_8_i)					\
+	    X##_f[_FP_FRAC_SRL_8_i]					\
+	      = ((X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip]		\
+		  >> _FP_FRAC_SRL_8_down)				\
+		 | (X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip+1]	\
+		    << _FP_FRAC_SRL_8_up));				\
+	  X##_f[_FP_FRAC_SRL_8_i++] = X##_f[7] >> _FP_FRAC_SRL_8_down;	\
+	}								\
+      for (; _FP_FRAC_SRL_8_i < 8; ++_FP_FRAC_SRL_8_i)			\
+	X##_f[_FP_FRAC_SRL_8_i] = 0;					\
+    }									\
+  while (0)
+
+
+/* Right shift with sticky-lsb.
+   What this actually means is that we do a standard right-shift,
+   but that if any of the bits that fall off the right hand side
+   were one then we always set the LSbit.  */
+#define _FP_FRAC_SRS_8(X, N, size)					\
+  do									\
+    {									\
+      _FP_I_TYPE _FP_FRAC_SRS_8_up, _FP_FRAC_SRS_8_down;		\
+      _FP_I_TYPE _FP_FRAC_SRS_8_skip, _FP_FRAC_SRS_8_i;			\
+      _FP_W_TYPE _FP_FRAC_SRS_8_s;					\
+      _FP_FRAC_SRS_8_skip = (N) / _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRS_8_down = (N) % _FP_W_TYPE_SIZE;			\
+      _FP_FRAC_SRS_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRS_8_down;	\
+      for (_FP_FRAC_SRS_8_s = _FP_FRAC_SRS_8_i = 0;			\
+	   _FP_FRAC_SRS_8_i < _FP_FRAC_SRS_8_skip;			\
+	   ++_FP_FRAC_SRS_8_i)						\
+	_FP_FRAC_SRS_8_s |= X##_f[_FP_FRAC_SRS_8_i];			\
+      if (!_FP_FRAC_SRS_8_down)						\
+	for (_FP_FRAC_SRS_8_i = 0;					\
+	     _FP_FRAC_SRS_8_i <= 7-_FP_FRAC_SRS_8_skip;			\
+	     ++_FP_FRAC_SRS_8_i)					\
+	  X##_f[_FP_FRAC_SRS_8_i]					\
+	    = X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip];		\
+      else								\
+	{								\
+	  _FP_FRAC_SRS_8_s						\
+	    |= X##_f[_FP_FRAC_SRS_8_i] << _FP_FRAC_SRS_8_up;		\
+	  for (_FP_FRAC_SRS_8_i = 0;					\
+	       _FP_FRAC_SRS_8_i < 7-_FP_FRAC_SRS_8_skip;		\
+	       ++_FP_FRAC_SRS_8_i)					\
+	    X##_f[_FP_FRAC_SRS_8_i]					\
+	      = ((X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip]		\
+		  >> _FP_FRAC_SRS_8_down)				\
+		 | (X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip+1]	\
+		    << _FP_FRAC_SRS_8_up));				\
+	  X##_f[_FP_FRAC_SRS_8_i++] = X##_f[7] >> _FP_FRAC_SRS_8_down;	\
+	}								\
+      for (; _FP_FRAC_SRS_8_i < 8; ++_FP_FRAC_SRS_8_i)			\
+	X##_f[_FP_FRAC_SRS_8_i] = 0;					\
+      /* Don't fix the LSB until the very end when we're sure f[0] is	\
+	 stable.  */							\
+      X##_f[0] |= (_FP_FRAC_SRS_8_s != 0);				\
+    }									\
+  while (0)
+
+#endif /* !SOFT_FP_OP_8_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/op-common.h b/libc/sysdeps/linux/sparc64/soft-fp/op-common.h
new file mode 100644
index 000000000..82b1d09fc
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/op-common.h
@@ -0,0 +1,2134 @@
+/* Software floating-point emulation. Common operations.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_OP_COMMON_H
+#define SOFT_FP_OP_COMMON_H	1
+
+#define _FP_DECL(wc, X)						\
+  _FP_I_TYPE X##_c __attribute__ ((unused)) _FP_ZERO_INIT;	\
+  _FP_I_TYPE X##_s __attribute__ ((unused)) _FP_ZERO_INIT;	\
+  _FP_I_TYPE X##_e __attribute__ ((unused)) _FP_ZERO_INIT;	\
+  _FP_FRAC_DECL_##wc (X)
+
+/* Test whether the qNaN bit denotes a signaling NaN.  */
+#define _FP_FRAC_SNANP(fs, X)				\
+  ((_FP_QNANNEGATEDP)					\
+   ? (_FP_FRAC_HIGH_RAW_##fs (X) & _FP_QNANBIT_##fs)	\
+   : !(_FP_FRAC_HIGH_RAW_##fs (X) & _FP_QNANBIT_##fs))
+#define _FP_FRAC_SNANP_SEMIRAW(fs, X)			\
+  ((_FP_QNANNEGATEDP)					\
+   ? (_FP_FRAC_HIGH_##fs (X) & _FP_QNANBIT_SH_##fs)	\
+   : !(_FP_FRAC_HIGH_##fs (X) & _FP_QNANBIT_SH_##fs))
+
+/* Finish truly unpacking a native fp value by classifying the kind
+   of fp value and normalizing both the exponent and the fraction.  */
+
+#define _FP_UNPACK_CANONICAL(fs, wc, X)				\
+  do								\
+    {								\
+      switch (X##_e)						\
+	{							\
+	default:						\
+	  _FP_FRAC_HIGH_RAW_##fs (X) |= _FP_IMPLBIT_##fs;	\
+	  _FP_FRAC_SLL_##wc (X, _FP_WORKBITS);			\
+	  X##_e -= _FP_EXPBIAS_##fs;				\
+	  X##_c = FP_CLS_NORMAL;				\
+	  break;						\
+								\
+	case 0:							\
+	  if (_FP_FRAC_ZEROP_##wc (X))				\
+	    X##_c = FP_CLS_ZERO;				\
+	  else if (FP_DENORM_ZERO)				\
+	    {							\
+	      X##_c = FP_CLS_ZERO;				\
+	      _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);		\
+	      FP_SET_EXCEPTION (FP_EX_DENORM);			\
+	    }							\
+	  else							\
+	    {							\
+	      /* A denormalized number.  */			\
+	      _FP_I_TYPE _FP_UNPACK_CANONICAL_shift;		\
+	      _FP_FRAC_CLZ_##wc (_FP_UNPACK_CANONICAL_shift,	\
+				 X);				\
+	      _FP_UNPACK_CANONICAL_shift -= _FP_FRACXBITS_##fs;	\
+	      _FP_FRAC_SLL_##wc (X, (_FP_UNPACK_CANONICAL_shift \
+				     + _FP_WORKBITS));		\
+	      X##_e -= (_FP_EXPBIAS_##fs - 1			\
+			+ _FP_UNPACK_CANONICAL_shift);		\
+	      X##_c = FP_CLS_NORMAL;				\
+	      FP_SET_EXCEPTION (FP_EX_DENORM);			\
+	    }							\
+	  break;						\
+								\
+	case _FP_EXPMAX_##fs:					\
+	  if (_FP_FRAC_ZEROP_##wc (X))				\
+	    X##_c = FP_CLS_INF;					\
+	  else							\
+	    {							\
+	      X##_c = FP_CLS_NAN;				\
+	      /* Check for signaling NaN.  */			\
+	      if (_FP_FRAC_SNANP (fs, X))			\
+		FP_SET_EXCEPTION (FP_EX_INVALID			\
+				  | FP_EX_INVALID_SNAN);	\
+	    }							\
+	  break;						\
+	}							\
+    }								\
+  while (0)
+
+/* Finish unpacking an fp value in semi-raw mode: the mantissa is
+   shifted by _FP_WORKBITS but the implicit MSB is not inserted and
+   other classification is not done.  */
+#define _FP_UNPACK_SEMIRAW(fs, wc, X)	_FP_FRAC_SLL_##wc (X, _FP_WORKBITS)
+
+/* Check whether a raw or semi-raw input value should be flushed to
+   zero, and flush it to zero if so.  */
+#define _FP_CHECK_FLUSH_ZERO(fs, wc, X)			\
+  do							\
+    {							\
+      if (FP_DENORM_ZERO				\
+	  && X##_e == 0					\
+	  && !_FP_FRAC_ZEROP_##wc (X))			\
+	{						\
+	  _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);	\
+	  FP_SET_EXCEPTION (FP_EX_DENORM);		\
+	}						\
+    }							\
+  while (0)
+
+/* A semi-raw value has overflowed to infinity.  Adjust the mantissa
+   and exponent appropriately.  */
+#define _FP_OVERFLOW_SEMIRAW(fs, wc, X)			\
+  do							\
+    {							\
+      if (FP_ROUNDMODE == FP_RND_NEAREST		\
+	  || (FP_ROUNDMODE == FP_RND_PINF && !X##_s)	\
+	  || (FP_ROUNDMODE == FP_RND_MINF && X##_s))	\
+	{						\
+	  X##_e = _FP_EXPMAX_##fs;			\
+	  _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);	\
+	}						\
+      else						\
+	{						\
+	  X##_e = _FP_EXPMAX_##fs - 1;			\
+	  _FP_FRAC_SET_##wc (X, _FP_MAXFRAC_##wc);	\
+	}						\
+      FP_SET_EXCEPTION (FP_EX_INEXACT);			\
+      FP_SET_EXCEPTION (FP_EX_OVERFLOW);		\
+    }							\
+  while (0)
+
+/* Check for a semi-raw value being a signaling NaN and raise the
+   invalid exception if so.  */
+#define _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X)			\
+  do								\
+    {								\
+      if (X##_e == _FP_EXPMAX_##fs				\
+	  && !_FP_FRAC_ZEROP_##wc (X)				\
+	  && _FP_FRAC_SNANP_SEMIRAW (fs, X))			\
+	FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SNAN);	\
+    }								\
+  while (0)
+
+/* Choose a NaN result from an operation on two semi-raw NaN
+   values.  */
+#define _FP_CHOOSENAN_SEMIRAW(fs, wc, R, X, Y, OP)			\
+  do									\
+    {									\
+      /* _FP_CHOOSENAN expects raw values, so shift as required.  */	\
+      _FP_FRAC_SRL_##wc (X, _FP_WORKBITS);				\
+      _FP_FRAC_SRL_##wc (Y, _FP_WORKBITS);				\
+      _FP_CHOOSENAN (fs, wc, R, X, Y, OP);				\
+      _FP_FRAC_SLL_##wc (R, _FP_WORKBITS);				\
+    }									\
+  while (0)
+
+/* Make the fractional part a quiet NaN, preserving the payload
+   if possible, otherwise make it the canonical quiet NaN and set
+   the sign bit accordingly.  */
+#define _FP_SETQNAN(fs, wc, X)					\
+  do								\
+    {								\
+      if (_FP_QNANNEGATEDP)					\
+	{							\
+	  _FP_FRAC_HIGH_RAW_##fs (X) &= _FP_QNANBIT_##fs - 1;	\
+	  if (_FP_FRAC_ZEROP_##wc (X))				\
+	    {							\
+	      X##_s = _FP_NANSIGN_##fs;				\
+	      _FP_FRAC_SET_##wc (X, _FP_NANFRAC_##fs);		\
+	    }							\
+	}							\
+      else							\
+	_FP_FRAC_HIGH_RAW_##fs (X) |= _FP_QNANBIT_##fs;		\
+    }								\
+  while (0)
+#define _FP_SETQNAN_SEMIRAW(fs, wc, X)				\
+  do								\
+    {								\
+      if (_FP_QNANNEGATEDP)					\
+	{							\
+	  _FP_FRAC_HIGH_##fs (X) &= _FP_QNANBIT_SH_##fs - 1;	\
+	  if (_FP_FRAC_ZEROP_##wc (X))				\
+	    {							\
+	      X##_s = _FP_NANSIGN_##fs;				\
+	      _FP_FRAC_SET_##wc (X, _FP_NANFRAC_##fs);		\
+	      _FP_FRAC_SLL_##wc (X, _FP_WORKBITS);		\
+	    }							\
+	}							\
+      else							\
+	_FP_FRAC_HIGH_##fs (X) |= _FP_QNANBIT_SH_##fs;		\
+    }								\
+  while (0)
+
+/* Test whether a biased exponent is normal (not zero or maximum).  */
+#define _FP_EXP_NORMAL(fs, wc, X)	(((X##_e + 1) & _FP_EXPMAX_##fs) > 1)
+
+/* Prepare to pack an fp value in semi-raw mode: the mantissa is
+   rounded and shifted right, with the rounding possibly increasing
+   the exponent (including changing a finite value to infinity).  */
+#define _FP_PACK_SEMIRAW(fs, wc, X)				\
+  do								\
+    {								\
+      int _FP_PACK_SEMIRAW_is_tiny				\
+	= X##_e == 0 && !_FP_FRAC_ZEROP_##wc (X);		\
+      if (_FP_TININESS_AFTER_ROUNDING				\
+	  && _FP_PACK_SEMIRAW_is_tiny)				\
+	{							\
+	  FP_DECL_##fs (_FP_PACK_SEMIRAW_T);			\
+	  _FP_FRAC_COPY_##wc (_FP_PACK_SEMIRAW_T, X);		\
+	  _FP_PACK_SEMIRAW_T##_s = X##_s;			\
+	  _FP_PACK_SEMIRAW_T##_e = X##_e;			\
+	  _FP_FRAC_SLL_##wc (_FP_PACK_SEMIRAW_T, 1);		\
+	  _FP_ROUND (wc, _FP_PACK_SEMIRAW_T);			\
+	  if (_FP_FRAC_OVERP_##wc (fs, _FP_PACK_SEMIRAW_T))	\
+	    _FP_PACK_SEMIRAW_is_tiny = 0;			\
+	}							\
+      _FP_ROUND (wc, X);					\
+      if (_FP_PACK_SEMIRAW_is_tiny)				\
+	{							\
+	  if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)		\
+	      || (FP_TRAPPING_EXCEPTIONS & FP_EX_UNDERFLOW))	\
+	    FP_SET_EXCEPTION (FP_EX_UNDERFLOW);			\
+	}							\
+      if (_FP_FRAC_HIGH_##fs (X)				\
+	  & (_FP_OVERFLOW_##fs >> 1))				\
+	{							\
+	  _FP_FRAC_HIGH_##fs (X) &= ~(_FP_OVERFLOW_##fs >> 1);	\
+	  X##_e++;						\
+	  if (X##_e == _FP_EXPMAX_##fs)				\
+	    _FP_OVERFLOW_SEMIRAW (fs, wc, X);			\
+	}							\
+      _FP_FRAC_SRL_##wc (X, _FP_WORKBITS);			\
+      if (X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X))	\
+	{							\
+	  if (!_FP_KEEPNANFRACP)				\
+	    {							\
+	      _FP_FRAC_SET_##wc (X, _FP_NANFRAC_##fs);		\
+	      X##_s = _FP_NANSIGN_##fs;				\
+	    }							\
+	  else							\
+	    _FP_SETQNAN (fs, wc, X);				\
+	}							\
+    }								\
+  while (0)
+
+/* Before packing the bits back into the native fp result, take care
+   of such mundane things as rounding and overflow.  Also, for some
+   kinds of fp values, the original parts may not have been fully
+   extracted -- but that is ok, we can regenerate them now.  */
+
+#define _FP_PACK_CANONICAL(fs, wc, X)					\
+  do									\
+    {									\
+      switch (X##_c)							\
+	{								\
+	case FP_CLS_NORMAL:						\
+	  X##_e += _FP_EXPBIAS_##fs;					\
+	  if (X##_e > 0)						\
+	    {								\
+	      _FP_ROUND (wc, X);					\
+	      if (_FP_FRAC_OVERP_##wc (fs, X))				\
+		{							\
+		  _FP_FRAC_CLEAR_OVERP_##wc (fs, X);			\
+		  X##_e++;						\
+		}							\
+	      _FP_FRAC_SRL_##wc (X, _FP_WORKBITS);			\
+	      if (X##_e >= _FP_EXPMAX_##fs)				\
+		{							\
+		  /* Overflow.  */					\
+		  switch (FP_ROUNDMODE)					\
+		    {							\
+		    case FP_RND_NEAREST:				\
+		      X##_c = FP_CLS_INF;				\
+		      break;						\
+		    case FP_RND_PINF:					\
+		      if (!X##_s)					\
+			X##_c = FP_CLS_INF;				\
+		      break;						\
+		    case FP_RND_MINF:					\
+		      if (X##_s)					\
+			X##_c = FP_CLS_INF;				\
+		      break;						\
+		    }							\
+		  if (X##_c == FP_CLS_INF)				\
+		    {							\
+		      /* Overflow to infinity.  */			\
+		      X##_e = _FP_EXPMAX_##fs;				\
+		      _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);		\
+		    }							\
+		  else							\
+		    {							\
+		      /* Overflow to maximum normal.  */		\
+		      X##_e = _FP_EXPMAX_##fs - 1;			\
+		      _FP_FRAC_SET_##wc (X, _FP_MAXFRAC_##wc);		\
+		    }							\
+		  FP_SET_EXCEPTION (FP_EX_OVERFLOW);			\
+		  FP_SET_EXCEPTION (FP_EX_INEXACT);			\
+		}							\
+	    }								\
+	  else								\
+	    {								\
+	      /* We've got a denormalized number.  */			\
+	      int _FP_PACK_CANONICAL_is_tiny = 1;			\
+	      if (_FP_TININESS_AFTER_ROUNDING && X##_e == 0)		\
+		{							\
+		  FP_DECL_##fs (_FP_PACK_CANONICAL_T);			\
+		  _FP_FRAC_COPY_##wc (_FP_PACK_CANONICAL_T, X);		\
+		  _FP_PACK_CANONICAL_T##_s = X##_s;			\
+		  _FP_PACK_CANONICAL_T##_e = X##_e;			\
+		  _FP_ROUND (wc, _FP_PACK_CANONICAL_T);			\
+		  if (_FP_FRAC_OVERP_##wc (fs, _FP_PACK_CANONICAL_T))	\
+		    _FP_PACK_CANONICAL_is_tiny = 0;			\
+		}							\
+	      X##_e = -X##_e + 1;					\
+	      if (X##_e <= _FP_WFRACBITS_##fs)				\
+		{							\
+		  _FP_FRAC_SRS_##wc (X, X##_e, _FP_WFRACBITS_##fs);	\
+		  _FP_ROUND (wc, X);					\
+		  if (_FP_FRAC_HIGH_##fs (X)				\
+		      & (_FP_OVERFLOW_##fs >> 1))			\
+		    {							\
+		      X##_e = 1;					\
+		      _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);		\
+		      FP_SET_EXCEPTION (FP_EX_INEXACT);			\
+		    }							\
+		  else							\
+		    {							\
+		      X##_e = 0;					\
+		      _FP_FRAC_SRL_##wc (X, _FP_WORKBITS);		\
+		    }							\
+		  if (_FP_PACK_CANONICAL_is_tiny			\
+		      && ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT)		\
+			  || (FP_TRAPPING_EXCEPTIONS			\
+			      & FP_EX_UNDERFLOW)))			\
+		    FP_SET_EXCEPTION (FP_EX_UNDERFLOW);			\
+		}							\
+	      else							\
+		{							\
+		  /* Underflow to zero.  */				\
+		  X##_e = 0;						\
+		  if (!_FP_FRAC_ZEROP_##wc (X))				\
+		    {							\
+		      _FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc);		\
+		      _FP_ROUND (wc, X);				\
+		      _FP_FRAC_LOW_##wc (X) >>= (_FP_WORKBITS);		\
+		    }							\
+		  FP_SET_EXCEPTION (FP_EX_UNDERFLOW);			\
+		}							\
+	    }								\
+	  break;							\
+									\
+	case FP_CLS_ZERO:						\
+	  X##_e = 0;							\
+	  _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);			\
+	  break;							\
+									\
+	case FP_CLS_INF:						\
+	  X##_e = _FP_EXPMAX_##fs;					\
+	  _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);			\
+	  break;							\
+									\
+	case FP_CLS_NAN:						\
+	  X##_e = _FP_EXPMAX_##fs;					\
+	  if (!_FP_KEEPNANFRACP)					\
+	    {								\
+	      _FP_FRAC_SET_##wc (X, _FP_NANFRAC_##fs);			\
+	      X##_s = _FP_NANSIGN_##fs;					\
+	    }								\
+	  else								\
+	    _FP_SETQNAN (fs, wc, X);					\
+	  break;							\
+	}								\
+    }									\
+  while (0)
+
+/* This one accepts raw argument and not cooked,  returns
+   1 if X is a signaling NaN.  */
+#define _FP_ISSIGNAN(fs, wc, X)			\
+  ({						\
+    int _FP_ISSIGNAN_ret = 0;			\
+    if (X##_e == _FP_EXPMAX_##fs)		\
+      {						\
+	if (!_FP_FRAC_ZEROP_##wc (X)		\
+	    && _FP_FRAC_SNANP (fs, X))		\
+	  _FP_ISSIGNAN_ret = 1;			\
+      }						\
+    _FP_ISSIGNAN_ret;				\
+  })
+
+
+
+
+
+/* Addition on semi-raw values.  */
+#define _FP_ADD_INTERNAL(fs, wc, R, X, Y, OP)				\
+  do									\
+    {									\
+      _FP_CHECK_FLUSH_ZERO (fs, wc, X);					\
+      _FP_CHECK_FLUSH_ZERO (fs, wc, Y);					\
+      if (X##_s == Y##_s)						\
+	{								\
+	  /* Addition.  */						\
+	  __label__ add1, add2, add3, add_done;				\
+	  R##_s = X##_s;						\
+	  int _FP_ADD_INTERNAL_ediff = X##_e - Y##_e;			\
+	  if (_FP_ADD_INTERNAL_ediff > 0)				\
+	    {								\
+	      R##_e = X##_e;						\
+	      if (Y##_e == 0)						\
+		{							\
+		  /* Y is zero or denormalized.  */			\
+		  if (_FP_FRAC_ZEROP_##wc (Y))				\
+		    {							\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);		\
+		      _FP_FRAC_COPY_##wc (R, X);			\
+		      goto add_done;					\
+		    }							\
+		  else							\
+		    {							\
+		      FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		      _FP_ADD_INTERNAL_ediff--;				\
+		      if (_FP_ADD_INTERNAL_ediff == 0)			\
+			{						\
+			  _FP_FRAC_ADD_##wc (R, X, Y);			\
+			  goto add3;					\
+			}						\
+		      if (X##_e == _FP_EXPMAX_##fs)			\
+			{						\
+			  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);		\
+			  _FP_FRAC_COPY_##wc (R, X);			\
+			  goto add_done;				\
+			}						\
+		      goto add1;					\
+		    }							\
+		}							\
+	      else if (X##_e == _FP_EXPMAX_##fs)			\
+		{							\
+		  /* X is NaN or Inf, Y is normal.  */			\
+		  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);			\
+		  _FP_FRAC_COPY_##wc (R, X);				\
+		  goto add_done;					\
+		}							\
+									\
+	      /* Insert implicit MSB of Y.  */				\
+	      _FP_FRAC_HIGH_##fs (Y) |= _FP_IMPLBIT_SH_##fs;		\
+									\
+	    add1:							\
+	      /* Shift the mantissa of Y to the right			\
+		 _FP_ADD_INTERNAL_EDIFF steps; remember to account	\
+		 later for the implicit MSB of X.  */			\
+	      if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs)		\
+		_FP_FRAC_SRS_##wc (Y, _FP_ADD_INTERNAL_ediff,		\
+				   _FP_WFRACBITS_##fs);			\
+	      else if (!_FP_FRAC_ZEROP_##wc (Y))			\
+		_FP_FRAC_SET_##wc (Y, _FP_MINFRAC_##wc);		\
+	      _FP_FRAC_ADD_##wc (R, X, Y);				\
+	    }								\
+	  else if (_FP_ADD_INTERNAL_ediff < 0)				\
+	    {								\
+	      _FP_ADD_INTERNAL_ediff = -_FP_ADD_INTERNAL_ediff;		\
+	      R##_e = Y##_e;						\
+	      if (X##_e == 0)						\
+		{							\
+		  /* X is zero or denormalized.  */			\
+		  if (_FP_FRAC_ZEROP_##wc (X))				\
+		    {							\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);		\
+		      _FP_FRAC_COPY_##wc (R, Y);			\
+		      goto add_done;					\
+		    }							\
+		  else							\
+		    {							\
+		      FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		      _FP_ADD_INTERNAL_ediff--;				\
+		      if (_FP_ADD_INTERNAL_ediff == 0)			\
+			{						\
+			  _FP_FRAC_ADD_##wc (R, Y, X);			\
+			  goto add3;					\
+			}						\
+		      if (Y##_e == _FP_EXPMAX_##fs)			\
+			{						\
+			  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);		\
+			  _FP_FRAC_COPY_##wc (R, Y);			\
+			  goto add_done;				\
+			}						\
+		      goto add2;					\
+		    }							\
+		}							\
+	      else if (Y##_e == _FP_EXPMAX_##fs)			\
+		{							\
+		  /* Y is NaN or Inf, X is normal.  */			\
+		  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);			\
+		  _FP_FRAC_COPY_##wc (R, Y);				\
+		  goto add_done;					\
+		}							\
+									\
+	      /* Insert implicit MSB of X.  */				\
+	      _FP_FRAC_HIGH_##fs (X) |= _FP_IMPLBIT_SH_##fs;		\
+									\
+	    add2:							\
+	      /* Shift the mantissa of X to the right			\
+		 _FP_ADD_INTERNAL_EDIFF steps; remember to account	\
+		 later for the implicit MSB of Y.  */			\
+	      if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs)		\
+		_FP_FRAC_SRS_##wc (X, _FP_ADD_INTERNAL_ediff,		\
+				   _FP_WFRACBITS_##fs);			\
+	      else if (!_FP_FRAC_ZEROP_##wc (X))			\
+		_FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc);		\
+	      _FP_FRAC_ADD_##wc (R, Y, X);				\
+	    }								\
+	  else								\
+	    {								\
+	      /* _FP_ADD_INTERNAL_ediff == 0.  */			\
+	      if (!_FP_EXP_NORMAL (fs, wc, X))				\
+		{							\
+		  if (X##_e == 0)					\
+		    {							\
+		      /* X and Y are zero or denormalized.  */		\
+		      R##_e = 0;					\
+		      if (_FP_FRAC_ZEROP_##wc (X))			\
+			{						\
+			  if (!_FP_FRAC_ZEROP_##wc (Y))			\
+			    FP_SET_EXCEPTION (FP_EX_DENORM);		\
+			  _FP_FRAC_COPY_##wc (R, Y);			\
+			  goto add_done;				\
+			}						\
+		      else if (_FP_FRAC_ZEROP_##wc (Y))			\
+			{						\
+			  FP_SET_EXCEPTION (FP_EX_DENORM);		\
+			  _FP_FRAC_COPY_##wc (R, X);			\
+			  goto add_done;				\
+			}						\
+		      else						\
+			{						\
+			  FP_SET_EXCEPTION (FP_EX_DENORM);		\
+			  _FP_FRAC_ADD_##wc (R, X, Y);			\
+			  if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs) \
+			    {						\
+			      /* Normalized result.  */			\
+			      _FP_FRAC_HIGH_##fs (R)			\
+				&= ~(_FP_W_TYPE) _FP_IMPLBIT_SH_##fs;	\
+			      R##_e = 1;				\
+			    }						\
+			  goto add_done;				\
+			}						\
+		    }							\
+		  else							\
+		    {							\
+		      /* X and Y are NaN or Inf.  */			\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);		\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);		\
+		      R##_e = _FP_EXPMAX_##fs;				\
+		      if (_FP_FRAC_ZEROP_##wc (X))			\
+			_FP_FRAC_COPY_##wc (R, Y);			\
+		      else if (_FP_FRAC_ZEROP_##wc (Y))			\
+			_FP_FRAC_COPY_##wc (R, X);			\
+		      else						\
+			_FP_CHOOSENAN_SEMIRAW (fs, wc, R, X, Y, OP);	\
+		      goto add_done;					\
+		    }							\
+		}							\
+	      /* The exponents of X and Y, both normal, are equal.  The	\
+		 implicit MSBs will always add to increase the		\
+		 exponent.  */						\
+	      _FP_FRAC_ADD_##wc (R, X, Y);				\
+	      R##_e = X##_e + 1;					\
+	      _FP_FRAC_SRS_##wc (R, 1, _FP_WFRACBITS_##fs);		\
+	      if (R##_e == _FP_EXPMAX_##fs)				\
+		/* Overflow to infinity (depending on rounding mode).  */ \
+		_FP_OVERFLOW_SEMIRAW (fs, wc, R);			\
+	      goto add_done;						\
+	    }								\
+	add3:								\
+	  if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs)		\
+	    {								\
+	      /* Overflow.  */						\
+	      _FP_FRAC_HIGH_##fs (R) &= ~(_FP_W_TYPE) _FP_IMPLBIT_SH_##fs; \
+	      R##_e++;							\
+	      _FP_FRAC_SRS_##wc (R, 1, _FP_WFRACBITS_##fs);		\
+	      if (R##_e == _FP_EXPMAX_##fs)				\
+		/* Overflow to infinity (depending on rounding mode).  */ \
+		_FP_OVERFLOW_SEMIRAW (fs, wc, R);			\
+	    }								\
+	add_done: ;							\
+	}								\
+      else								\
+	{								\
+	  /* Subtraction.  */						\
+	  __label__ sub1, sub2, sub3, norm, sub_done;			\
+	  int _FP_ADD_INTERNAL_ediff = X##_e - Y##_e;			\
+	  if (_FP_ADD_INTERNAL_ediff > 0)				\
+	    {								\
+	      R##_e = X##_e;						\
+	      R##_s = X##_s;						\
+	      if (Y##_e == 0)						\
+		{							\
+		  /* Y is zero or denormalized.  */			\
+		  if (_FP_FRAC_ZEROP_##wc (Y))				\
+		    {							\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);		\
+		      _FP_FRAC_COPY_##wc (R, X);			\
+		      goto sub_done;					\
+		    }							\
+		  else							\
+		    {							\
+		      FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		      _FP_ADD_INTERNAL_ediff--;				\
+		      if (_FP_ADD_INTERNAL_ediff == 0)			\
+			{						\
+			  _FP_FRAC_SUB_##wc (R, X, Y);			\
+			  goto sub3;					\
+			}						\
+		      if (X##_e == _FP_EXPMAX_##fs)			\
+			{						\
+			  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);		\
+			  _FP_FRAC_COPY_##wc (R, X);			\
+			  goto sub_done;				\
+			}						\
+		      goto sub1;					\
+		    }							\
+		}							\
+	      else if (X##_e == _FP_EXPMAX_##fs)			\
+		{							\
+		  /* X is NaN or Inf, Y is normal.  */			\
+		  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);			\
+		  _FP_FRAC_COPY_##wc (R, X);				\
+		  goto sub_done;					\
+		}							\
+									\
+	      /* Insert implicit MSB of Y.  */				\
+	      _FP_FRAC_HIGH_##fs (Y) |= _FP_IMPLBIT_SH_##fs;		\
+									\
+	    sub1:							\
+	      /* Shift the mantissa of Y to the right			\
+		 _FP_ADD_INTERNAL_EDIFF steps; remember to account	\
+		 later for the implicit MSB of X.  */			\
+	      if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs)		\
+		_FP_FRAC_SRS_##wc (Y, _FP_ADD_INTERNAL_ediff,		\
+				   _FP_WFRACBITS_##fs);			\
+	      else if (!_FP_FRAC_ZEROP_##wc (Y))			\
+		_FP_FRAC_SET_##wc (Y, _FP_MINFRAC_##wc);		\
+	      _FP_FRAC_SUB_##wc (R, X, Y);				\
+	    }								\
+	  else if (_FP_ADD_INTERNAL_ediff < 0)				\
+	    {								\
+	      _FP_ADD_INTERNAL_ediff = -_FP_ADD_INTERNAL_ediff;		\
+	      R##_e = Y##_e;						\
+	      R##_s = Y##_s;						\
+	      if (X##_e == 0)						\
+		{							\
+		  /* X is zero or denormalized.  */			\
+		  if (_FP_FRAC_ZEROP_##wc (X))				\
+		    {							\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);		\
+		      _FP_FRAC_COPY_##wc (R, Y);			\
+		      goto sub_done;					\
+		    }							\
+		  else							\
+		    {							\
+		      FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		      _FP_ADD_INTERNAL_ediff--;				\
+		      if (_FP_ADD_INTERNAL_ediff == 0)			\
+			{						\
+			  _FP_FRAC_SUB_##wc (R, Y, X);			\
+			  goto sub3;					\
+			}						\
+		      if (Y##_e == _FP_EXPMAX_##fs)			\
+			{						\
+			  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);		\
+			  _FP_FRAC_COPY_##wc (R, Y);			\
+			  goto sub_done;				\
+			}						\
+		      goto sub2;					\
+		    }							\
+		}							\
+	      else if (Y##_e == _FP_EXPMAX_##fs)			\
+		{							\
+		  /* Y is NaN or Inf, X is normal.  */			\
+		  _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);			\
+		  _FP_FRAC_COPY_##wc (R, Y);				\
+		  goto sub_done;					\
+		}							\
+									\
+	      /* Insert implicit MSB of X.  */				\
+	      _FP_FRAC_HIGH_##fs (X) |= _FP_IMPLBIT_SH_##fs;		\
+									\
+	    sub2:							\
+	      /* Shift the mantissa of X to the right			\
+		 _FP_ADD_INTERNAL_EDIFF steps; remember to account	\
+		 later for the implicit MSB of Y.  */			\
+	      if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs)		\
+		_FP_FRAC_SRS_##wc (X, _FP_ADD_INTERNAL_ediff,		\
+				   _FP_WFRACBITS_##fs);			\
+	      else if (!_FP_FRAC_ZEROP_##wc (X))			\
+		_FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc);		\
+	      _FP_FRAC_SUB_##wc (R, Y, X);				\
+	    }								\
+	  else								\
+	    {								\
+	      /* ediff == 0.  */					\
+	      if (!_FP_EXP_NORMAL (fs, wc, X))				\
+		{							\
+		  if (X##_e == 0)					\
+		    {							\
+		      /* X and Y are zero or denormalized.  */		\
+		      R##_e = 0;					\
+		      if (_FP_FRAC_ZEROP_##wc (X))			\
+			{						\
+			  _FP_FRAC_COPY_##wc (R, Y);			\
+			  if (_FP_FRAC_ZEROP_##wc (Y))			\
+			    R##_s = (FP_ROUNDMODE == FP_RND_MINF);	\
+			  else						\
+			    {						\
+			      FP_SET_EXCEPTION (FP_EX_DENORM);		\
+			      R##_s = Y##_s;				\
+			    }						\
+			  goto sub_done;				\
+			}						\
+		      else if (_FP_FRAC_ZEROP_##wc (Y))			\
+			{						\
+			  FP_SET_EXCEPTION (FP_EX_DENORM);		\
+			  _FP_FRAC_COPY_##wc (R, X);			\
+			  R##_s = X##_s;				\
+			  goto sub_done;				\
+			}						\
+		      else						\
+			{						\
+			  FP_SET_EXCEPTION (FP_EX_DENORM);		\
+			  _FP_FRAC_SUB_##wc (R, X, Y);			\
+			  R##_s = X##_s;				\
+			  if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs) \
+			    {						\
+			      /* |X| < |Y|, negate result.  */		\
+			      _FP_FRAC_SUB_##wc (R, Y, X);		\
+			      R##_s = Y##_s;				\
+			    }						\
+			  else if (_FP_FRAC_ZEROP_##wc (R))		\
+			    R##_s = (FP_ROUNDMODE == FP_RND_MINF);	\
+			  goto sub_done;				\
+			}						\
+		    }							\
+		  else							\
+		    {							\
+		      /* X and Y are NaN or Inf, of opposite signs.  */	\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, X);		\
+		      _FP_CHECK_SIGNAN_SEMIRAW (fs, wc, Y);		\
+		      R##_e = _FP_EXPMAX_##fs;				\
+		      if (_FP_FRAC_ZEROP_##wc (X))			\
+			{						\
+			  if (_FP_FRAC_ZEROP_##wc (Y))			\
+			    {						\
+			      /* Inf - Inf.  */				\
+			      R##_s = _FP_NANSIGN_##fs;			\
+			      _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs);	\
+			      _FP_FRAC_SLL_##wc (R, _FP_WORKBITS);	\
+			      FP_SET_EXCEPTION (FP_EX_INVALID		\
+						| FP_EX_INVALID_ISI);	\
+			    }						\
+			  else						\
+			    {						\
+			      /* Inf - NaN.  */				\
+			      R##_s = Y##_s;				\
+			      _FP_FRAC_COPY_##wc (R, Y);		\
+			    }						\
+			}						\
+		      else						\
+			{						\
+			  if (_FP_FRAC_ZEROP_##wc (Y))			\
+			    {						\
+			      /* NaN - Inf.  */				\
+			      R##_s = X##_s;				\
+			      _FP_FRAC_COPY_##wc (R, X);		\
+			    }						\
+			  else						\
+			    {						\
+			      /* NaN - NaN.  */				\
+			      _FP_CHOOSENAN_SEMIRAW (fs, wc, R, X, Y, OP); \
+			    }						\
+			}						\
+		      goto sub_done;					\
+		    }							\
+		}							\
+	      /* The exponents of X and Y, both normal, are equal.  The	\
+		 implicit MSBs cancel.  */				\
+	      R##_e = X##_e;						\
+	      _FP_FRAC_SUB_##wc (R, X, Y);				\
+	      R##_s = X##_s;						\
+	      if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs)		\
+		{							\
+		  /* |X| < |Y|, negate result.  */			\
+		  _FP_FRAC_SUB_##wc (R, Y, X);				\
+		  R##_s = Y##_s;					\
+		}							\
+	      else if (_FP_FRAC_ZEROP_##wc (R))				\
+		{							\
+		  R##_e = 0;						\
+		  R##_s = (FP_ROUNDMODE == FP_RND_MINF);		\
+		  goto sub_done;					\
+		}							\
+	      goto norm;						\
+	    }								\
+	sub3:								\
+	  if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs)		\
+	    {								\
+	      int _FP_ADD_INTERNAL_diff;				\
+	      /* Carry into most significant bit of larger one of X and Y, \
+		 canceling it; renormalize.  */				\
+	      _FP_FRAC_HIGH_##fs (R) &= _FP_IMPLBIT_SH_##fs - 1;	\
+	    norm:							\
+	      _FP_FRAC_CLZ_##wc (_FP_ADD_INTERNAL_diff, R);		\
+	      _FP_ADD_INTERNAL_diff -= _FP_WFRACXBITS_##fs;		\
+	      _FP_FRAC_SLL_##wc (R, _FP_ADD_INTERNAL_diff);		\
+	      if (R##_e <= _FP_ADD_INTERNAL_diff)			\
+		{							\
+		  /* R is denormalized.  */				\
+		  _FP_ADD_INTERNAL_diff					\
+		    = _FP_ADD_INTERNAL_diff - R##_e + 1;		\
+		  _FP_FRAC_SRS_##wc (R, _FP_ADD_INTERNAL_diff,		\
+				     _FP_WFRACBITS_##fs);		\
+		  R##_e = 0;						\
+		}							\
+	      else							\
+		{							\
+		  R##_e -= _FP_ADD_INTERNAL_diff;			\
+		  _FP_FRAC_HIGH_##fs (R) &= ~(_FP_W_TYPE) _FP_IMPLBIT_SH_##fs; \
+		}							\
+	    }								\
+	sub_done: ;							\
+	}								\
+    }									\
+  while (0)
+
+#define _FP_ADD(fs, wc, R, X, Y) _FP_ADD_INTERNAL (fs, wc, R, X, Y, '+')
+#define _FP_SUB(fs, wc, R, X, Y)					\
+  do									\
+    {									\
+      if (!(Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y)))	\
+	Y##_s ^= 1;							\
+      _FP_ADD_INTERNAL (fs, wc, R, X, Y, '-');				\
+    }									\
+  while (0)
+
+
+/* Main negation routine.  The input value is raw.  */
+
+#define _FP_NEG(fs, wc, R, X)			\
+  do						\
+    {						\
+      _FP_FRAC_COPY_##wc (R, X);		\
+      R##_e = X##_e;				\
+      R##_s = 1 ^ X##_s;			\
+    }						\
+  while (0)
+
+
+/* Main multiplication routine.  The input values should be cooked.  */
+
+#define _FP_MUL(fs, wc, R, X, Y)				\
+  do								\
+    {								\
+      R##_s = X##_s ^ Y##_s;					\
+      R##_e = X##_e + Y##_e + 1;				\
+      switch (_FP_CLS_COMBINE (X##_c, Y##_c))			\
+	{							\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NORMAL):	\
+	  R##_c = FP_CLS_NORMAL;				\
+								\
+	  _FP_MUL_MEAT_##fs (R, X, Y);				\
+								\
+	  if (_FP_FRAC_OVERP_##wc (fs, R))			\
+	    _FP_FRAC_SRS_##wc (R, 1, _FP_WFRACBITS_##fs);	\
+	  else							\
+	    R##_e--;						\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN):		\
+	  _FP_CHOOSENAN (fs, wc, R, X, Y, '*');			\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL):	\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_INF):		\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO):		\
+	  R##_s = X##_s;					\
+	  /* FALLTHRU */					\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF):		\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL):	\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NORMAL):	\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO):	\
+	  _FP_FRAC_COPY_##wc (R, X);				\
+	  R##_c = X##_c;					\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NAN):	\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NAN):		\
+	  R##_s = Y##_s;					\
+	  /* FALLTHRU */					\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_INF):	\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_ZERO):	\
+	  _FP_FRAC_COPY_##wc (R, Y);				\
+	  R##_c = Y##_c;					\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_INF):		\
+	  R##_s = _FP_NANSIGN_##fs;				\
+	  R##_c = FP_CLS_NAN;					\
+	  _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs);		\
+	  FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_IMZ);	\
+	  break;						\
+								\
+	default:						\
+	  _FP_UNREACHABLE;					\
+	}							\
+    }								\
+  while (0)
+
+
+/* Fused multiply-add.  The input values should be cooked.  */
+
+#define _FP_FMA(fs, wc, dwc, R, X, Y, Z)				\
+  do									\
+    {									\
+      __label__ done_fma;						\
+      FP_DECL_##fs (_FP_FMA_T);						\
+      _FP_FMA_T##_s = X##_s ^ Y##_s;					\
+      _FP_FMA_T##_e = X##_e + Y##_e + 1;				\
+      switch (_FP_CLS_COMBINE (X##_c, Y##_c))				\
+	{								\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NORMAL):		\
+	  switch (Z##_c)						\
+	    {								\
+	    case FP_CLS_INF:						\
+	    case FP_CLS_NAN:						\
+	      R##_s = Z##_s;						\
+	      _FP_FRAC_COPY_##wc (R, Z);				\
+	      R##_c = Z##_c;						\
+	      break;							\
+									\
+	    case FP_CLS_ZERO:						\
+	      R##_c = FP_CLS_NORMAL;					\
+	      R##_s = _FP_FMA_T##_s;					\
+	      R##_e = _FP_FMA_T##_e;					\
+									\
+	      _FP_MUL_MEAT_##fs (R, X, Y);				\
+									\
+	      if (_FP_FRAC_OVERP_##wc (fs, R))				\
+		_FP_FRAC_SRS_##wc (R, 1, _FP_WFRACBITS_##fs);		\
+	      else							\
+		R##_e--;						\
+	      break;							\
+									\
+	    case FP_CLS_NORMAL:;					\
+	      _FP_FRAC_DECL_##dwc (_FP_FMA_TD);				\
+	      _FP_FRAC_DECL_##dwc (_FP_FMA_ZD);				\
+	      _FP_FRAC_DECL_##dwc (_FP_FMA_RD);				\
+	      _FP_MUL_MEAT_DW_##fs (_FP_FMA_TD, X, Y);			\
+	      R##_e = _FP_FMA_T##_e;					\
+	      int _FP_FMA_tsh						\
+		= _FP_FRAC_HIGHBIT_DW_##dwc (fs, _FP_FMA_TD) == 0;	\
+	      _FP_FMA_T##_e -= _FP_FMA_tsh;				\
+	      int _FP_FMA_ediff = _FP_FMA_T##_e - Z##_e;		\
+	      if (_FP_FMA_ediff >= 0)					\
+		{							\
+		  int _FP_FMA_shift					\
+		    = _FP_WFRACBITS_##fs - _FP_FMA_tsh - _FP_FMA_ediff;	\
+		  if (_FP_FMA_shift <= -_FP_WFRACBITS_##fs)		\
+		    _FP_FRAC_SET_##dwc (_FP_FMA_ZD, _FP_MINFRAC_##dwc);	\
+		  else							\
+		    {							\
+		      _FP_FRAC_COPY_##dwc##_##wc (_FP_FMA_ZD, Z);	\
+		      if (_FP_FMA_shift < 0)				\
+			_FP_FRAC_SRS_##dwc (_FP_FMA_ZD, -_FP_FMA_shift,	\
+					    _FP_WFRACBITS_DW_##fs);	\
+		      else if (_FP_FMA_shift > 0)			\
+			_FP_FRAC_SLL_##dwc (_FP_FMA_ZD, _FP_FMA_shift);	\
+		    }							\
+		  R##_s = _FP_FMA_T##_s;				\
+		  if (_FP_FMA_T##_s == Z##_s)				\
+		    _FP_FRAC_ADD_##dwc (_FP_FMA_RD, _FP_FMA_TD,		\
+					_FP_FMA_ZD);			\
+		  else							\
+		    {							\
+		      _FP_FRAC_SUB_##dwc (_FP_FMA_RD, _FP_FMA_TD,	\
+					  _FP_FMA_ZD);			\
+		      if (_FP_FRAC_NEGP_##dwc (_FP_FMA_RD))		\
+			{						\
+			  R##_s = Z##_s;				\
+			  _FP_FRAC_SUB_##dwc (_FP_FMA_RD, _FP_FMA_ZD,	\
+					      _FP_FMA_TD);		\
+			}						\
+		    }							\
+		}							\
+	      else							\
+		{							\
+		  R##_e = Z##_e;					\
+		  R##_s = Z##_s;					\
+		  _FP_FRAC_COPY_##dwc##_##wc (_FP_FMA_ZD, Z);		\
+		  _FP_FRAC_SLL_##dwc (_FP_FMA_ZD, _FP_WFRACBITS_##fs);	\
+		  int _FP_FMA_shift = -_FP_FMA_ediff - _FP_FMA_tsh;	\
+		  if (_FP_FMA_shift >= _FP_WFRACBITS_DW_##fs)		\
+		    _FP_FRAC_SET_##dwc (_FP_FMA_TD, _FP_MINFRAC_##dwc);	\
+		  else if (_FP_FMA_shift > 0)				\
+		    _FP_FRAC_SRS_##dwc (_FP_FMA_TD, _FP_FMA_shift,	\
+					_FP_WFRACBITS_DW_##fs);		\
+		  if (Z##_s == _FP_FMA_T##_s)				\
+		    _FP_FRAC_ADD_##dwc (_FP_FMA_RD, _FP_FMA_ZD,		\
+					_FP_FMA_TD);			\
+		  else							\
+		    _FP_FRAC_SUB_##dwc (_FP_FMA_RD, _FP_FMA_ZD,		\
+					_FP_FMA_TD);			\
+		}							\
+	      if (_FP_FRAC_ZEROP_##dwc (_FP_FMA_RD))			\
+		{							\
+		  if (_FP_FMA_T##_s == Z##_s)				\
+		    R##_s = Z##_s;					\
+		  else							\
+		    R##_s = (FP_ROUNDMODE == FP_RND_MINF);		\
+		  _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc);		\
+		  R##_c = FP_CLS_ZERO;					\
+		}							\
+	      else							\
+		{							\
+		  int _FP_FMA_rlz;					\
+		  _FP_FRAC_CLZ_##dwc (_FP_FMA_rlz, _FP_FMA_RD);		\
+		  _FP_FMA_rlz -= _FP_WFRACXBITS_DW_##fs;		\
+		  R##_e -= _FP_FMA_rlz;					\
+		  int _FP_FMA_shift = _FP_WFRACBITS_##fs - _FP_FMA_rlz;	\
+		  if (_FP_FMA_shift > 0)				\
+		    _FP_FRAC_SRS_##dwc (_FP_FMA_RD, _FP_FMA_shift,	\
+					_FP_WFRACBITS_DW_##fs);		\
+		  else if (_FP_FMA_shift < 0)				\
+		    _FP_FRAC_SLL_##dwc (_FP_FMA_RD, -_FP_FMA_shift);	\
+		  _FP_FRAC_COPY_##wc##_##dwc (R, _FP_FMA_RD);		\
+		  R##_c = FP_CLS_NORMAL;				\
+		}							\
+	      break;							\
+	    }								\
+	  goto done_fma;						\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN):			\
+	  _FP_CHOOSENAN (fs, wc, _FP_FMA_T, X, Y, '*');			\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL):		\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_INF):			\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO):			\
+	  _FP_FMA_T##_s = X##_s;					\
+	  /* FALLTHRU */						\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF):			\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NORMAL):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO):		\
+	  _FP_FRAC_COPY_##wc (_FP_FMA_T, X);				\
+	  _FP_FMA_T##_c = X##_c;					\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NAN):		\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN):			\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NAN):			\
+	  _FP_FMA_T##_s = Y##_s;					\
+	  /* FALLTHRU */						\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_INF):		\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_ZERO):		\
+	  _FP_FRAC_COPY_##wc (_FP_FMA_T, Y);				\
+	  _FP_FMA_T##_c = Y##_c;					\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO):			\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_INF):			\
+	  _FP_FMA_T##_s = _FP_NANSIGN_##fs;				\
+	  _FP_FMA_T##_c = FP_CLS_NAN;					\
+	  _FP_FRAC_SET_##wc (_FP_FMA_T, _FP_NANFRAC_##fs);		\
+	  FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_IMZ_FMA);	\
+	  break;							\
+									\
+	default:							\
+	  _FP_UNREACHABLE;						\
+	}								\
+									\
+      /* T = X * Y is zero, infinity or NaN.  */			\
+      switch (_FP_CLS_COMBINE (_FP_FMA_T##_c, Z##_c))			\
+	{								\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN):			\
+	  _FP_CHOOSENAN (fs, wc, R, _FP_FMA_T, Z, '+');			\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL):		\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_INF):			\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO):			\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL):		\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO):			\
+	  R##_s = _FP_FMA_T##_s;					\
+	  _FP_FRAC_COPY_##wc (R, _FP_FMA_T);				\
+	  R##_c = _FP_FMA_T##_c;					\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN):			\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NAN):			\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NORMAL):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_INF):			\
+	  R##_s = Z##_s;						\
+	  _FP_FRAC_COPY_##wc (R, Z);					\
+	  R##_c = Z##_c;						\
+	  R##_e = Z##_e;						\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF):			\
+	  if (_FP_FMA_T##_s == Z##_s)					\
+	    {								\
+	      R##_s = Z##_s;						\
+	      _FP_FRAC_COPY_##wc (R, Z);				\
+	      R##_c = Z##_c;						\
+	    }								\
+	  else								\
+	    {								\
+	      R##_s = _FP_NANSIGN_##fs;					\
+	      R##_c = FP_CLS_NAN;					\
+	      _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs);			\
+	      FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_ISI);	\
+	    }								\
+	  break;							\
+									\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO):		\
+	  if (_FP_FMA_T##_s == Z##_s)					\
+	    R##_s = Z##_s;						\
+	  else								\
+	    R##_s = (FP_ROUNDMODE == FP_RND_MINF);			\
+	  _FP_FRAC_COPY_##wc (R, Z);					\
+	  R##_c = Z##_c;						\
+	  break;							\
+									\
+	default:							\
+	  _FP_UNREACHABLE;						\
+	}								\
+    done_fma: ;								\
+    }									\
+  while (0)
+
+
+/* Main division routine.  The input values should be cooked.  */
+
+#define _FP_DIV(fs, wc, R, X, Y)				\
+  do								\
+    {								\
+      R##_s = X##_s ^ Y##_s;					\
+      R##_e = X##_e - Y##_e;					\
+      switch (_FP_CLS_COMBINE (X##_c, Y##_c))			\
+	{							\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NORMAL):	\
+	  R##_c = FP_CLS_NORMAL;				\
+								\
+	  _FP_DIV_MEAT_##fs (R, X, Y);				\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN):		\
+	  _FP_CHOOSENAN (fs, wc, R, X, Y, '/');			\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL):	\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_INF):		\
+	case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO):		\
+	  R##_s = X##_s;					\
+	  _FP_FRAC_COPY_##wc (R, X);				\
+	  R##_c = X##_c;					\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NAN):	\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NAN):		\
+	  R##_s = Y##_s;					\
+	  _FP_FRAC_COPY_##wc (R, Y);				\
+	  R##_c = Y##_c;					\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_INF):	\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_INF):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NORMAL):	\
+	  R##_c = FP_CLS_ZERO;					\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_ZERO):	\
+	  FP_SET_EXCEPTION (FP_EX_DIVZERO);			\
+	  /* FALLTHRU */					\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO):		\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL):	\
+	  R##_c = FP_CLS_INF;					\
+	  break;						\
+								\
+	case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF):		\
+	case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO):	\
+	  R##_s = _FP_NANSIGN_##fs;				\
+	  R##_c = FP_CLS_NAN;					\
+	  _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs);		\
+	  FP_SET_EXCEPTION (FP_EX_INVALID			\
+			    | (X##_c == FP_CLS_INF		\
+			       ? FP_EX_INVALID_IDI		\
+			       : FP_EX_INVALID_ZDZ));		\
+	  break;						\
+								\
+	default:						\
+	  _FP_UNREACHABLE;					\
+	}							\
+    }								\
+  while (0)
+
+
+/* Helper for comparisons.  EX is 0 not to raise exceptions, 1 to
+   raise exceptions for signaling NaN operands, 2 to raise exceptions
+   for all NaN operands.  Conditionals are organized to allow the
+   compiler to optimize away code based on the value of EX.  */
+
+#define _FP_CMP_CHECK_NAN(fs, wc, X, Y, ex)				\
+  do									\
+    {									\
+      /* The arguments are unordered, which may or may not result in	\
+	 an exception.  */						\
+      if (ex)								\
+	{								\
+	  /* At least some cases of unordered arguments result in	\
+	     exceptions; check whether this is one.  */			\
+	  if (FP_EX_INVALID_SNAN || FP_EX_INVALID_VC)			\
+	    {								\
+	      /* Check separately for each case of "invalid"		\
+		 exceptions.  */					\
+	      if ((ex) == 2)						\
+		FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_VC);	\
+	      if (_FP_ISSIGNAN (fs, wc, X)				\
+		  || _FP_ISSIGNAN (fs, wc, Y))				\
+		FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SNAN);	\
+	    }								\
+	  /* Otherwise, we only need to check whether to raise an	\
+	     exception, not which case or cases it is.  */		\
+	  else if ((ex) == 2						\
+		   || _FP_ISSIGNAN (fs, wc, X)				\
+		   || _FP_ISSIGNAN (fs, wc, Y))				\
+	    FP_SET_EXCEPTION (FP_EX_INVALID);				\
+	}								\
+    }									\
+  while (0)
+
+/* Helper for comparisons.  If denormal operands would raise an
+   exception, check for them, and flush to zero as appropriate
+   (otherwise, we need only check and flush to zero if it might affect
+   the result, which is done later with _FP_CMP_CHECK_FLUSH_ZERO).  */
+#define _FP_CMP_CHECK_DENORM(fs, wc, X, Y)				\
+  do									\
+    {									\
+      if (FP_EX_DENORM != 0)						\
+	{								\
+	  /* We must ensure the correct exceptions are raised for	\
+	     denormal operands, even though this may not affect the	\
+	     result of the comparison.  */				\
+	  if (FP_DENORM_ZERO)						\
+	    {								\
+	      _FP_CHECK_FLUSH_ZERO (fs, wc, X);				\
+	      _FP_CHECK_FLUSH_ZERO (fs, wc, Y);				\
+	    }								\
+	  else								\
+	    {								\
+	      if ((X##_e == 0 && !_FP_FRAC_ZEROP_##wc (X))		\
+		  || (Y##_e == 0 && !_FP_FRAC_ZEROP_##wc (Y)))		\
+		FP_SET_EXCEPTION (FP_EX_DENORM);			\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+/* Helper for comparisons.  Check for flushing denormals for zero if
+   we didn't need to check earlier for any denormal operands.  */
+#define _FP_CMP_CHECK_FLUSH_ZERO(fs, wc, X, Y)	\
+  do						\
+    {						\
+      if (FP_EX_DENORM == 0)			\
+	{					\
+	  _FP_CHECK_FLUSH_ZERO (fs, wc, X);	\
+	  _FP_CHECK_FLUSH_ZERO (fs, wc, Y);	\
+	}					\
+    }						\
+  while (0)
+
+/* Main differential comparison routine.  The inputs should be raw not
+   cooked.  The return is -1, 0, 1 for normal values, UN
+   otherwise.  */
+
+#define _FP_CMP(fs, wc, ret, X, Y, un, ex)				\
+  do									\
+    {									\
+      _FP_CMP_CHECK_DENORM (fs, wc, X, Y);				\
+      /* NANs are unordered.  */					\
+      if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X))	\
+	  || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y)))	\
+	{								\
+	  (ret) = (un);							\
+	  _FP_CMP_CHECK_NAN (fs, wc, X, Y, (ex));			\
+	}								\
+      else								\
+	{								\
+	  int _FP_CMP_is_zero_x;					\
+	  int _FP_CMP_is_zero_y;					\
+									\
+	  _FP_CMP_CHECK_FLUSH_ZERO (fs, wc, X, Y);			\
+									\
+	  _FP_CMP_is_zero_x						\
+	    = (!X##_e && _FP_FRAC_ZEROP_##wc (X)) ? 1 : 0;		\
+	  _FP_CMP_is_zero_y						\
+	    = (!Y##_e && _FP_FRAC_ZEROP_##wc (Y)) ? 1 : 0;		\
+									\
+	  if (_FP_CMP_is_zero_x && _FP_CMP_is_zero_y)			\
+	    (ret) = 0;							\
+	  else if (_FP_CMP_is_zero_x)					\
+	    (ret) = Y##_s ? 1 : -1;					\
+	  else if (_FP_CMP_is_zero_y)					\
+	    (ret) = X##_s ? -1 : 1;					\
+	  else if (X##_s != Y##_s)					\
+	    (ret) = X##_s ? -1 : 1;					\
+	  else if (X##_e > Y##_e)					\
+	    (ret) = X##_s ? -1 : 1;					\
+	  else if (X##_e < Y##_e)					\
+	    (ret) = X##_s ? 1 : -1;					\
+	  else if (_FP_FRAC_GT_##wc (X, Y))				\
+	    (ret) = X##_s ? -1 : 1;					\
+	  else if (_FP_FRAC_GT_##wc (Y, X))				\
+	    (ret) = X##_s ? 1 : -1;					\
+	  else								\
+	    (ret) = 0;							\
+	}								\
+    }									\
+  while (0)
+
+
+/* Simplification for strict equality.  */
+
+#define _FP_CMP_EQ(fs, wc, ret, X, Y, ex)				\
+  do									\
+    {									\
+      _FP_CMP_CHECK_DENORM (fs, wc, X, Y);				\
+      /* NANs are unordered.  */					\
+      if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X))	\
+	  || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y)))	\
+	{								\
+	  (ret) = 1;							\
+	  _FP_CMP_CHECK_NAN (fs, wc, X, Y, (ex));			\
+	}								\
+      else								\
+	{								\
+	  _FP_CMP_CHECK_FLUSH_ZERO (fs, wc, X, Y);			\
+									\
+	  (ret) = !(X##_e == Y##_e					\
+		    && _FP_FRAC_EQ_##wc (X, Y)				\
+		    && (X##_s == Y##_s					\
+			|| (!X##_e && _FP_FRAC_ZEROP_##wc (X))));	\
+	}								\
+    }									\
+  while (0)
+
+/* Version to test unordered.  */
+
+#define _FP_CMP_UNORD(fs, wc, ret, X, Y, ex)				\
+  do									\
+    {									\
+      _FP_CMP_CHECK_DENORM (fs, wc, X, Y);				\
+      (ret) = ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X))	\
+	       || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))); \
+      if (ret)								\
+	_FP_CMP_CHECK_NAN (fs, wc, X, Y, (ex));				\
+    }									\
+  while (0)
+
+/* Main square root routine.  The input value should be cooked.  */
+
+#define _FP_SQRT(fs, wc, R, X)						\
+  do									\
+    {									\
+      _FP_FRAC_DECL_##wc (_FP_SQRT_T);					\
+      _FP_FRAC_DECL_##wc (_FP_SQRT_S);					\
+      _FP_W_TYPE _FP_SQRT_q;						\
+      switch (X##_c)							\
+	{								\
+	case FP_CLS_NAN:						\
+	  _FP_FRAC_COPY_##wc (R, X);					\
+	  R##_s = X##_s;						\
+	  R##_c = FP_CLS_NAN;						\
+	  break;							\
+	case FP_CLS_INF:						\
+	  if (X##_s)							\
+	    {								\
+	      R##_s = _FP_NANSIGN_##fs;					\
+	      R##_c = FP_CLS_NAN; /* NAN */				\
+	      _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs);			\
+	      FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SQRT);	\
+	    }								\
+	  else								\
+	    {								\
+	      R##_s = 0;						\
+	      R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */		\
+	    }								\
+	  break;							\
+	case FP_CLS_ZERO:						\
+	  R##_s = X##_s;						\
+	  R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */			\
+	  break;							\
+	case FP_CLS_NORMAL:						\
+	  R##_s = 0;							\
+	  if (X##_s)							\
+	    {								\
+	      R##_c = FP_CLS_NAN; /* NAN */				\
+	      R##_s = _FP_NANSIGN_##fs;					\
+	      _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs);			\
+	      FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SQRT);	\
+	      break;							\
+	    }								\
+	  R##_c = FP_CLS_NORMAL;					\
+	  if (X##_e & 1)						\
+	    _FP_FRAC_SLL_##wc (X, 1);					\
+	  R##_e = X##_e >> 1;						\
+	  _FP_FRAC_SET_##wc (_FP_SQRT_S, _FP_ZEROFRAC_##wc);		\
+	  _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc);			\
+	  _FP_SQRT_q = _FP_OVERFLOW_##fs >> 1;				\
+	  _FP_SQRT_MEAT_##wc (R, _FP_SQRT_S, _FP_SQRT_T, X,		\
+			      _FP_SQRT_q);				\
+	}								\
+    }									\
+  while (0)
+
+/* Convert from FP to integer.  Input is raw.  */
+
+/* RSIGNED can have following values:
+   0:  the number is required to be 0..(2^rsize)-1, if not, NV is set plus
+       the result is either 0 or (2^rsize)-1 depending on the sign in such
+       case.
+   1:  the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not,
+       NV is set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
+       depending on the sign in such case.
+   2:  the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not,
+       NV is set plus the result is reduced modulo 2^rsize.
+   -1: the number is required to be -(2^(rsize-1))..(2^rsize)-1, if not, NV is
+       set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
+       depending on the sign in such case.  */
+#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned)			\
+  do									\
+    {									\
+      if (X##_e < _FP_EXPBIAS_##fs)					\
+	{								\
+	  (r) = 0;							\
+	  if (X##_e == 0)						\
+	    {								\
+	      if (!_FP_FRAC_ZEROP_##wc (X))				\
+		{							\
+		  if (!FP_DENORM_ZERO)					\
+		    FP_SET_EXCEPTION (FP_EX_INEXACT);			\
+		  FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		}							\
+	    }								\
+	  else								\
+	    FP_SET_EXCEPTION (FP_EX_INEXACT);				\
+	}								\
+      else if ((rsigned) == 2						\
+	       && (X##_e						\
+		   >= ((_FP_EXPMAX_##fs					\
+			< _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs + (rsize) - 1) \
+		       ? _FP_EXPMAX_##fs				\
+		       : _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs + (rsize) - 1))) \
+	{								\
+	  /* Overflow resulting in 0.  */				\
+	  (r) = 0;							\
+	  FP_SET_EXCEPTION (FP_EX_INVALID				\
+			    | FP_EX_INVALID_CVI				\
+			    | ((FP_EX_INVALID_SNAN			\
+				&& _FP_ISSIGNAN (fs, wc, X))		\
+			       ? FP_EX_INVALID_SNAN			\
+			       : 0));					\
+	}								\
+      else if ((rsigned) != 2						\
+	       && (X##_e >= (_FP_EXPMAX_##fs < _FP_EXPBIAS_##fs + (rsize) \
+			     ? _FP_EXPMAX_##fs				\
+			     : (_FP_EXPBIAS_##fs + (rsize)		\
+				- ((rsigned) > 0 || X##_s)))		\
+		   || (!(rsigned) && X##_s)))				\
+	{								\
+	  /* Overflow or converting to the most negative integer.  */	\
+	  if (rsigned)							\
+	    {								\
+	      (r) = 1;							\
+	      (r) <<= (rsize) - 1;					\
+	      (r) -= 1 - X##_s;						\
+	    }								\
+	  else								\
+	    {								\
+	      (r) = 0;							\
+	      if (!X##_s)						\
+		(r) = ~(r);						\
+	    }								\
+									\
+	  if (_FP_EXPBIAS_##fs + (rsize) - 1 < _FP_EXPMAX_##fs		\
+	      && (rsigned)						\
+	      && X##_s							\
+	      && X##_e == _FP_EXPBIAS_##fs + (rsize) - 1)		\
+	    {								\
+	      /* Possibly converting to most negative integer; check the \
+		 mantissa.  */						\
+	      int _FP_TO_INT_inexact = 0;				\
+	      (void) ((_FP_FRACBITS_##fs > (rsize))			\
+		      ? ({						\
+			  _FP_FRAC_SRST_##wc (X, _FP_TO_INT_inexact,	\
+					      _FP_FRACBITS_##fs - (rsize), \
+					      _FP_FRACBITS_##fs);	\
+			  0;						\
+			})						\
+		      : 0);						\
+	      if (!_FP_FRAC_ZEROP_##wc (X))				\
+		FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI);	\
+	      else if (_FP_TO_INT_inexact)				\
+		FP_SET_EXCEPTION (FP_EX_INEXACT);			\
+	    }								\
+	  else								\
+	    FP_SET_EXCEPTION (FP_EX_INVALID				\
+			      | FP_EX_INVALID_CVI			\
+			      | ((FP_EX_INVALID_SNAN			\
+				  && _FP_ISSIGNAN (fs, wc, X))		\
+				 ? FP_EX_INVALID_SNAN			\
+				 : 0));					\
+	}								\
+      else								\
+	{								\
+	  int _FP_TO_INT_inexact = 0;					\
+	  _FP_FRAC_HIGH_RAW_##fs (X) |= _FP_IMPLBIT_##fs;		\
+	  if (X##_e >= _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1)	\
+	    {								\
+	      _FP_FRAC_ASSEMBLE_##wc ((r), X, (rsize));			\
+	      (r) <<= X##_e - _FP_EXPBIAS_##fs - _FP_FRACBITS_##fs + 1; \
+	    }								\
+	  else								\
+	    {								\
+	      _FP_FRAC_SRST_##wc (X, _FP_TO_INT_inexact,		\
+				  (_FP_FRACBITS_##fs + _FP_EXPBIAS_##fs - 1 \
+				   - X##_e),				\
+				  _FP_FRACBITS_##fs);			\
+	      _FP_FRAC_ASSEMBLE_##wc ((r), X, (rsize));			\
+	    }								\
+	  if ((rsigned) && X##_s)					\
+	    (r) = -(r);							\
+	  if ((rsigned) == 2 && X##_e >= _FP_EXPBIAS_##fs + (rsize) - 1) \
+	    {								\
+	      /* Overflow or converting to the most negative integer.  */ \
+	      if (X##_e > _FP_EXPBIAS_##fs + (rsize) - 1		\
+		  || !X##_s						\
+		  || (r) != (((__typeof__ (r)) 1) << ((rsize) - 1)))	\
+		{							\
+		  _FP_TO_INT_inexact = 0;				\
+		  FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI);	\
+		}							\
+	    }								\
+	  if (_FP_TO_INT_inexact)					\
+	    FP_SET_EXCEPTION (FP_EX_INEXACT);				\
+	}								\
+    }									\
+  while (0)
+
+/* Convert from floating point to integer, rounding according to the
+   current rounding direction.  Input is raw.  RSIGNED is as for
+   _FP_TO_INT.  */
+#define _FP_TO_INT_ROUND(fs, wc, r, X, rsize, rsigned)			\
+  do									\
+    {									\
+      __label__ _FP_TO_INT_ROUND_done;					\
+      if (X##_e < _FP_EXPBIAS_##fs)					\
+	{								\
+	  int _FP_TO_INT_ROUND_rounds_away = 0;				\
+	  if (X##_e == 0)						\
+	    {								\
+	      if (_FP_FRAC_ZEROP_##wc (X))				\
+		{							\
+		  (r) = 0;						\
+		  goto _FP_TO_INT_ROUND_done;				\
+		}							\
+	      else							\
+		{							\
+		  FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		  if (FP_DENORM_ZERO)					\
+		    {							\
+		      (r) = 0;						\
+		      goto _FP_TO_INT_ROUND_done;			\
+		    }							\
+		}							\
+	    }								\
+	  /* The result is 0, 1 or -1 depending on the rounding mode;	\
+	     -1 may cause overflow in the unsigned case.  */		\
+	  switch (FP_ROUNDMODE)						\
+	    {								\
+	    case FP_RND_NEAREST:					\
+	      _FP_TO_INT_ROUND_rounds_away				\
+		= (X##_e == _FP_EXPBIAS_##fs - 1			\
+		   && !_FP_FRAC_ZEROP_##wc (X));			\
+	      break;							\
+	    case FP_RND_ZERO:						\
+	      /* _FP_TO_INT_ROUND_rounds_away is already 0.  */		\
+	      break;							\
+	    case FP_RND_PINF:						\
+	      _FP_TO_INT_ROUND_rounds_away = !X##_s;			\
+	      break;							\
+	    case FP_RND_MINF:						\
+	      _FP_TO_INT_ROUND_rounds_away = X##_s;			\
+	      break;							\
+	    }								\
+	  if ((rsigned) == 0 && _FP_TO_INT_ROUND_rounds_away && X##_s)	\
+	    {								\
+	      /* Result of -1 for an unsigned conversion.  */		\
+	      (r) = 0;							\
+	      FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI);	\
+	    }								\
+	  else if ((rsize) == 1 && (rsigned) > 0			\
+		   && _FP_TO_INT_ROUND_rounds_away && !X##_s)		\
+	    {								\
+	      /* Converting to a 1-bit signed bit-field, which cannot	\
+		 represent +1.  */					\
+	      (r) = ((rsigned) == 2 ? -1 : 0);				\
+	      FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI);	\
+	    }								\
+	  else								\
+	    {								\
+	      (r) = (_FP_TO_INT_ROUND_rounds_away			\
+		     ? (X##_s ? -1 : 1)					\
+		     : 0);						\
+	      FP_SET_EXCEPTION (FP_EX_INEXACT);				\
+	    }								\
+	}								\
+      else if ((rsigned) == 2						\
+	       && (X##_e						\
+		   >= ((_FP_EXPMAX_##fs					\
+			< _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs + (rsize) - 1) \
+		       ? _FP_EXPMAX_##fs				\
+		       : _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs + (rsize) - 1))) \
+	{								\
+	  /* Overflow resulting in 0.  */				\
+	  (r) = 0;							\
+	  FP_SET_EXCEPTION (FP_EX_INVALID				\
+			    | FP_EX_INVALID_CVI				\
+			    | ((FP_EX_INVALID_SNAN			\
+				&& _FP_ISSIGNAN (fs, wc, X))		\
+			       ? FP_EX_INVALID_SNAN			\
+			       : 0));					\
+	}								\
+      else if ((rsigned) != 2						\
+	       && (X##_e >= (_FP_EXPMAX_##fs < _FP_EXPBIAS_##fs + (rsize) \
+			     ? _FP_EXPMAX_##fs				\
+			     : (_FP_EXPBIAS_##fs + (rsize)		\
+				- ((rsigned) > 0 && !X##_s)))		\
+		   || ((rsigned) == 0 && X##_s)))			\
+	{								\
+	  /* Definite overflow (does not require rounding to tell).  */	\
+	  if ((rsigned) != 0)						\
+	    {								\
+	      (r) = 1;							\
+	      (r) <<= (rsize) - 1;					\
+	      (r) -= 1 - X##_s;						\
+	    }								\
+	  else								\
+	    {								\
+	      (r) = 0;							\
+	      if (!X##_s)						\
+		(r) = ~(r);						\
+	    }								\
+									\
+	  FP_SET_EXCEPTION (FP_EX_INVALID				\
+			    | FP_EX_INVALID_CVI				\
+			    | ((FP_EX_INVALID_SNAN			\
+				&& _FP_ISSIGNAN (fs, wc, X))		\
+			       ? FP_EX_INVALID_SNAN			\
+			       : 0));					\
+	}								\
+      else								\
+	{								\
+	  /* The value is finite, with magnitude at least 1.  If	\
+	     the conversion is unsigned, the value is positive.		\
+	     If RSIGNED is not 2, the value does not definitely		\
+	     overflow by virtue of its exponent, but may still turn	\
+	     out to overflow after rounding; if RSIGNED is 2, the	\
+	     exponent may be such that the value definitely overflows,	\
+	     but at least one mantissa bit will not be shifted out.  */ \
+	  int _FP_TO_INT_ROUND_inexact = 0;				\
+	  _FP_FRAC_HIGH_RAW_##fs (X) |= _FP_IMPLBIT_##fs;		\
+	  if (X##_e >= _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1)	\
+	    {								\
+	      /* The value is an integer, no rounding needed.  */	\
+	      _FP_FRAC_ASSEMBLE_##wc ((r), X, (rsize));			\
+	      (r) <<= X##_e - _FP_EXPBIAS_##fs - _FP_FRACBITS_##fs + 1; \
+	    }								\
+	  else								\
+	    {								\
+	      /* May need to shift in order to round (unless there	\
+		 are exactly _FP_WORKBITS fractional bits already).  */	\
+	      int _FP_TO_INT_ROUND_rshift				\
+		= (_FP_FRACBITS_##fs + _FP_EXPBIAS_##fs			\
+		   - 1 - _FP_WORKBITS - X##_e);				\
+	      if (_FP_TO_INT_ROUND_rshift > 0)				\
+		_FP_FRAC_SRS_##wc (X, _FP_TO_INT_ROUND_rshift,		\
+				   _FP_WFRACBITS_##fs);			\
+	      else if (_FP_TO_INT_ROUND_rshift < 0)			\
+		_FP_FRAC_SLL_##wc (X, -_FP_TO_INT_ROUND_rshift);	\
+	      /* Round like _FP_ROUND, but setting			\
+		 _FP_TO_INT_ROUND_inexact instead of directly setting	\
+		 the "inexact" exception, since it may turn out we	\
+		 should set "invalid" instead.  */			\
+	      if (_FP_FRAC_LOW_##wc (X) & 7)				\
+		{							\
+		  _FP_TO_INT_ROUND_inexact = 1;				\
+		  switch (FP_ROUNDMODE)					\
+		    {							\
+		    case FP_RND_NEAREST:				\
+		      _FP_ROUND_NEAREST (wc, X);			\
+		      break;						\
+		    case FP_RND_ZERO:					\
+		      _FP_ROUND_ZERO (wc, X);				\
+		      break;						\
+		    case FP_RND_PINF:					\
+		      _FP_ROUND_PINF (wc, X);				\
+		      break;						\
+		    case FP_RND_MINF:					\
+		      _FP_ROUND_MINF (wc, X);				\
+		      break;						\
+		    }							\
+		}							\
+	      _FP_FRAC_SRL_##wc (X, _FP_WORKBITS);			\
+	      _FP_FRAC_ASSEMBLE_##wc ((r), X, (rsize));			\
+	    }								\
+	  if ((rsigned) != 0 && X##_s)					\
+	    (r) = -(r);							\
+	  /* An exponent of RSIZE - 1 always needs testing for		\
+	     overflow (either directly overflowing, or overflowing	\
+	     when rounding up results in 2^RSIZE).  An exponent of	\
+	     RSIZE - 2 can overflow for positive values when rounding	\
+	     up to 2^(RSIZE-1), but cannot overflow for negative	\
+	     values.  Smaller exponents cannot overflow.  */		\
+	  if (X##_e >= (_FP_EXPBIAS_##fs + (rsize) - 1			\
+			- ((rsigned) > 0 && !X##_s)))			\
+	    {								\
+	      if (X##_e > _FP_EXPBIAS_##fs + (rsize) - 1		\
+		  || (X##_e == _FP_EXPBIAS_##fs + (rsize) - 1		\
+		      && (X##_s						\
+			  ? (r) != (((__typeof__ (r)) 1) << ((rsize) - 1))	\
+			  : ((rsigned) > 0 || (r) == 0)))		\
+		  || ((rsigned) > 0					\
+		      && !X##_s						\
+		      && X##_e == _FP_EXPBIAS_##fs + (rsize) - 2	\
+		      && (r) == (((__typeof__ (r)) 1) << ((rsize) - 1))))	\
+		{							\
+		  if ((rsigned) != 2)					\
+		    {							\
+		      if ((rsigned) != 0)				\
+			{						\
+			  (r) = 1;					\
+			  (r) <<= (rsize) - 1;				\
+			  (r) -= 1 - X##_s;				\
+			}						\
+		      else						\
+			{						\
+			  (r) = 0;					\
+			  (r) = ~(r);					\
+			}						\
+		    }							\
+		  _FP_TO_INT_ROUND_inexact = 0;				\
+		  FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI);	\
+		}							\
+	    }								\
+	  if (_FP_TO_INT_ROUND_inexact)					\
+	    FP_SET_EXCEPTION (FP_EX_INEXACT);				\
+	}								\
+    _FP_TO_INT_ROUND_done: ;						\
+    }									\
+  while (0)
+
+/* Convert integer to fp.  Output is raw.  RTYPE is unsigned even if
+   input is signed.  */
+#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype)			\
+  do									\
+    {									\
+      __label__ pack_semiraw;						\
+      if (r)								\
+	{								\
+	  rtype _FP_FROM_INT_ur = (r);					\
+									\
+	  if ((X##_s = ((r) < 0)))					\
+	    _FP_FROM_INT_ur = -_FP_FROM_INT_ur;				\
+									\
+	  _FP_STATIC_ASSERT ((rsize) <= 2 * _FP_W_TYPE_SIZE,		\
+			     "rsize too large");			\
+	  (void) (((rsize) <= _FP_W_TYPE_SIZE)				\
+		  ? ({							\
+		      int _FP_FROM_INT_lz;				\
+		      __FP_CLZ (_FP_FROM_INT_lz,			\
+				(_FP_W_TYPE) _FP_FROM_INT_ur);		\
+		      X##_e = (_FP_EXPBIAS_##fs + _FP_W_TYPE_SIZE - 1	\
+			       - _FP_FROM_INT_lz);			\
+		    })							\
+		  : ({						\
+		      int _FP_FROM_INT_lz;				\
+		      __FP_CLZ_2 (_FP_FROM_INT_lz,			\
+				  (_FP_W_TYPE) (_FP_FROM_INT_ur		\
+						>> _FP_W_TYPE_SIZE),	\
+				  (_FP_W_TYPE) _FP_FROM_INT_ur);	\
+		      X##_e = (_FP_EXPBIAS_##fs + 2 * _FP_W_TYPE_SIZE - 1 \
+			       - _FP_FROM_INT_lz);			\
+		    }));						\
+									\
+	  if ((rsize) - 1 + _FP_EXPBIAS_##fs >= _FP_EXPMAX_##fs		\
+	      && X##_e >= _FP_EXPMAX_##fs)				\
+	    {								\
+	      /* Exponent too big; overflow to infinity.  (May also	\
+		 happen after rounding below.)  */			\
+	      _FP_OVERFLOW_SEMIRAW (fs, wc, X);				\
+	      goto pack_semiraw;					\
+	    }								\
+									\
+	  if ((rsize) <= _FP_FRACBITS_##fs				\
+	      || X##_e < _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs)		\
+	    {								\
+	      /* Exactly representable; shift left.  */			\
+	      _FP_FRAC_DISASSEMBLE_##wc (X, _FP_FROM_INT_ur, (rsize));	\
+	      if (_FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1 - X##_e > 0)	\
+		_FP_FRAC_SLL_##wc (X, (_FP_EXPBIAS_##fs			\
+				       + _FP_FRACBITS_##fs - 1 - X##_e)); \
+	    }								\
+	  else								\
+	    {								\
+	      /* More bits in integer than in floating type; need to	\
+		 round.  */						\
+	      if (_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 < X##_e)	\
+		_FP_FROM_INT_ur						\
+		  = ((_FP_FROM_INT_ur >> (X##_e - _FP_EXPBIAS_##fs	\
+					  - _FP_WFRACBITS_##fs + 1))	\
+		     | ((_FP_FROM_INT_ur				\
+			 << ((rsize) - (X##_e - _FP_EXPBIAS_##fs	\
+					- _FP_WFRACBITS_##fs + 1)))	\
+			!= 0));						\
+	      _FP_FRAC_DISASSEMBLE_##wc (X, _FP_FROM_INT_ur, (rsize));	\
+	      if ((_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 - X##_e) > 0) \
+		_FP_FRAC_SLL_##wc (X, (_FP_EXPBIAS_##fs			\
+				       + _FP_WFRACBITS_##fs - 1 - X##_e)); \
+	      _FP_FRAC_HIGH_##fs (X) &= ~(_FP_W_TYPE) _FP_IMPLBIT_SH_##fs; \
+	    pack_semiraw:						\
+	      _FP_PACK_SEMIRAW (fs, wc, X);				\
+	    }								\
+	}								\
+      else								\
+	{								\
+	  X##_s = 0;							\
+	  X##_e = 0;							\
+	  _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc);			\
+	}								\
+    }									\
+  while (0)
+
+
+/* Extend from a narrower floating-point format to a wider one.  Input
+   and output are raw.  If CHECK_NAN, then signaling NaNs are
+   converted to quiet with the "invalid" exception raised; otherwise
+   signaling NaNs remain signaling with no exception.  */
+#define _FP_EXTEND_CNAN(dfs, sfs, dwc, swc, D, S, check_nan)		\
+  do									\
+    {									\
+      _FP_STATIC_ASSERT (_FP_FRACBITS_##dfs >= _FP_FRACBITS_##sfs,	\
+			 "destination mantissa narrower than source");	\
+      _FP_STATIC_ASSERT ((_FP_EXPMAX_##dfs - _FP_EXPBIAS_##dfs		\
+			  >= _FP_EXPMAX_##sfs - _FP_EXPBIAS_##sfs),	\
+			 "destination max exponent smaller"		\
+			 " than source");				\
+      _FP_STATIC_ASSERT (((_FP_EXPBIAS_##dfs				\
+			   >= (_FP_EXPBIAS_##sfs			\
+			       + _FP_FRACBITS_##sfs - 1))		\
+			  || (_FP_EXPBIAS_##dfs == _FP_EXPBIAS_##sfs)), \
+			 "source subnormals do not all become normal,"	\
+			 " but bias not the same");			\
+      D##_s = S##_s;							\
+      _FP_FRAC_COPY_##dwc##_##swc (D, S);				\
+      if (_FP_EXP_NORMAL (sfs, swc, S))					\
+	{								\
+	  D##_e = S##_e + _FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs;	\
+	  _FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs - _FP_FRACBITS_##sfs)); \
+	}								\
+      else								\
+	{								\
+	  if (S##_e == 0)						\
+	    {								\
+	      _FP_CHECK_FLUSH_ZERO (sfs, swc, S);			\
+	      if (_FP_FRAC_ZEROP_##swc (S))				\
+		D##_e = 0;						\
+	      else if (_FP_EXPBIAS_##dfs				\
+		       < _FP_EXPBIAS_##sfs + _FP_FRACBITS_##sfs - 1)	\
+		{							\
+		  FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		  _FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs		\
+					  - _FP_FRACBITS_##sfs));	\
+		  D##_e = 0;						\
+		  if (FP_TRAPPING_EXCEPTIONS & FP_EX_UNDERFLOW)		\
+		    FP_SET_EXCEPTION (FP_EX_UNDERFLOW);			\
+		}							\
+	      else							\
+		{							\
+		  int FP_EXTEND_lz;					\
+		  FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		  _FP_FRAC_CLZ_##swc (FP_EXTEND_lz, S);			\
+		  _FP_FRAC_SLL_##dwc (D,				\
+				      FP_EXTEND_lz + _FP_FRACBITS_##dfs	\
+				      - _FP_FRACTBITS_##sfs);		\
+		  D##_e = (_FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs + 1	\
+			   + _FP_FRACXBITS_##sfs - FP_EXTEND_lz);	\
+		}							\
+	    }								\
+	  else								\
+	    {								\
+	      D##_e = _FP_EXPMAX_##dfs;					\
+	      if (!_FP_FRAC_ZEROP_##swc (S))				\
+		{							\
+		  if (check_nan && _FP_FRAC_SNANP (sfs, S))		\
+		    FP_SET_EXCEPTION (FP_EX_INVALID			\
+				      | FP_EX_INVALID_SNAN);		\
+		  _FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs		\
+					  - _FP_FRACBITS_##sfs));	\
+		  if (check_nan)					\
+		    _FP_SETQNAN (dfs, dwc, D);				\
+		}							\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+#define FP_EXTEND(dfs, sfs, dwc, swc, D, S)		\
+    _FP_EXTEND_CNAN (dfs, sfs, dwc, swc, D, S, 1)
+
+/* Truncate from a wider floating-point format to a narrower one.
+   Input and output are semi-raw.  */
+#define FP_TRUNC(dfs, sfs, dwc, swc, D, S)				\
+  do									\
+    {									\
+      _FP_STATIC_ASSERT (_FP_FRACBITS_##sfs >= _FP_FRACBITS_##dfs,	\
+			 "destination mantissa wider than source");	\
+      _FP_STATIC_ASSERT (((_FP_EXPBIAS_##sfs				\
+			   >= (_FP_EXPBIAS_##dfs			\
+			       + _FP_FRACBITS_##dfs - 1))		\
+			  || _FP_EXPBIAS_##sfs == _FP_EXPBIAS_##dfs),	\
+			 "source subnormals do not all become same,"	\
+			 " but bias not the same");			\
+      D##_s = S##_s;							\
+      if (_FP_EXP_NORMAL (sfs, swc, S))					\
+	{								\
+	  D##_e = S##_e + _FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs;	\
+	  if (D##_e >= _FP_EXPMAX_##dfs)				\
+	    _FP_OVERFLOW_SEMIRAW (dfs, dwc, D);				\
+	  else								\
+	    {								\
+	      if (D##_e <= 0)						\
+		{							\
+		  if (D##_e < 1 - _FP_FRACBITS_##dfs)			\
+		    {							\
+		      _FP_FRAC_SET_##swc (S, _FP_ZEROFRAC_##swc);	\
+		      _FP_FRAC_LOW_##swc (S) |= 1;			\
+		    }							\
+		  else							\
+		    {							\
+		      _FP_FRAC_HIGH_##sfs (S) |= _FP_IMPLBIT_SH_##sfs;	\
+		      _FP_FRAC_SRS_##swc (S, (_FP_WFRACBITS_##sfs	\
+					      - _FP_WFRACBITS_##dfs	\
+					      + 1 - D##_e),		\
+					  _FP_WFRACBITS_##sfs);		\
+		    }							\
+		  D##_e = 0;						\
+		}							\
+	      else							\
+		_FP_FRAC_SRS_##swc (S, (_FP_WFRACBITS_##sfs		\
+					- _FP_WFRACBITS_##dfs),		\
+				    _FP_WFRACBITS_##sfs);		\
+	      _FP_FRAC_COPY_##dwc##_##swc (D, S);			\
+	    }								\
+	}								\
+      else								\
+	{								\
+	  if (S##_e == 0)						\
+	    {								\
+	      _FP_CHECK_FLUSH_ZERO (sfs, swc, S);			\
+	      D##_e = 0;						\
+	      if (_FP_FRAC_ZEROP_##swc (S))				\
+		_FP_FRAC_SET_##dwc (D, _FP_ZEROFRAC_##dwc);		\
+	      else							\
+		{							\
+		  FP_SET_EXCEPTION (FP_EX_DENORM);			\
+		  if (_FP_EXPBIAS_##sfs					\
+		      < _FP_EXPBIAS_##dfs + _FP_FRACBITS_##dfs - 1)	\
+		    {							\
+		      _FP_FRAC_SRS_##swc (S, (_FP_WFRACBITS_##sfs	\
+					      - _FP_WFRACBITS_##dfs),	\
+					  _FP_WFRACBITS_##sfs);		\
+		      _FP_FRAC_COPY_##dwc##_##swc (D, S);		\
+		    }							\
+		  else							\
+		    {							\
+		      _FP_FRAC_SET_##dwc (D, _FP_ZEROFRAC_##dwc);	\
+		      _FP_FRAC_LOW_##dwc (D) |= 1;			\
+		    }							\
+		}							\
+	    }								\
+	  else								\
+	    {								\
+	      D##_e = _FP_EXPMAX_##dfs;					\
+	      if (_FP_FRAC_ZEROP_##swc (S))				\
+		_FP_FRAC_SET_##dwc (D, _FP_ZEROFRAC_##dwc);		\
+	      else							\
+		{							\
+		  _FP_CHECK_SIGNAN_SEMIRAW (sfs, swc, S);		\
+		  _FP_FRAC_SRL_##swc (S, (_FP_WFRACBITS_##sfs		\
+					  - _FP_WFRACBITS_##dfs));	\
+		  _FP_FRAC_COPY_##dwc##_##swc (D, S);			\
+		  /* Semi-raw NaN must have all workbits cleared.  */	\
+		  _FP_FRAC_LOW_##dwc (D)				\
+		    &= ~(_FP_W_TYPE) ((1 << _FP_WORKBITS) - 1);		\
+		  _FP_SETQNAN_SEMIRAW (dfs, dwc, D);			\
+		}							\
+	    }								\
+	}								\
+    }									\
+  while (0)
+
+/* Helper primitives.  */
+
+/* Count leading zeros in a word.  */
+
+#ifndef __FP_CLZ
+/* GCC 3.4 and later provide the builtins for us.  */
+# define __FP_CLZ(r, x)							\
+  do									\
+    {									\
+      _FP_STATIC_ASSERT ((sizeof (_FP_W_TYPE) == sizeof (unsigned int)	\
+			  || (sizeof (_FP_W_TYPE)			\
+			      == sizeof (unsigned long))		\
+			  || (sizeof (_FP_W_TYPE)			\
+			      == sizeof (unsigned long long))),		\
+			 "_FP_W_TYPE size unsupported for clz");	\
+      if (sizeof (_FP_W_TYPE) == sizeof (unsigned int))			\
+	(r) = __builtin_clz (x);					\
+      else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long))		\
+	(r) = __builtin_clzl (x);					\
+      else /* sizeof (_FP_W_TYPE) == sizeof (unsigned long long).  */	\
+	(r) = __builtin_clzll (x);					\
+    }									\
+  while (0)
+#endif /* ndef __FP_CLZ */
+
+#define _FP_DIV_HELP_imm(q, r, n, d)		\
+  do						\
+    {						\
+      (q) = (n) / (d), (r) = (n) % (d);		\
+    }						\
+  while (0)
+
+
+/* A restoring bit-by-bit division primitive.  */
+
+#define _FP_DIV_MEAT_N_loop(fs, wc, R, X, Y)				\
+  do									\
+    {									\
+      int _FP_DIV_MEAT_N_loop_count = _FP_WFRACBITS_##fs;		\
+      _FP_FRAC_DECL_##wc (_FP_DIV_MEAT_N_loop_u);			\
+      _FP_FRAC_DECL_##wc (_FP_DIV_MEAT_N_loop_v);			\
+      _FP_FRAC_COPY_##wc (_FP_DIV_MEAT_N_loop_u, X);			\
+      _FP_FRAC_COPY_##wc (_FP_DIV_MEAT_N_loop_v, Y);			\
+      _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc);				\
+      /* Normalize _FP_DIV_MEAT_N_LOOP_U and _FP_DIV_MEAT_N_LOOP_V.  */	\
+      _FP_FRAC_SLL_##wc (_FP_DIV_MEAT_N_loop_u, _FP_WFRACXBITS_##fs);	\
+      _FP_FRAC_SLL_##wc (_FP_DIV_MEAT_N_loop_v, _FP_WFRACXBITS_##fs);	\
+      /* First round.  Since the operands are normalized, either the	\
+	 first or second bit will be set in the fraction.  Produce a	\
+	 normalized result by checking which and adjusting the loop	\
+	 count and exponent accordingly.  */				\
+      if (_FP_FRAC_GE_1 (_FP_DIV_MEAT_N_loop_u, _FP_DIV_MEAT_N_loop_v))	\
+	{								\
+	  _FP_FRAC_SUB_##wc (_FP_DIV_MEAT_N_loop_u,			\
+			     _FP_DIV_MEAT_N_loop_u,			\
+			     _FP_DIV_MEAT_N_loop_v);			\
+	  _FP_FRAC_LOW_##wc (R) |= 1;					\
+	  _FP_DIV_MEAT_N_loop_count--;					\
+	}								\
+      else								\
+	R##_e--;							\
+      /* Subsequent rounds.  */						\
+      do								\
+	{								\
+	  int _FP_DIV_MEAT_N_loop_msb					\
+	    = (_FP_WS_TYPE) _FP_FRAC_HIGH_##wc (_FP_DIV_MEAT_N_loop_u) < 0; \
+	  _FP_FRAC_SLL_##wc (_FP_DIV_MEAT_N_loop_u, 1);			\
+	  _FP_FRAC_SLL_##wc (R, 1);					\
+	  if (_FP_DIV_MEAT_N_loop_msb					\
+	      || _FP_FRAC_GE_1 (_FP_DIV_MEAT_N_loop_u,			\
+				_FP_DIV_MEAT_N_loop_v))			\
+	    {								\
+	      _FP_FRAC_SUB_##wc (_FP_DIV_MEAT_N_loop_u,			\
+				 _FP_DIV_MEAT_N_loop_u,			\
+				 _FP_DIV_MEAT_N_loop_v);		\
+	      _FP_FRAC_LOW_##wc (R) |= 1;				\
+	    }								\
+	}								\
+      while (--_FP_DIV_MEAT_N_loop_count > 0);				\
+      /* If there's anything left in _FP_DIV_MEAT_N_LOOP_U, the result	\
+	 is inexact.  */						\
+      _FP_FRAC_LOW_##wc (R)						\
+	|= !_FP_FRAC_ZEROP_##wc (_FP_DIV_MEAT_N_loop_u);		\
+    }									\
+  while (0)
+
+#define _FP_DIV_MEAT_1_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 1, R, X, Y)
+#define _FP_DIV_MEAT_2_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 2, R, X, Y)
+#define _FP_DIV_MEAT_4_loop(fs, R, X, Y)  _FP_DIV_MEAT_N_loop (fs, 4, R, X, Y)
+
+#endif /* !SOFT_FP_OP_COMMON_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_add.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_add.c
new file mode 100644
index 000000000..a67deab31
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_add.c
@@ -0,0 +1,44 @@
+/* Software floating-point emulation.
+   (*c) = (*a) + (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_add(long double *c, const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_QP(A, a);
+  FP_UNPACK_SEMIRAW_QP(B, b);
+  FP_ADD_Q(C, A, B);
+  FP_PACK_SEMIRAW_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	ldd [%2], %%f56\n"
+"	ldd [%2+8], %%f58\n"
+"	faddq %%f52, %%f56, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_cmp.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_cmp.c
new file mode 100644
index 000000000..5316157ec
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_cmp.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Compare (*a) and (*b), return float condition code.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_cmp(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_Q(r, B, A, 3, 1);
+  if (r == -1) r = 2;
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 3));
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_cmpe.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_cmpe.c
new file mode 100644
index 000000000..e0a834c72
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_cmpe.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Compare (*a) and (*b), return float condition code.
+   Signal exception (unless masked) if unordered.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_cmpe(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_Q(r, B, A, 3, 2);
+  if (r == -1) r = 2;
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpeq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 3));
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_div.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_div.c
new file mode 100644
index 000000000..27d08f94d
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_div.c
@@ -0,0 +1,44 @@
+/* Software floating-point emulation.
+   (*c) = (*a) / (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_div(long double *c, const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_QP(A, a);
+  FP_UNPACK_QP(B, b);
+  FP_DIV_Q(C, A, B);
+  FP_PACK_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	ldd [%2], %%f56\n"
+"	ldd [%2+8], %%f58\n"
+"	fdivq %%f52, %%f56, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_dtoq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_dtoq.c
new file mode 100644
index 000000000..5a5c43b13
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_dtoq.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   (*c) = (long double)(a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+void _Qp_dtoq(long double *c, const double a)
+{
+  FP_DECL_EX;
+  FP_DECL_D(A);
+  FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_D(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,D,4,2,C,A);
+#else
+  FP_EXTEND(Q,D,2,1,C,A);
+#endif
+  FP_PACK_RAW_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	fdtoq %1, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "e" (a) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_feq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_feq.c
new file mode 100644
index 000000000..c7c626378
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_feq.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return 1 if (*a) == (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_feq(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_EQ_Q(r, A, B, 1);
+
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 3));
+
+  return !r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_fge.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_fge.c
new file mode 100644
index 000000000..19cacbb34
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_fge.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return 1 if (*a) >= (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_fge(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_Q(r, B, A, 3, 2);
+
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpeq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 1));
+
+  return (r <= 0);
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_fgt.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_fgt.c
new file mode 100644
index 000000000..70645d1cb
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_fgt.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return 1 if (*a) > (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_fgt(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_Q(r, B, A, 3, 2);
+
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpeq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 3) - 3);
+
+  return (r == -1);
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_fle.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_fle.c
new file mode 100644
index 000000000..6293fcbcd
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_fle.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return 1 if (*a) <= (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_fle(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_Q(r, B, A, -2, 2);
+
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpeq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 2) ? -1 : 0);
+
+  return (r >= 0);
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_flt.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_flt.c
new file mode 100644
index 000000000..7aa054697
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_flt.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return 1 if (*a) < (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_flt(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_Q(r, B, A, 3, 2);
+
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpeq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 3));
+
+  return (r == 1);
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_fne.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_fne.c
new file mode 100644
index 000000000..dd358eda0
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_fne.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return 1 if (*a) != (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_fne(const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B);
+  int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_UNPACK_RAW_QP(B, b);
+  FP_CMP_EQ_Q(r, A, B, 1);
+
+  QP_HANDLE_EXCEPTIONS(
+	__asm (
+"	ldd [%0], %%f52\n"
+"	ldd [%0+8], %%f54\n"
+"	ldd [%1], %%f56\n"
+"	ldd [%1+8], %%f58\n"
+"	fcmpq %%fcc3, %%f52, %%f56\n"
+"	" : : "r" (a), "r" (b) : QP_CLOBBER_CC);
+	_FPU_GETCW(_fcw);
+	r = ((_fcw >> 36) & 3) != 0);
+
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_itoq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_itoq.c
new file mode 100644
index 000000000..230fde365
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_itoq.c
@@ -0,0 +1,34 @@
+/* Software floating-point emulation.
+   (*c) = (long double)(a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_itoq(long double *c, const int a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  int b = a;
+
+  FP_FROM_INT_Q(C, b, 32, unsigned int);
+  FP_PACK_RAW_QP(c, C);
+  QP_NO_EXCEPTIONS;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_mul.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_mul.c
new file mode 100644
index 000000000..49a290af9
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_mul.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   (*c) = (*a) * (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* As QP_HANDLE_EXCEPTIONS reloads FPU control word anyway,
+   avoid doing it twice.  */
+#define _FP_MUL_MEAT_RESET_FE do {} while (0)
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_mul(long double *c, const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_QP(A, a);
+  FP_UNPACK_QP(B, b);
+  FP_MUL_Q(C, A, B);
+  FP_PACK_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(
+	_FPU_SETCW(_fcw);
+	__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	ldd [%2], %%f56\n"
+"	ldd [%2+8], %%f58\n"
+"	fmulq %%f52, %%f56, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_neg.S b/libc/sysdeps/linux/sparc64/soft-fp/qp_neg.S
new file mode 100644
index 000000000..d2fd7f286
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_neg.S
@@ -0,0 +1,30 @@
+/* Quad floating-point emulation.
+   (*c) = !(*a)
+   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY(_Qp_neg)
+	ldd	[%o1], %f60
+	ldd	[%o1 + 8], %f62
+	fnegd	%f60, %f60
+	std	%f60, [%o0]
+	jmpl	%o7 + 8, %g0
+	 std	%f62, [%o0 + 8]
+END(_Qp_neg)
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_qtod.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtod.c
new file mode 100644
index 000000000..7c3889da9
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtod.c
@@ -0,0 +1,48 @@
+/* Software floating-point emulation.
+   Return (double)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "double.h"
+#include "quad.h"
+
+double _Qp_qtod(const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_D(R);
+  double r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_QP(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(D,Q,2,4,R,A);
+#else
+  FP_TRUNC(D,Q,1,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_D(r, R);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fqtod %%f52, %0\n"
+"	" : "=&e" (r) : "r" (a) : QP_CLOBBER));
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoi.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoi.c
new file mode 100644
index 000000000..99cd760ac
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoi.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Return (int)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+int _Qp_qtoi(const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_TO_INT_Q(r, A, 32, 1);
+  QP_HANDLE_EXCEPTIONS(
+	int rx;
+	__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fqtoi %%f52, %%f31\n"
+"	st %%f31, [%0]\n"
+"	" : : "r" (&rx), "r" (a) : QP_CLOBBER, "f31");
+	r = rx);
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_qtos.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtos.c
new file mode 100644
index 000000000..dacd6c062
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtos.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return (float)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "quad.h"
+
+float _Qp_qtos(const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  FP_DECL_S(R);
+  float r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_QP(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_TRUNC(S,Q,1,4,R,A);
+#else
+  FP_TRUNC(S,Q,1,2,R,A);
+#endif
+  FP_PACK_SEMIRAW_S(r, R);
+
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fqtos %%f52, %0\n"
+"	" : "=&f" (r) : "r" (a) : QP_CLOBBER));
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoui.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoui.c
new file mode 100644
index 000000000..2d8fb5253
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoui.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Return (unsigned int)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+unsigned int _Qp_qtoui(const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned int r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_TO_INT_Q(r, A, 32, -1);
+  QP_HANDLE_EXCEPTIONS(
+	int rx;
+	__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fqtoi %%f52, %%f31\n"
+"	st %%f31, [%0]\n"
+"	" : : "r" (&rx), "r" (a) : QP_CLOBBER, "f31");
+	r = rx);
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoux.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoux.c
new file mode 100644
index 000000000..2d74a6ee1
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtoux.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Return (unsigned long)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+unsigned long _Qp_qtoux(const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned long r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_TO_INT_Q(r, A, 64, -1);
+  QP_HANDLE_EXCEPTIONS(
+	unsigned long rx;
+	__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fqtox %%f52, %%f60\n"
+"	std %%f60, [%0]\n"
+"	" : : "r" (&rx), "r" (a) : QP_CLOBBER);
+	r = rx);
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_qtox.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtox.c
new file mode 100644
index 000000000..abfc666cd
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_qtox.c
@@ -0,0 +1,46 @@
+/* Software floating-point emulation.
+   Return (long)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FP_ROUNDMODE FP_RND_ZERO
+#include "soft-fp.h"
+#include "quad.h"
+
+long _Qp_qtox(const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  unsigned long r;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_QP(A, a);
+  FP_TO_INT_Q(r, A, 64, 1);
+  QP_HANDLE_EXCEPTIONS(
+	long rx;
+	__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fqtox %%f52, %%f60\n"
+"	std %%f60, [%0]\n"
+"	" : : "r" (&rx), "r" (a) : QP_CLOBBER);
+	r = rx);
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_sqrt.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_sqrt.c
new file mode 100644
index 000000000..3d78b1fdb
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_sqrt.c
@@ -0,0 +1,41 @@
+/* Software floating-point emulation.
+   (*c) = sqrtl(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_sqrt(long double *c, const long double *a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_QP(A, a);
+  FP_SQRT_Q(C, A);
+  FP_PACK_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	fsqrtq %%f52, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "r" (a) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_stoq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_stoq.c
new file mode 100644
index 000000000..9202a7269
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_stoq.c
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   (*c) = (long double)(a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "single.h"
+#include "quad.h"
+
+void _Qp_stoq(long double *c, const float a)
+{
+  FP_DECL_EX;
+  FP_DECL_S(A);
+  FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_RAW_S(A, a);
+#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
+  FP_EXTEND(Q,S,4,1,C,A);
+#else
+  FP_EXTEND(Q,S,2,1,C,A);
+#endif
+  FP_PACK_RAW_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	fstoq %1, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "f" (a) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_sub.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_sub.c
new file mode 100644
index 000000000..71b989074
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_sub.c
@@ -0,0 +1,44 @@
+/* Software floating-point emulation.
+   (*c) = (*a) - (*b)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_sub(long double *c, const long double *a, const long double *b)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C);
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_SEMIRAW_QP(A, a);
+  FP_UNPACK_SEMIRAW_QP(B, b);
+  FP_SUB_Q(C, A, B);
+  FP_PACK_SEMIRAW_QP(c, C);
+  QP_HANDLE_EXCEPTIONS(__asm (
+"	ldd [%1], %%f52\n"
+"	ldd [%1+8], %%f54\n"
+"	ldd [%2], %%f56\n"
+"	ldd [%2+8], %%f58\n"
+"	fsubq %%f52, %%f56, %%f60\n"
+"	std %%f60, [%0]\n"
+"	std %%f62, [%0+8]\n"
+"	" : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER));
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_uitoq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_uitoq.c
new file mode 100644
index 000000000..cc8603ad4
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_uitoq.c
@@ -0,0 +1,34 @@
+/* Software floating-point emulation.
+   (*c) = (long double)(a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_uitoq(long double *c, const unsigned int a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  unsigned int b = a;
+
+  FP_FROM_INT_Q(C, b, 32, unsigned int);
+  FP_PACK_RAW_QP(c, C);
+  QP_NO_EXCEPTIONS;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_util.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_util.c
new file mode 100644
index 000000000..7e1f2511e
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_util.c
@@ -0,0 +1,60 @@
+/* Software floating-point emulation.
+   Helper routine for _Qp_* routines.
+   Simulate exceptions using double arithmetics.
+   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <float.h>
+#include <math.h>
+#include <assert.h>
+#include "soft-fp.h"
+
+void __Qp_handle_exceptions(int exceptions)
+{
+  if (exceptions & FP_EX_INVALID)
+    {
+      float f = 0.0;
+      __asm__ __volatile__ ("fdivs %0, %0, %0" : "+f" (f));
+    }
+  if (exceptions & FP_EX_DIVZERO)
+    {
+      float f = 1.0, g = 0.0;
+      __asm__ __volatile__ ("fdivs %0, %1, %0"
+			    : "+f" (f)
+			    : "f" (g));
+    }
+  if (exceptions & FP_EX_OVERFLOW)
+    {
+      float f = FLT_MAX;
+      __asm__ __volatile__("fmuls %0, %0, %0" : "+f" (f));
+      exceptions &= ~FP_EX_INEXACT;
+    }
+  if (exceptions & FP_EX_UNDERFLOW)
+    {
+      float f = FLT_MIN;
+      __asm__ __volatile__("fmuls %0, %0, %0" : "+f" (f));
+      exceptions &= ~FP_EX_INEXACT;
+    }
+  if (exceptions & FP_EX_INEXACT)
+    {
+      double d = 1.0, e = M_PI;
+      __asm__ __volatile__ ("fdivd %0, %1, %0"
+			    : "+f" (d)
+			    : "f" (e));
+    }
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_uxtoq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_uxtoq.c
new file mode 100644
index 000000000..766ca7887
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_uxtoq.c
@@ -0,0 +1,34 @@
+/* Software floating-point emulation.
+   (*c) = (long double)(a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_uxtoq(long double *c, const unsigned long a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  unsigned long b = a;
+
+  FP_FROM_INT_Q(C, b, 64, unsigned long);
+  FP_PACK_RAW_QP(c, C);
+  QP_NO_EXCEPTIONS;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/qp_xtoq.c b/libc/sysdeps/linux/sparc64/soft-fp/qp_xtoq.c
new file mode 100644
index 000000000..42c0bf104
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/qp_xtoq.c
@@ -0,0 +1,34 @@
+/* Software floating-point emulation.
+   (*c) = (long double)(*a)
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+void _Qp_xtoq(long double *c, const long a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(C);
+  long b = a;
+
+  FP_FROM_INT_Q(C, b, 64, unsigned long);
+  FP_PACK_RAW_QP(c, C);
+  QP_NO_EXCEPTIONS;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/quad.h b/libc/sysdeps/linux/sparc64/soft-fp/quad.h
new file mode 100644
index 000000000..388601fd8
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/quad.h
@@ -0,0 +1,330 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Quad Precision.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_QUAD_H
+#define SOFT_FP_QUAD_H	1
+
+#if _FP_W_TYPE_SIZE < 32
+# error "Here's a nickel, kid. Go buy yourself a real computer."
+#endif
+
+#if _FP_W_TYPE_SIZE < 64
+# define _FP_FRACTBITS_Q	(4*_FP_W_TYPE_SIZE)
+# define _FP_FRACTBITS_DW_Q	(8*_FP_W_TYPE_SIZE)
+#else
+# define _FP_FRACTBITS_Q		(2*_FP_W_TYPE_SIZE)
+# define _FP_FRACTBITS_DW_Q	(4*_FP_W_TYPE_SIZE)
+#endif
+
+#define _FP_FRACBITS_Q		113
+#define _FP_FRACXBITS_Q		(_FP_FRACTBITS_Q - _FP_FRACBITS_Q)
+#define _FP_WFRACBITS_Q		(_FP_WORKBITS + _FP_FRACBITS_Q)
+#define _FP_WFRACXBITS_Q	(_FP_FRACTBITS_Q - _FP_WFRACBITS_Q)
+#define _FP_EXPBITS_Q		15
+#define _FP_EXPBIAS_Q		16383
+#define _FP_EXPMAX_Q		32767
+
+#define _FP_QNANBIT_Q		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_Q-2) % _FP_W_TYPE_SIZE)
+#define _FP_QNANBIT_SH_Q		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_Q-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_Q		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_Q-1) % _FP_W_TYPE_SIZE)
+#define _FP_IMPLBIT_SH_Q		\
+	((_FP_W_TYPE) 1 << (_FP_FRACBITS_Q-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
+#define _FP_OVERFLOW_Q		\
+	((_FP_W_TYPE) 1 << (_FP_WFRACBITS_Q % _FP_W_TYPE_SIZE))
+
+#define _FP_WFRACBITS_DW_Q	(2 * _FP_WFRACBITS_Q)
+#define _FP_WFRACXBITS_DW_Q	(_FP_FRACTBITS_DW_Q - _FP_WFRACBITS_DW_Q)
+#define _FP_HIGHBIT_DW_Q	\
+  ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_Q - 1) % _FP_W_TYPE_SIZE)
+
+typedef float TFtype __attribute__ ((mode (TF)));
+
+#if _FP_W_TYPE_SIZE < 64
+
+union _FP_UNION_Q
+{
+  TFtype flt;
+  struct _FP_STRUCT_LAYOUT
+  {
+# if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp : _FP_EXPBITS_Q;
+    unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+    unsigned long frac2 : _FP_W_TYPE_SIZE;
+    unsigned long frac1 : _FP_W_TYPE_SIZE;
+    unsigned long frac0 : _FP_W_TYPE_SIZE;
+# else
+    unsigned long frac0 : _FP_W_TYPE_SIZE;
+    unsigned long frac1 : _FP_W_TYPE_SIZE;
+    unsigned long frac2 : _FP_W_TYPE_SIZE;
+    unsigned long frac3 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0)-(_FP_W_TYPE_SIZE * 3);
+    unsigned exp : _FP_EXPBITS_Q;
+    unsigned sign : 1;
+# endif /* not bigendian */
+  } bits __attribute__ ((packed));
+};
+
+
+# define FP_DECL_Q(X)		_FP_DECL (4, X)
+# define FP_UNPACK_RAW_Q(X, val)	_FP_UNPACK_RAW_4 (Q, X, (val))
+# define FP_UNPACK_RAW_QP(X, val)	_FP_UNPACK_RAW_4_P (Q, X, (val))
+# define FP_PACK_RAW_Q(val, X)	_FP_PACK_RAW_4 (Q, (val), X)
+# define FP_PACK_RAW_QP(val, X)			\
+  do						\
+    {						\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_4_P (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_Q(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_4 (Q, X, (val));		\
+      _FP_UNPACK_CANONICAL (Q, 4, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_QP(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_4_P (Q, X, (val));		\
+      _FP_UNPACK_CANONICAL (Q, 4, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_Q(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_4 (Q, X, (val));		\
+      _FP_UNPACK_SEMIRAW (Q, 4, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_QP(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_4_P (Q, X, (val));		\
+      _FP_UNPACK_SEMIRAW (Q, 4, X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_Q(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (Q, 4, X);		\
+      _FP_PACK_RAW_4 (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_QP(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (Q, 4, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_4_P (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_Q(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (Q, 4, X);		\
+      _FP_PACK_RAW_4 (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_QP(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (Q, 4, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_4_P (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN (Q, 4, X)
+# define FP_NEG_Q(R, X)			_FP_NEG (Q, 4, R, X)
+# define FP_ADD_Q(R, X, Y)		_FP_ADD (Q, 4, R, X, Y)
+# define FP_SUB_Q(R, X, Y)		_FP_SUB (Q, 4, R, X, Y)
+# define FP_MUL_Q(R, X, Y)		_FP_MUL (Q, 4, R, X, Y)
+# define FP_DIV_Q(R, X, Y)		_FP_DIV (Q, 4, R, X, Y)
+# define FP_SQRT_Q(R, X)		_FP_SQRT (Q, 4, R, X)
+# define _FP_SQRT_MEAT_Q(R, S, T, X, Q)	_FP_SQRT_MEAT_4 (R, S, T, X, (Q))
+# define FP_FMA_Q(R, X, Y, Z)		_FP_FMA (Q, 4, 8, R, X, Y, Z)
+
+# define FP_CMP_Q(r, X, Y, un, ex)	_FP_CMP (Q, 4, (r), X, Y, (un), (ex))
+# define FP_CMP_EQ_Q(r, X, Y, ex)	_FP_CMP_EQ (Q, 4, (r), X, Y, (ex))
+# define FP_CMP_UNORD_Q(r, X, Y, ex)	_FP_CMP_UNORD (Q, 4, (r), X, Y, (ex))
+
+# define FP_TO_INT_Q(r, X, rsz, rsg)	_FP_TO_INT (Q, 4, (r), X, (rsz), (rsg))
+# define FP_TO_INT_ROUND_Q(r, X, rsz, rsg)	\
+  _FP_TO_INT_ROUND (Q, 4, (r), X, (rsz), (rsg))
+# define FP_FROM_INT_Q(X, r, rs, rt)	_FP_FROM_INT (Q, 4, X, (r), (rs), rt)
+
+# define _FP_FRAC_HIGH_Q(X)	_FP_FRAC_HIGH_4 (X)
+# define _FP_FRAC_HIGH_RAW_Q(X)	_FP_FRAC_HIGH_4 (X)
+
+# define _FP_FRAC_HIGH_DW_Q(X)	_FP_FRAC_HIGH_8 (X)
+
+#else   /* not _FP_W_TYPE_SIZE < 64 */
+union _FP_UNION_Q
+{
+  TFtype flt /* __attribute__ ((mode (TF))) */ ;
+  struct _FP_STRUCT_LAYOUT
+  {
+    _FP_W_TYPE a, b;
+  } longs;
+  struct _FP_STRUCT_LAYOUT
+  {
+# if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign    : 1;
+    unsigned exp     : _FP_EXPBITS_Q;
+    _FP_W_TYPE frac1 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0) - _FP_W_TYPE_SIZE;
+    _FP_W_TYPE frac0 : _FP_W_TYPE_SIZE;
+# else
+    _FP_W_TYPE frac0 : _FP_W_TYPE_SIZE;
+    _FP_W_TYPE frac1 : _FP_FRACBITS_Q - (_FP_IMPLBIT_Q != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp     : _FP_EXPBITS_Q;
+    unsigned sign    : 1;
+# endif
+  } bits;
+};
+
+# define FP_DECL_Q(X)		_FP_DECL (2, X)
+# define FP_UNPACK_RAW_Q(X, val)	_FP_UNPACK_RAW_2 (Q, X, (val))
+# define FP_UNPACK_RAW_QP(X, val)	_FP_UNPACK_RAW_2_P (Q, X, (val))
+# define FP_PACK_RAW_Q(val, X)	_FP_PACK_RAW_2 (Q, (val), X)
+# define FP_PACK_RAW_QP(val, X)			\
+  do						\
+    {						\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_2_P (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_Q(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2 (Q, X, (val));		\
+      _FP_UNPACK_CANONICAL (Q, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_QP(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2_P (Q, X, (val));		\
+      _FP_UNPACK_CANONICAL (Q, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_Q(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2 (Q, X, (val));		\
+      _FP_UNPACK_SEMIRAW (Q, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_UNPACK_SEMIRAW_QP(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_2_P (Q, X, (val));		\
+      _FP_UNPACK_SEMIRAW (Q, 2, X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_Q(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (Q, 2, X);		\
+      _FP_PACK_RAW_2 (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_QP(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (Q, 2, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_2_P (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_Q(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (Q, 2, X);		\
+      _FP_PACK_RAW_2 (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_PACK_SEMIRAW_QP(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (Q, 2, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_2_P (Q, (val), X);		\
+    }						\
+  while (0)
+
+# define FP_ISSIGNAN_Q(X)		_FP_ISSIGNAN (Q, 2, X)
+# define FP_NEG_Q(R, X)			_FP_NEG (Q, 2, R, X)
+# define FP_ADD_Q(R, X, Y)		_FP_ADD (Q, 2, R, X, Y)
+# define FP_SUB_Q(R, X, Y)		_FP_SUB (Q, 2, R, X, Y)
+# define FP_MUL_Q(R, X, Y)		_FP_MUL (Q, 2, R, X, Y)
+# define FP_DIV_Q(R, X, Y)		_FP_DIV (Q, 2, R, X, Y)
+# define FP_SQRT_Q(R, X)		_FP_SQRT (Q, 2, R, X)
+# define _FP_SQRT_MEAT_Q(R, S, T, X, Q)	_FP_SQRT_MEAT_2 (R, S, T, X, (Q))
+# define FP_FMA_Q(R, X, Y, Z)		_FP_FMA (Q, 2, 4, R, X, Y, Z)
+
+# define FP_CMP_Q(r, X, Y, un, ex)	_FP_CMP (Q, 2, (r), X, Y, (un), (ex))
+# define FP_CMP_EQ_Q(r, X, Y, ex)	_FP_CMP_EQ (Q, 2, (r), X, Y, (ex))
+# define FP_CMP_UNORD_Q(r, X, Y, ex)	_FP_CMP_UNORD (Q, 2, (r), X, Y, (ex))
+
+# define FP_TO_INT_Q(r, X, rsz, rsg)	_FP_TO_INT (Q, 2, (r), X, (rsz), (rsg))
+# define FP_TO_INT_ROUND_Q(r, X, rsz, rsg)	\
+  _FP_TO_INT_ROUND (Q, 2, (r), X, (rsz), (rsg))
+# define FP_FROM_INT_Q(X, r, rs, rt)	_FP_FROM_INT (Q, 2, X, (r), (rs), rt)
+
+# define _FP_FRAC_HIGH_Q(X)	_FP_FRAC_HIGH_2 (X)
+# define _FP_FRAC_HIGH_RAW_Q(X)	_FP_FRAC_HIGH_2 (X)
+
+# define _FP_FRAC_HIGH_DW_Q(X)	_FP_FRAC_HIGH_4 (X)
+
+#endif /* not _FP_W_TYPE_SIZE < 64 */
+
+#endif /* !SOFT_FP_QUAD_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/s_frexpl.c b/libc/sysdeps/linux/sparc64/soft-fp/s_frexpl.c
new file mode 100644
index 000000000..6f0baeb3f
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/s_frexpl.c
@@ -0,0 +1,51 @@
+/* Software floating-point emulation.
+   frexpl(x, exp)
+   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * for non-zero x
+ *      x = frexpl(arg,&exp);
+ * return a long double fp quantity x such that 0.5 <= |x| <1.0
+ * and the corresponding binary exponent "exp". That is
+ *      arg = x*2^exp.
+ * If arg is inf, 0.0, or NaN, then frexpl(arg,&exp) returns arg
+ * with *exp=0.
+ */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double __frexpl(long double arg, int *exp)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  long double r;
+
+  *exp = 0;
+  FP_UNPACK_Q(A, arg);
+  if (A_c != FP_CLS_NORMAL)
+    return arg;
+  *exp = A_e + 1;
+  A_e = -1;
+  FP_PACK_Q(r, A);
+
+  return r;
+}
+
+weak_alias (__frexpl, frexpl)
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/s_scalblnl.c b/libc/sysdeps/linux/sparc64/soft-fp/s_scalblnl.c
new file mode 100644
index 000000000..250a8f4f0
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/s_scalblnl.c
@@ -0,0 +1,52 @@
+/* Software floating-point emulation.
+   scalblnl(x, exp)
+   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * scalblnl (long double x, long int n)
+ * scalblnl(x,n) returns x* 2**n  computed by  exponent
+ * manipulation rather than by actually performing an
+ * exponentiation or a multiplication.
+ */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double __scalblnl(long double arg, int exp)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  long double r;
+
+  FP_UNPACK_Q(A, arg);
+  switch (A_c)
+    {
+    case FP_CLS_ZERO:
+      return arg;
+    case FP_CLS_NAN:
+    case FP_CLS_INF:
+      FP_HANDLE_EXCEPTIONS;
+      return arg;
+    }
+  A_e += exp;
+  FP_PACK_Q(r, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/s_scalbnl.c b/libc/sysdeps/linux/sparc64/soft-fp/s_scalbnl.c
new file mode 100644
index 000000000..c686175e9
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/s_scalbnl.c
@@ -0,0 +1,52 @@
+/* Software floating-point emulation.
+   scalbnl(x, exp)
+   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/*
+ * scalbnl (long double x, int n)
+ * scalbnl(x,n) returns x* 2**n  computed by  exponent
+ * manipulation rather than by actually performing an
+ * exponentiation or a multiplication.
+ */
+
+#include "soft-fp.h"
+#include "quad.h"
+
+long double __scalbnl(long double arg, int exp)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A);
+  long double r;
+
+  FP_UNPACK_Q(A, arg);
+  switch (A_c)
+    {
+    case FP_CLS_ZERO:
+      return arg;
+    case FP_CLS_NAN:
+    case FP_CLS_INF:
+      FP_HANDLE_EXCEPTIONS;
+      return arg;
+    }
+  A_e += exp;
+  FP_PACK_Q(r, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/sfp-machine.h b/libc/sysdeps/linux/sparc64/soft-fp/sfp-machine.h
new file mode 100644
index 000000000..c03a6d376
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/sfp-machine.h
@@ -0,0 +1,147 @@
+/* Machine-dependent software floating-point definitions.
+   Sparc64 userland (_Q_* and _Qp_*) version.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz) and
+		  David S. Miller (davem@redhat.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fpu_control.h>
+#include <fenv.h>
+#include <stdlib.h>
+
+#define _FP_W_TYPE_SIZE		64
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+/* Helper macros for _FP_MUL_MEAT_2_120_240_double.  */
+#define _FP_MUL_MEAT_SET_FE_TZ		   			\
+do {								\
+  static fpu_control_t _fetz = _FPU_RC_DOWN;			\
+  _FPU_SETCW(_fetz);						\
+} while (0)
+#ifndef _FP_MUL_MEAT_RESET_FE
+#define _FP_MUL_MEAT_RESET_FE _FPU_SETCW(_fcw)
+#endif
+
+#define _FP_MUL_MEAT_S(R,X,Y)					\
+  _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
+#define _FP_MUL_MEAT_D(R,X,Y)					\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)					\
+  _FP_MUL_MEAT_2_120_240_double(_FP_WFRACBITS_Q,R,X,Y,		\
+				_FP_MUL_MEAT_SET_FE_TZ,		\
+				_FP_MUL_MEAT_RESET_FE)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1)
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+/* If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose Y.
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+/* Obtain the current rounding mode. */
+#ifndef FP_ROUNDMODE
+#define FP_ROUNDMODE	((_fcw >> 30) & 0x3)
+#endif
+
+/* Exception flags. */
+#define FP_EX_INVALID		(1 << 4)
+#define FP_EX_OVERFLOW		(1 << 3)
+#define FP_EX_UNDERFLOW		(1 << 2)
+#define FP_EX_DIVZERO		(1 << 1)
+#define FP_EX_INEXACT		(1 << 0)
+
+#define _FP_TININESS_AFTER_ROUNDING 0
+
+#define _FP_DECL_EX \
+  fpu_control_t _fcw __attribute__ ((unused)) = (FP_RND_NEAREST << 30)
+
+#define FP_INIT_ROUNDMODE					\
+do {								\
+  _FPU_GETCW(_fcw);						\
+} while (0)
+
+#define FP_TRAPPING_EXCEPTIONS ((_fcw >> 23) & 0x1f)
+#define FP_INHIBIT_RESULTS ((_fcw >> 23) & _fex)
+
+/* Simulate exceptions using double arithmetics. */
+extern void __Qp_handle_exceptions(int exc);
+
+#define FP_HANDLE_EXCEPTIONS					\
+do {								\
+  if (!_fex)							\
+    {								\
+      /* This is the common case, so we do it inline.		\
+       * We need to clear cexc bits if any.			\
+       */							\
+      __asm__ __volatile__("fzero %%f62\n\t"			\
+			   "faddd %%f62, %%f62, %%f62"		\
+			   : : : "f62");			\
+    }								\
+  else								\
+    {								\
+      __Qp_handle_exceptions (_fex);				\
+    }								\
+} while (0)
+
+#define QP_HANDLE_EXCEPTIONS(_a)				\
+do {								\
+  if ((_fcw >> 23) & _fex)					\
+    {								\
+      _a;							\
+    }								\
+  else								\
+    {								\
+      _fcw = (_fcw & ~0x1fL) | (_fex << 5) | _fex;		\
+      _FPU_SETCW(_fcw);						\
+    }								\
+} while (0)
+
+#define QP_NO_EXCEPTIONS					\
+  __asm ("fzero %%f62\n\t"					\
+	 "faddd %%f62, %%f62, %%f62" : : : "f62")
+
+#define QP_CLOBBER "memory", "f52", "f54", "f56", "f58", "f60", "f62"
+#define QP_CLOBBER_CC QP_CLOBBER , "cc"
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/single.h b/libc/sysdeps/linux/sparc64/soft-fp/single.h
new file mode 100644
index 000000000..75ade9fbe
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/single.h
@@ -0,0 +1,199 @@
+/* Software floating-point emulation.
+   Definitions for IEEE Single Precision.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_SINGLE_H
+#define SOFT_FP_SINGLE_H	1
+
+#if _FP_W_TYPE_SIZE < 32
+# error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+
+#define _FP_FRACTBITS_S		_FP_W_TYPE_SIZE
+
+#if _FP_W_TYPE_SIZE < 64
+# define _FP_FRACTBITS_DW_S	(2 * _FP_W_TYPE_SIZE)
+#else
+# define _FP_FRACTBITS_DW_S	_FP_W_TYPE_SIZE
+#endif
+
+#define _FP_FRACBITS_S		24
+#define _FP_FRACXBITS_S		(_FP_FRACTBITS_S - _FP_FRACBITS_S)
+#define _FP_WFRACBITS_S		(_FP_WORKBITS + _FP_FRACBITS_S)
+#define _FP_WFRACXBITS_S	(_FP_FRACTBITS_S - _FP_WFRACBITS_S)
+#define _FP_EXPBITS_S		8
+#define _FP_EXPBIAS_S		127
+#define _FP_EXPMAX_S		255
+#define _FP_QNANBIT_S		((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-2))
+#define _FP_QNANBIT_SH_S	((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-2+_FP_WORKBITS))
+#define _FP_IMPLBIT_S		((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-1))
+#define _FP_IMPLBIT_SH_S	((_FP_W_TYPE) 1 << (_FP_FRACBITS_S-1+_FP_WORKBITS))
+#define _FP_OVERFLOW_S		((_FP_W_TYPE) 1 << (_FP_WFRACBITS_S))
+
+#define _FP_WFRACBITS_DW_S	(2 * _FP_WFRACBITS_S)
+#define _FP_WFRACXBITS_DW_S	(_FP_FRACTBITS_DW_S - _FP_WFRACBITS_DW_S)
+#define _FP_HIGHBIT_DW_S	\
+  ((_FP_W_TYPE) 1 << (_FP_WFRACBITS_DW_S - 1) % _FP_W_TYPE_SIZE)
+
+/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
+   chosen by the target machine.  */
+
+typedef float SFtype __attribute__ ((mode (SF)));
+
+union _FP_UNION_S
+{
+  SFtype flt;
+  struct _FP_STRUCT_LAYOUT
+  {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+#else
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__ ((packed));
+};
+
+#define FP_DECL_S(X)		_FP_DECL (1, X)
+#define FP_UNPACK_RAW_S(X, val)	_FP_UNPACK_RAW_1 (S, X, (val))
+#define FP_UNPACK_RAW_SP(X, val)	_FP_UNPACK_RAW_1_P (S, X, (val))
+#define FP_PACK_RAW_S(val, X)	_FP_PACK_RAW_1 (S, (val), X)
+#define FP_PACK_RAW_SP(val, X)			\
+  do						\
+    {						\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_1_P (S, (val), X);		\
+    }						\
+  while (0)
+
+#define FP_UNPACK_S(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1 (S, X, (val));		\
+      _FP_UNPACK_CANONICAL (S, 1, X);		\
+    }						\
+  while (0)
+
+#define FP_UNPACK_SP(X, val)			\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1_P (S, X, (val));		\
+      _FP_UNPACK_CANONICAL (S, 1, X);		\
+    }						\
+  while (0)
+
+#define FP_UNPACK_SEMIRAW_S(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1 (S, X, (val));		\
+      _FP_UNPACK_SEMIRAW (S, 1, X);		\
+    }						\
+  while (0)
+
+#define FP_UNPACK_SEMIRAW_SP(X, val)		\
+  do						\
+    {						\
+      _FP_UNPACK_RAW_1_P (S, X, (val));		\
+      _FP_UNPACK_SEMIRAW (S, 1, X);		\
+    }						\
+  while (0)
+
+#define FP_PACK_S(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (S, 1, X);		\
+      _FP_PACK_RAW_1 (S, (val), X);		\
+    }						\
+  while (0)
+
+#define FP_PACK_SP(val, X)			\
+  do						\
+    {						\
+      _FP_PACK_CANONICAL (S, 1, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_1_P (S, (val), X);		\
+    }						\
+  while (0)
+
+#define FP_PACK_SEMIRAW_S(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (S, 1, X);		\
+      _FP_PACK_RAW_1 (S, (val), X);		\
+    }						\
+  while (0)
+
+#define FP_PACK_SEMIRAW_SP(val, X)		\
+  do						\
+    {						\
+      _FP_PACK_SEMIRAW (S, 1, X);		\
+      if (!FP_INHIBIT_RESULTS)			\
+	_FP_PACK_RAW_1_P (S, (val), X);		\
+    }						\
+  while (0)
+
+#define FP_ISSIGNAN_S(X)		_FP_ISSIGNAN (S, 1, X)
+#define FP_NEG_S(R, X)			_FP_NEG (S, 1, R, X)
+#define FP_ADD_S(R, X, Y)		_FP_ADD (S, 1, R, X, Y)
+#define FP_SUB_S(R, X, Y)		_FP_SUB (S, 1, R, X, Y)
+#define FP_MUL_S(R, X, Y)		_FP_MUL (S, 1, R, X, Y)
+#define FP_DIV_S(R, X, Y)		_FP_DIV (S, 1, R, X, Y)
+#define FP_SQRT_S(R, X)			_FP_SQRT (S, 1, R, X)
+#define _FP_SQRT_MEAT_S(R, S, T, X, Q)	_FP_SQRT_MEAT_1 (R, S, T, X, (Q))
+
+#if _FP_W_TYPE_SIZE < 64
+# define FP_FMA_S(R, X, Y, Z)	_FP_FMA (S, 1, 2, R, X, Y, Z)
+#else
+# define FP_FMA_S(R, X, Y, Z)	_FP_FMA (S, 1, 1, R, X, Y, Z)
+#endif
+
+#define FP_CMP_S(r, X, Y, un, ex)	_FP_CMP (S, 1, (r), X, Y, (un), (ex))
+#define FP_CMP_EQ_S(r, X, Y, ex)	_FP_CMP_EQ (S, 1, (r), X, Y, (ex))
+#define FP_CMP_UNORD_S(r, X, Y, ex)	_FP_CMP_UNORD (S, 1, (r), X, Y, (ex))
+
+#define FP_TO_INT_S(r, X, rsz, rsg)	_FP_TO_INT (S, 1, (r), X, (rsz), (rsg))
+#define FP_TO_INT_ROUND_S(r, X, rsz, rsg)	\
+  _FP_TO_INT_ROUND (S, 1, (r), X, (rsz), (rsg))
+#define FP_FROM_INT_S(X, r, rs, rt)	_FP_FROM_INT (S, 1, X, (r), (rs), rt)
+
+#define _FP_FRAC_HIGH_S(X)	_FP_FRAC_HIGH_1 (X)
+#define _FP_FRAC_HIGH_RAW_S(X)	_FP_FRAC_HIGH_1 (X)
+
+#if _FP_W_TYPE_SIZE < 64
+# define _FP_FRAC_HIGH_DW_S(X)	_FP_FRAC_HIGH_2 (X)
+#else
+# define _FP_FRAC_HIGH_DW_S(X)	_FP_FRAC_HIGH_1 (X)
+#endif
+
+#endif /* !SOFT_FP_SINGLE_H */
diff --git a/libc/sysdeps/linux/sparc64/soft-fp/soft-fp.h b/libc/sysdeps/linux/sparc64/soft-fp/soft-fp.h
new file mode 100644
index 000000000..079402e92
--- /dev/null
+++ b/libc/sysdeps/linux/sparc64/soft-fp/soft-fp.h
@@ -0,0 +1,342 @@
+/* Software floating-point emulation.
+   Copyright (C) 1997-2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef SOFT_FP_H
+#define SOFT_FP_H	1
+
+#include "sfp-machine.h"
+
+/* Allow sfp-machine to have its own byte order definitions.  */
+#ifndef __BYTE_ORDER
+# ifdef _LIBC
+#  include <endian.h>
+# else
+#  error "endianness not defined by sfp-machine.h"
+# endif
+#endif
+
+/* For unreachable default cases in switch statements over bitwise OR
+   of FP_CLS_* values.  */
+#if (defined __GNUC__							\
+     && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
+# define _FP_UNREACHABLE	__builtin_unreachable ()
+#else
+# define _FP_UNREACHABLE	abort ()
+#endif
+
+#if ((defined __GNUC__							\
+      && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))	\
+     || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L))
+# define _FP_STATIC_ASSERT(expr, msg)		\
+  _Static_assert ((expr), msg)
+#else
+# define _FP_STATIC_ASSERT(expr, msg)					\
+  extern int (*__Static_assert_function (void))				\
+    [!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })]
+#endif
+
+/* In the Linux kernel, some architectures have a single function that
+   uses different kinds of unpacking and packing depending on the
+   instruction being emulated, meaning it is not readily visible to
+   the compiler that variables from _FP_DECL and _FP_FRAC_DECL_*
+   macros are only used in cases where they were initialized.  */
+#ifdef __KERNEL__
+# define _FP_ZERO_INIT		= 0
+#else
+# define _FP_ZERO_INIT
+#endif
+
+#define _FP_WORKBITS		3
+#define _FP_WORK_LSB		((_FP_W_TYPE) 1 << 3)
+#define _FP_WORK_ROUND		((_FP_W_TYPE) 1 << 2)
+#define _FP_WORK_GUARD		((_FP_W_TYPE) 1 << 1)
+#define _FP_WORK_STICKY		((_FP_W_TYPE) 1 << 0)
+
+#ifndef FP_RND_NEAREST
+# define FP_RND_NEAREST		0
+# define FP_RND_ZERO		1
+# define FP_RND_PINF		2
+# define FP_RND_MINF		3
+#endif
+#ifndef FP_ROUNDMODE
+# define FP_ROUNDMODE		FP_RND_NEAREST
+#endif
+
+/* By default don't care about exceptions.  */
+#ifndef FP_EX_INVALID
+# define FP_EX_INVALID		0
+#endif
+#ifndef FP_EX_OVERFLOW
+# define FP_EX_OVERFLOW		0
+#endif
+#ifndef FP_EX_UNDERFLOW
+# define FP_EX_UNDERFLOW	0
+#endif
+#ifndef FP_EX_DIVZERO
+# define FP_EX_DIVZERO		0
+#endif
+#ifndef FP_EX_INEXACT
+# define FP_EX_INEXACT		0
+#endif
+#ifndef FP_EX_DENORM
+# define FP_EX_DENORM		0
+#endif
+
+/* Sub-exceptions of "invalid".  */
+/* Signaling NaN operand.  */
+#ifndef FP_EX_INVALID_SNAN
+# define FP_EX_INVALID_SNAN	0
+#endif
+/* Inf * 0.  */
+#ifndef FP_EX_INVALID_IMZ
+# define FP_EX_INVALID_IMZ	0
+#endif
+/* fma (Inf, 0, c).  */
+#ifndef FP_EX_INVALID_IMZ_FMA
+# define FP_EX_INVALID_IMZ_FMA	0
+#endif
+/* Inf - Inf.  */
+#ifndef FP_EX_INVALID_ISI
+# define FP_EX_INVALID_ISI	0
+#endif
+/* 0 / 0.  */
+#ifndef FP_EX_INVALID_ZDZ
+# define FP_EX_INVALID_ZDZ	0
+#endif
+/* Inf / Inf.  */
+#ifndef FP_EX_INVALID_IDI
+# define FP_EX_INVALID_IDI	0
+#endif
+/* sqrt (negative).  */
+#ifndef FP_EX_INVALID_SQRT
+# define FP_EX_INVALID_SQRT	0
+#endif
+/* Invalid conversion to integer.  */
+#ifndef FP_EX_INVALID_CVI
+# define FP_EX_INVALID_CVI	0
+#endif
+/* Invalid comparison.  */
+#ifndef FP_EX_INVALID_VC
+# define FP_EX_INVALID_VC	0
+#endif
+
+/* _FP_STRUCT_LAYOUT may be defined as an attribute to determine the
+   struct layout variant used for structures where bit-fields are used
+   to access specific parts of binary floating-point numbers.  This is
+   required for systems where the default ABI uses struct layout with
+   differences in how consecutive bit-fields are laid out from the
+   default expected by soft-fp.  */
+#ifndef _FP_STRUCT_LAYOUT
+# define _FP_STRUCT_LAYOUT
+#endif
+
+#ifdef _FP_DECL_EX
+# define FP_DECL_EX					\
+  int _fex = 0;						\
+  _FP_DECL_EX
+#else
+# define FP_DECL_EX int _fex = 0
+#endif
+
+/* Initialize any machine-specific state used in FP_ROUNDMODE,
+   FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS.  */
+#ifndef FP_INIT_ROUNDMODE
+# define FP_INIT_ROUNDMODE do {} while (0)
+#endif
+
+/* Initialize any machine-specific state used in
+   FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS.  */
+#ifndef FP_INIT_TRAPPING_EXCEPTIONS
+# define FP_INIT_TRAPPING_EXCEPTIONS FP_INIT_ROUNDMODE
+#endif
+
+/* Initialize any machine-specific state used in
+   FP_HANDLE_EXCEPTIONS.  */
+#ifndef FP_INIT_EXCEPTIONS
+# define FP_INIT_EXCEPTIONS FP_INIT_TRAPPING_EXCEPTIONS
+#endif
+
+#ifndef FP_HANDLE_EXCEPTIONS
+# define FP_HANDLE_EXCEPTIONS do {} while (0)
+#endif
+
+/* Whether to flush subnormal inputs to zero with the same sign.  */
+#ifndef FP_DENORM_ZERO
+# define FP_DENORM_ZERO 0
+#endif
+
+#ifndef FP_INHIBIT_RESULTS
+/* By default we write the results always.
+   sfp-machine may override this and e.g.
+   check if some exceptions are unmasked
+   and inhibit it in such a case.  */
+# define FP_INHIBIT_RESULTS 0
+#endif
+
+#define FP_SET_EXCEPTION(ex)				\
+  _fex |= (ex)
+
+#define FP_CUR_EXCEPTIONS				\
+  (_fex)
+
+#ifndef FP_TRAPPING_EXCEPTIONS
+# define FP_TRAPPING_EXCEPTIONS 0
+#endif
+
+/* A file using soft-fp may define FP_NO_EXCEPTIONS before including
+   soft-fp.h to indicate that, although a macro used there could raise
+   exceptions, or do rounding and potentially thereby raise
+   exceptions, for some arguments, for the particular arguments used
+   in that file no exceptions or rounding can occur.  Such a file
+   should not itself use macros relating to handling exceptions and
+   rounding modes; this is only for indirect uses (in particular, in
+   _FP_FROM_INT and the macros it calls).  */
+#ifdef FP_NO_EXCEPTIONS
+
+# undef FP_SET_EXCEPTION
+# define FP_SET_EXCEPTION(ex) do {} while (0)
+
+# undef FP_CUR_EXCEPTIONS
+# define FP_CUR_EXCEPTIONS 0
+
+# undef FP_TRAPPING_EXCEPTIONS
+# define FP_TRAPPING_EXCEPTIONS 0
+
+# undef FP_ROUNDMODE
+# define FP_ROUNDMODE FP_RND_ZERO
+
+# undef _FP_TININESS_AFTER_ROUNDING
+# define _FP_TININESS_AFTER_ROUNDING 0
+
+#endif
+
+/* A file using soft-fp may define FP_NO_EXACT_UNDERFLOW before
+   including soft-fp.h to indicate that, although a macro used there
+   could allow for the case of exact underflow requiring the underflow
+   exception to be raised if traps are enabled, for the particular
+   arguments used in that file no exact underflow can occur.  */
+#ifdef FP_NO_EXACT_UNDERFLOW
+# undef FP_TRAPPING_EXCEPTIONS
+# define FP_TRAPPING_EXCEPTIONS 0
+#endif
+
+#define _FP_ROUND_NEAREST(wc, X)				\
+  do								\
+    {								\
+      if ((_FP_FRAC_LOW_##wc (X) & 15) != _FP_WORK_ROUND)	\
+	_FP_FRAC_ADDI_##wc (X, _FP_WORK_ROUND);			\
+    }								\
+  while (0)
+
+#define _FP_ROUND_ZERO(wc, X)		(void) 0
+
+#define _FP_ROUND_PINF(wc, X)				\
+  do							\
+    {							\
+      if (!X##_s && (_FP_FRAC_LOW_##wc (X) & 7))	\
+	_FP_FRAC_ADDI_##wc (X, _FP_WORK_LSB);		\
+    }							\
+  while (0)
+
+#define _FP_ROUND_MINF(wc, X)			\
+  do						\
+    {						\
+      if (X##_s && (_FP_FRAC_LOW_##wc (X) & 7))	\
+	_FP_FRAC_ADDI_##wc (X, _FP_WORK_LSB);	\
+    }						\
+  while (0)
+
+#define _FP_ROUND(wc, X)			\
+  do						\
+    {						\
+      if (_FP_FRAC_LOW_##wc (X) & 7)		\
+	{					\
+	  FP_SET_EXCEPTION (FP_EX_INEXACT);	\
+	  switch (FP_ROUNDMODE)			\
+	    {					\
+	    case FP_RND_NEAREST:		\
+	      _FP_ROUND_NEAREST (wc, X);	\
+	      break;				\
+	    case FP_RND_ZERO:			\
+	      _FP_ROUND_ZERO (wc, X);		\
+	      break;				\
+	    case FP_RND_PINF:			\
+	      _FP_ROUND_PINF (wc, X);		\
+	      break;				\
+	    case FP_RND_MINF:			\
+	      _FP_ROUND_MINF (wc, X);		\
+	      break;				\
+	    }					\
+	}					\
+    }						\
+  while (0)
+
+#define FP_CLS_NORMAL		0
+#define FP_CLS_ZERO		1
+#define FP_CLS_INF		2
+#define FP_CLS_NAN		3
+
+#define _FP_CLS_COMBINE(x, y)	(((x) << 2) | (y))
+
+#include "op-1.h"
+#include "op-2.h"
+#include "op-4.h"
+#include "op-8.h"
+#include "op-common.h"
+
+/* Sigh.  Silly things longlong.h needs.  */
+#define UWtype		_FP_W_TYPE
+#define W_TYPE_SIZE	_FP_W_TYPE_SIZE
+
+typedef int QItype __attribute__ ((mode (QI)));
+typedef int SItype __attribute__ ((mode (SI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef unsigned int UQItype __attribute__ ((mode (QI)));
+typedef unsigned int USItype __attribute__ ((mode (SI)));
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#if _FP_W_TYPE_SIZE == 32
+typedef unsigned int UHWtype __attribute__ ((mode (HI)));
+#elif _FP_W_TYPE_SIZE == 64
+typedef USItype UHWtype;
+#endif
+
+#ifndef CMPtype
+# define CMPtype	int
+#endif
+
+#define SI_BITS		(__CHAR_BIT__ * (int) sizeof (SItype))
+#define DI_BITS		(__CHAR_BIT__ * (int) sizeof (DItype))
+
+#ifndef umul_ppmm
+# include "longlong.h"
+#endif
+
+#endif /* !SOFT_FP_H */
author	Waldemar Brodkorb <wbx@openadk.org>	2017-05-24 20:49:02 +0200
committer	Waldemar Brodkorb <wbx@openadk.org>	2017-06-23 23:46:04 +0200
commit	041cdc2769407c4d3869b218ad7ee7638e1c306e (patch)
tree	1ea25250d74dcb230cf5feee99226fc080dbd678 /libc/sysdeps/linux/sparc64/soft-fp
parent	58a5ba12bffad5916d9897c2870fc483f1db8282 (diff)