crt/fma{,f}.c: Implement FMA for `double` and `float` properly
FMA of `double` shall not be implemented using `long double`, as
the result has 106 bits and cannot be stored accurately in a
`long double`, whose mantissa only has 64 bits.
Let's calculate `FMA(0x1.0000000000001p0, 1, 0x0.00000000000007FFp0)`:
First, we multiply `0x1.0000000000001p0` and `1`, which yields
`0x1.0000000000001p0`. Then we add `0x0.00000000000007FFp0` to it:
0x1.0000 0000 0000 1 p0
+) 0x0.0000 0000 0000 07FF p0
---------------------------------
0x1.0000 0000 0000 17FF p0 (long double)
This result has 65 significant bits. When it is stored into a
`long double`, the last bit is rounded to even, as follows:
0x1.0000 0000 0000 17FF p0
1 <= rounded UP as this bit is set
---------------------------------
0x1.0000 0000 0000 1800 p0 (long double)
If we attempt to round this value into `double` again, we get:
0x1.0000 0000 0000 1800 p0
8 <= rounded UP as this bit is set
---------------------------------
0x1.0000 0000 0000 2000 p0 (double)
This is wrong. Because FMA shall round the result exactly once. If we
round the first result to double we get a different result:
0x0.0000 0000 0000 17FF p0
8 <= rounded DOWN as this bit is clear
---------------------------------
0x1.0000 0000 0000 1000 p0 (double)
`float` suffers from a similar problem.
This patch fixes such issues.
Testcase for `double`:
#include <assert.h>
#include <math.h>
int main(void)
{
volatile double a = 0x1.0000000000001p0;
volatile double b = 1;
volatile double c = 0x0.00000000000007FFp0;
assert(a * b + c == 0x1.0000000000001p0);
assert(fma(a, b, c) == 0x1.0000000000001p0);
assert((double)((long double)a * b + c)
== 0x1.0000000000002p0);
}
Testcase for `float`:
#include <assert.h>
#include <math.h>
int main(void)
{
volatile float a = 0x0.800001p0f;
volatile float b = 0x0.5FFFFFp0f;;
volatile float c = 0x0.000000000000FFFFFFp0f;
assert(a * b + c == 0x0.2FFFFFCp0);
assert(fmaf(a, b, c) == 0x0.2FFFFFCp0);
assert((float)((double)a * b + c)
== 0x0.3000000p0);
}
Reference: https://www.exploringbinary.com/double-rounding-errors-in-floating
Signed-off-by: Liu Hao <lh_mouse@126.com>
diff --git a/mingw-w64-crt/math/fma.c b/mingw-w64-crt/math/fma.c
index c4ce738..6774be7 100644
--- a/mingw-w64-crt/math/fma.c
+++ b/mingw-w64-crt/math/fma.c
@@ -29,13 +29,69 @@
return z;
}
+#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
+
+#include <math.h>
+#include <stdint.h>
+
+/* This is in accordance with the IEC 559 double-precision format.
+ * Be advised that due to the hidden bit, the higher half actually has 26 bits.
+ * Multiplying two 27-bit numbers will cause a 1-ULP error, which we cannot
+ * avoid. It is kept in the very last position.
+ */
+typedef union iec559_double_ {
+ struct __attribute__((__packed__)) {
+ uint64_t mlo : 27;
+ uint64_t mhi : 25;
+ uint64_t exp : 11;
+ uint64_t sgn : 1;
+ };
+ double f;
+} iec559_double;
+
+static inline void break_down(iec559_double *restrict lo, iec559_double *restrict hi, double x) {
+ hi->f = x;
+ /* Erase low-order significant bits. `hi->f` now has only 26 significant bits. */
+ hi->mlo = 0;
+ /* Store the low-order half. It will be normalized by the hardware. */
+ lo->f = x - hi->f;
+ /* Preserve signness in case of zero. */
+ lo->sgn = hi->sgn;
+}
+
+double fma(double x, double y, double z) {
+ /*
+ POSIX-2013:
+ 1. If x or y are NaN, a NaN shall be returned.
+ 2. If x multiplied by y is an exact infinity and z is also an infinity
+ but with the opposite sign, a domain error shall occur, and either a NaN
+ (if supported), or an implementation-defined value shall be returned.
+ 3. If one of x and y is infinite, the other is zero, and z is not a NaN,
+ a domain error shall occur, and either a NaN (if supported), or an
+ implementation-defined value shall be returned.
+ 4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN
+ shall be returned and a domain error may occur.
+ 5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned.
+ */
+ /* Check whether the result is finite. */
+ double ret = x * y + z;
+ if(!isfinite(ret)) {
+ return ret; /* If this naive check doesn't yield a finite value, the FMA isn't
+ likely to return one either. Forward the value as is. */
+ }
+ iec559_double xlo, xhi, ylo, yhi;
+ break_down(&xlo, &xhi, x);
+ break_down(&ylo, &yhi, y);
+ /* The order of these four statements is essential. Don't move them around. */
+ ret = z;
+ ret += xhi.f * yhi.f; /* The most significant item comes first. */
+ ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
+ ret += xlo.f * ylo.f; /* The least significant item comes last. */
+ return ret;
+}
+
#else
-long double fmal(long double x, long double y, long double z);
-
-/* For platforms that don't have hardware FMA, emulate it. */
-double fma(double x, double y, double z){
- return (double)fmal(x, y, z);
-}
+#error Please add FMA implementation for this platform.
#endif
diff --git a/mingw-w64-crt/math/fmaf.c b/mingw-w64-crt/math/fmaf.c
index b3f58a8..4661e4b 100644
--- a/mingw-w64-crt/math/fmaf.c
+++ b/mingw-w64-crt/math/fmaf.c
@@ -29,13 +29,69 @@
return z;
}
+#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
+
+#include <math.h>
+#include <stdint.h>
+
+/* This is in accordance with the IEC 559 single-precision format.
+ * Be advised that due to the hidden bit, the higher half actually has 11 bits.
+ * Multiplying two 13-bit numbers will cause a 1-ULP error, which we cannot
+ * avoid. It is kept in the very last position.
+ */
+typedef union iec559_float_ {
+ struct __attribute__((__packed__)) {
+ uint32_t mlo : 13;
+ uint32_t mhi : 10;
+ uint32_t exp : 8;
+ uint32_t sgn : 1;
+ };
+ float f;
+} iec559_float;
+
+static inline void break_down(iec559_float *restrict lo, iec559_float *restrict hi, float x) {
+ hi->f = x;
+ /* Erase low-order significant bits. `hi->f` now has only 11 significant bits. */
+ hi->mlo = 0;
+ /* Store the low-order half. It will be normalized by the hardware. */
+ lo->f = x - hi->f;
+ /* Preserve signness in case of zero. */
+ lo->sgn = hi->sgn;
+}
+
+float fmaf(float x, float y, float z) {
+ /*
+ POSIX-2013:
+ 1. If x or y are NaN, a NaN shall be returned.
+ 2. If x multiplied by y is an exact infinity and z is also an infinity
+ but with the opposite sign, a domain error shall occur, and either a NaN
+ (if supported), or an implementation-defined value shall be returned.
+ 3. If one of x and y is infinite, the other is zero, and z is not a NaN,
+ a domain error shall occur, and either a NaN (if supported), or an
+ implementation-defined value shall be returned.
+ 4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN
+ shall be returned and a domain error may occur.
+ 5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned.
+ */
+ /* Check whether the result is finite. */
+ float ret = x * y + z;
+ if(!isfinite(ret)) {
+ return ret; /* If this naive check doesn't yield a finite value, the FMA isn't
+ likely to return one either. Forward the value as is. */
+ }
+ iec559_float xlo, xhi, ylo, yhi;
+ break_down(&xlo, &xhi, x);
+ break_down(&ylo, &yhi, y);
+ /* The order of these four statements is essential. Don't move them around. */
+ ret = z;
+ ret += xhi.f * yhi.f; /* The most significant item comes first. */
+ ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
+ ret += xlo.f * ylo.f; /* The least significant item comes last. */
+ return ret;
+}
+
#else
-long double fmal(long double x, long double y, long double z);
-
-/* For platforms that don't have hardware FMA, emulate it. */
-float fmaf(float x, float y, float z){
- return (float)fmal(x, y, z);
-}
+#error Please add FMA implementation for this platform.
#endif