|  | /** | 
|  | * This file has no copyright assigned and is placed in the Public Domain. | 
|  | * This file is part of the mingw-w64 runtime package. | 
|  | * No warranty is given; refer to the file DISCLAIMER.PD within this package. | 
|  | */ | 
|  | long double fmal(long double x, long double y, long double z); | 
|  |  | 
|  | #if defined(_ARM_) || defined(__arm__) || defined(_ARM64_) || defined(__aarch64__) | 
|  |  | 
|  | double fma(double x, double y, double z); | 
|  |  | 
|  | /* On ARM `long double` is 64 bits. And ARM has hardware FMA. */ | 
|  | long double fmal(long double x, long double y, long double z){ | 
|  | return fma(x, y, z); | 
|  | } | 
|  |  | 
|  | #elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) | 
|  |  | 
|  | /** | 
|  | * x87-specific software-emulated FMA by LH_Mouse (lh_mouse at 126 dot com). | 
|  | * This file is donated to the mingw-w64 project. | 
|  | * Note: This file requires C99 support to compile. | 
|  | */ | 
|  |  | 
|  | #include <math.h> | 
|  | #include <stdint.h> | 
|  |  | 
|  | /* See <https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format>. | 
|  | * Note the higher half of the mantissa has fewer significant bits than the lower | 
|  | * half, which reduces rounding errors in the more significant position but increases | 
|  | * them in the other end. | 
|  | */ | 
|  | typedef union x87reg_ { | 
|  | struct __attribute__((__packed__)) { | 
|  | uint64_t mlo : 33; | 
|  | uint64_t mhi : 31; | 
|  | uint16_t exp : 15; | 
|  | uint16_t sgn :  1; | 
|  | }; | 
|  | long double f; | 
|  | } x87reg; | 
|  |  | 
|  | static inline void break_down(x87reg *restrict lo, x87reg *restrict hi, long double x) { | 
|  | hi->f = x; | 
|  | /* Erase low-order significant bits. `hi->f` now has only 31 significant bits. */ | 
|  | hi->mlo = 0; | 
|  | /* Store the low-order half. It will be normalized by the hardware. */ | 
|  | lo->f = x - hi->f; | 
|  | /* Preserve signness in case of zero. */ | 
|  | lo->sgn = hi->sgn; | 
|  | } | 
|  |  | 
|  | long double fmal(long double x, long double y, long double z) { | 
|  | /* | 
|  | POSIX-2013: | 
|  | 1. If x or y are NaN, a NaN shall be returned. | 
|  | 2. If x multiplied by y is an exact infinity and z is also an infinity | 
|  | but with the opposite sign, a domain error shall occur, and either a NaN | 
|  | (if supported), or an implementation-defined value shall be returned. | 
|  | 3. If one of x and y is infinite, the other is zero, and z is not a NaN, | 
|  | a domain error shall occur, and either a NaN (if supported), or an | 
|  | implementation-defined value shall be returned. | 
|  | 4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN | 
|  | shall be returned and a domain error may occur. | 
|  | 5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned. | 
|  | */ | 
|  | /* Check whether the result is finite. */ | 
|  | long double ret = x * y + z; | 
|  | if(!isfinite(ret)) { | 
|  | return ret; /* If this naive check doesn't yield a finite value, the FMA isn't | 
|  | likely to return one either. Forward the value as is. */ | 
|  | } | 
|  | x87reg xlo, xhi, ylo, yhi; | 
|  | break_down(&xlo, &xhi, x); | 
|  | break_down(&ylo, &yhi, y); | 
|  | /* The order of these four statements is essential. Don't move them around. */ | 
|  | ret = z; | 
|  | ret += xhi.f * yhi.f;                 /* The most significant item comes first. */ | 
|  | ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */ | 
|  | ret += xlo.f * ylo.f;                 /* The least significant item comes last. */ | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | #else | 
|  |  | 
|  | #error Please add FMA implementation for this platform. | 
|  |  | 
|  | #endif |