crt: Fix _beginthread and _beginthreadex functions for i386 pre-msvcr100 builds

Like the 32-bit x86 version of _XcptFilter function in pre-msvcr100 DLL
libraries, also _beginthread and _beginthreadex functions are affected and
do not handle SEH exceptions caused by SSE floating point exceptions
(STATUS_FLOAT_MULTIPLE_FAULTS and STATUS_FLOAT_MULTIPLE_TRAPS).

So if the SSE floating point exception occurs in a thread spawned by
_beginthread or _beginthreadex then it cause fatal process crash.

Add mingw-w64 wrappers around _beginthread and _beginthreadex functions
which register own SEH handler and forwards STATUS_FLOAT_MULTIPLE_FAULTS
and STATUS_FLOAT_MULTIPLE_TRAPS SEH exceptions into the __mingw_SEH_error_handler
function which now properly handles them.

This change fixes all SSE floating point exception tests running in
non-main threads for i386 pre-msvcr100 builds (including system os
msvcrt.dll library).

Signed-off-by: LIU Hao <lh_mouse@126.com>
diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index e368ae6..b651dce 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -577,6 +577,8 @@
   misc/msvcrt__getmainargs.c \
   misc/msvcrt__wgetmainargs.c \
   misc/i386__XcptFilter.c \
+  misc/i386__beginthread.c \
+  misc/i386__beginthreadex.c \
   math/i386__copysignf.c \
   misc/___lc_codepage_func.c \
   misc/___lc_handle_func.c \
@@ -1032,6 +1034,8 @@
 
 src_pre_msvcr100=\
   misc/i386__XcptFilter.c \
+  misc/i386__beginthread.c \
+  misc/i386__beginthreadex.c \
   misc/_invalid_parameter_noinfo_noreturn.c \
   misc/imaxdiv.c
 
diff --git a/mingw-w64-crt/lib-common/msvcrt.def.in b/mingw-w64-crt/lib-common/msvcrt.def.in
index 64a8fa2..7909159 100644
--- a/mingw-w64-crt/lib-common/msvcrt.def.in
+++ b/mingw-w64-crt/lib-common/msvcrt.def.in
@@ -536,8 +536,10 @@
 _atoi64
 _atoldbl
 F_X86_ANY(_beep)
-_beginthread
-_beginthreadex
+F_I386(__msvcrt_beginthread == _beginthread) ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+F_NON_I386(_beginthread)
+F_I386(__msvcrt_beginthreadex == _beginthreadex) ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
+F_NON_I386(_beginthreadex)
 _c_exit
 _cabs DATA
 _callnewh
diff --git a/mingw-w64-crt/lib32/crtdll.def.in b/mingw-w64-crt/lib32/crtdll.def.in
index bc4b926..99ededa 100644
--- a/mingw-w64-crt/lib32/crtdll.def.in
+++ b/mingw-w64-crt/lib32/crtdll.def.in
@@ -115,7 +115,7 @@
 _baseminor_dll DATA
 _baseversion_dll DATA
 _beep
-_beginthread
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
 _c_exit
 _cabs DATA
 _cexit
diff --git a/mingw-w64-crt/lib32/msvcr40d.def.in b/mingw-w64-crt/lib32/msvcr40d.def.in
index bcb770c..5f5f754 100644
--- a/mingw-w64-crt/lib32/msvcr40d.def.in
+++ b/mingw-w64-crt/lib32/msvcr40d.def.in
@@ -1085,8 +1085,8 @@
 _atodbl
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _calloc_dbg
diff --git a/mingw-w64-crt/lib32/msvcr70.def.in b/mingw-w64-crt/lib32/msvcr70.def.in
index 3a685e7..29a8aa9 100644
--- a/mingw-w64-crt/lib32/msvcr70.def.in
+++ b/mingw-w64-crt/lib32/msvcr70.def.in
@@ -263,8 +263,8 @@
 _atoi64
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _callnewh
diff --git a/mingw-w64-crt/lib32/msvcr70d.def.in b/mingw-w64-crt/lib32/msvcr70d.def.in
index 4eddb5a..3bbf5c5 100644
--- a/mingw-w64-crt/lib32/msvcr70d.def.in
+++ b/mingw-w64-crt/lib32/msvcr70d.def.in
@@ -297,8 +297,8 @@
 _atoi64
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _callnewh
diff --git a/mingw-w64-crt/lib32/msvcr71.def.in b/mingw-w64-crt/lib32/msvcr71.def.in
index bef231f..6248aa6 100644
--- a/mingw-w64-crt/lib32/msvcr71.def.in
+++ b/mingw-w64-crt/lib32/msvcr71.def.in
@@ -256,8 +256,8 @@
 _atoi64
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _callnewh
diff --git a/mingw-w64-crt/lib32/msvcr71d.def.in b/mingw-w64-crt/lib32/msvcr71d.def.in
index 6e6ee58..71fee41 100644
--- a/mingw-w64-crt/lib32/msvcr71d.def.in
+++ b/mingw-w64-crt/lib32/msvcr71d.def.in
@@ -290,8 +290,8 @@
 _atoi64
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _callnewh
diff --git a/mingw-w64-crt/lib32/msvcr80.def.in b/mingw-w64-crt/lib32/msvcr80.def.in
index fbbbae5..d84908c 100644
--- a/mingw-w64-crt/lib32/msvcr80.def.in
+++ b/mingw-w64-crt/lib32/msvcr80.def.in
@@ -368,8 +368,8 @@
 _atoldbl
 _atoldbl_l
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _byteswap_uint64
 _byteswap_ulong
 _byteswap_ushort
diff --git a/mingw-w64-crt/lib32/msvcr80d.def.in b/mingw-w64-crt/lib32/msvcr80d.def.in
index 0970c09..30302a1 100644
--- a/mingw-w64-crt/lib32/msvcr80d.def.in
+++ b/mingw-w64-crt/lib32/msvcr80d.def.in
@@ -429,8 +429,8 @@
 _atoldbl
 _atoldbl_l
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _byteswap_uint64
 _byteswap_ulong
 _byteswap_ushort
diff --git a/mingw-w64-crt/lib32/msvcr90.def.in b/mingw-w64-crt/lib32/msvcr90.def.in
index 166bba6..5c3d6a4 100644
--- a/mingw-w64-crt/lib32/msvcr90.def.in
+++ b/mingw-w64-crt/lib32/msvcr90.def.in
@@ -368,8 +368,8 @@
 _atoldbl
 _atoldbl_l
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _byteswap_uint64
 _byteswap_ulong
 _byteswap_ushort
diff --git a/mingw-w64-crt/lib32/msvcr90d.def.in b/mingw-w64-crt/lib32/msvcr90d.def.in
index 1b454e6..294cb57 100644
--- a/mingw-w64-crt/lib32/msvcr90d.def.in
+++ b/mingw-w64-crt/lib32/msvcr90d.def.in
@@ -419,8 +419,8 @@
 _atoldbl
 _atoldbl_l
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _byteswap_uint64
 _byteswap_ulong
 _byteswap_ushort
diff --git a/mingw-w64-crt/lib32/msvcrt10.def.in b/mingw-w64-crt/lib32/msvcrt10.def.in
index 57961b1..ab67734 100644
--- a/mingw-w64-crt/lib32/msvcrt10.def.in
+++ b/mingw-w64-crt/lib32/msvcrt10.def.in
@@ -903,7 +903,7 @@
 _aexit_rtn DATA
 _amsg_exit
 __msvcrt_assert DATA == _assert ; mingw-w64 provides _assert() function as wrapper around renamed __msvcrt_assert symbol
-_beginthread
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
 _c_exit
 _cabs
 _cexit
diff --git a/mingw-w64-crt/lib32/msvcrt20.def.in b/mingw-w64-crt/lib32/msvcrt20.def.in
index 2ca5846..0b13fc4 100644
--- a/mingw-w64-crt/lib32/msvcrt20.def.in
+++ b/mingw-w64-crt/lib32/msvcrt20.def.in
@@ -941,8 +941,8 @@
 _atodbl
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _cexit
diff --git a/mingw-w64-crt/lib32/msvcrt40.def.in b/mingw-w64-crt/lib32/msvcrt40.def.in
index ad31df8..95654f5 100644
--- a/mingw-w64-crt/lib32/msvcrt40.def.in
+++ b/mingw-w64-crt/lib32/msvcrt40.def.in
@@ -1059,8 +1059,8 @@
 _atodbl
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _cexit
diff --git a/mingw-w64-crt/lib32/msvcrtd.def.in b/mingw-w64-crt/lib32/msvcrtd.def.in
index d3e30bb..f7d5dce 100644
--- a/mingw-w64-crt/lib32/msvcrtd.def.in
+++ b/mingw-w64-crt/lib32/msvcrtd.def.in
@@ -242,8 +242,8 @@
 _atoi64
 _atoldbl
 _beep
-_beginthread
-_beginthreadex
+__msvcrt_beginthread == _beginthread ; rename _beginthread as it does not handle SSE floating point exceptions, real _beginthread provided by emu
+__msvcrt_beginthreadex == _beginthreadex ; rename _beginthreadex as it does not handle SSE floating point exceptions, real _beginthreadex provided by emu
 _c_exit
 _cabs
 _callnewh
diff --git a/mingw-w64-crt/misc/i386__beginthread.c b/mingw-w64-crt/misc/i386__beginthread.c
new file mode 100644
index 0000000..6d07df7
--- /dev/null
+++ b/mingw-w64-crt/misc/i386__beginthread.c
@@ -0,0 +1,70 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#if defined(__i386__)
+
+/* Function _beginthread() in pre-msvcr100 libraries do not handle STATUS_FLOAT_MULTIPLE_FAULTS and STATUS_FLOAT_MULTIPLE_TRAPS SEH exceptions.
+ * These two SEH exceptions are thrown by Windows system for 32-bit x86 processes when SSE floating point exception occurs.
+ * So fix the pre-msvcr100 behavior by catching those two exceptions manually and propagating them to __mingw_SEH_error_handler() which correctly process them.
+ * x87 floating point exceptions and also 64-bit x86 processes are correctly handled by msvcr* libraries. */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <process.h>
+#include <windows.h>
+
+EXCEPTION_DISPOSITION __cdecl __mingw_SEH_error_handler(EXCEPTION_RECORD *, PVOID, CONTEXT *, PVOID);
+
+#if defined(__i386__)
+/* We need to make sure that we align the stack to 16 bytes for the sake of SSE */
+__attribute__((force_align_arg_pointer))
+#endif
+static EXCEPTION_DISPOSITION __cdecl sse_float_exception_handler(EXCEPTION_RECORD *ExceptionRecord, PVOID EstablisherFrame, CONTEXT *ContextRecord, PVOID DispatcherContext)
+{
+  if (ExceptionRecord->ExceptionCode == STATUS_FLOAT_MULTIPLE_FAULTS || ExceptionRecord->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS)
+    return __mingw_SEH_error_handler(ExceptionRecord, EstablisherFrame, ContextRecord, DispatcherContext);
+  else
+    return ExceptionContinueSearch;
+}
+
+#if defined(__i386__)
+/* We need to make sure that we align the stack to 16 bytes for the sake of SSE */
+__attribute__((force_align_arg_pointer))
+#endif
+static void __cdecl thread_func(void *data)
+{
+  void **thread_args = data;
+  _beginthread_proc_type start_address = thread_args[0];
+  void *arglist = thread_args[1];
+  free(thread_args);
+  EXCEPTION_REGISTRATION_RECORD exception_record = {
+    .Next = (EXCEPTION_REGISTRATION_RECORD *)__readfsdword(0),
+    .Handler = (PEXCEPTION_ROUTINE)(INT_PTR)sse_float_exception_handler,
+  };
+  __writefsdword(0, (DWORD)&exception_record);
+  start_address(arglist);
+  __writefsdword(0, (DWORD)exception_record.Next);
+}
+
+uintptr_t __cdecl __msvcrt_beginthread(_beginthread_proc_type start_address, unsigned stack_size, void *arglist);
+uintptr_t __cdecl _beginthread(_beginthread_proc_type start_address, unsigned stack_size, void *arglist)
+{
+  uintptr_t ret;
+  void **thread_args = malloc(2 * sizeof(void *));
+  if (!thread_args) {
+    errno = ENOMEM;
+    return 0;
+  }
+  thread_args[0] = (void *)start_address;
+  thread_args[1] = arglist;
+  ret = __msvcrt_beginthread(thread_func, stack_size, thread_args);
+  if (ret == (uintptr_t)-1)
+    free(thread_args);
+  return ret;
+}
+uintptr_t (__cdecl *__MINGW_IMP_SYMBOL(_beginthread))(_beginthread_proc_type, unsigned, void *) = _beginthread;
+
+#endif
diff --git a/mingw-w64-crt/misc/i386__beginthreadex.c b/mingw-w64-crt/misc/i386__beginthreadex.c
new file mode 100644
index 0000000..aede03f
--- /dev/null
+++ b/mingw-w64-crt/misc/i386__beginthreadex.c
@@ -0,0 +1,72 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#if defined(__i386__)
+
+/* Function _beginthreadex() in pre-msvcr100 libraries do not handle STATUS_FLOAT_MULTIPLE_FAULTS and STATUS_FLOAT_MULTIPLE_TRAPS SEH exceptions.
+ * These two SEH exceptions are thrown by Windows system for 32-bit x86 processes when SSE floating point exception occurs.
+ * So fix the pre-msvcr100 behavior by catching those two exceptions manually and propagating them to __mingw_SEH_error_handler() which correctly process them.
+ * x87 floating point exceptions and also 64-bit x86 processes are correctly handled by msvcr* libraries. */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <process.h>
+#include <windows.h>
+
+EXCEPTION_DISPOSITION __cdecl __mingw_SEH_error_handler(EXCEPTION_RECORD *, PVOID, CONTEXT *, PVOID);
+
+#if defined(__i386__)
+/* We need to make sure that we align the stack to 16 bytes for the sake of SSE */
+__attribute__((force_align_arg_pointer))
+#endif
+static EXCEPTION_DISPOSITION __cdecl sse_float_exception_handler(EXCEPTION_RECORD *ExceptionRecord, PVOID EstablisherFrame, CONTEXT *ContextRecord, PVOID DispatcherContext)
+{
+  if (ExceptionRecord->ExceptionCode == STATUS_FLOAT_MULTIPLE_FAULTS || ExceptionRecord->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS)
+    return __mingw_SEH_error_handler(ExceptionRecord, EstablisherFrame, ContextRecord, DispatcherContext);
+  else
+    return ExceptionContinueSearch;
+}
+
+#if defined(__i386__)
+/* We need to make sure that we align the stack to 16 bytes for the sake of SSE */
+__attribute__((force_align_arg_pointer))
+#endif
+static unsigned __stdcall thread_func(void *data)
+{
+  unsigned ret;
+  void **thread_args = data;
+  _beginthreadex_proc_type start_address = thread_args[0];
+  void *arglist = thread_args[1];
+  free(thread_args);
+  EXCEPTION_REGISTRATION_RECORD exception_record = {
+    .Next = (EXCEPTION_REGISTRATION_RECORD *)__readfsdword(0),
+    .Handler = (PEXCEPTION_ROUTINE)(INT_PTR)sse_float_exception_handler,
+  };
+  __writefsdword(0, (DWORD)&exception_record);
+  ret = start_address(arglist);
+  __writefsdword(0, (DWORD)exception_record.Next);
+  return ret;
+}
+
+uintptr_t __cdecl __msvcrt_beginthreadex(void *security, unsigned stack_size, _beginthreadex_proc_type start_address, void *arglist, unsigned initflag, unsigned *thrdaddr);
+uintptr_t __cdecl _beginthreadex(void *security, unsigned stack_size, _beginthreadex_proc_type start_address, void *arglist, unsigned initflag, unsigned *thrdaddr)
+{
+  uintptr_t ret;
+  void **thread_args = malloc(2 * sizeof(void *));
+  if (!thread_args) {
+    errno = ENOMEM;
+    return 0;
+  }
+  thread_args[0] = (void *)start_address;
+  thread_args[1] = arglist;
+  ret = __msvcrt_beginthreadex(security, stack_size, thread_func, thread_args, initflag, thrdaddr);
+  if (!ret)
+    free(thread_args);
+  return ret;
+}
+uintptr_t (__cdecl *__MINGW_IMP_SYMBOL(_beginthreadex))(void *, unsigned, _beginthreadex_proc_type, void *, unsigned, unsigned *) = _beginthreadex;
+
+#endif
diff --git a/mingw-w64-crt/testcases/t_sigfpe.c b/mingw-w64-crt/testcases/t_sigfpe.c
index f4e2eec..0c30621 100644
--- a/mingw-w64-crt/testcases/t_sigfpe.c
+++ b/mingw-w64-crt/testcases/t_sigfpe.c
@@ -1101,13 +1101,6 @@
     else
 #endif
     {
-#if defined(_WIN32) && defined(__i386__)
-#if __MSVCRT_VERSION__ < 0xa00
-      /* FIXME: On Windows for 32-bit x86 processes with pre-msvcr100 all SSE floating point exceptions do not trigger SIGFPE for non-main thread and instead crashes process */
-      if (main_threadid == GetCurrentThreadId())
-#endif
-#endif
-      {
       ret |= sse_float_div_zero();
       ret |= sse_float_div_zero();
 
@@ -1122,7 +1115,6 @@
 
       ret |= sse_float_inexact();
       ret |= sse_float_inexact();
-      }
     }
   }
   else