This set of M4 macros will one day be used to automatically generate <intrin.h>. For now, it just contains the declarations (prototype, linkage, attributes, etc.) of all intrinsics required by x86 targets, including all MMX, 3DNow! and SSE/2/3/4/4a intrinsics and intrinsics shared between x86 and other targets

git-svn-id: svn+ssh://svn.code.sf.net/p/mingw-w64/code/trunk@1511 4407c894-4637-0410-b4f5-ada5f102cad1
diff --git a/mingw-w64-headers/include/intrin-machine.m4 b/mingw-w64-headers/include/intrin-machine.m4
new file mode 100644
index 0000000..2ce0059
--- /dev/null
+++ b/mingw-w64-headers/include/intrin-machine.m4
@@ -0,0 +1,75 @@
+divert(`-1')
+INTRIN(`void',			`_alloca',		((`size_t', `size')))
+INTRIN(`int',			`abs',			((`int', `value')))
+INTRIN(`unsigned short',	`_byteswap_ushort',	((`unsigned short', `value')))
+INTRIN(`__INTRIN_ULONG',	`_byteswap_ulong',	((`__INTRIN_ULONG', `value')))
+INTRIN(`__INTRIN_UINT64',	`_byteswap_uint64',	((`__INTRIN_UINT64', `value')))
+INTRIN(`void',			`__debugbreak')
+INTRIN(`__INTRIN_LONG',		`labs',			((`__INTRIN_LONG', `value')))
+INTRIN(`__INTRIN_ULONG',	`_lrotl',		((`__INTRIN_ULONG', `value'), (`int', `shift')))
+INTRIN(`__INTRIN_ULONG',	`_lrotr',		((`__INTRIN_ULONG', `value'), (`int', `shift')))
+INTRIN(`int',			`memcmp',		((`const void *', `buf1'), (`const void *', `buf2'), (`size_t', `count')))
+INTRIN(`void *',		`memcpy',		((`void *', `dest'), (`const void *', `src'), (`size_t', `count')))
+INTRIN(`void *',		`memset',		((`void *', `dest'), (`int', `c'), (`size_t', `count')))
+INTRIN(`unsigned int',		`_rotl',		((`unsigned int', `value'), (`int', `shift')))
+INTRIN(`unsigned int',		`_rotr',		((`unsigned int', `value'), (`int', `shift')))
+INTRIN(`int',			`_setjmp',		((`jmp_buf', `env')))
+INTRIN(`char *',		`strcat',		((`char *', `dest'), (`const char *', `src')))
+INTRIN(`int',			`strcmp',		((`const char *', `str1'), (`const char *', `str2')))
+INTRIN(`char *',		`strcpy',		((`char *', `dest'), (`const char *', `src')))
+INTRIN(`size_t',		`strlen',		((`const char *', `str')))
+INTRIN(`char *',		`_strset',		((`char *', `str'), (`int', `c')))
+INTRIN(`char *',		`strset',		((`char *', `str'), (`int', `c')))
+INTRIN(`__INTRIN_UINT64',	`_rotl64',		((`__INTRIN_UINT64', `value'), (`int', `shift')))
+INTRIN(`__INTRIN_UINT64',	`_rotr64',		((`__INTRIN_UINT64', `value'), (`int', `shift')))
+INTRIN(`__INTRIN_INT64',	`_abs64',		((`__INTRIN_INT64', `value')))
+
+INTRIN_CDECL(`_alloca')
+INTRIN_CDECL(`abs')
+INTRIN_CDECL(`_byteswap_ushort')
+INTRIN_CDECL(`_byteswap_ulong')
+INTRIN_CDECL(`_byteswap_uint64')
+INTRIN_CDECL(`__debugbreak')
+INTRIN_CDECL(`labs')
+INTRIN_CDECL(`_lrotl')
+INTRIN_CDECL(`_lrotr')
+INTRIN_CDECL(`memcmp')
+INTRIN_CDECL(`memcpy')
+INTRIN_CDECL(`memset')
+INTRIN_CDECL(`_rotl')
+INTRIN_CDECL(`_rotr')
+INTRIN_CDECL(`_setjmp')
+INTRIN_CDECL(`strcat')
+INTRIN_CDECL(`strcmp')
+INTRIN_CDECL(`strcpy')
+INTRIN_CDECL(`strlen')
+INTRIN_CDECL(`_strset')
+INTRIN_CDECL(`strset')
+INTRIN_CDECL(`_rotl64')
+INTRIN_CDECL(`_rotr64')
+INTRIN_CDECL(`_abs64')
+
+INTRIN_PURE(`memcmp')
+INTRIN_PURE(`strcmp')
+INTRIN_PURE(`strlen')
+INTRIN_PURE(`wcscmp')
+INTRIN_PURE(`wcslen')
+
+INTRIN_CONST(`abs')
+INTRIN_CONST(`_byteswap_ushort')
+INTRIN_CONST(`_byteswap_ulong')
+INTRIN_CONST(`_byteswap_uint64')
+INTRIN_CONST(`labs')
+INTRIN_CONST(`_lrotl')
+INTRIN_CONST(`_lrotr')
+INTRIN_CONST(`_rotl')
+INTRIN_CONST(`_rotr')
+INTRIN_CONST(`abs64')
+dnl FIXME: these aren't const on some architectures
+dnl INTRIN_CONST(`_rotl64')
+dnl INTRIN_CONST(`_rotr64')
+
+INTRIN_SETJMP(`_setjmp')
+
+divert`'dnl
+
diff --git a/mingw-w64-headers/include/intrin-machinei.m4 b/mingw-w64-headers/include/intrin-machinei.m4
new file mode 100644
index 0000000..c756308
--- /dev/null
+++ b/mingw-w64-headers/include/intrin-machinei.m4
@@ -0,0 +1,127 @@
+divert(`-1')
+INTRIN(`void',			`_disable')
+INTRIN(`__INTRIN_INT64',	`__emul',				((`int', `a'), (`int', `b')))
+INTRIN(`__INTRIN_UINT64',	`__emulu',				((`unsigned int', `a'), (`unsigned int', `b')))
+INTRIN(`void',			`_enable')
+INTRIN(`__INTRIN_LONG',		`_InterlockedDecrement',		((`__INTRIN_LONG volatile *', `lpAddend')))
+INTRIN(`__INTRIN_LONG',		`_InterlockedExchange',			((`__INTRIN_LONG volatile *', `Target'), (`__INTRIN_LONG', `Value')))
+INTRIN(`__INTRIN_LONG',		`_InterlockedExchangeAdd',		((`__INTRIN_LONG volatile *', `Addend'), (`__INTRIN_LONG', `Value')))
+INTRIN(`__INTRIN_LONG',		`_InterlockedCompareExchange',		((`__INTRIN_LONG volatile *', `Destination'), (`__INTRIN_LONG', `Exchange'), (`__INTRIN_LONG', `Comperand')))
+INTRIN(`__INTRIN_INT64',	`_InterlockedCompareExchange64',	((`__INTRIN_INT64 volatile *', `Destination'), (`__INTRIN_INT64', `Exchange'), (`__INTRIN_INT64', `Comperand')))
+INTRIN(`__INTRIN_LONG',		`_InterlockedIncrement',		((`__INTRIN_LONG volatile *', `lpAddend')))
+INTRIN(`int',			`_inp',					((`unsigned short', `port')))
+INTRIN(`int',			`inp',					((`unsigned short', `port')))
+INTRIN(`__INTRIN_ULONG',	`_inpd',				((`unsigned short', `port')))
+INTRIN(`__INTRIN_ULONG',	`inpd',					((`unsigned short', `port')))
+INTRIN(`unsigned short',	`_inpw',				((`unsigned short', `port')))
+INTRIN(`unsigned short',	`inpw',					((`unsigned short', `port')))
+INTRIN(`__INTRIN_UINT64',	`__ll_lshift',				((`__INTRIN_UINT64', `Mask'), (`int', `nBit')))
+INTRIN(`__INTRIN_INT64',	`__ll_rshift',				((`__INTRIN_INT64', `Mask'), (`int', `nBit')))
+INTRIN(`int',			`_outp',				((`unsigned short', `port'), (`int', `databyte')))
+INTRIN(`int',			`outp',					((`unsigned short', `port'), (`int', `databyte')))
+INTRIN(`__INTRIN_ULONG',	`_outpd',				((`unsigned short', `port'), (`__INTRIN_ULONG', `dataword')))
+INTRIN(`__INTRIN_ULONG',	`outpd',				((`unsigned short', `port'), (`__INTRIN_ULONG', `dataword')))
+INTRIN(`unsigned short',	`_outpw',				((`unsigned short', `port'), (`unsigned short', `dataword')))
+INTRIN(`unsigned short',	`outpw',				((`unsigned short', `port'), (`unsigned short', `dataword')))
+INTRIN(`void *',		`_ReturnAddress')
+INTRIN(`__INTRIN_UINT64',	`__ull_rshift',				((`__INTRIN_UINT64', `Mask'), (`int', `nBit')))
+INTRIN(`void *',		`_AddressOfReturnAddress')
+INTRIN(`void',			`_WriteBarrier')
+INTRIN(`void',			`_ReadWriteBarrier')
+INTRIN(`void',			`__wbinvd')
+INTRIN(`void',			`__invlpg',				((`void *', `Address')))
+INTRIN(`__INTRIN_UINT64',	`__readmsr',				((`__INTRIN_ULONG', `Register')))
+INTRIN(`void',			`__writemsr',				((`__INTRIN_ULONG', `Register'), (`__INTRIN_UINT64', `Value')))
+INTRIN(`__INTRIN_UINT64',	`__rdtsc')
+INTRIN(`void',			`__movsb',				((`unsigned char *', `Destination'), (`unsigned char const *', `Source'), (`size_t', `Count')))
+INTRIN(`void',			`__movsw',				((`unsigned short *', `Destination'), (`unsigned short const *', `Source'), (`size_t', `Count')))
+INTRIN(`void',			`__movsd',				((`__INTRIN_ULONG *', `Destination'), (`__INTRIN_ULONG const *', `Source'), (`size_t', `Count')))
+INTRIN(`unsigned char',		`__inbyte',				((`unsigned short', `Port')))
+INTRIN(`unsigned short',	`__inword',				((`unsigned short', `Port')))
+INTRIN(`__INTRIN_ULONG',	`__indword',				((`unsigned short', `Port')))
+INTRIN(`void',			`__outbyte',				((`unsigned short', `Port'), (`unsigned char', `Data')))
+INTRIN(`void',			`__outword',				((`unsigned short', `Port'), (`unsigned short', `Data')))
+INTRIN(`void',			`__outdword',				((`unsigned short', `Port'), (`__INTRIN_ULONG', `Data')))
+INTRIN(`void',			`__inbytestring',			((`unsigned short', `Port'), (`unsigned char *', `Buffer'), (`__INTRIN_ULONG', `Count')))
+INTRIN(`void',			`__inwordstring',			((`unsigned short', `Port'), (`unsigned short *', `Buffer'), (`__INTRIN_ULONG', `Count')))
+INTRIN(`void',			`__indwordstring',			((`unsigned short', `Port'), (`__INTRIN_ULONG *', `Buffer'), (`__INTRIN_ULONG', `Count')))
+INTRIN(`void',			`__outbytestring',			((`unsigned short', `Port'), (`unsigned char *', `Buffer'), (`__INTRIN_ULONG', `Count')))
+INTRIN(`void',			`__outwordstring',			((`unsigned short', `Port'), (`unsigned short *', `Buffer'), (`__INTRIN_ULONG', `Count')))
+INTRIN(`void',			`__outdwordstring',			((`unsigned short', `Port'), (`__INTRIN_ULONG *', `Buffer'), (`__INTRIN_ULONG', `Count')))
+INTRIN(`unsigned int',		`__getcallerseflags')
+INTRIN(`void',			`__vmx_vmptrst',			((`__INTRIN_UINT64 *', `VmcsPhysicalAddress')))
+INTRIN(`void',			`__vmx_off')
+INTRIN(`void',			`__svm_clgi')
+INTRIN(`void',			`__svm_invlpga',			((`void *', `Va'), (`int', `ASID')))
+INTRIN(`void',			`__svm_skinit',				((`int', `SLB')))
+INTRIN(`void',			`__svm_stgi')
+INTRIN(`void',			`__svm_vmload',				((`size_t', `VmcbPhysicalAddress')))
+INTRIN(`void',			`__svm_vmrun',				((`size_t', `VmcbPhysicalAddress')))
+INTRIN(`void',			`__svm_vmsave',				((`size_t', `VmcbPhysicalAddress')))
+INTRIN(`void',			`__halt')
+INTRIN(`void',			`__sidt',				((`void *', `Destination')))
+INTRIN(`void',			`__lidt',				((`void *', `Source')))
+INTRIN(`void',			`__ud2')
+INTRIN(`void',			`__nop')
+INTRIN(`void',			`__stosb',				((`unsigned char *', `Dest'), (`unsigned char', `Data'), (`size_t', `Count')))
+INTRIN(`void',			`__stosw',				((`unsigned short *', `Dest'), (` unsigned short', `Data'), (`size_t', `Count')))
+INTRIN(`void',			`__stosd',				((`__INTRIN_ULONG *', `Dest'), (` __INTRIN_ULONG', `Data'), (`size_t', `Count')))
+INTRIN(`unsigned char',		`_interlockedbittestandset',		((`__INTRIN_LONG volatile *', `a'), (`__INTRIN_LONG', `b')))
+INTRIN(`unsigned char',		`_interlockedbittestandreset',		((`__INTRIN_LONG volatile *', `a'), (`__INTRIN_LONG', `b')))
+INTRIN(`void',			`__cpuid',				((`int', `a', `[4]'), (`int', `b')))
+INTRIN(`void',			`__cpuidex',				((`int', `a', `[4]'), (`int', `b'), (`int', `c')))
+INTRIN(`__INTRIN_UINT64',	`__readpmc',				((`__INTRIN_ULONG', `a')))
+INTRIN(`__INTRIN_ULONG',	`__segmentlimit',			((`__INTRIN_ULONG', `a')))
+INTRIN(`void',			`__int2c')
+INTRIN(`unsigned short',	`__lzcnt16',				((`unsigned short', `value')))
+INTRIN(`unsigned int',		`__lzcnt',				((`unsigned int', `value')))
+INTRIN(`unsigned short',	`__popcnt16',				((`unsigned short', `value')))
+INTRIN(`unsigned int',		`__popcnt',				((`unsigned int', `value')))
+INTRIN(`__m128i',		`_mm_extract_si64',			((`__m128i', `Source'), (`__m128i', `Descriptor')))
+INTRIN(`__m128i',		`_mm_extracti_si64',			((`__m128i', `Source'), (`int', `Length'), (`int', `Index')))
+INTRIN(`__m128i',		`_mm_insert_si64',			((`__m128i', `Source1'), (`__m128i', `Source2')))
+INTRIN(`__m128i',		`_mm_inserti_si64',			((`__m128i', `Source1'), (`__m128i', `Source2'), (`int', `Length'), (`int', `Index')))
+INTRIN(`void',			`_mm_stream_sd',			((`double *', `Dest'), (`__m128d', `Source')))
+INTRIN(`void',			`_mm_stream_ss',			((`float *', `Dest'), (`__m128', `Source')))
+INTRIN(`__INTRIN_UINT64',	`__rdtscp',				((`unsigned int *', `Aux')))
+
+INTRIN_CDECL(`_disable')
+INTRIN_CDECL(`_enable')
+INTRIN_CDECL(`_InterlockedDecrement')
+INTRIN_CDECL(`_InterlockedIncrement')
+INTRIN_CDECL(`_inp')
+INTRIN_CDECL(`inp')
+INTRIN_CDECL(`_inpd')
+INTRIN_CDECL(`inpd')
+INTRIN_CDECL(`_inpw')
+INTRIN_CDECL(`inpw')
+INTRIN_CDECL(`_outp')
+INTRIN_CDECL(`outp')
+INTRIN_CDECL(`_outpd')
+INTRIN_CDECL(`outpd')
+INTRIN_CDECL(`_outpw')
+INTRIN_CDECL(`outpw')
+
+INTRIN_DISCARDABLE(`__readmsr')
+INTRIN_DISCARDABLE(`__rdtsc')
+INTRIN_DISCARDABLE(`__readpmc')
+INTRIN_DISCARDABLE(`_mm_extract_si64')
+INTRIN_DISCARDABLE(`_mm_extracti_si64')
+INTRIN_DISCARDABLE(`_mm_insert_si64')
+INTRIN_DISCARDABLE(`_mm_inserti_si64')
+
+INTRIN_CONST(`__emul')
+INTRIN_CONST(`__emulu')
+INTRIN_CONST(`__ll_lshift')
+INTRIN_CONST(`__ll_rshift')
+INTRIN_CONST(`_ReturnAddress')
+INTRIN_CONST(`__ull_rshift')
+INTRIN_CONST(`_AddressOfReturnAddress')
+INTRIN_CONST(`__getcallerseflags')
+INTRIN_CONST(`__lzcnt16')
+INTRIN_CONST(`__lzcnt')
+INTRIN_CONST(`__popcnt16')
+INTRIN_CONST(`__popcnt')
+
+divert`'dnl
+
diff --git a/mingw-w64-headers/include/intrin-machineia32.m4 b/mingw-w64-headers/include/intrin-machineia32.m4
new file mode 100644
index 0000000..6045db1
--- /dev/null
+++ b/mingw-w64-headers/include/intrin-machineia32.m4
@@ -0,0 +1,56 @@
+divert(`-1')
+INTRIN(`long',			`_InterlockedAddLargeStatistic',	((`__INTRIN_INT64 volatile *', `Addend'), (`long', `Value')))
+INTRIN(`__m64',			`_m_from_float',			((`float', `f')))
+INTRIN(`float',			`_m_to_float',				((`__m64', `m')))
+INTRIN(`__m64',			`_mm_cvt_ps2pi',			((`__m128', `value')))
+INTRIN(`__m64',			`_mm_cvtt_ps2pi',			((`__m128', `value')))
+INTRIN(`__m128',		`_mm_cvt_pi2ps',			((`__m128', `a'), (`__m64', `b')))
+INTRIN(`void',			`_mm_stream_pi',			((`__m64*', `p'), (`__m64', `a')))
+INTRIN(`void',			`_m_femms')
+INTRIN(`__m64',			`_m_pavgusb',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pf2id',				((`__m64', `m')))
+INTRIN(`__m64',			`_m_pfacc',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfadd',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfcmpeq',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfcmpge',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfcmpgt',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfmax',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfmin',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfmul',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfrcp',				((`__m64', `m')))
+INTRIN(`__m64',			`_m_pfrcpit1',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfrcpit2',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfrsqrt',				((`__m64', `m')))
+INTRIN(`__m64',			`_m_pfrsqit1',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfsub',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfsubr',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pi2fd',				((`__m64', `m')))
+INTRIN(`__m64',			`_m_pmulhrw',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pf2iw',				((`__m64', `m')))
+INTRIN(`__m64',			`_m_pfnacc',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pfpnacc',				((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',			`_m_pi2fw',				((`__m64', `m')))
+INTRIN(`__m64',			`_m_pswapd',				((`__m64', `m')))
+INTRIN(`unsigned long',		`__readcr0')
+INTRIN(`unsigned long',		`__readcr2')
+INTRIN(`unsigned long',		`__readcr3')
+INTRIN(`unsigned long',		`__readcr4')
+INTRIN(`unsigned long',		`__readcr8')
+INTRIN(`void',			`__writecr0',				((`unsigned', `Data')))
+INTRIN(`void',			`__writecr3',				((`unsigned', `Data')))
+INTRIN(`void',			`__writecr4',				((`unsigned', `Data')))
+INTRIN(`void',			`__writecr8',				((`unsigned', `Data')))
+INTRIN(`unsigned',		`__readdr',				((`unsigned int', `DebugRegister')))
+INTRIN(`void',			`__writedr',				((`unsigned int', `DebugRegister'), (`unsigned', `DebugValue')))
+INTRIN(`unsigned',		`__readeflags')
+INTRIN(`void',			`__writeeflags',			((`unsigned', `Value')))
+INTRIN(`unsigned char',		`__readfsbyte',				((`unsigned long', `Offset')))
+INTRIN(`unsigned short',	`__readfsword',				((`unsigned long', `Offset')))
+INTRIN(`unsigned long',		`__readfsdword',			((`unsigned long', `Offset')))
+INTRIN(`__INTRIN_UINT64',	`__readfsqword',			((`unsigned long', `Offset')))
+INTRIN(`void',			`__writefsbyte',			((`unsigned long', `Offset'), (`unsigned char', `Data')))
+INTRIN(`void',			`__writefsword',			((`unsigned long', `Offset'), (`unsigned short', `Data')))
+INTRIN(`void',			`__writefsdword',			((`unsigned long', `Offset'), (`unsigned long', `Data')))
+INTRIN(`void',			`__writefsqword',			((`unsigned long', `Offset'), (`__INTRIN_UINT64', `Data')))
+divert`'dnl
+
diff --git a/mingw-w64-headers/include/intrin-machineiw64.m4 b/mingw-w64-headers/include/intrin-machineiw64.m4
new file mode 100644
index 0000000..b93ef10
--- /dev/null
+++ b/mingw-w64-headers/include/intrin-machineiw64.m4
@@ -0,0 +1,46 @@
+divert(`-1')
+INTRIN(`__INTRIN_LONG',		`_InterlockedOr',			((`__INTRIN_LONG volatile *', `Value'), (`__INTRIN_LONG', `Mask')))
+INTRIN(`char',			`_InterlockedOr8',			((`char volatile *', `Value'), (`char', `Mask')))
+INTRIN(`short',			`_InterlockedOr16',			((`short volatile *', `Value'), (`short', `Mask')))
+INTRIN(`__INTRIN_LONG',		`_InterlockedXor',			((`__INTRIN_LONG volatile *', `Value'), (`__INTRIN_LONG', `Mask')))
+INTRIN(`char',			`_InterlockedXor8',			((`char volatile *', `Value'), (`char', `Mask')))
+INTRIN(`short',			`_InterlockedXor16',			((`short volatile *', `Value'), (`short', `Mask')))
+INTRIN(`__INTRIN_LONG',		`_InterlockedAnd',			((`__INTRIN_LONG volatile *', `Value'), (`__INTRIN_LONG', `Mask')))
+INTRIN(`char',			`_InterlockedAnd8',			((`char volatile *', `Value'), (`char', `Mask')))
+INTRIN(`short',			`_InterlockedAnd16',			((`short volatile *', `Value'), (`short', `Mask')))
+INTRIN(`unsigned char',		`_bittest',				((`__INTRIN_LONG const *', `a'), (`__INTRIN_LONG', `b')))
+INTRIN(`unsigned char',		`_bittestandset',			((`__INTRIN_LONG *', `a'), (`__INTRIN_LONG', `b')))
+INTRIN(`unsigned char',		`_bittestandreset',			((`__INTRIN_LONG *', `a'), (`__INTRIN_LONG', `b')))
+INTRIN(`unsigned char',		`_bittestandcomplement',		((`__INTRIN_LONG *', `a'), (`__INTRIN_LONG', `b')))
+INTRIN(`unsigned char',		`_BitScanForward',			((`__INTRIN_ULONG*', `Index'), (`__INTRIN_ULONG', `Mask')))
+INTRIN(`unsigned char',		`_BitScanReverse',			((`__INTRIN_ULONG*', `Index'), (`__INTRIN_ULONG', `Mask')))
+INTRIN(`wchar_t *',		`wcscat',				((`wchar_t *', `strDestination'), (`const wchar_t *', `strSource')))
+INTRIN(`int',			`wcscmp',				((`const wchar_t *', `string1'), (`const wchar_t *', `string2')))
+INTRIN(`wchar_t *',		`wcscpy',				((`wchar_t *', `strDestination'), (`const wchar_t *', `strSource')))
+INTRIN(`size_t',		`wcslen',				((`const wchar_t *', `str')))
+INTRIN(`wchar_t *',		`_wcsset',				((`wchar_t *', `str'), (`wchar_t', `c')))
+INTRIN(`void',			`_ReadBarrier')
+INTRIN(`unsigned char',		`_rotr8',				((`unsigned char', `value'), (`unsigned char', `shift')))
+INTRIN(`unsigned short',	`_rotr16',				((`unsigned short', `value'), (`unsigned char', `shift')))
+INTRIN(`unsigned char',		`_rotl8',				((`unsigned char', `value'), (`unsigned char', `shift')))
+INTRIN(`unsigned short',	`_rotl16',				((`unsigned short', `value'), (`unsigned char', `shift')))
+INTRIN(`short',			`_InterlockedIncrement16',		((`short volatile *', `Addend')))
+INTRIN(`short',			`_InterlockedDecrement16',		((`short volatile *', `Addend')))
+INTRIN(`short',			`_InterlockedCompareExchange16',	((`short volatile *', `Destination'), (`short', `Exchange'), (`short', `Comparand')))
+INTRIN(`void',			`__nvreg_save_fence')
+INTRIN(`void',			`__nvreg_restore_fence')
+
+INTRIN_CDECL(`wcscat')
+INTRIN_CDECL(`wcscmp')
+INTRIN_CDECL(`wcscpy')
+INTRIN_CDECL(`wcslen')
+INTRIN_CDECL(`_wcsset')
+
+INTRIN_IMPORT(`wcscat')
+INTRIN_IMPORT(`wcscmp')
+INTRIN_IMPORT(`wcscpy')
+INTRIN_IMPORT(`wcslen')
+INTRIN_IMPORT(`_wcsset')
+
+divert`'dnl
+
diff --git a/mingw-w64-headers/include/intrin-machinex86x_ia64.m4 b/mingw-w64-headers/include/intrin-machinex86x_ia64.m4
new file mode 100644
index 0000000..8f8c380
--- /dev/null
+++ b/mingw-w64-headers/include/intrin-machinex86x_ia64.m4
@@ -0,0 +1,92 @@
+divert(`-1')
+INTRIN(`void',		`_m_empty')
+INTRIN(`__m64',		`_m_from_int',		((`int', `i')))
+INTRIN(`int',		`_m_to_int',		((`__m64', `a')))
+INTRIN(`__m64',		`_m_packsswb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_packssdw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_packuswb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_punpckhbw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_punpckhwd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_punpckhdq',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_punpcklbw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_punpcklwd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_punpckldq',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddsb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddsw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddusb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_paddusw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubsb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubsw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubusb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psubusw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pmaddwd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pmulhw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pmullw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psllw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psllwi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_pslld',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pslldi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_psllq',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psllqi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_psraw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psrawi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_psrad',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psradi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_psrlw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psrlwi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_psrld',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psrldi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_psrlq',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psrlqi,		((`__m64', `a'), (`int', `count')))
+INTRIN(`__m64',		`_m_pand',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pandn',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_por',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pxor',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pcmpeqb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pcmpeqw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pcmpeqd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pcmpgtb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pcmpgtw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pcmpgtd',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_mm_setzero_si64')
+INTRIN(`__m64',		`_mm_set_pi32',		((`int', `i1'), (`int', `i0')))
+INTRIN(`__m64',		`_mm_set_pi16',		((`short', `w3'), (`short', `w2'), (`short', `w1'), (`short', `w0')))
+INTRIN(`__m64',		`_mm_set_pi8',		((`char', `b7'), (`char', `b6'), (`char', `b5'), (`char', `b4'), (`char', `b3'), (`char', `b2'), (`char', `b1'), (`char', `b0')))
+INTRIN(`__m64',		`_mm_set1_pi32',	((`int', `i')))
+INTRIN(`__m64',		`_mm_set1_pi16',	((`short', `i')))
+INTRIN(`__m64',		`_mm_set1_pi8',		((`char', `i')))
+INTRIN(`__m64',		`_mm_setr_pi32',	((`int', `i0'), (`int', `i1')))
+INTRIN(`__m64',		`_mm_setr_pi16',	((`short', `w0'), (`short', `w1'), (`short', `w2'), (`short', `w3')))
+INTRIN(`__m64',		`_mm_setr_pi8',		((`char', `b0'), (`char', `b1'), (`char', `b2'), (`char', `b3'), (`char', `b4'), (`char', `b5'), (`char', `b6'), (`char', `b7')))
+INTRIN(`int',		`_m_pextrw',		((`__m64', `a'), (`int', `n')))
+INTRIN(`__m64',		`_m_pinsrw',		((`__m64', `a'), (`int', `d'), (`int', `a')))
+INTRIN(`__m64',		`_m_pmaxsw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pmaxub',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pminsw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pminub',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`int',		`_m_pmovmskb',		((`__m64', `a')))
+INTRIN(`__m64',		`_m_pmulhuw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pshufw,		((`__m64', `a'), (`int', `n')))
+INTRIN(`void',		`_m_maskmovq',		((`__m64', `d'), (`__m64', `n'), (`char*', `p')))
+INTRIN(`__m64',		`_m_pavgb',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_pavgw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_m_psadbw',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_mm_cvtpd_pi32',	((`__m128d', `a')))
+INTRIN(`__m64',		`_mm_cvttpd_pi32',	((`__m128d', `a')))
+INTRIN(`__m128d',	`_mm_cvtpi32_pd',	((`__m64', `a')))
+INTRIN(`__m64',		`_mm_add_si64',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_mm_mul_su32',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m64',		`_mm_sub_si64',		((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m128i',	`_mm_set_epi64',	((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m128i',	`_mm_set1_epi64',	((`__m64', `a')))
+INTRIN(`__m128i',	`_mm_setr_epi64',	((`__m64', `m1'), (`__m64', `m2')))
+INTRIN(`__m128i',	`_mm_movpi64_epi64',	((`__m64', `a')))
+INTRIN(`__m64',		`_mm_movepi64_pi64',	((`__m128i', `a')))
+divert`'dnl
+
diff --git a/mingw-w64-headers/include/intrin-machinex86x_x64.m4 b/mingw-w64-headers/include/intrin-machinex86x_x64.m4
new file mode 100644
index 0000000..a1c4132
--- /dev/null
+++ b/mingw-w64-headers/include/intrin-machinex86x_x64.m4
@@ -0,0 +1,404 @@
+divert(`-1')
+INTRIN(`__m128',	`_mm_add_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_add_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_sub_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_sub_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_mul_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_mul_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_div_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_div_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_sqrt_ss',			((`__m128', `a')))
+INTRIN(`__m128',	`_mm_sqrt_ps',			((`__m128', `a')))
+INTRIN(`__m128',	`_mm_rcp_ss',			((`__m128', `a')))
+INTRIN(`__m128',	`_mm_rcp_ps',			((`__m128', `a')))
+INTRIN(`__m128',	`_mm_rsqrt_ss',			((`__m128', `a')))
+INTRIN(`__m128',	`_mm_rsqrt_ps',			((`__m128', `a')))
+INTRIN(`__m128',	`_mm_min_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_min_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_max_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_max_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_and_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_andnot_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_or_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_xor_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpeq_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpeq_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmplt_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmplt_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmple_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmple_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpgt_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpgt_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpge_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpge_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpneq_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpneq_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpnlt_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpnlt_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpnle_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpnle_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpngt_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpngt_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpnge_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpnge_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpord_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpord_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpunord_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_cmpunord_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_comieq_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_comilt_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_comile_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_comigt_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_comige_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_comineq_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_ucomieq_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_ucomilt_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_ucomile_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_ucomigt_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_ucomige_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_ucomineq_ss',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`int',		`_mm_cvt_ss2si',		((`__m128', `a')))
+INTRIN(`int',		`_mm_cvtt_ss2si',		((`__m128', `a')))
+INTRIN(`__m128',	`_mm_cvt_si2ss',		((`__m128', `a'), (`int', `b')))
+INTRIN(`__m128',	`_mm_shuffle_ps',		((`__m128', `a'), (`__m128', `b'), (`unsigned int', `i')))
+INTRIN(`__m128',	`_mm_unpackhi_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_unpacklo_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_loadh_pi',			((`__m128`, 'a'), (`__m64 const*`, 'p')))
+INTRIN(`void',		`_mm_storeh_pi',		((`__m64`, 'p'), (`__m128`, 'a')))
+INTRIN(`__m128',	`_mm_loadl_pi',			((`__m128`, 'a'), (`__m64 const*`, 'p')))
+INTRIN(`void',		`_mm_storel_pi',		((`__m64*`, 'p'), (`__m128`, 'a')))
+INTRIN(`int',		`_mm_movemask_ps',		((`__m128', `a')))
+INTRIN(`__m128',	`_mm_set_ss',			((`float', `w')))
+INTRIN(`__m128',	`_mm_set_ps1',			((`float', `w')))
+INTRIN(`__m128',	`_mm_set_ps',			((`float z'), (`float y'), (`float x'), (`float w')))
+INTRIN(`__m128',	`_mm_setr_ps',			((`float w'), (`float x'), (`float y'), (`float z')))
+INTRIN(`__m128',	`_mm_setzero_ps')
+INTRIN(`__m128',	`_mm_load_ss',			((`float const*', `p')))
+INTRIN(`__m128',	`_mm_load_ps1',			((`float const*', `p')))
+INTRIN(`__m128',	`_mm_load_ps',			((`float const*', `p')))
+INTRIN(`__m128',	`_mm_loadr_ps',			((`float const*', `p')))
+INTRIN(`__m128',	`_mm_loadu_ps',			((`float const*', `p')))
+INTRIN(`__m128',	`_mm_move_ss',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`void',		`_mm_store_ss',			((`float*', `p'), (`__m128', `a')))
+INTRIN(`void',		`_mm_store_ps1',		((`float*', `p'), (`__m128', `a')))
+INTRIN(`void',		`_mm_store_ps',			((`float*', `p'), (`__m128', `a')))
+INTRIN(`void',		`_mm_storer_ps',		((`float*', `p'), (`__m128', `a')))
+INTRIN(`void',		`_mm_storeu_ps',		((`float*', `p'), (`__m128', `a')))
+INTRIN(`void',		`_mm_prefetch',			((`char const*', `p'), (`int', `i')))
+INTRIN(`void',		`_mm_stream_ps',		((`float*', `p'), (`__m128', `a')))
+INTRIN(`void',		`_mm_sfence')
+INTRIN(`unsigned int',	`_mm_getcsr')
+INTRIN(`void',		`_mm_setcsr',			((`unsigned int', `i')))
+INTRIN(`__m128',	`_mm_movelh_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128',	`_mm_movehl_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`void',		`_m_prefetch',			((`void*', `p')))
+INTRIN(`void',		`_m_prefetchw',			((`volatile const void*', `p')))
+INTRIN(`__m128d',	`_mm_add_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_add_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_div_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_div_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_max_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_max_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_min_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_min_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_mul_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_mul_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_sqrt_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_sqrt_pd',			((`__m128d', `a')))
+INTRIN(`__m128d',	`_mm_sub_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_sub_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_and_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_andnot_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_or_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_xor_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpeq_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpeq_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmplt_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmplt_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmple_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmple_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpgt_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpgt_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpge_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpge_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpneq_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpneq_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpnlt_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpnlt_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpnle_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpnle_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpngt_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpngt_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpnge_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpnge_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpord_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpord_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpunord_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cmpunord_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_comieq_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_comilt_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_comile_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_comigt_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_comige_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_comineq_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_ucomieq_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_ucomilt_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_ucomile_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_ucomigt_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_ucomige_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_ucomineq_sd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128',	`_mm_cvtpd_ps',			((`__m128d', `a')))
+INTRIN(`__m128d',	`_mm_cvtps_pd',			((`__m128', `a')))
+INTRIN(`__m128d',	`_mm_cvtepi32_pd',		((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_cvtpd_epi32',		((`__m128d', `a')))
+INTRIN(`int',		`_mm_cvtsd_si32',		((`__m128d', `a')))
+INTRIN(`__m128',	`_mm_cvtsd_ss',			((`__m128', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_cvtsi32_sd',		((`__m128d', `a'), (`int', `b')))
+INTRIN(`__m128d',	`_mm_cvtss_sd',			((`__m128d', `a'), (`__m128', `b')))
+INTRIN(`__m128i',	`_mm_cvttpd_epi32',		((`__m128d', `a')))
+INTRIN(`int',		`_mm_cvttsd_si32',		((`__m128d', `a')))
+INTRIN(`__m128',	`_mm_cvtepi32_ps',		((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_cvtps_epi32',		((`__m128', `a')))
+INTRIN(`__m128i',	`_mm_cvttps_epi32',		((`__m128', `a')))
+INTRIN(`__m128d',	`_mm_unpackhi_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128d',	`_mm_unpacklo_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`int',		`_mm_movemask_pd',		((`__m128d', `a')))
+INTRIN(`__m128d',	`_mm_shuffle_pd',		((`__m128d', `a'), (`__m128d', `b'), (`int', `i')))
+INTRIN(`__m128d',	`_mm_load_pd',			((`double const*', `p')))
+INTRIN(`__m128d',	`_mm_load1_pd',			((`double const*', `p')))
+INTRIN(`__m128d',	`_mm_loadr_pd',			((`double const*', `p')))
+INTRIN(`__m128d',	`_mm_loadu_pd',			((`double const*', `p')))
+INTRIN(`__m128d',	`_mm_load_sd',			((`double const*', `p')))
+INTRIN(`__m128d',	`_mm_loadh_pd',			((`__m128d', `a'), (`double const*', `p')))
+INTRIN(`__m128d',	`_mm_loadl_pd',			((`__m128d', `a'), (`double const*', `p')))
+INTRIN(`__m128d',	`_mm_set_sd',			((`double', `w')))
+INTRIN(`__m128d',	`_mm_set1_pd',			((`double', `w')))
+INTRIN(`__m128d',	`_mm_set_pd',			((`double', `w'), (`double', `x')))
+INTRIN(`__m128d',	`_mm_setr_pd',			((`double', `w'), (`double', `x')))
+INTRIN(`__m128d',	`_mm_setzero_pd')
+INTRIN(`__m128d',	`_mm_move_sd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`void',		`_mm_store_sd',			((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_store1_pd',		((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_store_pd',			((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_storeu_pd',		((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_storer_pd',		((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_storeh_pd',		((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_storel_pd',		((`double*', `p'), (`__m128d', `p')))
+INTRIN(`__m128i',	`_mm_add_epi8',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_add_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_add_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_add_epi64',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_adds_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_adds_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_adds_epu8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_adds_epu16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_avg_epu8',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_avg_epu16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_madd_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_max_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_max_epu8',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_min_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_min_epu8',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_mulhi_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_mulhi_epu16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_mullo_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_mul_epu32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sad_epu8',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sub_epi8',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sub_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sub_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sub_epi64',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_subs_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_subs_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_subs_epu8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_subs_epu16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_andnot_si128',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_and_si128',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_or_si128',			((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_xor_si128',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_slli_si128',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_slli_epi16',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_sll_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_slli_epi32',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_sll_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_slli_epi64',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_sll_epi64',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_srai_epi16',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_sra_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_srai_epi32',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_sra_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_srli_si128',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_srli_epi16',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_srl_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_srli_epi32',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_srl_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_srli_epi64',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_srl_epi64',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmpeq_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmpeq_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmpeq_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmpgt_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmpgt_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmpgt_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmplt_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmplt_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cmplt_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_cvtsi32_si128',		((`int', `a')))
+INTRIN(`int',		`_mm_cvtsi128_si32',		((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_packs_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_packs_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_packus_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`int',		`_mm_extract_epi16',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_insert_epi16',		((`__m128i', `a'), (`int', `b'), (`int', `imm')))
+INTRIN(`int',		`_mm_movemask_epi8',		((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_shuffle_epi32',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_shufflehi_epi16',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_shufflelo_epi16',		((`__m128i', `a'), (`int', `imm')))
+INTRIN(`__m128i',	`_mm_unpackhi_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpackhi_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpackhi_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpackhi_epi64',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpacklo_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpacklo_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpacklo_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_unpacklo_epi64',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_load_si128',		((`__m128i const*', `p')))
+INTRIN(`__m128i',	`_mm_loadu_si128',		((`__m128i const*', `p')))
+INTRIN(`__m128i',	`_mm_loadl_epi64',		((`__m128i const*', `p')))
+INTRIN(`__m128i',	`_mm_set_epi32',		((`int', `i3'), (`int', `i2'), (`int', `i1'), (`int', `i0')))
+INTRIN(`__m128i',	`_mm_set_epi16',		((`short', `w7'), (`short', `w6'), (`short', `w5'), (`short', `w4'), (`short', `w3'), (`short', `w2'), (`short', `w1'), (`short', `w0')))
+INTRIN(`__m128i',	`_mm_set_epi8',			((`char', `b15'), (`char', `b14'), (`char', `b13'), (`char', `b12'), (`char', `b11'), (`char', `b10'), (`char', `b9'), (`char', `b8'), (`char', `b7'), (`char', `b6'), (`char', `b5'), (`char', `b4'), (`char', `b3'), (`char', `b2'), (`char', `b1'), (`char', `b0')))
+INTRIN(`__m128i',	`_mm_set1_epi32',		((`int', `i')))
+INTRIN(`__m128i',	`_mm_set1_epi16',		((`short', `w')))
+INTRIN(`__m128i',	`_mm_set1_epi8',		((`char', `b')))
+INTRIN(`__m128i',	`_mm_setl_epi64',		((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_setr_epi32',		((`int', `i0'), (`int', `i1'), (`int', `i2'), (`int', `i3')))
+INTRIN(`__m128i',	`_mm_setr_epi16',		((`short', `w0'), (`short', `w1'), (`short', `w2'), (`short', `w3'), (`short', `w4'), (`short', `w5'), (`short', `w6'), (`short', `w7')))
+INTRIN(`__m128i',	`_mm_setr_epi8',		((`char', `b0'), (`char', `b1'), (`char', `b2'), (`char', `b3'), (`char', `b4'), (`char', `b5'), (`char', `b6'), (`char', `b7'), (`char', `b8'), (`char', `b9'), (`char', `b10'), (`char', `b11'), (`char', `b12'), (`char', `b13'), (`char', `b14'), (`char', `b15')))
+INTRIN(`__m128i',	`_mm_setzero_si128')
+INTRIN(`void',		`_mm_store_si128',		((`__m128i*', `p'), (`__m128i', `a')))
+INTRIN(`void',		`_mm_storeu_si128',		((`__m128i*', `p'), (`__m128i', `a')))
+INTRIN(`void',		`_mm_storel_epi64',		((`__m128i*', `p'), (`__m128i', `a')))
+INTRIN(`void',		`_mm_maskmoveu_si128',		((`__m128i', `d'), (`__m128i', `n'), (`char*', `p')))
+INTRIN(`__m128i',	`_mm_move_epi64',		((`__m128i', `a')))
+INTRIN(`void',		`_mm_stream_pd',		((`double*', `p'), (`__m128d', `p')))
+INTRIN(`void',		`_mm_stream_si128',		((`__m128i*', `p'), (`__m128i', `a')))
+INTRIN(`void',		`_mm_clflush',			((`void const *', `p')))
+INTRIN(`void',		`_mm_lfence')
+INTRIN(`void',		`_mm_mfence')
+INTRIN(`void',		`_mm_stream_si32',		((`int*', `p'), (`int', `a')))
+INTRIN(`void',		`_mm_pause')
+INTRIN(`__m128',	`_mm_addsub_ps',		((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128d',	`_mm_addsub_pd',		((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128',	`_mm_hadd_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128d',	`_mm_hadd_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128',	`_mm_hsub_ps',			((`__m128', `a'), (`__m128', `b')))
+INTRIN(`__m128d',	`_mm_hsub_pd',			((`__m128d', `a'), (`__m128d', `b')))
+INTRIN(`__m128i',	`_mm_lddqu_si128',		((`__m128i const*', `Data')))
+INTRIN(`void',		`_mm_monitor',			((`void const*', `Address'), (`unsigned int', `a'), (`unsigned int', `b')))
+INTRIN(`__m128d',	`_mm_movedup_pd',		((`__m128d', `a')))
+INTRIN(`__m128d',	`_mm_loaddup_pd',		((`double const*', `p')))
+INTRIN(`__m128',	`_mm_movehdup_ps',		((`__m128', `a')))
+INTRIN(`__m128',	`_mm_moveldup_ps',		((`__m128', `a')))
+INTRIN(`void',		`_mm_mwait',			((`unsigned int', `a'), (`unsigned int', `b')))
+INTRIN(`__m128i',	`_mm_hadd_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_hadd_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_hadds_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m64',		`_mm_hadd_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m64',		`_mm_hadd_pi32',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m64',		`_mm_hadds_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m128i',	`_mm_hsub_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_hsub_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_hsubs_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m64',		`_mm_hsub_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m64',		`_mm_hsub_pi32',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m64',		`_mm_hsubs_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m128i',	`_mm_maddubs_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m64',		`_mm_maddubs_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m128i',	`_mm_mulhrs_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m64',		`_mm_mulhrs_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m128i',	`_mm_shuffle_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m64',		`_mm_shuffle_pi8',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m128i',	`_mm_sign_epi8',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sign_epi16',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_sign_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m64',		`_mm_sign_pi8',			((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m64',		`_mm_sign_pi16',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m64',		`_mm_sign_pi32',		((`__m64', `a'), (`__m64', `b')))
+INTRIN(`__m128i',	`_mm_alignr_epi8',		((`__m128i', `a'), (`__m128i', `b'), (`int', `n')))
+INTRIN(`__m64',		`_mm_alignr_pi8',		((`__m64', `a'), (`__m64', `b'), (`int', `n')))
+INTRIN(`__m128i',	`_mm_abs_epi8',			((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_abs_epi16',		((`__m128i', `a')))
+INTRIN(`__m128i',	`_mm_abs_epi32',		((`__m128i', `a')))
+INTRIN(`__m64',		`_mm_abs_pi8',			((`__m64', `a')))
+INTRIN(`__m64',		`_mm_abs_pi16',			((`__m64', `a')))
+INTRIN(`__m64',		`_mm_abs_pi32',			((`__m64', `a')))
+INTRIN(`__m128i',	`_mm_blend_epi16',		((`__m128i', `v1'), (`__m128i', `v2'), (`const int', `mask')))
+INTRIN(`__m128i',	`_mm_blendv_epi8',		((`__m128i', `v1'), (`__m128i', `v2'), __m128i mask))
+INTRIN(`__m128',	`_mm_blend_ps',			((`__m128', `v1'), (`__m128', `v2'), (`const int', `mask')))
+INTRIN(`__m128',	`_mm_blendv_ps',		((`__m128', `v1'), (`__m128', `v2'), (`__m128', `v3')))
+INTRIN(`__m128d',	`_mm_blend_pd',			((`__m128d', `v1'), (`__m128d', `v2'), (`const int', `mask')))
+INTRIN(`__m128d',	`_mm_blendv_pd',		((`__m128d', `v1'), (`__m128d', `v2'), (`__m128d', `v3')))
+INTRIN(`__m128',	`_mm_dp_ps',			((`__m128', `val1'), (`__m128', `val2'), (`const int', `mask')))
+INTRIN(`__m128d',	`_mm_dp_pd',			((`__m128d', `val1'), (`__m128d', `val2'), (`const int', `mask')))
+INTRIN(`__m128i',	`_mm_cmpeq_epi64',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_min_epi8',			((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_max_epi8',			((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_min_epu16',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_max_epu16',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_min_epi32',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_max_epi32',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_min_epu32',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_max_epu32',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_mullo_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`__m128i',	`_mm_mul_epi32',		((`__m128i', `a'), (`__m128i', `b')))
+INTRIN(`int',		`_mm_testz_si128',		((`__m128i', `mask'), (`__m128i', `val')))
+INTRIN(`int',		`_mm_testc_si128',		((`__m128i', `mask'), (`__m128i', `val')))
+INTRIN(`int',		`_mm_testnzc_si128',		((`__m128i', `mask'), (`__m128i', `s2')))
+INTRIN(`__m128',	`_mm_insert_ps',		((`__m128', `dst'), (`__m128', `src'), (`const int', `ndx')))
+INTRIN(`int',		`_mm_extract_ps',		((`__m128', `src'), (`const int', `ndx')))
+INTRIN(`__m128i',	`_mm_insert_epi8',		((`__m128i', `dst'), (`int', `s'), (`const int', `ndx')))
+INTRIN(`__m128i',	`_mm_insert_epi32',		((`__m128i', `dst'), (`int', `s'), (`const int', `ndx')))
+INTRIN(`int',		`_mm_extract_epi8',		((`__m128i', `src'), (`const int', `ndx')))
+INTRIN(`int',		`_mm_extract_epi32',		((`__m128i', `src'), (`const int', `ndx')))
+INTRIN(`__m128i',	`_mm_minpos_epu16',		((`__m128i', `shortValues')))
+INTRIN(`__m128d',	`_mm_round_pd',			((`__m128d', `val'), (`int', `iRoundMode')))
+INTRIN(`__m128d',	`_mm_round_sd',			((`__m128d', `dst'), (`__m128d', `val, (`int', `iRoundMode')))
+INTRIN(`__m128',	`_mm_round_ps',			((`__m128', `val'), (`int', `iRoundMode')))
+INTRIN(`__m128',	`_mm_round_ss',			((`__m128', `dst'), (`__m128', `val'), (`int', `iRoundMode')))
+INTRIN(`__m128i',	`_mm_cvtepi8_epi32',		((`__m128i', `byteValues')))
+INTRIN(`__m128i',	`_mm_cvtepi16_epi32',		((`__m128i', `shortValues')))
+INTRIN(`__m128i',	`_mm_cvtepi8_epi64',		((`__m128i', `byteValues')))
+INTRIN(`__m128i',	`_mm_cvtepi32_epi64',		((`__m128i', `intValues')))
+INTRIN(`__m128i',	`_mm_cvtepi16_epi64',		((`__m128i', `shortValues')))
+INTRIN(`__m128i',	`_mm_cvtepi8_epi16',		((`__m128i', `byteValues')))
+INTRIN(`__m128i',	`_mm_cvtepu8_epi32',		((`__m128i', `byteValues')))
+INTRIN(`__m128i',	`_mm_cvtepu16_epi32',		((`__m128i', `shortValues')))
+INTRIN(`__m128i',	`_mm_cvtepu8_epi64',		((`__m128i', `shortValues')))
+INTRIN(`__m128i',	`_mm_cvtepu32_epi64',		((`__m128i', `intValues')))
+INTRIN(`__m128i',	`_mm_cvtepu16_epi64',		((`__m128i', `shortValues')))
+INTRIN(`__m128i',	`_mm_cvtepu8_epi16',		((`__m128i', `byteValues')))
+INTRIN(`__m128i',	`_mm_packus_epi32',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`__m128i',	`_mm_mpsadbw_epu8',		((`__m128i', `s1'), (`__m128i', `s2'), (`const int', `msk')))
+INTRIN(`__m128i',	`_mm_stream_load_si128',	((`__m128i*', `v1')))
+INTRIN(`__m128i',	`_mm_cmpistrm',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpistri',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`__m128i',	`_mm_cmpestrm',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpestri',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpistrz',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpistrc',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpistrs',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpistro',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpistra',			((`__m128i', `a'), (`__m128i', `b'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpestrz',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpestrc',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpestrs',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpestro',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`int',		`_mm_cmpestra',			((`__m128i', `a'), (`int', `la'), (`__m128i', `b'), (`int', `lb'), (`const int', `mode')))
+INTRIN(`__m128i',	`_mm_cmpgt_epi64',		((`__m128i', `val1'), (`__m128i', `val2')))
+INTRIN(`int',		`_mm_popcnt_u32',		((`unsigned int', `v')))
+INTRIN(`unsigned int',	`_mm_crc32_u8',			((`unsigned int', `crc'), (`unsigned char', `v')))
+INTRIN(`unsigned int',	`_mm_crc32_u16',		((`unsigned int', `crc'), (`unsigned short', `v')))
+INTRIN(`unsigned int',	`_mm_crc32_u32',		((`unsigned int', `crc'), (`unsigned int', `v')))
+divert`'dnl
+
diff --git a/mingw-w64-headers/include/intrin.m4 b/mingw-w64-headers/include/intrin.m4
new file mode 100644
index 0000000..6a288b7
--- /dev/null
+++ b/mingw-w64-headers/include/intrin.m4
@@ -0,0 +1,129 @@
+dnl
+dnl The following utility macros are covered by the following license:
+dnl
+dnl The files in this directory provide example uses of GNU M4.
+dnl The following copyright notice applies to each of these
+dnl description files.
+dnl 
+dnl Copyright (C) 2006 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+dnl 
+divert(`-1')
+define(`quote', `ifelse(`$#', `0', `', ``$*'')')
+define(`dquote', ``$@'')
+define(`dquote_elt', `ifelse(`$#', `0', `', `$#', `1', ```$1''',
+                             ```$1'',$0(shift($@))')')
+
+define(`foreach', `pushdef(`$1')_$0(`$1',
+  (dquote(dquote_elt$2)), `$3')popdef(`$1')')
+define(`_arg1', `$1')
+define(`_foreach', `ifelse(`$2', `(`')', `',
+  `define(`$1', _arg1$2)$3`'$0(`$1', (dquote(shift$2)), `$3')')')
+
+# join(sep, args) - join each non-empty ARG into a single
+# string, with each element separated by SEP
+define(`join',
+`ifelse(`$#', `2', ``$2'',
+  `ifelse(`$2', `', `', ``$2'_')$0(`$1', shift(shift($@)))')')
+define(`_join',
+`ifelse(`$#$2', `2', `',
+  `ifelse(`$2', `', `', ``$1$2'')$0(`$1', shift(shift($@)))')')
+# joinall(sep, args) - join each ARG, including empty ones,
+# into a single string, with each element separated by SEP
+define(`joinall', ``$2'_$0(`$1', shift($@))')
+define(`_joinall',
+`ifelse(`$#', `2', `', ``$1$3'$0(`$1', shift(shift($@)))')')
+
+define(`_cat', `$1$2')
+define(`_x', `$1$2')
+define(`nargs', `$#')
+
+define(`fatal_error', `errprint(ifdef(`__program__', `__program__', ``m4'')'`:ifelse(__line__, `0', `', `__file__:__line__:')` fatal error: $*')m4exit(`1')')')
+
+define(`_list_inline', `$@')
+define(`_list_add', `(_list_inline$1, `$2')')
+define(`list_add', `ifelse($1, list_create, (`$2'), _list_add($1, `$2'))')
+define(`list_create', `')
+define(`list_length', `nargs$1')
+divert`'dnl
+divert(`-1')
+dnl
+dnl Macros to enumerate argument lists
+dnl
+define(`_ARGTYPE_BASE', `$1')
+define(`_ARGTYPE_ARRAY', `$3')
+define(`_ARGTYPE', `$1$3')
+define(`_ARGNAME', `$2')
+define(`_ARG', `$1 $2$3')
+define(`_ARGNAMES', `join(`, ', foreach(`arg', ($*), `_cat(`_ARGNAME', arg), '))')
+define(`_ARGTYPES', `join(`, ', foreach(`arg', ($*), `_cat(`_ARGTYPE', arg), '))')
+define(`_ARGS', `join(`, ', foreach(`arg', ($*), `_cat(`_ARG', arg), '))')
+
+dnl
+dnl Macro to define an intrinsic function, its return value and argument list
+dnl
+define(`INTRIN', `
+	dnl Prevent redefinition
+	ifdef(`_INTRIN_'_INTRIN_MACHINE`_$2',
+		`fatal_error(`redefinition of intrinsic $2')',
+		`define(`_INTRIN_'_INTRIN_MACHINE`_$2', `1')
+	')
+
+	dnl Add intrinsic name to the target list
+	define(`_INTRIN_'_INTRIN_MACHINE, list_add(_cat(`_INTRIN_', _INTRIN_MACHINE), `$2'))
+
+	dnl Remember the return type and arguments of the intrinsic
+	define(`_INTRIN_'_INTRIN_MACHINE`_RET_$2', `$1')
+	define(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$2', `$3')
+')
+
+define(`_INTRIN_GET_RET', `ifdef(`_INTRIN_'_INTRIN_MACHINE`_RET_$1', _x(`_INTRIN_'_INTRIN_MACHINE`_RET_$1'), `fatal_error(`intrinsic $1 is not defined')')')
+define(`_INTRIN_GET_ARGTYPES', `ifdef(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$1', _cat(`_ARGTYPES', _x(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$1')), `fatal_error(`intrinsic $1 is not defined')')')
+define(`_INTRIN_GET_ARGNAMES', `ifdef(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$1', _cat(`_ARGNAMES', _x(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$1')), `fatal_error(`intrinsic $1 is not defined')')')
+define(`_INTRIN_GET_ARGS', `ifdef(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$1', _x(`_INTRIN_'_INTRIN_MACHINE`_ARGS_$1'), `fatal_error(`intrinsic $1 is not defined')')')
+
+dnl
+dnl Calling convention
+dnl
+define(`_INTRIN_GET_CALLCONV', `ifdef(`_INTRIN_'_INTRIN_MACHINE`_CALLCONV_$1', `_INTRIN_CALLCONV_$1', `')')
+define(`_INTRIN_SET_CALLCONV', `ifdef(`_INTRIN_'_INTRIN_MACHINE`_CALLCONV_$1', `fatal_error(`calling convention already defined for intrinsic $1')', `define(`_INTRIN_CALLCONV_$1', `$2')')')
+define(`INTRIN_CDECL', `_INTRIN_SET_CALLCONV(`$1', `__cdecl')')
+define(`INTRIN_STDCALL', `_INTRIN_SET_CALLCONV(`$1', `__stdcall')')
+
+dnl
+dnl Attributes
+dnl
+define(`INTRIN_SETJMP', `define(`_INTRIN_'_INTRIN_MACHINE`_SETJMP_$1', `')')
+define(`INTRIN_DISCARDABLE', `define(`_INTRIN_'_INTRIN_MACHINE`_DISCARDABLE_$1', `')')
+define(`INTRIN_PURE', `define(`_INTRIN_'_INTRIN_MACHINE`_PURE_$1', `')')
+define(`INTRIN_CONST', `define(`_INTRIN_'_INTRIN_MACHINE`_CONST_$1', `')')
+
+dnl
+dnl Linkage
+dnl
+define(`INTRIN_IMPORT', `define(`_INTRIN_'_INTRIN_MACHINE`_IMPORT_$1', `')')
+
+dnl
+dnl List of targets
+dnl
+define(`_INTRIN_MACHINES', (`', `i', `ia32', `ia64', `x64', `w64', `iw64', `x86x_ia64', `x86x_x64', `sa', `cc', `ce'))
+define(`_INTRIN_WITH_MACHINE', `pushdef(`_INTRIN_MACHINE', `$1')`'$2`'popdef(`_INTRIN_MACHINE')')
+define(`_INTRIN', `_cat(`_INTRIN_', _INTRIN_MACHINE)')
+define(`_INTRIN_FOREACH_MACHINE', `foreach(`target', _INTRIN_MACHINES, `_INTRIN_WITH_MACHINE(target, `$1')')')
+define(`_INTRIN_FOREACH', `_INTRIN_FOREACH_MACHINE(`foreach(`$1', _INTRIN, `$2')')')
+
+divert`'dnl
+divert(`-1')
+dnl
+dnl Import the definitions for all intrinsics
+dnl
+foreach(`target', _INTRIN_MACHINES, `
+	_INTRIN_WITH_MACHINE(target, `
+		define(`_INTRIN_'_INTRIN_MACHINE, list_create)
+		include(`intrin-machine'target`.m4')
+	')
+')
+divert`'dnl
+