crt: Change mingw-aligned-malloc.c to be compatible with MS CRT functions

MS CRT libraries (UCRT, msvcr70+ and os system msvcrt) for functions
_aligned_malloc() and _aligned_offset_malloc() allocate the source buffer
slightly smaller than the mingw-w64 functions in mingw-aligned-malloc.c.
This is because the alignment gap can be used for the user data too.

Do exactly same math calculation in the mingw-w64 implementation as is UCRT
doing. Same calculation is in msvcr70 and probably also in system msvcrt
library (at least different _msize() tests confirms it).

This will allow to call __mingw_aligned_msize() on buffer allocated by
MS CRT _aligned_malloc() function from msvcrt.dll, msvcr70.dll and
msvcrt71.dll CRT libraries. This is very useful because these CRT libraries
do not have _aligned_msize() function and hence mingw-w64 could emulate it
via __mingw_aligned_msize().

Signed-off-by: Martin Storsjö <martin@martin.st>
diff --git a/mingw-w64-crt/misc/mingw-aligned-malloc.c b/mingw-w64-crt/misc/mingw-aligned-malloc.c
index be07ed6..02872c9 100644
--- a/mingw-w64-crt/misc/mingw-aligned-malloc.c
+++ b/mingw-w64-crt/misc/mingw-aligned-malloc.c
@@ -22,9 +22,10 @@
 #define UI(p) ((uintptr_t) (p))
 #define CP(p) ((char *) p)
 
+#define GAP(offset) ((0 - offset) & (sizeof (void *) -1))
 #define PTR_ALIGN(p0, alignment, offset)				\
-            ((void *) (((UI(p0) + (alignment + sizeof(void*)) + offset)	\
-			& (~UI(alignment - 1)))				\
+            ((void *) (((UI(p0) + (alignment + GAP(offset) + sizeof(void*)) + offset)	\
+			& (~UI(alignment)))				\
 		       - offset))
 
 /* Pointer must sometimes be aligned; assume sizeof(void*) is a power of two. */
@@ -44,12 +45,13 @@
     return ((void *) 0);
   if (alignment < sizeof (void *))
     alignment = sizeof (void *);
+  alignment--;
 
   /* Including the extra sizeof(void*) is overkill on a 32-bit
      machine, since malloc is already 8-byte aligned, as long
      as we enforce alignment >= 8 ...but oh well.  */
 
-  p0 = malloc (size + (alignment + sizeof (void *)));
+  p0 = malloc (size + (alignment + GAP (offset) + sizeof (void *)));
   if (!p0)
     return ((void *) 0);
   p = PTR_ALIGN (p0, alignment, offset);
@@ -88,6 +90,7 @@
     }
   if (alignment < sizeof (void *))
     alignment = sizeof (void *);
+  alignment--;
 
   p0 = ORIG_PTR (memblock);
   /* It is an error for the alignment to change. */
@@ -95,7 +98,7 @@
     goto bad;
   shift = CP (memblock) - CP (p0);
 
-  p0 = realloc (p0, size + (alignment + sizeof (void *)));
+  p0 = realloc (p0, size + (alignment + GAP (offset) + sizeof (void *)));
   if (!p0)
     return ((void *) 0);
   p = PTR_ALIGN (p0, alignment, offset);
@@ -131,6 +134,7 @@
     }
   if (alignment < sizeof (void *))
     alignment = sizeof (void *);
+  alignment--;
 
   p0 = ORIG_PTR (memblock);
 
@@ -141,5 +145,5 @@
       return (size_t)-1;
     }
 
-  return _msize (p0) - (alignment + sizeof (void *));
+  return _msize (p0) - (alignment + GAP (offset) + sizeof (void *));
 }