Added proper 128-bit muldiv() stuff for macOS and Windows. Will actually use them next. Also fixed compiling on non-Windows.

2019-05-05 02:17:11 -04:00 · 2019-05-05 02:17:11 -04:00 · 952b36b1c2
parent 7424a9ea6c
commit 952b36b1c2
3 changed files with 191 additions and 1 deletions
--- a/test/lib/testing.c
+++ b/test/lib/testing.c
@ -14,7 +14,7 @@
 #ifdef _MSC_VER
 #define testingprivSnprintf _snprintf
 #else
-#define testingprivSnprint snprintf
+#define testingprivSnprintf snprintf
 #endif
 // and yes, vsnprintf() IS properly provided, so wtf

--- a/test/lib/timer.c
+++ b/test/lib/timer.c
@ -1,6 +1,7 @@
 // 2 may 2019
 #include <string.h>
 #include "timer.h"
+#include "timerpriv.h"

 // This is based on the algorithm that Go uses for time.Duration.
 // Of course, we're not expressing it the same way...
@ -127,3 +128,180 @@ void timerDurationString(timerDuration d, char buf[timerDurationStringLen])
 	}
 	memmove(buf, buf + start, 33 - start);
 }
+
+// portable implementations of 64x64-bit MulDiv(), because:
+// - a division intrinsic was not added to Visual Studio until VS2015
+// - there does not seem to be a division intrinsic in GCC or clang as far as I can tell
+// - there are no 128-bit facilities in macOS as far as I can tell
+
+static void int128FromUint64(uint64_t n, timerprivInt128 *out)
+{
+	out->neg = 0;
+	out->high = 0;
+	out->low = n;
+}
+
+static void int128FromInt64(int64_t n, timerprivInt128 *out)
+{
+	if (n >= 0) {
+		int128FromUint64((uint64_t) n, out);
+		return;
+	}
+	out->neg = 1;
+	out->high = 0;
+	// C99 §6.2.6.2 resticts the possible signed integer representations in C to either sign-magnitude, 1's complement, or 2's complement.
+	// Therefore, INT64_MIN will always be either -INT64_MAX or -INT64_MAX - 1, so we can safely do this to see if we need to special-case INT64_MIN as -INT64_MIN cannot be safely represented, or if we can just say -n as that can be safely represented.
+	// See also https://stackoverflow.com/questions/29808397/how-to-portably-find-out-minint-max-absint-min
+	if (n < -INT64_MAX) {
+		// INT64_MIN is -INT64_MAX - 1
+		out->low = ((uint64_t) INT64_MAX) + 1;
+		return;
+	}
+	out->low = (uint64_t) (-n);
+}
+
+// references for this part:
+// - https://opensource.apple.com/source/Libc/Libc-1272.200.26/gen/nanosleep.c.auto.html
+// - https://en.wikipedia.org/wiki/Division_algorithm#Integer_division_(unsigned)_with_remainder
+
+static void int128UAdd(timerprivInt128 *x, const timerprivInt128 *y)
+{
+	x->high += y->high;
+	x->low += y->low;
+	if (x->low < y->low)
+		x->high++;
+}
+
+static void int128USub(timerprivInt128 *x, const timerprivInt128 *y)
+{
+	x->high -= y->high;
+	if (x->low < y->low)
+		x->high--;
+	x->low -= y->low;
+}
+
+static void int128Lsh1(timerprivInt128 *x)
+{
+	x->high <<= 1;
+	if ((x->low & 0x8000000000000000) != 0)
+		x->high |= 1;
+	x->low <<= 1;
+}
+
+static uint64_t int128Bit(const timerprivInt128 *x, int i)
+{
+	uint64_t which;
+
+	which = x->low;
+	if (i >= 64) {
+		i -= 64;
+		which = x->high;
+	}
+	return (which >> i) & 1;
+}
+
+static int int128UCmp(const timerprivInt128 *x, const timerprivInt128 *y)
+{
+	if (x->high < y->high)
+		return -1;
+	if (x->high > y->high)
+		return 1;
+	if (x->low < y->low)
+		return -1;
+	if (x->low > y->low)
+		return 1;
+	return 0;
+}
+
+static void int128BitSet(timerprivInt128 *x, int i)
+{
+	uint64_t bit;
+
+	bit = 1;
+	if (i >= 64) {
+		i -= 64;
+		bit <<= i;
+		x->high |= bit;
+		return;
+	}
+	bit <<= i;
+	x->low |= bit;
+}
+
+static void int128MulDiv64(timerprivInt128 *x, timerprivInt128 *y, timerprivInt128 *z, timerprivInt128 *quot)
+{
+	int finalNeg;
+	uint64_t x64high, x64low;
+	uint64_t y64high, y64low;
+	timerprivInt128 add, numer, rem;
+	int i;
+
+	finalNeg = 0;
+	if (x->neg)
+		finalNeg = !finalNeg;
+	if (y->neg)
+		finalNeg = !finalNeg;
+	if (z->neg)
+		finalNeg = !finalNeg;
+	quot->neg = finalNeg;
+	// we now treat x, y, and z as unsigned
+
+	// first, multiply x and y into numer
+	// this assumes x->high == y->high == 0
+	numer.neg = 0;
+	// the idea is if x = (a * 2^32) + b and y = (c * 2^32) + d, we can express x * y as ((a * 2^32) + b) * ((c * 2^32) + d)...
+	x64high = (x->low >> 32) & 0xFFFFFFFF;
+	x64low = x->low & 0xFFFFFFFF;
+	y64high = (y->low >> 32) & 0xFFFFFFFF;
+	y64low = y->low & 0xFFFFFFFF;
+	// and we can expand that out to get...
+	numer.high = x64high * y64high;		// a * c * 2^64 +
+	numer.low = x64low * y64low;			// b * d +
+	add.neg = 0;
+	add.high = x64high * y64low;			// a * d * 2^32 +
+	add.low = add.high & 0xFFFFFFFF00000000;
+	add.high >>= 32;
+	int128UAdd(&numer, &add);
+	add.high = x64low * y64high;			// b * c * 2^32
+	add.low = add.high & 0xFFFFFFFF00000000;
+	add.high >>= 32;
+	int128UAdd(&numer, &add);
+	// I did type this all by hand, btw; the idea does come from Apple's implementation, though they explain it a bit more obtusely, and the odd behavior with anding high and shifting it right is to avoid looking like I directly copied their code which does the opposite
+
+	// and now long-divide
+	// Apple's implementation uses Newton–Raphson division using doubles to store 1/z but I'd rather go with "slow but guaranteed to be accurate"
+	// (Apple also rejects quotients > UINT64_MAX; we won't)
+	quot->high = 0;
+	quot->low = 0;
+	rem.neg = 0;
+	rem.high = 0;
+	rem.low = 0;
+	for (i = 127; i >= 0; i--) {
+		int128Lsh1(&rem);
+		rem.low |= int128Bit(&numer, i);
+		if (int128UCmp(&rem, z) >= 0) {
+			int128USub(&rem, z);
+			int128BitSet(quot, i);
+		}
+	}
+}
+
+void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot)
+{
+	timerprivInt128 a, b, c;
+
+	int128FromInt64(x, &a);
+	int128FromInt64(y, &b);
+	int128FromInt64(z, &c);
+	int128MulDiv64(&a, &b, &c, quot);
+}
+
+void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot)
+{
+	timerprivInt128 a, b, c;
+
+	int128FromUint64(x, &a);
+	int128FromUint64(y, &b);
+	int128FromUint64(z, &c);
+	int128MulDiv64(&a, &b, &c, quot);
+}
--- a/test/lib/timerpriv.h
+++ b/test/lib/timerpriv.h
@ -0,0 +1,12 @@
+// 5 may 2019
+
+typedef struct timerprivInt128 timerprivInt128;
+
+struct timerprivInt128 {
+	int neg;
+	uint64_t high;
+	uint64_t low;
+};
+
+extern void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot);
+extern void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot);