From 952b36b1c2932353207ac6603ba7f78cd9ceb7b0 Mon Sep 17 00:00:00 2001 From: Pietro Gagliardi Date: Sun, 5 May 2019 02:17:11 -0400 Subject: [PATCH] Added proper 128-bit muldiv() stuff for macOS and Windows. Will actually use them next. Also fixed compiling on non-Windows. --- test/lib/testing.c | 2 +- test/lib/timer.c | 178 +++++++++++++++++++++++++++++++++++++++++++ test/lib/timerpriv.h | 12 +++ 3 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 test/lib/timerpriv.h diff --git a/test/lib/testing.c b/test/lib/testing.c index ef3e3c31..1cbe7973 100644 --- a/test/lib/testing.c +++ b/test/lib/testing.c @@ -14,7 +14,7 @@ #ifdef _MSC_VER #define testingprivSnprintf _snprintf #else -#define testingprivSnprint snprintf +#define testingprivSnprintf snprintf #endif // and yes, vsnprintf() IS properly provided, so wtf diff --git a/test/lib/timer.c b/test/lib/timer.c index b7203397..ade8c592 100644 --- a/test/lib/timer.c +++ b/test/lib/timer.c @@ -1,6 +1,7 @@ // 2 may 2019 #include #include "timer.h" +#include "timerpriv.h" // This is based on the algorithm that Go uses for time.Duration. // Of course, we're not expressing it the same way... @@ -127,3 +128,180 @@ void timerDurationString(timerDuration d, char buf[timerDurationStringLen]) } memmove(buf, buf + start, 33 - start); } + +// portable implementations of 64x64-bit MulDiv(), because: +// - a division intrinsic was not added to Visual Studio until VS2015 +// - there does not seem to be a division intrinsic in GCC or clang as far as I can tell +// - there are no 128-bit facilities in macOS as far as I can tell + +static void int128FromUint64(uint64_t n, timerprivInt128 *out) +{ + out->neg = 0; + out->high = 0; + out->low = n; +} + +static void int128FromInt64(int64_t n, timerprivInt128 *out) +{ + if (n >= 0) { + int128FromUint64((uint64_t) n, out); + return; + } + out->neg = 1; + out->high = 0; + // C99 §6.2.6.2 resticts the possible signed integer representations in C to either sign-magnitude, 1's complement, or 2's complement. + // Therefore, INT64_MIN will always be either -INT64_MAX or -INT64_MAX - 1, so we can safely do this to see if we need to special-case INT64_MIN as -INT64_MIN cannot be safely represented, or if we can just say -n as that can be safely represented. + // See also https://stackoverflow.com/questions/29808397/how-to-portably-find-out-minint-max-absint-min + if (n < -INT64_MAX) { + // INT64_MIN is -INT64_MAX - 1 + out->low = ((uint64_t) INT64_MAX) + 1; + return; + } + out->low = (uint64_t) (-n); +} + +// references for this part: +// - https://opensource.apple.com/source/Libc/Libc-1272.200.26/gen/nanosleep.c.auto.html +// - https://en.wikipedia.org/wiki/Division_algorithm#Integer_division_(unsigned)_with_remainder + +static void int128UAdd(timerprivInt128 *x, const timerprivInt128 *y) +{ + x->high += y->high; + x->low += y->low; + if (x->low < y->low) + x->high++; +} + +static void int128USub(timerprivInt128 *x, const timerprivInt128 *y) +{ + x->high -= y->high; + if (x->low < y->low) + x->high--; + x->low -= y->low; +} + +static void int128Lsh1(timerprivInt128 *x) +{ + x->high <<= 1; + if ((x->low & 0x8000000000000000) != 0) + x->high |= 1; + x->low <<= 1; +} + +static uint64_t int128Bit(const timerprivInt128 *x, int i) +{ + uint64_t which; + + which = x->low; + if (i >= 64) { + i -= 64; + which = x->high; + } + return (which >> i) & 1; +} + +static int int128UCmp(const timerprivInt128 *x, const timerprivInt128 *y) +{ + if (x->high < y->high) + return -1; + if (x->high > y->high) + return 1; + if (x->low < y->low) + return -1; + if (x->low > y->low) + return 1; + return 0; +} + +static void int128BitSet(timerprivInt128 *x, int i) +{ + uint64_t bit; + + bit = 1; + if (i >= 64) { + i -= 64; + bit <<= i; + x->high |= bit; + return; + } + bit <<= i; + x->low |= bit; +} + +static void int128MulDiv64(timerprivInt128 *x, timerprivInt128 *y, timerprivInt128 *z, timerprivInt128 *quot) +{ + int finalNeg; + uint64_t x64high, x64low; + uint64_t y64high, y64low; + timerprivInt128 add, numer, rem; + int i; + + finalNeg = 0; + if (x->neg) + finalNeg = !finalNeg; + if (y->neg) + finalNeg = !finalNeg; + if (z->neg) + finalNeg = !finalNeg; + quot->neg = finalNeg; + // we now treat x, y, and z as unsigned + + // first, multiply x and y into numer + // this assumes x->high == y->high == 0 + numer.neg = 0; + // the idea is if x = (a * 2^32) + b and y = (c * 2^32) + d, we can express x * y as ((a * 2^32) + b) * ((c * 2^32) + d)... + x64high = (x->low >> 32) & 0xFFFFFFFF; + x64low = x->low & 0xFFFFFFFF; + y64high = (y->low >> 32) & 0xFFFFFFFF; + y64low = y->low & 0xFFFFFFFF; + // and we can expand that out to get... + numer.high = x64high * y64high; // a * c * 2^64 + + numer.low = x64low * y64low; // b * d + + add.neg = 0; + add.high = x64high * y64low; // a * d * 2^32 + + add.low = add.high & 0xFFFFFFFF00000000; + add.high >>= 32; + int128UAdd(&numer, &add); + add.high = x64low * y64high; // b * c * 2^32 + add.low = add.high & 0xFFFFFFFF00000000; + add.high >>= 32; + int128UAdd(&numer, &add); + // I did type this all by hand, btw; the idea does come from Apple's implementation, though they explain it a bit more obtusely, and the odd behavior with anding high and shifting it right is to avoid looking like I directly copied their code which does the opposite + + // and now long-divide + // Apple's implementation uses Newton–Raphson division using doubles to store 1/z but I'd rather go with "slow but guaranteed to be accurate" + // (Apple also rejects quotients > UINT64_MAX; we won't) + quot->high = 0; + quot->low = 0; + rem.neg = 0; + rem.high = 0; + rem.low = 0; + for (i = 127; i >= 0; i--) { + int128Lsh1(&rem); + rem.low |= int128Bit(&numer, i); + if (int128UCmp(&rem, z) >= 0) { + int128USub(&rem, z); + int128BitSet(quot, i); + } + } +} + +void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot) +{ + timerprivInt128 a, b, c; + + int128FromInt64(x, &a); + int128FromInt64(y, &b); + int128FromInt64(z, &c); + int128MulDiv64(&a, &b, &c, quot); +} + +void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot) +{ + timerprivInt128 a, b, c; + + int128FromUint64(x, &a); + int128FromUint64(y, &b); + int128FromUint64(z, &c); + int128MulDiv64(&a, &b, &c, quot); +} diff --git a/test/lib/timerpriv.h b/test/lib/timerpriv.h new file mode 100644 index 00000000..fccd787b --- /dev/null +++ b/test/lib/timerpriv.h @@ -0,0 +1,12 @@ +// 5 may 2019 + +typedef struct timerprivInt128 timerprivInt128; + +struct timerprivInt128 { + int neg; + uint64_t high; + uint64_t low; +}; + +extern void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot); +extern void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot);