Added proper 128-bit muldiv() stuff for macOS and Windows. Will actually use them next. Also fixed compiling on non-Windows.

This commit is contained in:
Pietro Gagliardi 2019-05-05 02:17:11 -04:00
parent 7424a9ea6c
commit 952b36b1c2
3 changed files with 191 additions and 1 deletions

View File

@ -14,7 +14,7 @@
#ifdef _MSC_VER
#define testingprivSnprintf _snprintf
#else
#define testingprivSnprint snprintf
#define testingprivSnprintf snprintf
#endif
// and yes, vsnprintf() IS properly provided, so wtf

View File

@ -1,6 +1,7 @@
// 2 may 2019
#include <string.h>
#include "timer.h"
#include "timerpriv.h"
// This is based on the algorithm that Go uses for time.Duration.
// Of course, we're not expressing it the same way...
@ -127,3 +128,180 @@ void timerDurationString(timerDuration d, char buf[timerDurationStringLen])
}
memmove(buf, buf + start, 33 - start);
}
// portable implementations of 64x64-bit MulDiv(), because:
// - a division intrinsic was not added to Visual Studio until VS2015
// - there does not seem to be a division intrinsic in GCC or clang as far as I can tell
// - there are no 128-bit facilities in macOS as far as I can tell
static void int128FromUint64(uint64_t n, timerprivInt128 *out)
{
out->neg = 0;
out->high = 0;
out->low = n;
}
static void int128FromInt64(int64_t n, timerprivInt128 *out)
{
if (n >= 0) {
int128FromUint64((uint64_t) n, out);
return;
}
out->neg = 1;
out->high = 0;
// C99 §6.2.6.2 resticts the possible signed integer representations in C to either sign-magnitude, 1's complement, or 2's complement.
// Therefore, INT64_MIN will always be either -INT64_MAX or -INT64_MAX - 1, so we can safely do this to see if we need to special-case INT64_MIN as -INT64_MIN cannot be safely represented, or if we can just say -n as that can be safely represented.
// See also https://stackoverflow.com/questions/29808397/how-to-portably-find-out-minint-max-absint-min
if (n < -INT64_MAX) {
// INT64_MIN is -INT64_MAX - 1
out->low = ((uint64_t) INT64_MAX) + 1;
return;
}
out->low = (uint64_t) (-n);
}
// references for this part:
// - https://opensource.apple.com/source/Libc/Libc-1272.200.26/gen/nanosleep.c.auto.html
// - https://en.wikipedia.org/wiki/Division_algorithm#Integer_division_(unsigned)_with_remainder
static void int128UAdd(timerprivInt128 *x, const timerprivInt128 *y)
{
x->high += y->high;
x->low += y->low;
if (x->low < y->low)
x->high++;
}
static void int128USub(timerprivInt128 *x, const timerprivInt128 *y)
{
x->high -= y->high;
if (x->low < y->low)
x->high--;
x->low -= y->low;
}
static void int128Lsh1(timerprivInt128 *x)
{
x->high <<= 1;
if ((x->low & 0x8000000000000000) != 0)
x->high |= 1;
x->low <<= 1;
}
static uint64_t int128Bit(const timerprivInt128 *x, int i)
{
uint64_t which;
which = x->low;
if (i >= 64) {
i -= 64;
which = x->high;
}
return (which >> i) & 1;
}
static int int128UCmp(const timerprivInt128 *x, const timerprivInt128 *y)
{
if (x->high < y->high)
return -1;
if (x->high > y->high)
return 1;
if (x->low < y->low)
return -1;
if (x->low > y->low)
return 1;
return 0;
}
static void int128BitSet(timerprivInt128 *x, int i)
{
uint64_t bit;
bit = 1;
if (i >= 64) {
i -= 64;
bit <<= i;
x->high |= bit;
return;
}
bit <<= i;
x->low |= bit;
}
static void int128MulDiv64(timerprivInt128 *x, timerprivInt128 *y, timerprivInt128 *z, timerprivInt128 *quot)
{
int finalNeg;
uint64_t x64high, x64low;
uint64_t y64high, y64low;
timerprivInt128 add, numer, rem;
int i;
finalNeg = 0;
if (x->neg)
finalNeg = !finalNeg;
if (y->neg)
finalNeg = !finalNeg;
if (z->neg)
finalNeg = !finalNeg;
quot->neg = finalNeg;
// we now treat x, y, and z as unsigned
// first, multiply x and y into numer
// this assumes x->high == y->high == 0
numer.neg = 0;
// the idea is if x = (a * 2^32) + b and y = (c * 2^32) + d, we can express x * y as ((a * 2^32) + b) * ((c * 2^32) + d)...
x64high = (x->low >> 32) & 0xFFFFFFFF;
x64low = x->low & 0xFFFFFFFF;
y64high = (y->low >> 32) & 0xFFFFFFFF;
y64low = y->low & 0xFFFFFFFF;
// and we can expand that out to get...
numer.high = x64high * y64high; // a * c * 2^64 +
numer.low = x64low * y64low; // b * d +
add.neg = 0;
add.high = x64high * y64low; // a * d * 2^32 +
add.low = add.high & 0xFFFFFFFF00000000;
add.high >>= 32;
int128UAdd(&numer, &add);
add.high = x64low * y64high; // b * c * 2^32
add.low = add.high & 0xFFFFFFFF00000000;
add.high >>= 32;
int128UAdd(&numer, &add);
// I did type this all by hand, btw; the idea does come from Apple's implementation, though they explain it a bit more obtusely, and the odd behavior with anding high and shifting it right is to avoid looking like I directly copied their code which does the opposite
// and now long-divide
// Apple's implementation uses NewtonRaphson division using doubles to store 1/z but I'd rather go with "slow but guaranteed to be accurate"
// (Apple also rejects quotients > UINT64_MAX; we won't)
quot->high = 0;
quot->low = 0;
rem.neg = 0;
rem.high = 0;
rem.low = 0;
for (i = 127; i >= 0; i--) {
int128Lsh1(&rem);
rem.low |= int128Bit(&numer, i);
if (int128UCmp(&rem, z) >= 0) {
int128USub(&rem, z);
int128BitSet(quot, i);
}
}
}
void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot)
{
timerprivInt128 a, b, c;
int128FromInt64(x, &a);
int128FromInt64(y, &b);
int128FromInt64(z, &c);
int128MulDiv64(&a, &b, &c, quot);
}
void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot)
{
timerprivInt128 a, b, c;
int128FromUint64(x, &a);
int128FromUint64(y, &b);
int128FromUint64(z, &c);
int128MulDiv64(&a, &b, &c, quot);
}

12
test/lib/timerpriv.h Normal file
View File

@ -0,0 +1,12 @@
// 5 may 2019
typedef struct timerprivInt128 timerprivInt128;
struct timerprivInt128 {
int neg;
uint64_t high;
uint64_t low;
};
extern void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot);
extern void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot);