Added proper 128-bit muldiv() stuff for macOS and Windows. Will actually use them next. Also fixed compiling on non-Windows.
This commit is contained in:
parent
7424a9ea6c
commit
952b36b1c2
|
@ -14,7 +14,7 @@
|
|||
#ifdef _MSC_VER
|
||||
#define testingprivSnprintf _snprintf
|
||||
#else
|
||||
#define testingprivSnprint snprintf
|
||||
#define testingprivSnprintf snprintf
|
||||
#endif
|
||||
// and yes, vsnprintf() IS properly provided, so wtf
|
||||
|
||||
|
|
178
test/lib/timer.c
178
test/lib/timer.c
|
@ -1,6 +1,7 @@
|
|||
// 2 may 2019
|
||||
#include <string.h>
|
||||
#include "timer.h"
|
||||
#include "timerpriv.h"
|
||||
|
||||
// This is based on the algorithm that Go uses for time.Duration.
|
||||
// Of course, we're not expressing it the same way...
|
||||
|
@ -127,3 +128,180 @@ void timerDurationString(timerDuration d, char buf[timerDurationStringLen])
|
|||
}
|
||||
memmove(buf, buf + start, 33 - start);
|
||||
}
|
||||
|
||||
// portable implementations of 64x64-bit MulDiv(), because:
|
||||
// - a division intrinsic was not added to Visual Studio until VS2015
|
||||
// - there does not seem to be a division intrinsic in GCC or clang as far as I can tell
|
||||
// - there are no 128-bit facilities in macOS as far as I can tell
|
||||
|
||||
static void int128FromUint64(uint64_t n, timerprivInt128 *out)
|
||||
{
|
||||
out->neg = 0;
|
||||
out->high = 0;
|
||||
out->low = n;
|
||||
}
|
||||
|
||||
static void int128FromInt64(int64_t n, timerprivInt128 *out)
|
||||
{
|
||||
if (n >= 0) {
|
||||
int128FromUint64((uint64_t) n, out);
|
||||
return;
|
||||
}
|
||||
out->neg = 1;
|
||||
out->high = 0;
|
||||
// C99 §6.2.6.2 resticts the possible signed integer representations in C to either sign-magnitude, 1's complement, or 2's complement.
|
||||
// Therefore, INT64_MIN will always be either -INT64_MAX or -INT64_MAX - 1, so we can safely do this to see if we need to special-case INT64_MIN as -INT64_MIN cannot be safely represented, or if we can just say -n as that can be safely represented.
|
||||
// See also https://stackoverflow.com/questions/29808397/how-to-portably-find-out-minint-max-absint-min
|
||||
if (n < -INT64_MAX) {
|
||||
// INT64_MIN is -INT64_MAX - 1
|
||||
out->low = ((uint64_t) INT64_MAX) + 1;
|
||||
return;
|
||||
}
|
||||
out->low = (uint64_t) (-n);
|
||||
}
|
||||
|
||||
// references for this part:
|
||||
// - https://opensource.apple.com/source/Libc/Libc-1272.200.26/gen/nanosleep.c.auto.html
|
||||
// - https://en.wikipedia.org/wiki/Division_algorithm#Integer_division_(unsigned)_with_remainder
|
||||
|
||||
static void int128UAdd(timerprivInt128 *x, const timerprivInt128 *y)
|
||||
{
|
||||
x->high += y->high;
|
||||
x->low += y->low;
|
||||
if (x->low < y->low)
|
||||
x->high++;
|
||||
}
|
||||
|
||||
static void int128USub(timerprivInt128 *x, const timerprivInt128 *y)
|
||||
{
|
||||
x->high -= y->high;
|
||||
if (x->low < y->low)
|
||||
x->high--;
|
||||
x->low -= y->low;
|
||||
}
|
||||
|
||||
static void int128Lsh1(timerprivInt128 *x)
|
||||
{
|
||||
x->high <<= 1;
|
||||
if ((x->low & 0x8000000000000000) != 0)
|
||||
x->high |= 1;
|
||||
x->low <<= 1;
|
||||
}
|
||||
|
||||
static uint64_t int128Bit(const timerprivInt128 *x, int i)
|
||||
{
|
||||
uint64_t which;
|
||||
|
||||
which = x->low;
|
||||
if (i >= 64) {
|
||||
i -= 64;
|
||||
which = x->high;
|
||||
}
|
||||
return (which >> i) & 1;
|
||||
}
|
||||
|
||||
static int int128UCmp(const timerprivInt128 *x, const timerprivInt128 *y)
|
||||
{
|
||||
if (x->high < y->high)
|
||||
return -1;
|
||||
if (x->high > y->high)
|
||||
return 1;
|
||||
if (x->low < y->low)
|
||||
return -1;
|
||||
if (x->low > y->low)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void int128BitSet(timerprivInt128 *x, int i)
|
||||
{
|
||||
uint64_t bit;
|
||||
|
||||
bit = 1;
|
||||
if (i >= 64) {
|
||||
i -= 64;
|
||||
bit <<= i;
|
||||
x->high |= bit;
|
||||
return;
|
||||
}
|
||||
bit <<= i;
|
||||
x->low |= bit;
|
||||
}
|
||||
|
||||
static void int128MulDiv64(timerprivInt128 *x, timerprivInt128 *y, timerprivInt128 *z, timerprivInt128 *quot)
|
||||
{
|
||||
int finalNeg;
|
||||
uint64_t x64high, x64low;
|
||||
uint64_t y64high, y64low;
|
||||
timerprivInt128 add, numer, rem;
|
||||
int i;
|
||||
|
||||
finalNeg = 0;
|
||||
if (x->neg)
|
||||
finalNeg = !finalNeg;
|
||||
if (y->neg)
|
||||
finalNeg = !finalNeg;
|
||||
if (z->neg)
|
||||
finalNeg = !finalNeg;
|
||||
quot->neg = finalNeg;
|
||||
// we now treat x, y, and z as unsigned
|
||||
|
||||
// first, multiply x and y into numer
|
||||
// this assumes x->high == y->high == 0
|
||||
numer.neg = 0;
|
||||
// the idea is if x = (a * 2^32) + b and y = (c * 2^32) + d, we can express x * y as ((a * 2^32) + b) * ((c * 2^32) + d)...
|
||||
x64high = (x->low >> 32) & 0xFFFFFFFF;
|
||||
x64low = x->low & 0xFFFFFFFF;
|
||||
y64high = (y->low >> 32) & 0xFFFFFFFF;
|
||||
y64low = y->low & 0xFFFFFFFF;
|
||||
// and we can expand that out to get...
|
||||
numer.high = x64high * y64high; // a * c * 2^64 +
|
||||
numer.low = x64low * y64low; // b * d +
|
||||
add.neg = 0;
|
||||
add.high = x64high * y64low; // a * d * 2^32 +
|
||||
add.low = add.high & 0xFFFFFFFF00000000;
|
||||
add.high >>= 32;
|
||||
int128UAdd(&numer, &add);
|
||||
add.high = x64low * y64high; // b * c * 2^32
|
||||
add.low = add.high & 0xFFFFFFFF00000000;
|
||||
add.high >>= 32;
|
||||
int128UAdd(&numer, &add);
|
||||
// I did type this all by hand, btw; the idea does come from Apple's implementation, though they explain it a bit more obtusely, and the odd behavior with anding high and shifting it right is to avoid looking like I directly copied their code which does the opposite
|
||||
|
||||
// and now long-divide
|
||||
// Apple's implementation uses Newton–Raphson division using doubles to store 1/z but I'd rather go with "slow but guaranteed to be accurate"
|
||||
// (Apple also rejects quotients > UINT64_MAX; we won't)
|
||||
quot->high = 0;
|
||||
quot->low = 0;
|
||||
rem.neg = 0;
|
||||
rem.high = 0;
|
||||
rem.low = 0;
|
||||
for (i = 127; i >= 0; i--) {
|
||||
int128Lsh1(&rem);
|
||||
rem.low |= int128Bit(&numer, i);
|
||||
if (int128UCmp(&rem, z) >= 0) {
|
||||
int128USub(&rem, z);
|
||||
int128BitSet(quot, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot)
|
||||
{
|
||||
timerprivInt128 a, b, c;
|
||||
|
||||
int128FromInt64(x, &a);
|
||||
int128FromInt64(y, &b);
|
||||
int128FromInt64(z, &c);
|
||||
int128MulDiv64(&a, &b, &c, quot);
|
||||
}
|
||||
|
||||
void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot)
|
||||
{
|
||||
timerprivInt128 a, b, c;
|
||||
|
||||
int128FromUint64(x, &a);
|
||||
int128FromUint64(y, &b);
|
||||
int128FromUint64(z, &c);
|
||||
int128MulDiv64(&a, &b, &c, quot);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
// 5 may 2019
|
||||
|
||||
typedef struct timerprivInt128 timerprivInt128;
|
||||
|
||||
struct timerprivInt128 {
|
||||
int neg;
|
||||
uint64_t high;
|
||||
uint64_t low;
|
||||
};
|
||||
|
||||
extern void timerprivMulDiv64(int64_t x, int64_t y, int64_t z, timerprivInt128 *quot);
|
||||
extern void timerprivMulDivU64(uint64_t x, uint64_t y, uint64_t z, timerprivInt128 *quot);
|
Loading…
Reference in New Issue