47-bit dividend with 16 low bits always zero; and a 31-bit result
how have you calculated the size of the this?
Assuming 16-bit signed integer coordinates whose difference never exceeds 2
15-1 = 32767, as otherwise Q15.16 fixed point does not have sufficient range.
With that constraint, the dividend and the divisor are both 32-bit signed integers. Handling the signs separately, that makes 31-bit dividend and divisor. The result requires 16 fractional bits, so multiplying dividend by 2
16 before division, the quotient will be correct to within an ULP (as this truncates the extra bits). If you add half the divisor to the multiplied dividend, the quotient will be correct to within half an ULP (but then the 16 low bits in the 47-bit divisor are no longer zero).
Here is the verifier:
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include <errno.h>
#include <time.h>
#include <math.h>
typedef struct {
int32_t x;
int32_t y;
} i32vec;
static uint64_t prng_state = 1;
static inline uint64_t prng_read_uint64(const char *path)
{
uint64_t result = 0;
FILE *src;
if (!path || !*path)
return 0;
src = fopen(path, "rb");
if (!src)
return 0;
if (fread(&result, sizeof result, 1, src) != 1) {
fclose(src);
return 0;
}
fclose(src);
return result;
}
static inline uint64_t prng_randomize(void)
{
uint64_t seed = 0;
int rounds = 100;
/* Try some known randomness sources. */
if (!seed) seed ^= prng_read_uint64("/dev/urandom");
if (!seed) seed ^= prng_read_uint64("/dev/arandom");
if (!seed) seed ^= prng_read_uint64("/dev/random");
/* Mix in a bit of time. */
seed ^= (uint64_t)time(NULL) * UINT64_C(59121997897);
/* Mix in a bit of clock, as well. */
seed ^= (uint64_t)clock() * UINT64_C(5054973881);
/* Make sure it is nonzero. */
if (!seed)
seed = 1;
/* Churn it a bit, to make it less predictable. */
while (rounds-->0) {
seed ^= seed >> 12;
seed ^= seed << 25;
seed ^= seed >> 27;
}
/* Done. */
prng_state = seed;
return seed;
}
static inline uint64_t prng_u64(void)
{
uint64_t state = prng_state;
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
prng_state = state;
return state * UINT64_C(2685821657736338717);
}
static uint32_t prng_xmask = 0;
static uint32_t prng_ymask = 0;
static uint32_t prng_xmax = 0;
static uint32_t prng_ymax = 0;
static int32_t prng_xmin = 0;
static int32_t prng_ymin = 0;
static inline i32vec prng_i32vec(void)
{
uint32_t x, y;
i32vec result;
do {
uint64_t u = prng_u64();
x = u & prng_xmask;
y = (u >> 32) & prng_ymask;
} while (x > prng_xmax || y > prng_ymax);
result.x = x + prng_xmin;
result.y = y + prng_ymin;
return result;
}
static inline void prng_i32vec_range(int32_t xmin, int32_t ymin,
int32_t xmax, int32_t ymax)
{
if (xmin <= xmax) {
prng_xmin = xmin;
prng_xmax = xmax - xmin;
} else {
prng_xmin = xmax;
prng_xmax = xmin - xmax;
}
if (ymin <= ymax) {
prng_ymin = ymin;
prng_ymax = ymax - ymin;
} else {
prng_ymin = ymax;
prng_ymax = ymin - ymax;
}
prng_xmask = prng_xmax;
prng_xmask |= prng_xmask >> 1;
prng_xmask |= prng_xmask >> 2;
prng_xmask |= prng_xmask >> 4;
prng_xmask |= prng_xmask >> 8;
prng_xmask |= prng_xmask >> 16;
prng_ymask = prng_ymax;
prng_ymask |= prng_ymask >> 1;
prng_ymask |= prng_ymask >> 2;
prng_ymask |= prng_ymask >> 4;
prng_ymask |= prng_ymask >> 8;
prng_ymask |= prng_ymask >> 16;
}
static inline double fx_div_correct(const int32_t n, const int32_t d)
{
return (double)n * 65536.0 / (double)d;
}
static uint64_t max_n = 0;
static uint32_t max_d = 0;
static inline int32_t fx_div(const int32_t n, const int32_t d)
{
uint64_t dividend;
uint32_t un, ud;
int32_t result;
int negative;
if (n < 0) {
un = -n;
negative = 1;
} else {
un = n;
negative = 0;
}
if (d < 0) {
ud = -d;
negative = !negative;
} else {
ud = d;
}
if (max_d < ud)
max_d = ud;
/* 47-bit division */
dividend = ((uint64_t)un) << 16;
#ifdef ROUNDING
dividend += (ud / 2);
#endif
if (max_n < dividend)
max_n = dividend;
result = dividend / ud;
return (negative) ? -result : result;
}
static int parse_ulong(const char *src, unsigned long *to)
{
const char *end;
unsigned long val;
if (!src || !*src)
return -1;
end = src;
errno = 0;
val = strtoul(src, (char **)&end, 0);
if (errno)
return -1;
if (end == src)
return -1;
while (*end == '\t' || *end == '\n' || *end == '\v' ||
*end == '\f' || *end == '\r' || *end == ' ')
end++;
if (*end)
return -1;
if (to)
*to = val;
return 0;
}
static int parse_i32(const char *src, int32_t *to)
{
const char *end;
long val;
if (!src || !*src)
return -1;
end = src;
errno = 0;
val = strtol(src, (char **)&end, 0);
if (errno)
return -1;
if (end == src)
return -1;
if (val < INT32_MIN || val > INT32_MAX)
return -1;
while (*end == '\t' || *end == '\n' || *end == '\v' ||
*end == '\f' || *end == '\r' || *end == ' ')
end++;
if (*end)
return -1;
if (to)
*to = val;
return 0;
}
int main(int argc, char *argv[])
{
int32_t min, max;
double fraction, correct, err, errmin, errmax;
i32vec xy;
unsigned long i, iters;
if (argc != 4) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help | help ]\n", argv[0]);
fprintf(stderr, " %s MIN MAX ITERATIONS\n", argv[0]);
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
if (parse_i32(argv[1], &min)) {
fprintf(stderr, "%s: Invalid minimum. Try 1.\n", argv[1]);
return EXIT_FAILURE;
}
if (parse_i32(argv[2], &max)) {
fprintf(stderr, "%s: Invalid maximum. Try 32767.\n", argv[2]);
return EXIT_FAILURE;
}
if (parse_ulong(argv[3], &iters) || iters < 1) {
fprintf(stderr, "%s: Invalid number of iterations.\n", argv[3]);
return EXIT_FAILURE;
}
prng_i32vec_range(min, min, max, max);
errmin = HUGE_VAL;
errmax = -HUGE_VAL;
printf("Using seed %" PRIu64 ".\n", prng_randomize());
for (i = 0; i < iters; i++) {
do {
xy = prng_i32vec();
correct = fx_div_correct(xy.x, xy.y);
} while (xy.y == 0 || correct < (double)INT32_MIN || correct > (double)INT32_MAX);
fraction = fx_div(xy.x, xy.y);
err = correct - fraction;
if (errmin > err)
errmin = err;
if (errmax < err)
errmax = err;
}
printf("Largest dividend was %" PRIu64 " (%.0f bits)\n", max_n, ceil(log((double)max_n)/log(2.0)));
printf("Largest divisor was %" PRIu32 " (%.0f bits)\n", max_d, ceil(log((double)max_d)/log(2.0)));
printf("Maximum negative error found: %9.6f ULP\n", errmin);
printf("Maximum positive error found: %9.6f ULP\n", errmax);
return EXIT_SUCCESS;
}
If we look at the formulae, the largest possible dividend and divisor with such coordinates do not exceed 32767×32767×2 = 2147352578 in magnitude. So, if we run the verifier with parameters
-2147352578 2147352578 1000000000, it'll test a billion uniform random divisions with such dividends and divisors, limited to cases where the fixed-point quotient is within the 32-bit signed integer range.
Note that the divisor in the formulae are an effective "area": the area of a square with edge length the same as Manhattan distance between two points, or twice the area of the triangle. Since the dividend is always a multiple of "width" or "height", all cases we get in practice will have small fixed-point results; so only considering the well-behaved cases (where (the integer part of) the quotient is within the same range as "width" or "height") is warranted.
If it was compiled without defining
ROUNDING preprocessor macro, the output is something like
Using seed 15011178025248273303. Largest dividend was 140728898355200 (47 bits) Largest divisor was 2147352578 (31 bits) Maximum negative error found: -1.000000 ULP Maximum positive error found: 1.000000 ULPNote that ULP refers to Units in Least-significant Place. If you look at the fixed-point result as a simple signed integer, then the error in ULPs is the difference to the actual floating-point quotient multiplied by 2
16.
If you define
ROUNDING at compile time, then the divisor has 47 significant bits (not low 16 bits zero), and the output is something like
Using seed 16834975864486337449. Largest dividend was 140729911937246 (47 bits) Largest divisor was 2147352555 (31 bits) Maximum negative error found: -0.500000 ULP Maximum positive error found: 0.500000 ULPThe numerical testing indicates my logic is correct: when the coordinate and value differences are less than 2
15, unsigned integer division using a 47-bit dividend and a 31-bit divisor yields fixed-point results with at most half an ULP of error. In other words, that calculating the division using double-precision floating point and multiplying the quotient by 2
16, rounds to the same value as that unsigned integer division; the difference between the two is within -0.5 to +0.5.
This also means that if you do intend to use Q15.16 fixed point arithmetic, you really do need a 47:31 to 31-bit unsigned integer division (or 48:32 to 32-bit signed integer division) to construct accurate fixed point values in the first place. I do suspect that most of the numerical instabilities/inaccuracies you've seen were due to inaccurately calculated fixed point deltas.
(If a Q15.16 delta is off by 1 ULP, the error is at most 1/65536 per step. So, although the integer part may differ at each step, the error is less than 0.5 for up to 32767 steps. Since walking involves a dual loop, that 32768 steps is the Manhattan distance from the starting point. This means that for all shapes where all vertices are within 32767 steps (Manhattan distance) from each other, correctly calculated Q15.16 deltas will have less than 0.5 of error at each point in the shape.)