Kyotaro HORIGUCHI <[email protected]> writes:
> The first attached is the revised patch and the second is
> temporary sanity check code for non-128bit environment code. (but
> works only on 128 bit environment)
This seemed to me to be probably even less correct, so I extracted
the addition and multiplication logic into a standalone test program
(attached), which compares the result of a multiplication to that
of native int128 arithmetic. I changed the order of the LinearInterval
fields to be LS-first so that I could overlay them onto an int128
result (on a little-endian machine); this is just for testing purposes
not something we must do in the finished code. I soon found cases
where it indeed fails, eg
$ ./a.out 0x7ffffffff 0x7ffffffff
7FFFFFFFF * 7FFFFFFFF
result = 62 18446744004990074881
result = 3E FFFFFFF000000001
MISMATCH!
result = 63 18446744004990074881
result = 3F FFFFFFF000000001
After fooling with it for awhile, I decided that the cause of the
problems was basically not thinking carefully about the lower half
of the value being unsigned: that affects when to do carries in
the addition macro, and we also have to be careful about whether
or not to sign-extend the partial product terms. The second
attached file is a version that I can't break anymore, though I'm
not quite sure it's bug-free.
regards, tom lane
#include "postgres.h"
/*
* LinearInterval's alternative defeinition for the environments without
* int128 arithmetics. See interval_cmp_value for datails.
*/
typedef struct
{
uint64 lo; /* holds the lower 64 bits without sign */
int64 hi; /* holds significant 64 bits including a sign bit */
} LinearInterval;
typedef union LI
{
int128 i128;
LinearInterval li;
} LI;
/*
* arithmetic 32 bit extraction from int64
*
* INT64_AU32 extracts significant 32 bit of int64 as a int64, and INT64_AL32
* extracts non-siginificant 32 bit as a int64. Both macros extends sign bits
* according to the given value. The values of these macros and the parameter
* value are in the following relationship.
*
* i64 = (int64)INT64_AU32(i64) * (2^32) + (int64)INT64_AL32(i64)
*/
#define INT64_AU32(i64) ((i64) / (1LL<<32))
#define INT64_AL32(i64) (((i64) & 0xffffffff) | ((i64) < 0 ? 0xffffffff00000000 : 0))
/*
* Adds signed 65 bit integer into LinearInterval variable. If s is not zero,
* its sign is used as v's sign.
*/
#define LINEARINTERVAL_ADD_INT65(li, v, s) \
{ \
uint64 t = (uint64)(v); \
uint64 p = (li).lo; \
(li).lo += t; \
if (s < 0 || (s == 0 && v < 0)) \
(li).hi --; \
if ((li).lo < p) \
(li).hi ++; \
}
static inline LinearInterval
interval_times(int64 x, int64 y)
{
LinearInterval span = {0, 0};
int64 tmp;
/*
* perform 128 bit multiplication using 64 bit variables.
*
* x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo)
* = (x.hi * y.hi) << 64 +
* ((x.hi * y.lo) + (x.lo * y.hi)) << 32 +
* x.lo * y.lo
*/
/* We don't bother calculation results in zero */
if (x != 0 && y != 0)
{
int64 x_u32 = INT64_AU32(x);
int64 x_l32 = INT64_AL32(x);
/* the first term */
span.hi = x_u32 * (y >> 32);
/* the second term */
tmp = x_l32 * (y >> 32)
+ x_u32 * (y & 0xffffffff);
span.hi += INT64_AU32(tmp);
/* this shift may push out MSB. supply it explicitly */
LINEARINTERVAL_ADD_INT65(span, INT64_AL32(tmp) << 32, tmp);
/* the third term */
tmp = x_l32 * (y & 0xffffffff);
LINEARINTERVAL_ADD_INT65(span, tmp, 0);
}
return span;
}
int
main(int argc, char **argv)
{
int64 x = strtol(argv[1], NULL, 0);
int64 y = strtol(argv[2], NULL, 0);
LI li;
LI li2;
printf("%lX * %lX\n", x, y);
li.li = interval_times(x, y);
printf("result = %ld %lu\n", li.li.hi, li.li.lo);
printf("result = %lX %lX\n", li.li.hi, li.li.lo);
li2.i128 = (int128) x * (int128) y;
if (li.li.hi != li2.li.hi || li.li.lo != li2.li.lo)
{
printf("MISMATCH!\n");
printf("result = %ld %lu\n", li2.li.hi, li2.li.lo);
printf("result = %lX %lX\n", li2.li.hi, li2.li.lo);
}
return 0;
}
#include "postgres.h"
/*
* LinearInterval's alternative defeinition for the environments without
* int128 arithmetics. See interval_cmp_value for datails.
*/
typedef struct
{
uint64 lo; /* holds the lower 64 bits without sign */
int64 hi; /* holds significant 64 bits including a sign
* bit */
} LinearInterval;
typedef union LI
{
int128 i128;
LinearInterval li;
} LI;
/*
* INT64_AU32 extracts the most significant 32 bits of int64 as int64, while
* INT64_AL32 extracts the least significant 32 bits as uint64.
*/
#define INT64_AU32(i64) ((i64) >> 32)
#define INT64_AL32(i64) ((i64) & UINT64CONST(0xFFFFFFFF))
/*
* Add an unsigned int64 value into a LinearInterval variable.
* First add the value to the .lo part, then check to see if a carry
* needs to be propagated into the .hi part. A carry is needed if both
* inputs have high bits set, or if just one input has high bit set
* but the new .lo part doesn't. Remember that .lo part is unsigned;
* we cast to signed here just as a cheap way to check the high bit.
*/
#define LINEARINTERVAL_ADD_UINT64(li, v) \
do { \
uint64 t = (uint64) (v); \
uint64 oldlo = (li).lo; \
(li).lo += t; \
if (((int64) t < 0 && (int64) oldlo < 0) || \
(((int64) t < 0 || (int64) oldlo < 0) && (int64) (li).lo >= 0)) \
(li).hi++; \
} while (0)
static inline LinearInterval
interval_times(int64 x, int64 y)
{
LinearInterval span = {0, 0};
/*----------
* Form the 128-bit product x * y using 64-bit arithmetic.
* Considering each 64-bit input as having 32-bit high and low parts,
* we can compute
*
* x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo)
* = (x.hi * y.hi) << 64 +
* (x.hi * y.lo) << 32 +
* (x.lo * y.hi) << 32 +
* x.lo * y.lo
*
* Each individual product is of 32-bit terms so it won't overflow when
* computed in 64-bit arithmetic. Then we just have to shift it to the
* correct position while adding into the 128-bit result. We must also
* keep in mind that the "lo" parts must be treated as unsigned.
*----------
*/
/* INT64_AU32 must use arithmetic right shift */
StaticAssertStmt(((int64) -1 >> 1) == (int64) -1,
"arithmetic right shift is needed");
/* No need to work hard if product must be zero */
if (x != 0 && y != 0)
{
int64 x_u32 = INT64_AU32(x);
uint64 x_l32 = INT64_AL32(x);
int64 y_u32 = INT64_AU32(y);
uint64 y_l32 = INT64_AL32(y);
int64 tmp;
/* the first term */
span.hi = x_u32 * y_u32;
printf("first term = %016lX\n", span.hi);
/* the second term: sign-extend it only if x is negative */
tmp = x_u32 * y_l32;
printf("second term = %016lX\n", tmp);
if (x < 0)
span.hi += INT64_AU32(tmp);
else
span.hi += ((uint64) tmp) >> 32;
LINEARINTERVAL_ADD_UINT64(span, ((uint64) INT64_AL32(tmp)) << 32);
printf("partial sum = %016lX%016lX\n", span.hi, span.lo);
/* the third term: sign-extend it only if y is negative */
tmp = x_l32 * y_u32;
printf("third term = %016lX\n", tmp);
if (y < 0)
span.hi += INT64_AU32(tmp);
else
span.hi += ((uint64) tmp) >> 32;
LINEARINTERVAL_ADD_UINT64(span, ((uint64) INT64_AL32(tmp)) << 32);
printf("partial sum = %016lX%016lX\n", span.hi, span.lo);
/* the fourth term: always unsigned */
printf("fourth term = %016lX\n", x_l32 * y_l32);
LINEARINTERVAL_ADD_UINT64(span, x_l32 * y_l32);
}
return span;
}
int
main(int argc, char **argv)
{
int64 x = strtol(argv[1], NULL, 0);
int64 y = strtol(argv[2], NULL, 0);
LI li;
LI li2;
printf("%lX * %lX\n", x, y);
li.li = interval_times(x, y);
printf("result = %016lX%016lX\n", li.li.hi, li.li.lo);
printf("result = %ld %lu\n", li.li.hi, li.li.lo);
li2.i128 = (int128) x * (int128) y;
if (li.li.hi != li2.li.hi || li.li.lo != li2.li.lo)
{
printf("MISMATCH!\n");
printf("result = %016lX%016lX\n", li2.li.hi, li2.li.lo);
printf("result = %ld %lu\n", li2.li.hi, li2.li.lo);
}
return 0;
}
--
Sent via pgsql-general mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general