Hello Thomas,
Function nbits(), which was previously discussed, has been simplified by using the function pg_popcount64().Hi Fabien, Suzuki-san, I am not smart enough to commit this
I'm in no hurry:-)
or judge its value for benchmarking,
I think that it is valuable given that we have non uniform random generators in pgbench.
I'm wondering about the modular_multiply manual implementation which adds quite a few lines of non trivial code. If int128 is available on all/most platforms, it could be removed and marked as not supported on these, although it would create an issue with non regression tests.
but I have a few trivial comments on the language: + It allows to mix the output of non uniform random functions so that "It allows the output of non-uniform random functions to be mixed so that"
Fixed.
+ ensures that a perfect permutation is applied: there are no collisions + nor holes in the output values. "neither collisions nor holes", or "no collisions or holes"
I choose the first.
+ The function errors if size is not positive. "raises an error"
Fixed.
+ * 24 bits mega primes from https://primes.utm.edu/lists/small/millions/ "24 bit mega primes"
Fixed.
+/* length of n binary representation */ +static int +nbits(uint64 n) +{ + /* set lower bits to 1 and count them */ + return pg_popcount64(compute_mask(n)); +} I suppose you could use n == 0 ? 0 : pg_leftmost_one_pos64(n) + 1, and then...
It would create a branch, that I'm trying to avoid.
+/* return smallest mask holding n */ +static uint64 +compute_mask(uint64 n) +{ + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + return n; +} ... here you could use 1 << nbits(n)) - 1. I have no idea if doing it that way around is better, it's just a thought and removes a few lines of bit-swizzling code.
This would create a infinite recursion as nbits currently uses compute_mask. The 6 bitfield operation above is pretty efficient, I'd let it at that.
Attached a v16. -- Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index 816f9cc4c7..1747a9f66a 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -947,7 +947,7 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d <row> <entry> <literal>default_seed</literal> </entry> - <entry>seed used in hash functions by default</entry> + <entry>seed used in hash and pseudo-random permutation functions by default</entry> </row> <row> @@ -1443,6 +1443,13 @@ SELECT 2 AS two, 3 AS three \gset p_ <entry><literal>pow(2.0, 10)</literal>, <literal>power(2.0, 10)</literal></entry> <entry><literal>1024.0</literal></entry> </row> + <row> + <entry><literal><function>pr_perm(<replaceable>i</replaceable>, <replaceable>size</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry> + <entry>integer</entry> + <entry>pseudo-random permutation in [0,size)</entry> + <entry><literal>pr_perm(0, 4)</literal></entry> + <entry><literal>0</literal>, <literal>1</literal>, <literal>2</literal> or <literal>3</literal></entry> + </row> <row> <entry><literal><function>random(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>)</function></literal></entry> <entry>integer</entry> @@ -1608,6 +1615,24 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) / </programlisting> </para> + <para> + Function <literal>pr_perm</literal> implements a pseudo-random permutation. + It allows the output of non uniform random functions to be mixed so that + values drawn more often are not trivially correlated. + It permutes integers in [0, size) using a seed by applying rounds of + simple invertible functions, similarly to an encryption function, + although beware that it is not at all cryptographically secure. + Compared to <literal>hash</literal> functions discussed above, the function + ensures that a perfect permutation is applied: there are neither collisions + nor holes in the output values. + Values outside the interval are interpreted modulo the size. + The function raises an error if size is not positive. + If no seed is provided, <literal>:default_seed</literal> is used. + For a given size and seed, the function is fully deterministic: if two + permutations on the same size must not be correlated, use distinct seeds + as outlined in the previous example about hash functions. + </para> + <para> As an example, the full definition of the built-in TPC-B-like transaction is: diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y index 17c9ec32c5..13410787d4 100644 --- a/src/bin/pgbench/exprparse.y +++ b/src/bin/pgbench/exprparse.y @@ -19,6 +19,7 @@ #define PGBENCH_NARGS_VARIABLE (-1) #define PGBENCH_NARGS_CASE (-2) #define PGBENCH_NARGS_HASH (-3) +#define PGBENCH_NARGS_PRPERM (-4) PgBenchExpr *expr_parse_result; @@ -370,6 +371,9 @@ static const struct { "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A }, + { + "pr_perm", PGBENCH_NARGS_PRPERM, PGBENCH_PRPERM + }, /* keep as last array element */ { NULL, 0, 0 @@ -482,6 +486,19 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList *args) } break; + /* pseudo-random permutation function with optional seed argument */ + case PGBENCH_NARGS_PRPERM: + if (len < 2 || len > 3) + expr_yyerror_more(yyscanner, "unexpected number of arguments", + PGBENCH_FUNCTIONS[fnumber].fname); + + if (len == 2) + { + PgBenchExpr *var = make_variable("default_seed"); + args = make_elist(var, args); + } + break; + /* common case: positive arguments number */ default: Assert(PGBENCH_FUNCTIONS[fnumber].nargs >= 0); diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 570cf3306a..41ca17c26b 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -38,6 +38,7 @@ #include "getopt_long.h" #include "libpq-fe.h" #include "portability/instr_time.h" +#include "port/pg_bitutils.h" #include <ctype.h> #include <float.h> @@ -1040,6 +1041,307 @@ getHashMurmur2(int64 val, uint64 seed) return (int64) result; } +/* pseudo-random permutation */ + +/* 16 so that % 16 can be optimized to & 0x0f */ +#define PRP_PRIMES 16 +/* + * 24 bit mega primes from https://primes.utm.edu/lists/small/millions/ + * the i-th prime, i \in [0, 15], is the first prime above 2^23 + i * 2^19 + */ +static uint64 primes[PRP_PRIMES] = { + UINT64CONST(8388617), + UINT64CONST(8912921), + UINT64CONST(9437189), + UINT64CONST(9961487), + UINT64CONST(10485767), + UINT64CONST(11010059), + UINT64CONST(11534351), + UINT64CONST(12058679), + UINT64CONST(12582917), + UINT64CONST(13107229), + UINT64CONST(13631489), + UINT64CONST(14155777), + UINT64CONST(14680067), + UINT64CONST(15204391), + UINT64CONST(15728681), + UINT64CONST(16252967) +}; + +/* how many "encryption" rounds to apply */ +#define PRP_ROUNDS 4 + +/* return smallest mask holding n */ +static uint64 +compute_mask(uint64 n) +{ + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + n |= n >> 32; + return n; +} + +#if !defined(PG_INT128_TYPE) +/* length of n binary representation */ +static int +nbits(uint64 n) +{ + /* set lower bits to 1 and count them */ + return pg_popcount64(compute_mask(n)); +} +#endif + +/* + * Compute (x * y) % m, where x and y in [0, 2^64), m in [1, 2^64). + * + * Use improved interleaved modular multiplication algorithm to avoid + * overflows when necessary. + */ +static uint64 +modular_multiply(uint64 x, uint64 y, const uint64 m) +{ +#if defined(PG_INT128_TYPE) + return (PG_INT128_TYPE) x * (PG_INT128_TYPE) y % (PG_INT128_TYPE) m; +#else + int i, + bits; + uint64 r; + + Assert(m >= 1); + + /* Because of (x * y) % m = (x % m * y % m) % m */ + if (x >= m) + x %= m; + if (y >= m) + y %= m; + + /* Return the trivial result. */ + if (x == 0 || y == 0 || m == 1) + return 0; + + /* Return the result if (x * y) can be multiplied without overflow. */ + if (nbits(x) + nbits(y) <= 64) + return (x * y) % m; + + /* To reduce the for loop in the algorithm below, ensure y <= x. */ + if (x < y) + { + uint64 tmp = x; + + x = y; + y = tmp; + } + + /*----- + * Interleaved modular multiplication algorithm from: + * + * D.N. Amanor et al., "Efficient hardware architecture for modular + * multiplication on FPGAs", in International Conference on Field + * Programmable Logic and Applications, Aug 2005, pp. 539-542. + * + * This algorithm is usually used in the field of digital circuit design. + * + * Input: X, Y, M; 0 <= X, Y <= M; + * Output: R = X * Y mod M; + * bits: number of bits of Y + * Y[i]: i th bit of Y + * + * 1. R = 0; + * 2. for (i = bits - 1; i >= 0; i--) { + * 3. R = 2 * R; + * 4. if (Y[i] == 0x1) + * 5. R += X; + * 6. if (R >= M) R -= M; + * 7. if (R >= M) R -= M; + * } + * + * In Steps 3 and 5, overflow should be avoided. The details are explained + * in each step. + * Steps 6 and 7 can be instead a modular operation (R %= M). + * + * Note that, in this implementation, the distributive property of modular + * operation shown in below is used whenever it can be applied. + * (X * Y) % M = X % M * Y % M, (X + Y) % M = X % M + Y % M + * + * For ease of understanding, an example is shown. + * Example: (11 * 5) % 7 + * + * [Notation] ":=" means substitution. + * + * [initial value] + * R := 0x0000 + * + * [i=2] + * R := (0x0000 * 2) % 0x0111 = 0x0000 + * R := (R + 0x1011) % 0x0111 = 0x0100 + * + * [i=1] + * R := (0x0100 * 2) % 0x0111 = 0x1000 % 0x0111 = 0x0001 + * + * [i=0] + * R := (0x0001 * 2) % 0x0111 = 0x0010 + * R := (R + 0x1011) % 0x0111 = 0x1101 % 0x0111 = 0x0110 + * + * [result] + * R = 6 + */ + + bits = nbits(y); + r = 0; + + for (i = bits - 1; i >= 0; i--) + { + if (r > UINT64CONST(0x7fffffffffffffff)) + /*----- + * To avoid overflow, transform from (2 * r) to (2 * r) % m, and + * further transform to mathematically equivalent form below: + * + * For ease of understanding, an example using one digit decimal number, + * where r = 6 and m = 7, is shown. + * (6 * 2) % 7 = (7 - 1) * 2 % 7 = (7 % 7 - 1 % 7) * 2 % 7 + * = (0 - 1) * 2 % 7 = -2 % 7 = 7 - 2 = 5. + * Generally, if (r * 2) overflows, + * (r * 2) % m = m - (m - r) * 2. + */ + r = m - ((m - r) << 1); + else + r <<= 1; + + if ((y >> i) & 0x1) + { + if (r > UINT64CONST(0xffffffffffffffff) - x) + /*----- + * To compute (r + x) without overflow: transform to + * (r + x) % m, and then to (r + x - m). + * + * An example using one digit decimal number, where r = 6, x = 5 and + * m = 7, is shown. + * (6 + 5) % 7 = (6 + (5 - 7 + 7)) % 7 = 6 % 7 + (5 - 7) % 7 + 7 % 7 + * = 6 + (5 - 7) + 0 = 4. + * Generally, if (r + x) overflows, + * (r + x) % m = r + (x - m). + */ + r += x - m; + else + r += x; + } + + r %= m; + } + + return r; +#endif +} + +/* + * Donald Knuth's linear congruential generator + * + * Relying on multiplication overflows is part of the design + * of this simple pseudo random number generator. + */ +#define DK_LCG_MUL UINT64CONST(6364136223846793005) +#define DK_LCG_INC UINT64CONST(1442695040888963407) + +/* do not use all small bits */ +#define LCG_SHIFT 13 + +/* + * PRP: parametric pseudo-random permutation + * + * Result in [0, size) is a permutation for inputs in the same set. + * + * Note that this function does not pass statistical tests: eg + * permutations of 2, 3, 4 or 5 ints are not strictly equiprobable. + * Things worsen for large sizes as there are many more permutations + * (size!) than seeds to select them (2^64 < 21!). + * However it is inexpensive compared to an actual encryption function, + * and the quality is good enough to avoid trivial correlations on + * large sizes, which is the expected use case. + * + * THIS FUNCTION IS NOT CRYPTOGRAPHICALLY SECURE. + * DO NOT USE FOR SUCH PURPOSE. + */ +static int64 +pseudorandom_perm(const int64 data, const int64 isize, const int64 seed) +{ + /* computations are performed on unsigned values */ + uint64 size = (uint64) isize; + uint64 v = (uint64) data % size; + uint64 key = (uint64) seed; + /* size-1 ensures 2 possibly overlapping halves */ + uint64 mask = compute_mask(size - 1) >> 1; + + /* nothing to permute */ + if (isize == 1) + return 0; + + Assert(isize >= 2); + + /*----- + * Apply 4 rounds of bijective transformations using key updated + * at each stage: + * + * (1) scramble: partial xors on overlapping power-of-2 subsets + * for instance with v in 0 .. 14 (i.e. with size == 15): + * if v is in 0 .. 7 do v = (v ^ k) % 8 + * if v is in 7 .. 14 do v = 14 - ((14-v) ^ k) % 8 + * note that because of the overlap (here 7), v may be changed twice. + * this transformation if bijective because the condition to apply it + * is still true after applying it, and xor itself is bijective on a + * power-of-2 size. + * + * (2) scatter: linear modulo + * v = (v * p + k) % size + * this transformation is bijective is p & size are prime, which is + * ensured in the code by the while loop which discards primes when + * size is a multiple of it. + * + */ + for (unsigned int i = 0, p = key % PRP_PRIMES; i < PRP_ROUNDS; i++, p = (p + 1) % PRP_PRIMES) + { + uint64 t; + + /* first "half" whitening, for v in 0 .. mask */ + key = key * DK_LCG_MUL + DK_LCG_INC; + if (v <= mask) + v ^= (key >> LCG_SHIFT) & mask; + + /* second (possibly overlapping) "half" whitening */ + key = key * DK_LCG_MUL + DK_LCG_INC; + t = size - 1 - v; + if (t <= mask) + { + t ^= (key >> LCG_SHIFT) & mask; + v = size - 1 - t; + } + + /* at most 2 primes are skipped for a given size */ + while (unlikely(size % primes[p] == 0)) + p = (p + 1) % PRP_PRIMES; + + /* scatter values with a prime multiplication */ + key = key * DK_LCG_MUL + DK_LCG_INC; + + /* Performance shortcut for 24 bit primes, ok for size up to ~10E12 */ + if ((v & UINT64CONST(0xffffffffff)) == v) + v = (primes[p] * v + (key >> LCG_SHIFT)) % size; + else + /*----- + * Note: the add cannot overflow as size is under 63 bits: + * mmv = mm(prime, v, size) < size <= 0x7fffffffffffffff = (1<<63)-1 + * ks = key >> LCG_SHIFTS <= 2^51 + * => mmv + ks < (1<<64) - 1 + */ + v = (modular_multiply(primes[p], v, size) + (key >> LCG_SHIFT)) % size; + } + + /* back to signed */ + return (int64) v; +} + /* * Initialize the given SimpleStats struct to all zeroes */ @@ -2384,6 +2686,27 @@ evalStandardFunc(CState *st, return true; } + case PGBENCH_PRPERM: + { + int64 val, size, seed; + + Assert(nargs == 3); + + if (!coerceToInt(&vargs[0], &val) || + !coerceToInt(&vargs[1], &size) || + !coerceToInt(&vargs[2], &seed)) + return false; + + if (size < 1) + { + fprintf(stderr, "pr_perm size parameter must be >= 1\n"); + return false; + } + + setIntValue(retval, pseudorandom_perm(val, size, seed)); + return true; + } + default: /* cannot get here */ Assert(0); diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h index c4a1e298e0..573a67ec69 100644 --- a/src/bin/pgbench/pgbench.h +++ b/src/bin/pgbench/pgbench.h @@ -99,7 +99,8 @@ typedef enum PgBenchFunction PGBENCH_IS, PGBENCH_CASE, PGBENCH_HASH_FNV1A, - PGBENCH_HASH_MURMUR2 + PGBENCH_HASH_MURMUR2, + PGBENCH_PRPERM } PgBenchFunction; typedef struct PgBenchExpr PgBenchExpr; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 5a2fdb9acb..f7f1dfa055 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -326,6 +326,17 @@ pgbench( qr{command=98.: int 5432\b}, # :random_seed qr{command=99.: int -9223372036854775808\b}, # min int qr{command=100.: int 9223372036854775807\b}, # max int + qr{command=101.: boolean true\b}, + qr{command=102.: boolean true\b}, + qr{command=103.: boolean true\b}, + qr{command=104.: boolean true\b}, + qr{command=105.: boolean true\b}, + qr{command=109.: boolean true\b}, + qr{command=110.: boolean true\b}, + qr{command=111.: boolean true\b}, + qr{command=112.: boolean true\b}, + qr{command=113.: int 9223372036854775797\b}, + qr{command=114.: boolean true\b}, ], 'pgbench expressions', { @@ -453,6 +464,37 @@ SELECT :v0, :v1, :v2, :v3; -- minint constant parsing \set min debug(-9223372036854775808) \set max debug(-(:min + 1)) +-- pseudo-random permutation +\set t debug(pr_perm(0, 2) + pr_perm(1, 2) = 1) +\set t debug(pr_perm(0, 3) + pr_perm(1, 3) + pr_perm(2, 3) = 3) +\set t debug(pr_perm(0, 4) + pr_perm(1, 4) + pr_perm(2, 4) + pr_perm(3, 4) = 6) +\set t debug(pr_perm(0, 5) + pr_perm(1, 5) + pr_perm(2, 5) + pr_perm(3, 5) + pr_perm(4, 5) = 10) +\set t debug(pr_perm(0, 16) + pr_perm(1, 16) + pr_perm(2, 16) + pr_perm(3, 16) + \ + pr_perm(4, 16) + pr_perm(5, 16) + pr_perm(6, 16) + pr_perm(7, 16) + \ + pr_perm(8, 16) + pr_perm(9, 16) + pr_perm(10, 16) + pr_perm(11, 16) + \ + pr_perm(12, 16) + pr_perm(13, 16) + pr_perm(14, 16) + pr_perm(15, 16) = 120) +-- random sanity check +\set size random(2, 1000) +\set v random(0, :size - 1) +\set p pr_perm(:v, :size) +\set t debug(0 <= :p and :p < :size and :p = pr_perm(:v + :size, :size) and :p <> pr_perm(:v + 1, :size)) +-- actual values +\set t debug(pr_perm(:v, 1) = 0) +\set t debug(pr_perm(0, 2, 5432) = 0 and pr_perm(1, 2, 5432) = 1 and \ + pr_perm(0, 2, 5431) = 1 and pr_perm(1, 2, 5431) = 0) +-- ~50 bits test to exercise full modular multiplication +\set t debug(pr_perm(0, 1000000000000000, 54321) = 9406454989259 and \ + pr_perm(1999999999999999, 1000000000000000, 54321) = 54570773902028 and \ + pr_perm(2500000000000000, 1000000000000000, 54321) = 771082311307468) +-- 63 bits tests +\set size debug(:max - 10) +\set ok debug(pr_perm(:size-1, :size, 5432) = 7214172101785397543 and \ + pr_perm(:size-2, :size, 5432) = 4060085360303920649 and \ + pr_perm(:size-3, :size, 5432) = 919477102797071521 and \ + pr_perm(:size-4, :size, 5432) = 7588404289602340497 and \ + pr_perm(:size-5, :size, 5432) = 5568354808723584469 and \ + pr_perm(:size-6, :size, 5432) = 2410458883211907565 and \ + pr_perm(:size-7, :size, 5432) = 1738667467693569814) } }); @@ -766,9 +808,13 @@ SELECT LEAST(} . join(', ', (':i') x 256) . q{)} ], [ 'misc empty script', 1, [qr{empty command list for script}], q{} ], [ - 'bad boolean', 2, + 'bad boolean', 2, [qr{malformed variable.*trueXXX}], q{\set b :badtrue or true} ], + [ + 'invalid pr_perm size', 2, + [qr{pr_perm size parameter must be >= 1}], q{\set i pr_perm(0, 0)} + ], # GSET [ diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index f7fa18418b..714618fc40 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -315,6 +315,16 @@ my @script_tests = ( 'double overflow 3', [qr{double constant overflow}], { 'overflow-3.sql' => "\\set d .1E310\n" } + ], + [ + 'not enough arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-1.sql' => "\\set i pr_perm(1)\n" } + ], + [ + 'too many arguments for pr_perm', + [qr{unexpected number of arguments \(pr_perm\)}], + { 'bad-pr_perm-2.sql' => "\\set i pr_perm(1, 2, 3, 4)\n" } ],); for my $t (@script_tests)