Hi Fabien, > To summarize: > - better software engineering > - similar speed (slightly slower) > - better statistical quality > - quite small state > - soundness
Personally, I think your patch is great. Speaking of the speed I believe we should consider the performance of the entire DBMS in typical scenarios, not the performance of the single procedure. I'm pretty sure in these terms the impact of your patch is neglectable now, and almost certainly beneficial in the long term because of better randomness. While reviewing your patch I noticed that you missed test_integerset.c. Here is an updated patch. -- Best regards, Aleksander Alekseev
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 2c2f149fb0..146b524076 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -1188,7 +1188,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, * Found a suitable tuple, so save it, replacing one old tuple * at random */ - int k = (int) (targrows * sampler_random_fract(rstate.randstate)); + int k = (int) (targrows * sampler_random_fract(&rstate.randstate)); Assert(k >= 0 && k < targrows); heap_freetuple(rows[k]); diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index c48a421e88..174859ad64 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -5094,7 +5094,7 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate) if (astate->rowstoskip <= 0) { /* Choose a random reservoir element to replace. */ - pos = (int) (targrows * sampler_random_fract(astate->rstate.randstate)); + pos = (int) (targrows * sampler_random_fract(&astate->rstate.randstate)); Assert(pos >= 0 && pos < targrows); heap_freetuple(astate->rows[pos]); } diff --git a/contrib/tsm_system_rows/tsm_system_rows.c b/contrib/tsm_system_rows/tsm_system_rows.c index 4996612902..1a46d4b143 100644 --- a/contrib/tsm_system_rows/tsm_system_rows.c +++ b/contrib/tsm_system_rows/tsm_system_rows.c @@ -69,7 +69,7 @@ static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumbe static OffsetNumber system_rows_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); -static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate); +static uint32 random_relative_prime(uint32 n, pg_prng_state *randstate); /* @@ -213,25 +213,25 @@ system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks) if (sampler->step == 0) { /* Initialize now that we have scan descriptor */ - SamplerRandomState randstate; + pg_prng_state randstate; /* If relation is empty, there's nothing to scan */ if (nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ - sampler_random_init_state(sampler->seed, randstate); + sampler_random_init_state(sampler->seed, &randstate); /* Compute nblocks/firstblock/step only once per query */ sampler->nblocks = nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ - sampler->firstblock = sampler_random_fract(randstate) * + sampler->firstblock = sampler_random_fract(&randstate) * sampler->nblocks; /* Find relative prime as step size for linear probing */ - sampler->step = random_relative_prime(sampler->nblocks, randstate); + sampler->step = random_relative_prime(sampler->nblocks, &randstate); } /* Reinitialize lb */ @@ -317,7 +317,7 @@ gcd(uint32 a, uint32 b) * (else return 1). */ static uint32 -random_relative_prime(uint32 n, SamplerRandomState randstate) +random_relative_prime(uint32 n, pg_prng_state *randstate) { uint32 r; diff --git a/contrib/tsm_system_time/tsm_system_time.c b/contrib/tsm_system_time/tsm_system_time.c index 788d8f9a68..36acc6c106 100644 --- a/contrib/tsm_system_time/tsm_system_time.c +++ b/contrib/tsm_system_time/tsm_system_time.c @@ -69,7 +69,7 @@ static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumbe static OffsetNumber system_time_nextsampletuple(SampleScanState *node, BlockNumber blockno, OffsetNumber maxoffset); -static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate); +static uint32 random_relative_prime(uint32 n, pg_prng_state *randstate); /* @@ -224,25 +224,25 @@ system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks) if (sampler->step == 0) { /* Initialize now that we have scan descriptor */ - SamplerRandomState randstate; + pg_prng_state randstate; /* If relation is empty, there's nothing to scan */ if (nblocks == 0) return InvalidBlockNumber; /* We only need an RNG during this setup step */ - sampler_random_init_state(sampler->seed, randstate); + sampler_random_init_state(sampler->seed, &randstate); /* Compute nblocks/firstblock/step only once per query */ sampler->nblocks = nblocks; /* Choose random starting block within the relation */ /* (Actually this is the predecessor of the first block visited) */ - sampler->firstblock = sampler_random_fract(randstate) * + sampler->firstblock = sampler_random_fract(&randstate) * sampler->nblocks; /* Find relative prime as step size for linear probing */ - sampler->step = random_relative_prime(sampler->nblocks, randstate); + sampler->step = random_relative_prime(sampler->nblocks, &randstate); } /* Reinitialize lb and start_time */ @@ -330,7 +330,7 @@ gcd(uint32 a, uint32 b) * (else return 1). */ static uint32 -random_relative_prime(uint32 n, SamplerRandomState randstate) +random_relative_prime(uint32 n, pg_prng_state *randstate) { uint32 r; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 426c1e6710..092d53d28a 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1294,7 +1294,7 @@ acquire_sample_rows(Relation onerel, int elevel, * Found a suitable tuple, so save it, replacing one old * tuple at random */ - int k = (int) (targrows * sampler_random_fract(rstate.randstate)); + int k = (int) (targrows * sampler_random_fract(&rstate.randstate)); Assert(k >= 0 && k < targrows); heap_freetuple(rows[k]); diff --git a/src/backend/optimizer/geqo/geqo_random.c b/src/backend/optimizer/geqo/geqo_random.c index f21bc047e6..8b42a9ffba 100644 --- a/src/backend/optimizer/geqo/geqo_random.c +++ b/src/backend/optimizer/geqo/geqo_random.c @@ -21,14 +21,7 @@ geqo_set_seed(PlannerInfo *root, double seed) { GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; - /* - * XXX. This seeding algorithm could certainly be improved - but it is not - * critical to do so. - */ - memset(private->random_state, 0, sizeof(private->random_state)); - memcpy(private->random_state, - &seed, - Min(sizeof(private->random_state), sizeof(seed))); + pg_prng_fseed(&private->random_state, seed); } double @@ -36,5 +29,5 @@ geqo_rand(PlannerInfo *root) { GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; - return pg_erand48(private->random_state); + return pg_prng_f64(&private->random_state); } diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 098bbb372b..37fcd645e0 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -65,7 +65,7 @@ float8 degree_c_one = 1.0; /* State for drandom() and setseed() */ static bool drandom_seed_set = false; -static unsigned short drandom_seed[3] = {0, 0, 0}; +static pg_prng_state drandom_seed = PG_PRNG_DEFAULT_STATE; /* Local function prototypes */ static double sind_q1(double x); @@ -2762,22 +2762,20 @@ drandom(PG_FUNCTION_ARGS) * Should that fail for some reason, we fall back on a lower-quality * seed based on current time and PID. */ - if (!pg_strong_random(drandom_seed, sizeof(drandom_seed))) + if (!pg_strong_random(&drandom_seed, sizeof(drandom_seed))) { TimestampTz now = GetCurrentTimestamp(); uint64 iseed; /* Mix the PID with the most predictable bits of the timestamp */ iseed = (uint64) now ^ ((uint64) MyProcPid << 32); - drandom_seed[0] = (unsigned short) iseed; - drandom_seed[1] = (unsigned short) (iseed >> 16); - drandom_seed[2] = (unsigned short) (iseed >> 32); + pg_prng_seed(&drandom_seed, iseed); } drandom_seed_set = true; } - /* pg_erand48 produces desired result range [0.0 - 1.0) */ - result = pg_erand48(drandom_seed); + /* produces desired result range [0.0 - 1.0) */ + result = pg_prng_f64(&drandom_seed); PG_RETURN_FLOAT8(result); } @@ -2790,7 +2788,6 @@ Datum setseed(PG_FUNCTION_ARGS) { float8 seed = PG_GETARG_FLOAT8(0); - uint64 iseed; if (seed < -1 || seed > 1 || isnan(seed)) ereport(ERROR, @@ -2798,11 +2795,7 @@ setseed(PG_FUNCTION_ARGS) errmsg("setseed parameter %g is out of allowed range [-1,1]", seed))); - /* Use sign bit + 47 fractional bits to fill drandom_seed[] */ - iseed = (int64) (seed * (float8) UINT64CONST(0x7FFFFFFFFFFF)); - drandom_seed[0] = (unsigned short) iseed; - drandom_seed[1] = (unsigned short) (iseed >> 16); - drandom_seed[2] = (unsigned short) (iseed >> 32); + pg_prng_fseed(&drandom_seed, seed); drandom_seed_set = true; PG_RETURN_VOID(); diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index 0c327e823f..7b59128878 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -49,7 +49,7 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, bs->t = 0; /* blocks scanned so far */ bs->m = 0; /* blocks selected so far */ - sampler_random_init_state(randseed, bs->randstate); + sampler_random_init_state(randseed, &bs->randstate); return Min(bs->n, bs->N); } @@ -98,7 +98,7 @@ BlockSampler_Next(BlockSampler bs) * less than k, which means that we cannot fail to select enough blocks. *---------- */ - V = sampler_random_fract(bs->randstate); + V = sampler_random_fract(&bs->randstate); p = 1.0 - (double) k / (double) K; while (V < p) { @@ -136,10 +136,10 @@ reservoir_init_selection_state(ReservoirState rs, int n) * Reservoir sampling is not used anywhere where it would need to return * repeatable results so we can initialize it randomly. */ - sampler_random_init_state(random(), rs->randstate); + sampler_random_init_state(random(), &rs->randstate); /* Initial value of W (for use when Algorithm Z is first applied) */ - rs->W = exp(-log(sampler_random_fract(rs->randstate)) / n); + rs->W = exp(-log(sampler_random_fract(&rs->randstate)) / n); } double @@ -154,7 +154,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) double V, quot; - V = sampler_random_fract(rs->randstate); /* Generate V */ + V = sampler_random_fract(&rs->randstate); /* Generate V */ S = 0; t += 1; /* Note: "num" in Vitter's code is always equal to t - n */ @@ -186,7 +186,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) tmp; /* Generate U and X */ - U = sampler_random_fract(rs->randstate); + U = sampler_random_fract(&rs->randstate); X = t * (W - 1.0); S = floor(X); /* S is tentatively set to floor(X) */ /* Test if U <= h(S)/cg(X) in the manner of (6.3) */ @@ -215,7 +215,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) y *= numer / denom; denom -= 1; } - W = exp(-log(sampler_random_fract(rs->randstate)) / n); /* Generate W in advance */ + W = exp(-log(sampler_random_fract(&rs->randstate)) / n); /* Generate W in advance */ if (exp(log(y) / n) <= (t + X) / t) break; } @@ -230,24 +230,22 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) *---------- */ void -sampler_random_init_state(long seed, SamplerRandomState randstate) +sampler_random_init_state(long seed, pg_prng_state *randstate) { - randstate[0] = 0x330e; /* same as pg_erand48, but could be anything */ - randstate[1] = (unsigned short) seed; - randstate[2] = (unsigned short) (seed >> 16); + pg_prng_seed(randstate, (uint64_t) seed); } /* Select a random value R uniformly distributed in (0 - 1) */ double -sampler_random_fract(SamplerRandomState randstate) +sampler_random_fract(pg_prng_state *randstate) { double res; - /* pg_erand48 returns a value in [0.0 - 1.0), so we must reject 0 */ + /* pg_prng_f64 returned value in [0.0 - 1.0), so we must reject 0.0 */ do { - res = pg_erand48(randstate); - } while (res == 0.0); + res = pg_prng_f64(randstate); + } while (unlikely(res == 0.0)); return res; } @@ -266,22 +264,28 @@ double anl_random_fract(void) { /* initialize if first time through */ - if (oldrs.randstate[0] == 0) - sampler_random_init_state(random(), oldrs.randstate); + if (!oldrs.randstate_initialized) + { + sampler_random_init_state(random(), &oldrs.randstate); + oldrs.randstate_initialized = true; + } /* and compute a random fraction */ - return sampler_random_fract(oldrs.randstate); + return sampler_random_fract(&oldrs.randstate); } double anl_init_selection_state(int n) { /* initialize if first time through */ - if (oldrs.randstate[0] == 0) - sampler_random_init_state(random(), oldrs.randstate); + if (!oldrs.randstate_initialized) + { + sampler_random_init_state(random(), &oldrs.randstate); + oldrs.randstate_initialized = true; + } /* Initial value of W (for use when Algorithm Z is first applied) */ - return exp(-log(sampler_random_fract(oldrs.randstate)) / n); + return exp(-log(sampler_random_fract(&oldrs.randstate)) / n); } double diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index dc84b7b9b7..a459424a63 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -342,16 +342,8 @@ typedef struct StatsData SimpleStats lag; } StatsData; -/* - * Struct to keep random state. - */ -typedef struct RandomState -{ - unsigned short xseed[3]; -} RandomState; - /* Various random sequences are initialized from this one. */ -static RandomState base_random_sequence; +static pg_prng_state base_random_sequence; /* Synchronization barrier for start and connection */ static THREAD_BARRIER_T barrier; @@ -453,7 +445,7 @@ typedef struct * Separate randomness for each client. This is used for random functions * PGBENCH_RANDOM_* during the execution of the script. */ - RandomState cs_func_rs; + pg_prng_state cs_func_rs; int use_file; /* index in sql_script for this client */ int command; /* command number in script */ @@ -490,9 +482,9 @@ typedef struct * random state to make all of them independent of each other and * therefore deterministic at the thread level. */ - RandomState ts_choose_rs; /* random state for selecting a script */ - RandomState ts_throttle_rs; /* random state for transaction throttling */ - RandomState ts_sample_rs; /* random state for log sampling */ + pg_prng_state ts_choose_rs; /* random state for selecting a script */ + pg_prng_state ts_throttle_rs; /* random state for transaction throttling */ + pg_prng_state ts_sample_rs; /* random state for log sampling */ int64 throttle_trigger; /* previous/next throttling (us) */ FILE *logfile; /* where to log, or NULL */ @@ -890,42 +882,34 @@ strtodouble(const char *str, bool errorOK, double *dv) } /* - * Initialize a random state struct. + * Initialize a prng state struct. * * We derive the seed from base_random_sequence, which must be set up already. */ static void -initRandomState(RandomState *random_state) +initRandomState(pg_prng_state *state) { - random_state->xseed[0] = (unsigned short) - (pg_jrand48(base_random_sequence.xseed) & 0xFFFF); - random_state->xseed[1] = (unsigned short) - (pg_jrand48(base_random_sequence.xseed) & 0xFFFF); - random_state->xseed[2] = (unsigned short) - (pg_jrand48(base_random_sequence.xseed) & 0xFFFF); + pg_prng_seed(state, pg_prng_u64(&base_random_sequence)); } + /* * Random number generator: uniform distribution from min to max inclusive. * * Although the limits are expressed as int64, you can't generate the full * int64 range in one call, because the difference of the limits mustn't - * overflow int64. In practice it's unwise to ask for more than an int32 - * range, because of the limited precision of pg_erand48(). + * overflow int64. */ static int64 -getrand(RandomState *random_state, int64 min, int64 max) +getrand(pg_prng_state *state, int64 min, int64 max) { /* * Odd coding is so that min and max have approximately the same chance of * being selected as do numbers between them. - * - * pg_erand48() is thread-safe and concurrent, which is why we use it - * rather than random(), which in glibc is non-reentrant, and therefore - * protected by a mutex, and therefore a bottleneck on machines with many - * CPUs. */ - return min + (int64) ((max - min + 1) * pg_erand48(random_state->xseed)); + int64 size = max - min + 1, + val = pg_prng_u64(state) & UINT64CONST(0x7FFFFFFFFFFFFFFF); + return min + (int64) (val % size); } /* @@ -934,8 +918,7 @@ getrand(RandomState *random_state, int64 min, int64 max) * value is exp(-parameter). */ static int64 -getExponentialRand(RandomState *random_state, int64 min, int64 max, - double parameter) +getExponentialRand(pg_prng_state *state, int64 min, int64 max, double parameter) { double cut, uniform, @@ -945,7 +928,7 @@ getExponentialRand(RandomState *random_state, int64 min, int64 max, Assert(parameter > 0.0); cut = exp(-parameter); /* erand in [0, 1), uniform in (0, 1] */ - uniform = 1.0 - pg_erand48(random_state->xseed); + uniform = 1.0 - pg_prng_f64(state); /* * inner expression in (cut, 1] (if parameter > 0), rand in [0, 1) @@ -958,8 +941,7 @@ getExponentialRand(RandomState *random_state, int64 min, int64 max, /* random number generator: gaussian distribution from min to max inclusive */ static int64 -getGaussianRand(RandomState *random_state, int64 min, int64 max, - double parameter) +getGaussianRand(pg_prng_state *state, int64 min, int64 max, double parameter) { double stdev; double rand; @@ -982,13 +964,13 @@ getGaussianRand(RandomState *random_state, int64 min, int64 max, do { /* - * pg_erand48 generates [0,1), but for the basic version of the + * pg_prng_f64 generates [0,1), but for the basic version of the * Box-Muller transform the two uniformly distributed random numbers * are expected in (0, 1] (see * https://en.wikipedia.org/wiki/Box-Muller_transform) */ - double rand1 = 1.0 - pg_erand48(random_state->xseed); - double rand2 = 1.0 - pg_erand48(random_state->xseed); + double rand1 = 1.0 - pg_prng_f64(state); + double rand2 = 1.0 - pg_prng_f64(state); /* Box-Muller basic form transform */ double var_sqrt = sqrt(-2.0 * log(rand1)); @@ -1018,7 +1000,7 @@ getGaussianRand(RandomState *random_state, int64 min, int64 max, * not be one. */ static int64 -getPoissonRand(RandomState *random_state, double center) +getPoissonRand(pg_prng_state *state, double center) { /* * Use inverse transform sampling to generate a value > 0, such that the @@ -1026,8 +1008,8 @@ getPoissonRand(RandomState *random_state, double center) */ double uniform; - /* erand in [0, 1), uniform in (0, 1] */ - uniform = 1.0 - pg_erand48(random_state->xseed); + /* pseudo-random value in [0, 1), uniform in (0, 1] */ + uniform = 1.0 - pg_prng_f64(state); return (int64) (-log(uniform) * center + 0.5); } @@ -1040,7 +1022,7 @@ getPoissonRand(RandomState *random_state, double center) * This works for s > 1.0, but may perform badly for s very close to 1.0. */ static int64 -computeIterativeZipfian(RandomState *random_state, int64 n, double s) +computeIterativeZipfian(pg_prng_state *state, int64 n, double s) { double b = pow(2.0, s - 1.0); double x, @@ -1055,8 +1037,8 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s) while (true) { /* random variates */ - u = pg_erand48(random_state->xseed); - v = pg_erand48(random_state->xseed); + u = pg_prng_f64(state); + v = pg_prng_f64(state); x = floor(pow(u, -1.0 / (s - 1.0))); @@ -1070,14 +1052,14 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s) /* random number generator: zipfian distribution from min to max inclusive */ static int64 -getZipfianRand(RandomState *random_state, int64 min, int64 max, double s) +getZipfianRand(pg_prng_state *state, int64 min, int64 max, double s) { int64 n = max - min + 1; /* abort if parameter is invalid */ Assert(MIN_ZIPFIAN_PARAM <= s && s <= MAX_ZIPFIAN_PARAM); - return min - 1 + computeIterativeZipfian(random_state, n, s); + return min - 1 + computeIterativeZipfian(state, n, s); } /* @@ -1134,7 +1116,7 @@ getHashMurmur2(int64 val, uint64 seed) * For small sizes, this generates each of the (size!) possible permutations * of integers in the range [0, size) with roughly equal probability. Once * the size is larger than 20, the number of possible permutations exceeds the - * number of distinct states of the internal pseudorandom number generators, + * number of distinct states of the internal pseudorandom number generator, * and so not all possible permutations can be generated, but the permutations * chosen should continue to give the appearance of being random. * @@ -1144,25 +1126,19 @@ getHashMurmur2(int64 val, uint64 seed) static int64 permute(const int64 val, const int64 isize, const int64 seed) { - RandomState random_state1; - RandomState random_state2; - uint64 size; - uint64 v; - int masklen; - uint64 mask; - int i; + /* using a high-end PRNG is probably overkill */ + pg_prng_state state; + uint64 size; + uint64 v; + int masklen; + uint64 mask; + int i; if (isize < 2) return 0; /* nothing to permute */ - /* Initialize a pair of random states using the seed */ - random_state1.xseed[0] = seed & 0xFFFF; - random_state1.xseed[1] = (seed >> 16) & 0xFFFF; - random_state1.xseed[2] = (seed >> 32) & 0xFFFF; - - random_state2.xseed[0] = (((uint64) seed) >> 48) & 0xFFFF; - random_state2.xseed[1] = seed & 0xFFFF; - random_state2.xseed[2] = (seed >> 16) & 0xFFFF; + /* Initialize prng state using the seed */ + pg_prng_seed(&state, (uint64) seed); /* Computations are performed on unsigned values */ size = (uint64) isize; @@ -1208,8 +1184,8 @@ permute(const int64 val, const int64 isize, const int64 seed) t; /* Random multiply (by an odd number), XOR and rotate of lower half */ - m = (uint64) getrand(&random_state1, 0, mask) | 1; - r = (uint64) getrand(&random_state2, 0, mask); + m = (pg_prng_u64(&state) & mask) | 1; + r = pg_prng_u64(&state) & mask; if (v <= mask) { v = ((v * m) ^ r) & mask; @@ -1217,8 +1193,8 @@ permute(const int64 val, const int64 isize, const int64 seed) } /* Random multiply (by an odd number), XOR and rotate of upper half */ - m = (uint64) getrand(&random_state1, 0, mask) | 1; - r = (uint64) getrand(&random_state2, 0, mask); + m = (pg_prng_u64(&state) & mask) | 1; + r = pg_prng_u64(&state) & mask; t = size - 1 - v; if (t <= mask) { @@ -1228,7 +1204,7 @@ permute(const int64 val, const int64 isize, const int64 seed) } /* Random offset */ - r = (uint64) getrand(&random_state2, 0, size - 1); + r = pg_prng_u64(&state) % (size - 1); v = (v + r) % size; } @@ -3787,7 +3763,7 @@ doLog(TState *thread, CState *st, * to the random sample. */ if (sample_rate != 0.0 && - pg_erand48(thread->ts_sample_rs.xseed) > sample_rate) + pg_prng_f64(&thread->ts_sample_rs) > sample_rate) return; /* should we aggregate the results or not? */ @@ -5681,12 +5657,11 @@ set_random_seed(const char *seed) if (seed != NULL) pg_log_info("setting random seed to %llu", (unsigned long long) iseed); + random_seed = iseed; /* Fill base_random_sequence with low-order bits of seed */ - base_random_sequence.xseed[0] = iseed & 0xFFFF; - base_random_sequence.xseed[1] = (iseed >> 16) & 0xFFFF; - base_random_sequence.xseed[2] = (iseed >> 32) & 0xFFFF; + pg_prng_seed(&base_random_sequence, (uint64_t) iseed); return true; } @@ -6369,9 +6344,7 @@ main(int argc, char **argv) /* set default seed for hash functions */ if (lookupVariable(&state[0], "default_seed") == NULL) { - uint64 seed = - ((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) | - (((uint64) pg_jrand48(base_random_sequence.xseed) & 0xFFFFFFFF) << 32); + uint64 seed = pg_prng_u64(&base_random_sequence); for (i = 0; i < nclients; i++) if (!putVariableInt(&state[i], "startup", "default_seed", (int64) seed)) diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 9cc5270e02..9f0eeef997 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -427,9 +427,9 @@ pgbench( # After explicit seeding, the four random checks (1-3,20) are # deterministic - qr{command=1.: int 13\b}, # uniform random - qr{command=2.: int 116\b}, # exponential random - qr{command=3.: int 1498\b}, # gaussian random + qr{command=1.: int 10\b}, # uniform random + qr{command=2.: int 100\b}, # exponential random + qr{command=3.: int 1526\b}, # gaussian random qr{command=4.: int 4\b}, qr{command=5.: int 5\b}, qr{command=6.: int 6\b}, @@ -496,6 +496,7 @@ pgbench( qr{command=109.: boolean true\b}, qr{command=110.: boolean true\b}, qr{command=111.: boolean true\b}, + qr{command=113.: boolean true\b}, ], 'pgbench expressions', { @@ -639,8 +640,17 @@ SELECT :v0, :v1, :v2, :v3; \set t debug(0 <= :p and :p < :size and :p = permute(:v + :size, :size) and :p <> permute(:v + 1, :size)) -- actual values \set t debug(permute(:v, 1) = 0) -\set t debug(permute(0, 2, 5432) = 0 and permute(1, 2, 5432) = 1 and \ - permute(0, 2, 5435) = 1 and permute(1, 2, 5435) = 0) +\set t debug(permute(0, 2, 5431) = 1 and permute(1, 2, 5431) = 0 and \ + permute(0, 2, 5432) = 0 and permute(1, 2, 5432) = 1) +-- 63 bits tests for checking permute portability across architectures +\set size debug(:max - 10) +\set t debug(permute(:size-1, :size, 5432) = 4352241160009873020 and \ + permute(:size-2, :size, 5432) = 4990004119691638710 and \ + permute(:size-3, :size, 5432) = 5868029867256127549 and \ + permute(:size-4, :size, 5432) = 1238830324886341378 and \ + permute(:size-5, :size, 5432) = 7415914367102906897 and \ + permute(:size-6, :size, 5432) = 3214501037984818995 and \ + permute(:size-7, :size, 5432) = 600660351261124695) } }); diff --git a/src/include/optimizer/geqo.h b/src/include/optimizer/geqo.h index 24dcdfb6cc..75bfada4a1 100644 --- a/src/include/optimizer/geqo.h +++ b/src/include/optimizer/geqo.h @@ -72,8 +72,8 @@ extern double Geqo_seed; /* 0 .. 1 */ */ typedef struct { - List *initial_rels; /* the base relations we are joining */ - unsigned short random_state[3]; /* state for pg_erand48() */ + List *initial_rels; /* the base relations we are joining */ + pg_prng_state random_state; /* PRNG state */ } GeqoPrivateData; diff --git a/src/include/port.h b/src/include/port.h index 82f63de325..a73c944c91 100644 --- a/src/include/port.h +++ b/src/include/port.h @@ -356,10 +356,24 @@ extern int gettimeofday(struct timeval *tp, struct timezone *tzp); #define pgoff_t off_t #endif -extern double pg_erand48(unsigned short xseed[3]); -extern long pg_lrand48(void); -extern long pg_jrand48(unsigned short xseed[3]); -extern void pg_srand48(long seed); +/* PRNG: Pseudo-Random Number Generator */ +typedef struct pg_prng_state { + uint64_t s0, s1; +} pg_prng_state; + +/* use Donald Knuth's LCG constants */ +#define PG_PRNG_DEFAULT_STATE \ + { UINT64CONST(0x5851F42D4C957F2D), UINT64CONST(0x14057B7EF767814F) } + +extern void pg_prng_seed(pg_prng_state *state, uint64_t seed); +extern void pg_prng_fseed(pg_prng_state *state, double fseed); +extern void pg_prng_strong_seed(pg_prng_state *state); +extern uint64_t pg_prng_u64(pg_prng_state *state); +extern int64_t pg_prng_i64(pg_prng_state *state); +extern uint32_t pg_prng_u32(pg_prng_state *state); +extern int32_t pg_prng_i32(pg_prng_state *state); +extern double pg_prng_f64(pg_prng_state *state); +extern bool pg_prng_bool(pg_prng_state *state); #ifndef HAVE_FLS extern int fls(int mask); diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h index a58d14281b..97cd9b7aef 100644 --- a/src/include/utils/sampling.h +++ b/src/include/utils/sampling.h @@ -17,11 +17,9 @@ /* Random generator for sampling code */ -typedef unsigned short SamplerRandomState[3]; - extern void sampler_random_init_state(long seed, - SamplerRandomState randstate); -extern double sampler_random_fract(SamplerRandomState randstate); + pg_prng_state *randstate); +extern double sampler_random_fract(pg_prng_state *randstate); /* Block sampling methods */ @@ -32,7 +30,7 @@ typedef struct int n; /* desired sample size */ BlockNumber t; /* current block number */ int m; /* blocks selected so far */ - SamplerRandomState randstate; /* random generator state */ + pg_prng_state randstate; /* random generator state */ } BlockSamplerData; typedef BlockSamplerData *BlockSampler; @@ -46,8 +44,9 @@ extern BlockNumber BlockSampler_Next(BlockSampler bs); typedef struct { - double W; - SamplerRandomState randstate; /* random generator state */ + double W; + pg_prng_state randstate; /* random generator state */ + bool randstate_initialized; } ReservoirStateData; typedef ReservoirStateData *ReservoirState; diff --git a/src/port/Makefile b/src/port/Makefile index 52dbf5783f..9ff2056688 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -42,11 +42,11 @@ OBJS = \ $(PG_CRC32C_OBJS) \ bsearch_arg.o \ chklocale.o \ - erand48.o \ inet_net_ntop.o \ noblock.o \ path.o \ pg_bitutils.o \ + pg_prng.o \ pg_strong_random.o \ pgcheckdir.o \ pgmkdirp.o \ diff --git a/src/port/erand48.c b/src/port/erand48.c deleted file mode 100644 index a82ea94220..0000000000 --- a/src/port/erand48.c +++ /dev/null @@ -1,136 +0,0 @@ -/*------------------------------------------------------------------------- - * - * erand48.c - * - * This file supplies pg_erand48() and related functions, which except - * for the names are just like the POSIX-standard erand48() family. - * (We don't supply the full set though, only the ones we have found use - * for in Postgres. In particular, we do *not* implement lcong48(), so - * that there is no need for the multiplier and addend to be variable.) - * - * We used to test for an operating system version rather than - * unconditionally using our own, but (1) some versions of Cygwin have a - * buggy erand48() that always returns zero and (2) as of 2011, glibc's - * erand48() is strangely coded to be almost-but-not-quite thread-safe, - * which doesn't matter for the backend but is important for pgbench. - * - * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group - * - * Portions Copyright (c) 1993 Martin Birgmeier - * All rights reserved. - * - * You may redistribute unmodified or modified versions of this source - * code provided that the above copyright notice and this and the - * following conditions are retained. - * - * This software is provided ``as is'', and comes with no warranties - * of any kind. I shall in no event be liable for anything that happens - * to anyone/anything when using this software. - * - * IDENTIFICATION - * src/port/erand48.c - * - *------------------------------------------------------------------------- - */ - -#include "c.h" - -#include <math.h> - -/* These values are specified by POSIX */ -#define RAND48_MULT UINT64CONST(0x0005deece66d) -#define RAND48_ADD UINT64CONST(0x000b) - -/* POSIX specifies 0x330e's use in srand48, but the other bits are arbitrary */ -#define RAND48_SEED_0 (0x330e) -#define RAND48_SEED_1 (0xabcd) -#define RAND48_SEED_2 (0x1234) - -static unsigned short _rand48_seed[3] = { - RAND48_SEED_0, - RAND48_SEED_1, - RAND48_SEED_2 -}; - - -/* - * Advance the 48-bit value stored in xseed[] to the next "random" number. - * - * Also returns the value of that number --- without masking it to 48 bits. - * If caller uses the result, it must mask off the bits it wants. - */ -static uint64 -_dorand48(unsigned short xseed[3]) -{ - /* - * We do the arithmetic in uint64; any type wider than 48 bits would work. - */ - uint64 in; - uint64 out; - - in = (uint64) xseed[2] << 32 | (uint64) xseed[1] << 16 | (uint64) xseed[0]; - - out = in * RAND48_MULT + RAND48_ADD; - - xseed[0] = out & 0xFFFF; - xseed[1] = (out >> 16) & 0xFFFF; - xseed[2] = (out >> 32) & 0xFFFF; - - return out; -} - - -/* - * Generate a random floating-point value using caller-supplied state. - * Values are uniformly distributed over the interval [0.0, 1.0). - */ -double -pg_erand48(unsigned short xseed[3]) -{ - uint64 x = _dorand48(xseed); - - return ldexp((double) (x & UINT64CONST(0xFFFFFFFFFFFF)), -48); -} - -/* - * Generate a random non-negative integral value using internal state. - * Values are uniformly distributed over the interval [0, 2^31). - */ -long -pg_lrand48(void) -{ - uint64 x = _dorand48(_rand48_seed); - - return (x >> 17) & UINT64CONST(0x7FFFFFFF); -} - -/* - * Generate a random signed integral value using caller-supplied state. - * Values are uniformly distributed over the interval [-2^31, 2^31). - */ -long -pg_jrand48(unsigned short xseed[3]) -{ - uint64 x = _dorand48(xseed); - - return (int32) ((x >> 16) & UINT64CONST(0xFFFFFFFF)); -} - -/* - * Initialize the internal state using the given seed. - * - * Per POSIX, this uses only 32 bits from "seed" even if "long" is wider. - * Hence, the set of possible seed values is smaller than it could be. - * Better practice is to use caller-supplied state and initialize it with - * random bits obtained from a high-quality source of random bits. - * - * Note: POSIX specifies a function seed48() that allows all 48 bits - * of the internal state to be set, but we don't currently support that. - */ -void -pg_srand48(long seed) -{ - _rand48_seed[0] = RAND48_SEED_0; - _rand48_seed[1] = (unsigned short) seed; - _rand48_seed[2] = (unsigned short) (seed >> 16); -} diff --git a/src/port/pg_prng.c b/src/port/pg_prng.c new file mode 100644 index 0000000000..0ac5692e6a --- /dev/null +++ b/src/port/pg_prng.c @@ -0,0 +1,179 @@ +/* + * Pseudo-Random Number Generator + * + * Previous version was the 1988 (?) rand48 standard with 48 bits state LCG, + * designed for running on 32 bits or even 16 bits architectures, and to produce + * 32 bit ints and floats (i.e. *not* doubles). + * The implied 16-bits unpacking/packing may have a detrimental performance impact + * on modern 64 bits architectures. + * + * We (probably) want: + * - NOT to invent a new design! + * - one reasonable default PRNG for all pg internal uses, currently this is + * POSIX rand48. + * - something fast, close to rand48 (which basically does 2 arithmetic ops) + * no need for something cryptographic though, which would imply slow + * - to produce 64 bits integers & doubles with a 52 bits mantissa + * so state size > 64 bits. + * - a small state though, because we might generate quite a few of them + * for different purposes so state size <= 256 or even <= 128 bits + * - the state to be aligned to whatever => 128 bits + * - 64 bits operations for efficiency on modern architectures, + * but not 128 bits operations (yet:-). + * - not to depend on special hardware for speed (eg MMX/SSE/AES). + * - not something with obvious known and relevant defects. + * - not something with "rights" attached. + * + * These constraints reduce a lot the available options from + * https://en.wikipedia.org/wiki/List_of_random_number_generators + * + * LCG + * - POSIX rand48: m = 2⁴⁸, a = 25214903917, c = 11, extract 47…16 + * - MMIX by Donald Knuth: m = 2⁶⁴, a = 6364136223846793005, c = 1442695040888963407 + * - very bad on low bits, which MUST NOT BE USED + * a 128 bits would do, but no clear standards, and it would imply 128-bit ops. + * + * Xor-Shift-Rotate PRNGs: + * Xoroshiro128+ + * - fails statistical linearity tests in the low bits. + * Xoroshiro128** + * https://prng.di.unimi.it/xoroshiro128starstar.c + * - mild and irrelevant issues: close to zero state, stats issues after some transforms + * Xoshiro256** + * - it looks good, but some issues (no big deal for pg usage IMO) + * https://www.pcg-random.org/posts/a-quick-look-at-xoshiro256.html + * mild problems getting out of closes-to-zero state + * + * MT19937-64 + * + default in many places, standard + * - huge 2.5 KiB state (but TinyMT, 127 bits state space) + * - slow (but SFMT, twice as fast) + * - vulnerabilities on tinyMT? + * + * WELL + * - large state, 32-bit oriented + * + * ACORN Family + * https://en.wikipedia.org/wiki/ACORN_(PRNG) + * - Fortran / double oriented, no C implementation found?! + * - medium large state "In practice we recommend using k > 10, M = 2^60 (for general application) + * or M = 2^120 (for demanding applications requiring high-quality pseudo-random numbers + * that will consistently pass all the tests in standard test packages such as TestU01) + * and choose any odd value less than M for the seed." + * k = 11 => 88 bytes/704-bits state, too large:-( + * + * PCG Family + * - https://www.pcg-random.org/ + * - 32-bits design? while?? + * + * Splitmix? + * Others? + */ + +#include "c.h" +#include <math.h> + +#define FIRST_BIT_MASK UINT64CONST(0x8000000000000000) +#define HALF_BITS_MASK UINT64CONST(0x00000000FFFFFFFF) +#define DMANTISSA_MASK UINT64CONST(0x000FFFFFFFFFFFFF) + +/* 64-bits rotate left */ +static inline uint64_t rotl(const uint64_t x, const int bits) +{ + return (x << bits) | (x >> (64 - bits)); +} + +/* another 64-bits generator is used for state initialization */ +static uint64_t splitmix64(uint64_t * state) +{ + /* state update */ + uint64_t val = (*state += UINT64CONST(0x9E3779B97f4A7C15)); + + /* value extraction */ + val = (val ^ (val >> 30)) * UINT64CONST(0xBF58476D1CE4E5B9); + val = (val ^ (val >> 27)) * UINT64CONST(0x94D049BB133111EB); + + return val ^ (val >> 31); +} + +/* seed prng state from a 64 bits integer, ensuring non zero */ +void pg_prng_seed(pg_prng_state *state, uint64_t seed) +{ + state->s0 = splitmix64(&seed); + state->s1 = splitmix64(&seed); +} + +/* seed with 53 bits (mantissa & sign) from a float */ +void pg_prng_fseed(pg_prng_state *state, double fseed) +{ + uint64 seed = (int64) (((double) DMANTISSA_MASK) * fseed); + pg_prng_seed(state, seed); +} + +/* strong random seeding */ +void pg_prng_strong_seed(pg_prng_state *state) +{ + pg_strong_random((void *) state, sizeof(pg_prng_state)); + + /* avoid zero with Donald Knuth's LCG parameters */ + if (unlikely(state->s0 == 0 && state->s1 == 0)) + { + /* should it warn that something is amiss if we get there? */ + pg_prng_state def = PG_PRNG_DEFAULT_STATE; + *state = def; + } +} + +/* generator & state update */ +static uint64_t xoroshiro128ss(pg_prng_state *state) +{ + const uint64_t s0 = state->s0, + sx = state->s1 ^ s0, + val = rotl(s0 * 5, 7) * 9; + + /* update state */ + state->s0 = rotl(s0, 24) ^ sx ^ (sx << 16); + state->s1 = rotl(sx, 37); + + return val; +} + +/* u64 generator */ +uint64_t pg_prng_u64(pg_prng_state *state) +{ + return xoroshiro128ss(state); +} + +/* i64 generator */ +int64_t pg_prng_i64(pg_prng_state *state) +{ + return (int64_t) xoroshiro128ss(state); +} + +/* u32 generator */ +uint32_t pg_prng_u32(pg_prng_state *state) +{ + const uint64_t v = xoroshiro128ss(state); + return (uint32_t) (((v >> 32) ^ v) & HALF_BITS_MASK); +} + +/* i32 generator */ +int32_t pg_prng_i32(pg_prng_state *state) +{ + const uint64_t v = xoroshiro128ss(state); + return (int32_t) (((v >> 32) ^ v) & HALF_BITS_MASK); +} + +/* double generator */ +double pg_prng_f64(pg_prng_state *state) +{ + uint64_t v = xoroshiro128ss(state); + return ldexp((double) (v & DMANTISSA_MASK), -52); +} + +/* bool generator */ +bool pg_prng_bool(pg_prng_state *state) +{ + uint64_t v = xoroshiro128ss(state); + return (v & FIRST_BIT_MASK) == FIRST_BIT_MASK; +} diff --git a/src/port/random.c b/src/port/random.c index 2dd59a0829..136ed9dd8a 100644 --- a/src/port/random.c +++ b/src/port/random.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * random.c - * random() wrapper + * random() and srandom() wrapper * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -15,11 +15,17 @@ #include "c.h" -#include <math.h> +/* global shared state: not thread safe! */ +static pg_prng_state state = PG_PRNG_DEFAULT_SEED; +void +srandom(unsigned int seed) +{ + pg_prng_seed(&state, (uint64_t) seed); +} long random(void) { - return pg_lrand48(); + return pg_prng_i32(&state); } diff --git a/src/port/srandom.c b/src/port/srandom.c deleted file mode 100644 index cf1007b2ef..0000000000 --- a/src/port/srandom.c +++ /dev/null @@ -1,25 +0,0 @@ -/*------------------------------------------------------------------------- - * - * srandom.c - * srandom() wrapper - * - * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * src/port/srandom.c - * - *------------------------------------------------------------------------- - */ - -#include "c.h" - -#include <math.h> - - -void -srandom(unsigned int seed) -{ - pg_srand48((long int) seed); -} diff --git a/src/test/modules/test_integerset/test_integerset.c b/src/test/modules/test_integerset/test_integerset.c index 21c6f49b37..c0923e35c5 100644 --- a/src/test/modules/test_integerset/test_integerset.c +++ b/src/test/modules/test_integerset/test_integerset.c @@ -248,7 +248,7 @@ test_pattern(const test_spec *spec) * only a small part of the integer space is used. We would very * rarely hit values that are actually in the set. */ - x = (pg_lrand48() << 31) | pg_lrand48(); + x = (random() << 31) | random(); x = x % (last_int + 1000); /* Do we expect this value to be present in the set? */ @@ -571,7 +571,7 @@ test_huge_distances(void) */ while (num_values < 1000) { - val += pg_lrand48(); + val += random(); values[num_values++] = val; }