For whatever it is worth, the patch looks good to me.
A minor nitpick would be to use a verb in the part:
`cost when the parameter in (0, 1)`
maybe:
`cost when the parameter's value is in (0, 1)` or similar.
Looks ok.
Apart from that, I would suggest it that the patch could be moved to
Waiting for Author state.
Attached an upated.
--
Fabien.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 24833f46bc..2ded558163 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1607,7 +1607,8 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
Springer 1986. The distribution is not defined when the parameter's
value is 1.0. The function's performance is poor for parameter values
- close and above 1.0 and on a small range.
+ very close and above 1.0 and on a small range. Also, there is a large setup
+ cost when the parameter's value is in (0, 1) and the interval is large.
</para>
<para>
<replaceable>parameter</replaceable> defines how skewed the distribution
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 4789ab92ee..fa35c27736 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -410,7 +410,7 @@ typedef struct
} CState;
/*
- * Cache cell for random_zipfian call
+ * Cache cell for random_zipfian call for when s in (0.0, 1.0)
*/
typedef struct
{
@@ -418,7 +418,7 @@ typedef struct
double s; /* s - parameter of random_zipfian function */
int64 n; /* number of elements in range (max - min + 1) */
- double harmonicn; /* generalizedHarmonicNumber(n, s) */
+ double harmonicn; /* cache expensive generalizedHarmonicNumber(n, s) */
double alpha;
double beta;
double eta;
@@ -982,15 +982,17 @@ getPoissonRand(RandomState *random_state, double center)
return (int64) (-log(uniform) * center + 0.5);
}
-/* helper function for getZipfianRand */
+/* expensive helper function for getZipfianRand when s in (0.0, 1.0) */
static double
generalizedHarmonicNumber(int64 n, double s)
{
- int i;
+ int64 i;
double ans = 0.0;
+ /* computed in reverse order for better numerical precision */
for (i = n; i > 1; i--)
ans += pow(i, -s);
+
return ans + 1.0;
}
@@ -1053,6 +1055,12 @@ zipfFindOrCreateCacheCell(ZipfCache *cache, int64 n, double s)
* Computing zipfian using rejection method, based on
* "Non-Uniform Random Variate Generation",
* Luc Devroye, p. 550-551, Springer 1986.
+ *
+ * This method applies when s > 1.0 and works with an unbounded n,
+ * although the implementation is bounded.
+ *
+ * It is inefficient when s is close to 1.0 or n is small because the
+ * while loop is more likely to be iterated.
*/
static int64
computeIterativeZipfian(RandomState *random_state, int64 n, double s)
@@ -1083,6 +1091,12 @@ computeIterativeZipfian(RandomState *random_state, int64 n, double s)
* Computing zipfian using harmonic numbers, based on algorithm described in
* "Quickly Generating Billion-Record Synthetic Databases",
* Jim Gray et al, SIGMOD 1994
+ *
+ * This method applies when s in (0.0, 1.0). It does zipf for the first
+ * two values, and then uses an exponential approximation starting from
+ * the third value.
+ *
+ * There is a potentially very large setup cost when n is large.
*/
static int64
computeHarmonicZipfian(ZipfCache *zipf_cache, RandomState *random_state,
@@ -1109,7 +1123,7 @@ getZipfianRand(ZipfCache *zipf_cache, RandomState *random_state, int64 min,
/* abort if parameter is invalid */
Assert(s > 0.0 && s != 1.0 && s <= MAX_ZIPFIAN_PARAM);
- return min - 1 + ((s > 1)
+ return min - 1 + ((s > 1.0)
? computeIterativeZipfian(random_state, n, s)
: computeHarmonicZipfian(zipf_cache, random_state, n, s));
}