Hello Fabien,
10/01/2018 21:42, Fabien COELHO пишет:
>>>> Should we probably add some infrastructure for optional arguments?
>>>
>>> You can look at the handling of "CASE" which may or may not have an
>>> "ELSE" clause.
>>>
>>> I'd suggest you use a new negative argument with the special meaning
>>> for hash, and create the seed value when missing when building the
>>> function, so as to simplify the executor code.
>
>> Added a new nargs option -3 for hash functions and moved arguments check
>> to parser. It's starting to look a bit odd and I'm thinking about
>> replacing bare numbers (-1, -2, -3) with #defined macros. E.g.:
>>
>> #define PGBENCH_NARGS_VARIABLE (-1)
>> #define PGBENCH_NARGS_CASE (-2)
>> #define PGBENCH_NARGS_HASH (-3)
>
> Yes, I'm more than fine with improving readability.
>
Added macros.
>>> Instead of 0, I'd consider providing a random default so that the
>>> hashing behavior is not the same from one run to the next. What do you
>>> think?
>>
>> Makes sence since each thread is also initializes its random_state with
>> random numbers before start. So I added global variable 'hash_seed' and
>> initialize it with random() before threads spawned.
>
> Hmm. I do not think that we should want a shared seed value. The seed
> should be different for each call so as to avoid undesired
> correlations. If wanted, correlation could be obtained by using an
> explicit identical seed.
Probably I'm missing something but I cannot see the point. If we change
seed on every invokation then we get uniform-like distribution (see
attached image). And we don't get the same hash value for the same input
which is the whole point of hash functions. Maybe I didn't understand
you correctly.
Anyway I've attached a new version with some tests and docs added.
--
Ildar Musin
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3dd492c..c575f19 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -1246,6 +1246,27 @@ pgbench <optional> <replaceable>options</replaceable>
</optional> <replaceable>d
<entry><literal>5</literal></entry>
</row>
<row>
+ <entry><literal><function>hash(<replaceable>a</replaceable> [,
<replaceable>seed</replaceable> ] )</function></literal></entry>
+ <entry>integer</entry>
+ <entry>alias for <literal>hash_murmur2()</literal></entry>
+ <entry><literal>hash(10, 5432)</literal></entry>
+ <entry><literal>-5817877081768721676</literal></entry>
+ </row>
+ <row>
+ <entry><literal><function>hash_fnv1a(<replaceable>a</replaceable> [,
<replaceable>seed</replaceable> ] )</function></literal></entry>
+ <entry>integer</entry>
+ <entry><literal>FNV</literal> hash</entry>
+ <entry><literal>hash_fnv1a(10, 5432)</literal></entry>
+ <entry><literal>-7793829335365542153</literal></entry>
+ </row>
+ <row>
+ <entry><literal><function>hash_murmur2(<replaceable>a</replaceable> [,
<replaceable>seed</replaceable> ] )</function></literal></entry>
+ <entry>integer</entry>
+ <entry><literal>murmur2</literal> hash</entry>
+ <entry><literal>hash_murmur2(10, 5432)</literal></entry>
+ <entry><literal>-5817877081768721676</literal></entry>
+ </row>
+ <row>
<entry><literal><function>int(<replaceable>x</replaceable>)</function></literal></entry>
<entry>integer</entry>
<entry>cast to int</entry>
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index e23ca51..9668b3d 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -16,6 +16,10 @@
#include "pgbench.h"
+#define PGBENCH_NARGS_VARIABLE (-1)
+#define PGBENCH_NARGS_CASE (-2)
+#define PGBENCH_NARGS_HASH (-3)
+
PgBenchExpr *expr_parse_result;
static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
@@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator,
PgBenchExpr *expr)
/*
* List of available functions:
* - fname: function name, "!..." for special internal functions
- * - nargs: number of arguments
- * -1 is a special value for least & greatest meaning
#args >= 1
- * -2 is for the "CASE WHEN ..." function, which has #args
>= 3 and odd
+ * - nargs: number of arguments. Special cases:
+ * - PGBENCH_NARGS_VARIABLE is a special value for least &
greatest
+ * meaning #args >= 1;
+ * - PGBENCH_NARGS_CASE is for the "CASE WHEN ..."
function, which
+ * has #args >= 3 and odd;
+ * - PGBENCH_NARGS_HASH is for hash functions, which have
one required
+ * and one optional argument;
* - tag: function identifier from PgBenchFunction enum
*/
static const struct
@@ -259,10 +267,10 @@ static const struct
"abs", 1, PGBENCH_ABS
},
{
- "least", -1, PGBENCH_LEAST
+ "least", PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
},
{
- "greatest", -1, PGBENCH_GREATEST
+ "greatest", PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
},
{
"debug", 1, PGBENCH_DEBUG
@@ -347,7 +355,16 @@ static const struct
},
/* "case when ... then ... else ... end" construction */
{
- "!case_end", -2, PGBENCH_CASE
+ "!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
+ },
+ {
+ "hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
+ },
+ {
+ "hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
+ },
+ {
+ "hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
},
/* keep as last array element */
{
@@ -433,12 +450,12 @@ make_func(yyscan_t yyscanner, int fnumber,
PgBenchExprList *args)
PGBENCH_FUNCTIONS[fnumber].fname);
/* check at least one arg for least & greatest */
- if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
+ if (PGBENCH_FUNCTIONS[fnumber].nargs == PGBENCH_NARGS_VARIABLE &&
elist_length(args) == 0)
expr_yyerror_more(yyscanner, "at least one argument expected",
PGBENCH_FUNCTIONS[fnumber].fname);
/* special case: case (when ... then ...)+ (else ...)? end */
- if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
+ if (PGBENCH_FUNCTIONS[fnumber].nargs == PGBENCH_NARGS_CASE)
{
int len = elist_length(args);
@@ -447,6 +464,15 @@ make_func(yyscan_t yyscanner, int fnumber, PgBenchExprList
*args)
expr_yyerror_more(yyscanner, "odd and >= 3 number of
arguments expected",
"case control
structure");
}
+ /* special case: hash functions with optional arguments */
+ if (PGBENCH_FUNCTIONS[fnumber].nargs == PGBENCH_NARGS_HASH)
+ {
+ int len = elist_length(args);
+
+ if (len < 1 || len > 2)
+ expr_yyerror_more(yyscanner, "unexpected number of
arguments",
+
PGBENCH_FUNCTIONS[fnumber].fname);
+ }
expr->etype = ENODE_FUNCTION;
expr->u.function.function = PGBENCH_FUNCTIONS[fnumber].tag;
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 31ea6ca..6bf94cc 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -61,6 +61,14 @@
#define ERRCODE_UNDEFINED_TABLE "42P01"
/*
+ * Hashing constants
+ */
+#define FNV_PRIME 0x100000001b3
+#define FNV_OFFSET_BASIS 0xcbf29ce484222325
+#define MM2_MUL 0xc6a4a7935bd1e995
+#define MM2_ROT 47
+
+/*
* Multi-platform pthread implementations
*/
@@ -439,6 +447,8 @@ static int num_scripts; /* number of scripts in
sql_script[] */
static int num_commands = 0; /* total number of Command structs */
static int64 total_weight = 0;
+static int hash_seed; /* default seed used in hash functions
*/
+
static int debug = 0; /* debug flag */
/* Builtin test scripts */
@@ -915,6 +925,51 @@ getZipfianRand(TState *thread, int64 min, int64 max,
double s)
}
/*
+ * FNV-1a hash function
+ */
+static int64
+getHashFnv1a(int64 val, uint64 seed)
+{
+ int64 result;
+ int i;
+
+ result = FNV_OFFSET_BASIS ^ seed;
+ for (i = 0; i < 8; ++i)
+ {
+ int32 octet = val & 0xff;
+
+ val = val >> 8;
+ result = result ^ octet;
+ result = result * FNV_PRIME;
+ }
+
+ return result;
+}
+
+/*
+ * Murmur2 hash function
+ */
+static int64
+getHashMurmur2(int64 val, uint64 seed)
+{
+ uint64 result = seed ^ (sizeof(int64) * MM2_MUL);
+ uint64 k = (uint64) val;
+
+ k *= MM2_MUL;
+ k ^= k >> MM2_ROT;
+ k *= MM2_MUL;
+
+ result ^= k;
+ result *= MM2_MUL;
+
+ result ^= result >> MM2_ROT;
+ result *= MM2_MUL;
+ result ^= result >> MM2_ROT;
+
+ return (int64) result;
+}
+
+/*
* Initialize the given SimpleStats struct to all zeroes
*/
static void
@@ -2209,6 +2264,34 @@ evalStandardFunc(
return true;
}
+ /* hashing */
+ case PGBENCH_HASH_FNV1A:
+ case PGBENCH_HASH_MURMUR2:
+ {
+ int64 val;
+ int64 seed;
+ int64 result;
+
+ Assert(nargs >= 1);
+
+ if (!coerceToInt(&vargs[0], &val))
+ return false;
+
+ /* read optional seed value */
+ if (nargs > 1)
+ {
+ if (!coerceToInt(&vargs[1], &seed))
+ return false;
+ }
+ else
+ seed = hash_seed;
+
+ result = (func == PGBENCH_HASH_FNV1A) ?
+ getHashFnv1a(val, seed) :
getHashMurmur2(val, seed);
+ setIntValue(retval, result);
+ return true;
+ }
+
default:
/* cannot get here */
Assert(0);
@@ -5054,6 +5137,9 @@ main(int argc, char **argv)
INSTR_TIME_SET_CURRENT(start_time);
srandom((unsigned int) INSTR_TIME_GET_MICROSEC(start_time));
+ /* set default seed for hash functions */
+ hash_seed = random();
+
/* set up thread data structures */
threads = (TState *) pg_malloc(sizeof(TState) * nthreads);
nclients_dealt = 0;
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 0705ccd..6983865 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -97,7 +97,9 @@ typedef enum PgBenchFunction
PGBENCH_LE,
PGBENCH_LT,
PGBENCH_IS,
- PGBENCH_CASE
+ PGBENCH_CASE,
+ PGBENCH_HASH_FNV1A,
+ PGBENCH_HASH_MURMUR2
} PgBenchFunction;
typedef struct PgBenchExpr PgBenchExpr;
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl
b/src/bin/pgbench/t/001_pgbench_with_server.pl
index a8b2962..eda28ea 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -259,6 +259,10 @@ pgbench(
qr{command=46.: int 46\b},
qr{command=47.: boolean true\b},
qr{command=48.: boolean true\b},
+ qr{command=49.: int -5817877081768721676\b},
+ qr{command=50.: boolean true\b},
+ qr{command=51.: int -7793829335365542153\b},
+ qr{command=52.: int -?\d+\b},
],
'pgbench expressions',
{ '001_pgbench_expressions' => q{-- integer functions
@@ -327,6 +331,11 @@ pgbench(
\set n6 debug(:n IS NULL AND NOT :f AND :t)
-- conditional truth
\set cs debug(CASE WHEN 1 THEN TRUE END AND CASE WHEN 1.0 THEN TRUE END AND
CASE WHEN :n THEN NULL ELSE TRUE END)
+-- hash functions
+\set h0 debug(hash(10, 5432))
+\set h1 debug(:h0 = hash_murmur2(10, 5432))
+\set h3 debug(hash_fnv1a(10, 5432))
+\set h4 debug(hash(10))
-- lazy evaluation
\set zy 0
\set yz debug(case when :zy = 0 then -1 else (1 / :zy) end)