Re: [HACKERS] extend pgbench expressions with functions

Fabien COELHO Thu, 05 Nov 2015 02:35:41 -0800


Hello Robert,


Here is a v13 and a small test script.

 - change names to random(), random_exponential() and random_gaussian()
   I find them too long, but if the committer want that I cannot help
   it:-)

 - more comments, especially about the expression evaluation &
   type system.

 - improved documentation, in particular to include suggestions by Tomas
   Vondra about clarifying explanations about the gaussian &
   exponential random generators, and clear references from \setrandom
   to the \set expressions.

 - still just one patch, because removing double would mean removing the 2
   exponential & gaussian random functions which require a double
   argument.

   Note that I started with one small patch for adding the infrastructure,
   but then Heikki requested more functions including double type stuff to
   illustrate the point, then Robert asks to break it back, going forward
   and backward is tiring...

 - still "lousy" *debug functions, because I found them useful for
   debugging and testing, really. It is easy to remove them, but I would
   advise against doing that as it would make debugging an expression
   much less straightforward.

--
Fabien.

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 0ac40f1..38d0994 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -771,24 +771,35 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       Sets variable <replaceable>varname</> to an integer value calculated
       from <replaceable>expression</>.
       The expression may contain integer constants such as <literal>5432</>,
-      references to variables <literal>:</><replaceable>variablename</>,
+      double constants such as <literal>3.14156</>,
+      references to integer variables <literal>:</><replaceable>variablename</>,
       and expressions composed of unary (<literal>-</>) or binary operators
-      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>, <literal>%</>)
-      with their usual associativity, and parentheses.
+      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>,
+      <literal>%</>) with their usual associativity, function calls and
+      parentheses.
+      <xref linkend="functions-pgbench-func-table"> shows the available
+      functions.
+     </para>
+
+     <para>
+      Typing between integer and double is implicit and descendant: the type of
+      an operator or function depends on the expected type of the result.
+      For instance, if an integer is expected, <literal>exp1 + exp2</> will cast
+      both operands to int and use the integer addition.
      </para>
 
      <para>
       Examples:
 <programlisting>
 \set ntellers 10 * :scale
-\set aid (1021 * :aid) % (100000 * :scale) + 1
+\set aid (1021 * rand(1, 100000 * :scale)) % (100000 * :scale) + 1
 </programlisting></para>
     </listitem>
    </varlistentry>
 
    <varlistentry>
     <term>
-     <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | { gaussian | exponential } <replaceable>threshold</> ]</literal>
+     <literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | { gaussian | exponential } <replaceable>param</> ]</literal>
      </term>
 
     <listitem>
@@ -801,57 +812,35 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </para>
 
      <para>
-      By default, or when <literal>uniform</> is specified, all values in the
-      range are drawn with equal probability.  Specifying <literal>gaussian</>
-      or  <literal>exponential</> options modifies this behavior; each
-      requires a mandatory threshold which determines the precise shape of the
-      distribution.
-     </para>
+      <itemizedlist>
+       <listitem>
+        <para>
+         <literal>\setrandom n 1 10</> or <literal>\setrandom n 1 10 uniform</>
+         is equivalent to <literal>\set n random(1, 10)</> and uses a uniform
+         distribution.
+        </para>
+       </listitem>
 
-     <para>
-      For a Gaussian distribution, the interval is mapped onto a standard
-      normal distribution (the classical bell-shaped Gaussian curve) truncated
-      at <literal>-threshold</> on the left and <literal>+threshold</>
-      on the right.
-      To be precise, if <literal>PHI(x)</> is the cumulative distribution
-      function of the standard normal distribution, with mean <literal>mu</>
-      defined as <literal>(max + min) / 2.0</>, then value <replaceable>i</>
-      between <replaceable>min</> and <replaceable>max</> inclusive is drawn
-      with probability:
-      <literal>
-        (PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) -
-         PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) /
-         (2.0 * PHI(threshold) - 1.0)</>.
-      Intuitively, the larger the <replaceable>threshold</>, the more
-      frequently values close to the middle of the interval are drawn, and the
-      less frequently values close to the <replaceable>min</> and
-      <replaceable>max</> bounds.
-      About 67% of values are drawn from the middle <literal>1.0 / threshold</>
-      and 95% in the middle <literal>2.0 / threshold</>; for instance, if
-      <replaceable>threshold</> is 4.0, 67% of values are drawn from the middle
-      quarter and 95% from the middle half of the interval.
-      The minimum <replaceable>threshold</> is 2.0 for performance of
-      the Box-Muller transform.
-     </para>
+      <listitem>
+       <para>
+        <literal>\setrandom n 1 10 exponential 3.0</> is equivalent to
+        <literal>\set n random_exponential(1, 10, 3.0)</> and uses an
+        exponential distribution.
+       </para>
+      </listitem>
 
-     <para>
-      For an exponential distribution, the <replaceable>threshold</>
-      parameter controls the distribution by truncating a quickly-decreasing
-      exponential distribution at <replaceable>threshold</>, and then
-      projecting onto integers between the bounds.
-      To be precise, value <replaceable>i</> between <replaceable>min</> and
-      <replaceable>max</> inclusive is drawn with probability:
-      <literal>(exp(-threshold*(i-min)/(max+1-min)) -
-       exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold))</>.
-      Intuitively, the larger the <replaceable>threshold</>, the more
-      frequently values close to <replaceable>min</> are accessed, and the
-      less frequently values close to <replaceable>max</> are accessed.
-      The closer to 0 the threshold, the flatter (more uniform) the access
-      distribution.
-      A crude approximation of the distribution is that the most frequent 1%
-      values in the range, close to <replaceable>min</>, are drawn
-      <replaceable>threshold</>%  of the time.
-      The <replaceable>threshold</> value must be strictly positive.
+      <listitem>
+       <para>
+        <literal>\setrandom n 1 10 gaussian 2.0</> is equivalent to
+        <literal>\set n random_gaussian(1, 10, 2.0)</>, and uses a gaussian
+        distribution.
+       </para>
+      </listitem>
+     </itemizedlist>
+
+       See the documentation of these functions below for further information
+       about the precise shape of these distributions, depending on the value
+       of the parameter.
      </para>
 
      <para>
@@ -931,18 +920,196 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
    </varlistentry>
   </variablelist>
 
+   <!-- list pgbench functions in alphabetical order -->
+   <table id="functions-pgbench-func-table">
+    <title>PgBench Functions</title>
+    <tgroup cols="5">
+     <thead>
+      <row>
+       <entry>Function</entry>
+       <entry>Return Type</entry>
+       <entry>Description</entry>
+       <entry>Example</entry>
+       <entry>Result</entry>
+      </row>
+     </thead>
+     <tbody>
+      <row>
+       <entry><literal><function>abs(<replaceable>a</>)</></></>
+       <entry>same as <replaceable>a</></>
+       <entry>integer or double absolute value</>
+       <entry><literal>abs(-17)</></>
+       <entry><literal>17</></>
+      </row>
+      <row>
+       <entry><literal><function>ddebug(<replaceable>x</>)</></></>
+       <entry>double</>
+       <entry>stderr print for debug and return argument</>
+       <entry><literal>ddebug(5432.1)</></>
+       <entry><literal>5432.1</></>
+      </row>
+      <row>
+       <entry><literal><function>double(<replaceable>i</>)</></></>
+       <entry>double</>
+       <entry>evaluate as int and cast to double</>
+       <entry><literal>double(5432)</></>
+       <entry><literal>5432.0</></>
+      </row>
+      <row>
+       <entry><literal><function>idebug(<replaceable>i</>)</></></>
+       <entry>integer</>
+       <entry>stderr print for debug and return argument</>
+       <entry><literal>idebug(5432)</></>
+       <entry><literal>5432</></>
+      </row>
+      <row>
+       <entry><literal><function>int(<replaceable>x</>)</></></>
+       <entry>integer</>
+       <entry>evaluate as double and cast to int</>
+       <entry><literal>int(5.4 + 3.8)</></>
+       <entry><literal>9</></>
+      </row>
+      <row>
+       <entry><literal><function>max(<replaceable>i</>, <replaceable>...</>)</></></>
+       <entry>integer</>
+       <entry>maximum value</>
+       <entry><literal>max(5, 4, 3, 2)</></>
+       <entry><literal>5</></>
+      </row>
+      <row>
+       <entry><literal><function>min(<replaceable>i</>, <replaceable>...</>)</></></>
+       <entry>integer</>
+       <entry>minimum value</>
+       <entry><literal>min(5, 4, 3, 2)</></>
+       <entry><literal>2</></>
+      </row>
+      <row>
+       <entry><literal><function>pi()</></></>
+       <entry>double</>
+       <entry>value of the PI constant</>
+       <entry><literal>pi()</></>
+       <entry><literal>3.14159265358979323846</></>
+      </row>
+      <row>
+       <entry><literal><function>random(<replaceable>lb</>, <replaceable>ub</>)</></></>
+       <entry>integer</>
+       <entry>uniformly distributed random integer in <literal>[lb,ub]</></>
+       <entry><literal>random(1, 10)</></>
+       <entry>an int between <literal>1</> and <literal>10</></>
+      </row>
+      <row>
+       <entry><literal><function>random_exponential(<replaceable>lb</>, <replaceable>ub</>, <replaceable>param</>)</></></>
+       <entry>integer</>
+       <entry>exponentially distributed random integer in <literal>[ub,lb]</>,
+              see below</>
+       <entry><literal>random_exponential(1, 10, 3.0)</></>
+       <entry>an int between <literal>1</> and <literal>10</></>
+      </row>
+      <row>
+       <entry><literal><function>random_gaussian(<replaceable>lb</>, <replaceable>ub</>, <replaceable>param</>)</></></>
+       <entry>integer</>
+       <entry>gaussian distributed random integer in <literal>[ub,lb]</>,
+              see below</>
+       <entry><literal>random_gaussian(1, 10, 2.5)</></>
+       <entry>an int between <literal>1</> and <literal>10</></>
+      </row>
+      <row>
+       <entry><literal><function>sqrt(<replaceable>x</>)</></></>
+       <entry>double</>
+       <entry>square root</>
+       <entry><literal>sqrt(2.0)</></>
+       <entry><literal>1.414213562</></>
+      </row>
+     </tbody>
+     </tgroup>
+   </table>
+
+   <para>
+    The <literal>random</> function generated values are uniform, that is
+    all values in the specified range are drawn with equal probability.
+   </para>
+
+   <para>
+     The <literal>random_exponential</> and <literal>random_gaussian</>
+     functions require an additional double parameter which determines the
+     precise shape of the  distribution.
+   </para>
+
+   <itemizedlist>
+    <listitem>
+     <para>
+      For an exponential distribution, the <replaceable>param</> parameter
+      controls the distribution by truncating a quickly-decreasing
+      exponential distribution at <replaceable>param</>, and then
+      projecting onto integers between the bounds.
+      To be precise, with
+<literallayout>
+f(x) = exp(-param * (x-min) / (max-min+1)) / (1 - exp(-param))
+</literallayout>
+      Then value <replaceable>i</> between <replaceable>min</> and
+      <replaceable>max</> inclusive is drawn with probability:
+      <literal>f(x) - f(x+1)</>.
+     </para>
+
+     <para>
+      Intuitively, the larger the <replaceable>param</>, the more
+      frequently values close to <replaceable>min</> are accessed, and the
+      less frequently values close to <replaceable>max</> are accessed.
+      The closer to 0 the parameter, the flatter (more uniform) the access
+      distribution.
+      A crude approximation of the distribution is that the most frequent 1%
+      values in the range, close to <replaceable>min</>, are drawn
+      <replaceable>param</>%  of the time.
+      The <replaceable>param</> value must be strictly positive.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      For a Gaussian distribution, the interval is mapped onto a standard
+      normal distribution (the classical bell-shaped Gaussian curve) truncated
+      at <literal>-param</> on the left and <literal>+param</>
+      on the right.
+      Values in the middle of the interval are more likely to be drawn.
+      To be precise, if <literal>PHI(x)</> is the cumulative distribution
+      function of the standard normal distribution, with mean <literal>mu</>
+      defined as <literal>(max+min)/2</>, with
+<literallayout>
+f(x) = PHI(2 * param * (x-mu) / (max-min+1)) / (2 * PHI(param) - 1)
+</literallayout>
+      then value <replaceable>i</> between <replaceable>min</> and
+      <replaceable>max</> inclusive is drawn with probability:
+      <literal>f(i+0.5) - f(i-0.5)</>.
+     </para>
+     <para>
+      Intuitively, the larger the <replaceable>param</>, the more
+      frequently values close to the middle of the interval are drawn, and the
+      less frequently values close to the <replaceable>min</> and
+      <replaceable>max</> bounds.
+      About 67% of values are drawn from the middle <literal>1/param</>,
+      that is a relative <literal>0.5/param</> around the mean,
+      and 95% in the middle <literal>2/param</>, that is
+      a relative <literal>1/param</> around the mean;
+      for instance, if <replaceable>param</> is 4.0, 67% of values are drawn
+      from the middle quarter (1/4.0) of the interval
+      (i.e. from <literal>3/8</> to <literal>5/8</>)
+      and 95% from the middle half (2/4.0) of the interval (second and third
+      quartiles).
+      The minimum <replaceable>param</> is 2.0 for performance of
+      the Box-Muller transform.
+     </para>
+    </listitem>
+   </itemizedlist>
+
   <para>
    As an example, the full definition of the built-in TPC-B-like
    transaction is:
 
 <programlisting>
-\set nbranches :scale
-\set ntellers 10 * :scale
-\set naccounts 100000 * :scale
-\setrandom aid 1 :naccounts
-\setrandom bid 1 :nbranches
-\setrandom tid 1 :ntellers
-\setrandom delta -5000 5000
+\set aid random(1, 100000 * :scale)
+\set bid random(1, 1 * :scale)
+\set tid random(1, 10 * :scale)
+\set delta random(-5000, 5000)
 BEGIN;
 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
@@ -1097,27 +1264,26 @@ starting vacuum...end.
 transaction type: TPC-B (sort of)
 scaling factor: 1
 query mode: simple
-number of clients: 10
+number of clients: 4
 number of threads: 1
-number of transactions per client: 1000
-number of transactions actually processed: 10000/10000
-tps = 618.764555 (including connections establishing)
-tps = 622.977698 (excluding connections establishing)
+duration: 3 s
+number of transactions actually processed: 1239
+latency average: 9.584 ms
+latency stddev: 5.204 ms
+tps = 411.913509 (including connections establishing)
+tps = 413.088125 (excluding connections establishing)
 statement latencies in milliseconds:
-        0.004386        \set nbranches 1 * :scale
-        0.001343        \set ntellers 10 * :scale
-        0.001212        \set naccounts 100000 * :scale
-        0.001310        \setrandom aid 1 :naccounts
-        0.001073        \setrandom bid 1 :nbranches
-        0.001005        \setrandom tid 1 :ntellers
-        0.001078        \setrandom delta -5000 5000
-        0.326152        BEGIN;
-        0.603376        UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
-        0.454643        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
-        5.528491        UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
-        7.335435        UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
-        0.371851        INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
-        1.212976        END;
+        0.010948        \set aid random(1, 100000 * :scale)
+        0.003161        \set bid random(1, 1 * :scale)
+        0.002203        \set tid random(1, 10 * :scale)
+        0.002266        \set delta random(-5000, 5000)
+        0.144510        BEGIN;
+        0.498513        UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+        0.334889        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+        1.436523        UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+        4.914786        UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+        0.338837        INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
+        1.877755        END;
 </screen>
   </para>
 
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index e68631e..a1477f9 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -16,10 +16,14 @@
 
 PgBenchExpr *expr_parse_result;
 
+static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
 static PgBenchExpr *make_integer_constant(int64 ival);
+static PgBenchExpr *make_double_constant(double dval);
 static PgBenchExpr *make_variable(char *varname);
 static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
 		PgBenchExpr *rexpr);
+static int find_func(const char * fname);
+static PgBenchExpr *make_func(const int fnumber, PgBenchExprList *args);
 
 %}
 
@@ -29,15 +33,19 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
 %union
 {
 	int64		ival;
+	double		dval;
 	char	   *str;
 	PgBenchExpr *expr;
+	PgBenchExprList *elist;
 }
 
+%type <elist> elist
 %type <expr> expr
-%type <ival> INTEGER
-%type <str> VARIABLE
+%type <ival> INTEGER function
+%type <dval> DOUBLE
+%type <str> VARIABLE FUNCTION
 
-%token INTEGER VARIABLE
+%token INTEGER DOUBLE VARIABLE FUNCTION
 %token CHAR_ERROR /* never used, will raise a syntax error */
 
 /* Precedence: lowest to highest */
@@ -49,6 +57,11 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
 
 result: expr				{ expr_parse_result = $1; }
 
+elist:                  	{ $$ = NULL; }
+	| expr 					{ $$ = make_elist($1, NULL); }
+	| elist ',' expr		{ $$ = make_elist($3, $1); }
+	;
+
 expr: '(' expr ')'			{ $$ = $2; }
 	| '+' expr %prec UMINUS	{ $$ = $2; }
 	| '-' expr %prec UMINUS	{ $$ = make_op('-', make_integer_constant(0), $2); }
@@ -58,7 +71,12 @@ expr: '(' expr ')'			{ $$ = $2; }
 	| expr '/' expr			{ $$ = make_op('/', $1, $3); }
 	| expr '%' expr			{ $$ = make_op('%', $1, $3); }
 	| INTEGER				{ $$ = make_integer_constant($1); }
+	| DOUBLE				{ $$ = make_double_constant($1); }
 	| VARIABLE 				{ $$ = make_variable($1); }
+	| function '(' elist ')'{ $$ = make_func($1, $3); }
+	;
+
+function: FUNCTION			{ $$ = find_func($1); pg_free($1); }
 	;
 
 %%
@@ -74,6 +92,16 @@ make_integer_constant(int64 ival)
 }
 
 static PgBenchExpr *
+make_double_constant(double dval)
+{
+	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+	expr->etype = ENODE_DOUBLE_CONSTANT;
+	expr->u.double_constant.dval = dval;
+	return expr;
+}
+
+static PgBenchExpr *
 make_variable(char *varname)
 {
 	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
@@ -95,4 +123,123 @@ make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
 	return expr;
 }
 
+/* list of available functions
+ * - fname: function name
+ * - nargs: number of arguments (-1 is a special value for min & max)
+ * - tag: function identifier from PgBenchFunction enum
+ */
+static struct {
+	char * fname;
+	int nargs;
+	PgBenchFunction tag;
+} PGBENCH_FUNCTIONS[] = {
+	{ "pi", 0, PGBENCH_PI },
+	{ "abs", 1, PGBENCH_ABS },
+	{ "sqrt", 1, PGBENCH_SQRT },
+	{ "int", 1, PGBENCH_INT },
+	{ "double", 1, PGBENCH_DOUBLE },
+	{ "min", -1, PGBENCH_MIN },
+	{ "max", -1, PGBENCH_MAX },
+	{ "random", 2, PGBENCH_RANDOM },
+	{ "random_gaussian", 3, PGBENCH_RANDOM_GAUSSIAN },
+	{ "random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL },
+	{ "idebug", 1, PGBENCH_IDEBUG },
+	{ "ddebug", 1, PGBENCH_DDEBUG },
+
+	/* keep as last array element */
+	{ NULL, 0, 0 }
+};
+
+/*
+ * Find a function from its name
+ *
+ * return the index of the function from the PGBENCH_FUNCTIONS array
+ * or fail if the function is unknown.
+ */
+static int
+find_func(const char * fname)
+{
+	int i = 0;
+
+	while (PGBENCH_FUNCTIONS[i].fname)
+	{
+		if (pg_strcasecmp(fname, PGBENCH_FUNCTIONS[i].fname) == 0)
+			return i;
+		i++;
+	}
+
+	expr_yyerror_more("unexpected function name", fname);
+
+	/* not reached */
+	return -1;
+}
+
+/* Expression linked list builder */
+static PgBenchExprList *
+make_elist(PgBenchExpr *expr, PgBenchExprList *list)
+{
+	PgBenchExprList *cons = pg_malloc(sizeof(PgBenchExprList));
+	cons->expr = expr;
+	cons->next = list;
+	return cons;
+}
+
+/*
+ * Reverse expression linked list
+ *
+ * The list of function arguments is built in reverse order, and reversed once
+ * at the end so as to avoid appending repeatedly at the end of the list.
+ */
+static PgBenchExprList *
+reverse_elist(PgBenchExprList *list)
+{
+	PgBenchExprList *cur = list, *prec = NULL, *next = NULL;
+
+	while (cur != NULL)
+	{
+		next = cur->next;
+		cur->next = prec;
+		prec = cur;
+		cur = next;
+	}
+
+	return prec;
+}
+
+/* Return the length of an expression list */
+static int
+elist_length(PgBenchExprList *list)
+{
+	int len = 0;
+
+	for (; list != NULL; list = list->next)
+		len++;
+
+	return len;
+}
+
+/* Build function call expression */
+static PgBenchExpr *
+make_func(const int fnumber, PgBenchExprList *args)
+{
+	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+	Assert(fnumber >= 0);
+
+	if ((PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
+		 PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args)) ||
+		/* check at least one arg for min & max */
+		(PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
+		 elist_length(args) == 0))
+		expr_yyerror_more("unexpected number of arguments",
+						  PGBENCH_FUNCTIONS[fnumber].fname);
+
+	expr->etype = ENODE_FUNCTION;
+	expr->u.function.function = PGBENCH_FUNCTIONS[fnumber].tag;
+	/* the argument list has been built in reverse order, it is fixed here */
+	expr->u.function.args = reverse_elist(args);
+
+	return expr;
+}
+
 #include "exprscan.c"
diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l
index 5331ab7..1f8fc65 100644
--- a/src/bin/pgbench/exprscan.l
+++ b/src/bin/pgbench/exprscan.l
@@ -46,6 +46,7 @@ space			[ \t\r\f]
 "%"				{ yycol += yyleng; return '%'; }
 "("				{ yycol += yyleng; return '('; }
 ")"				{ yycol += yyleng; return ')'; }
+","				{ yycol += yyleng; return ','; }
 
 :[a-zA-Z0-9_]+	{
 					yycol += yyleng;
@@ -57,8 +58,19 @@ space			[ \t\r\f]
 					yylval.ival = strtoint64(yytext);
 					return INTEGER;
 				}
+[0-9]+\.[0-9]+	{
+					yycol += yyleng;
+					yylval.dval = atof(yytext);
+					return DOUBLE;
+				}
+[a-zA-Z0-9_]+   {
+					yycol += yyleng;
+					yylval.str = pg_strdup(yytext);
+					return FUNCTION;
+				}
+
+[\n]			{ yycol = 0; yyline++; /* never occurs, input on one line */ }
 
-[\n]			{ yycol = 0; yyline++; }
 {space}+		{ yycol += yyleng; /* ignore */ }
 
 .				{
@@ -71,10 +83,16 @@ space			[ \t\r\f]
 %%
 
 void
-yyerror(const char *message)
+expr_yyerror_more(const char *message, const char *more)
 {
 	syntax_error(expr_source, expr_lineno, expr_full_line, expr_command,
-				 message, NULL, expr_col + yycol);
+				 message, more, expr_col + yycol);
+}
+
+void
+yyerror(const char *message)
+{
+	expr_yyerror_more(message, NULL);
 }
 
 /*
@@ -94,15 +112,14 @@ expr_scanner_init(const char *str, const char *source,
 	expr_command = (char *) cmd;
 	expr_col = (int) ecol;
 
-	/*
-	 * Might be left over after error
-	 */
+	/* reset column count for this scan */
+	yycol = 0;
+
+	/* Might be left over after error */
 	if (YY_CURRENT_BUFFER)
 		yy_delete_buffer(YY_CURRENT_BUFFER);
 
-	/*
-	 * Make a scan buffer with special termination needed by flex.
-	 */
+	/* Make a scan buffer with special termination needed by flex. */
 	scanbuflen = slen;
 	scanbuf = pg_malloc(slen + 2);
 	memcpy(scanbuf, str, slen);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index f2d435b..9000b58 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -303,13 +303,10 @@ static int	debug = 0;			/* debug flag */
 
 /* default scenario */
 static char *tpc_b = {
-	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
-	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
-	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
-	"\\setrandom bid 1 :nbranches\n"
-	"\\setrandom tid 1 :ntellers\n"
-	"\\setrandom delta -5000 5000\n"
+	"\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n"
+	"\\set bid random(1, " CppAsString2(nbranches) " * :scale)\n"
+	"\\set tid random(1, " CppAsString2(ntellers) " * :scale)\n"
+	"\\set delta random(-5000, 5000)\n"
 	"BEGIN;\n"
 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
@@ -321,13 +318,10 @@ static char *tpc_b = {
 
 /* -N case */
 static char *simple_update = {
-	"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
-	"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
-	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
-	"\\setrandom bid 1 :nbranches\n"
-	"\\setrandom tid 1 :ntellers\n"
-	"\\setrandom delta -5000 5000\n"
+	"\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n"
+	"\\set bid random(1, " CppAsString2(nbranches) " * :scale)\n"
+	"\\set tid random(1, " CppAsString2(ntellers) " * :scale)\n"
+	"\\set delta random(-5000, 5000)\n"
 	"BEGIN;\n"
 	"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
@@ -337,8 +331,7 @@ static char *simple_update = {
 
 /* -S case */
 static char *select_only = {
-	"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-	"\\setrandom aid 1 :naccounts\n"
+	"\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n"
 	"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
 };
 
@@ -887,13 +880,191 @@ getQueryParams(CState *st, const Command *command, const char **params)
 }
 
 /*
+ * Recursive evaluation of int and double expressions
+ *
+ * Pgbench uses an implicit descendant typing, that is depending on the
+ * expected type (int or double) the corresponding evalInt or evalDouble
+ * function is called.
+ *
+ * The expected type at the highest level is always an integer, as pgbench
+ * only supports integer variables. Some function arguments are expected to
+ * be double, and thus the expected type can change during the recursion
+ * when evaluation those arguments.
+ *
+ * The expected type can also be enforced with functions "int" which expects
+ * a double argument and cast the result to int, and "double" which expects an
+ * int argument and casts it to double.
+ *
+ * This approach departs from usual type systems which are ascendant, starting
+ * from the type of leaves and typing nodes while going upwards in the
+ * expression syntax tree.
+ *
+ * This is not perceived as an issue for the typical pgbench scripts which
+ * mostly deal with integers, as the double type is only used as a parameter
+ * for exponential and gaussian distributed random generation, so the
+ * difference should seldom be noticed, and can always be fixed by inserting
+ * an explicit conversion (int or double function) at the right place.
+ *
+ * This convention greatly simplifies the handling of types, as there is no
+ * need of an explicit typing/compilation phase which would insert conversions
+ * and resolve overloaded operators, or to put these conversions and
+ * resolutions in the expression evaluation which would induce repeated
+ * code to test types and handle conversion for each function and operators.
+ */
+
+static bool evalInt(TState *, CState *, PgBenchExpr *, int64 *);
+
+/*
+ * Recursive evaluation of an expression in a pgbench script
+ * using the current state of variables.
+ * Returns whether the evaluation was ok,
+ * the value itself is returned through the retval pointer.
+ */
+static bool
+evalDouble(TState *thread, CState *st, PgBenchExpr *expr, double *retval)
+{
+	switch (expr->etype)
+	{
+		case ENODE_DOUBLE_CONSTANT:
+		{
+			*retval = expr->u.double_constant.dval;
+			return true;
+		}
+		case ENODE_OPERATOR:
+		{
+			double		lval, rval;
+
+			if (!evalDouble(thread, st, expr->u.operator.lexpr, &lval))
+				return false;
+			if (!evalDouble(thread, st, expr->u.operator.rexpr, &rval))
+				return false;
+
+			switch (expr->u.operator.operator)
+			{
+				case '+':
+					*retval = lval + rval;
+					return true;
+
+				case '-':
+					*retval = lval - rval;
+					return true;
+
+				case '*':
+					*retval = lval * rval;
+					return true;
+
+				case '/':
+					*retval = lval / rval;
+					return true;
+
+					/* cast any int operator */
+				case '%':
+				{
+					int64 ival;
+					if (!evalInt(thread, st, expr, &ival))
+						return false;
+					*retval = (double) ival;
+					return true;
+				}
+				default:
+					fprintf(stderr, "unexpected operator '%c'\n",
+							expr->u.operator.operator);
+					exit(1);
+			}
+		}
+		case ENODE_FUNCTION:
+		{
+			PgBenchFunction func = expr->u.function.function;
+			PgBenchExprList *args = expr->u.function.args;
+
+			switch (func)
+			{
+			case PGBENCH_PI:
+				*retval = M_PI;
+				return true;
+			case PGBENCH_ABS: /* also an integer function */
+			{
+				if (!evalDouble(thread, st, args->expr, retval))
+					return false;
+
+				if ((*retval) < 0.0)
+					*retval = - *retval;
+
+				return true;
+			}
+			case PGBENCH_SQRT:
+			{
+				double arg;
+
+				if (!evalDouble(thread, st, args->expr, &arg))
+					return false;
+
+				*retval = sqrt(arg);
+
+				return true;
+			}
+			case PGBENCH_DDEBUG:
+			{
+				if (!evalDouble(thread, st, args->expr, retval))
+					return false;
+
+				fprintf(stderr, "ddebug(script=%d,command=%d): %f\n",
+						st->use_file, st->state+1, *retval);
+
+				return true;
+			}
+			case PGBENCH_DOUBLE:
+			{
+				int64 ival;
+				if (!evalInt(thread, st, args->expr, &ival))
+					return false;
+				*retval = (double) ival;
+				return true;
+			}
+			/* integer-specific functions are evaluated as int and cast */
+			case PGBENCH_INT:
+			case PGBENCH_IDEBUG:
+			case PGBENCH_MIN:
+			case PGBENCH_MAX:
+			case PGBENCH_RANDOM:
+			case PGBENCH_RANDOM_EXPONENTIAL:
+			case PGBENCH_RANDOM_GAUSSIAN:
+			{
+				int64 ival;
+				if (!evalInt(thread, st, expr, &ival))
+					return false;
+				*retval = (double) ival;
+				return true;
+			}
+			default:
+				fprintf(stderr, "unexpected function tag: %d\n", func);
+				exit(1);
+			}
+		}
+	case ENODE_INTEGER_CONSTANT:
+	case ENODE_VARIABLE:
+		{
+			int64 ival;
+			if (!evalInt(thread, st, expr, &ival))
+				return false;
+			*retval = (double) ival;
+			return true;
+		}
+	default:
+		fprintf(stderr, "unexpected enode type in double evaluation: %d\n",
+				expr->etype);
+		exit(1);
+	}
+}
+
+/*
  * Recursive evaluation of an expression in a pgbench script
  * using the current state of variables.
  * Returns whether the evaluation was ok,
  * the value itself is returned through the retval pointer.
  */
 static bool
-evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
+evalInt(TState *thread, CState *st, PgBenchExpr *expr, int64 *retval)
 {
 	switch (expr->etype)
 	{
@@ -903,6 +1074,12 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
 				return true;
 			}
 
+		case ENODE_DOUBLE_CONSTANT:
+			{
+				*retval = (int64) expr->u.double_constant.dval;
+				return true;
+			}
+
 		case ENODE_VARIABLE:
 			{
 				char	   *var;
@@ -922,49 +1099,180 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
 				int64		lval;
 				int64		rval;
 
-				if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
+				if (!evalInt(thread, st, expr->u.operator.lexpr, &lval))
 					return false;
-				if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
+				if (!evalInt(thread, st, expr->u.operator.rexpr, &rval))
 					return false;
+
 				switch (expr->u.operator.operator)
 				{
-					case '+':
-						*retval = lval + rval;
-						return true;
+				case '+':
+					*retval = lval + rval;
+					return true;
 
-					case '-':
-						*retval = lval - rval;
-						return true;
+				case '-':
+					*retval = lval - rval;
+					return true;
 
-					case '*':
-						*retval = lval * rval;
-						return true;
+				case '*':
+					*retval = lval * rval;
+					return true;
 
-					case '/':
-						if (rval == 0)
+				case '/':
+					if (rval == 0)
+					{
+						fprintf(stderr, "division by zero\n");
+						return false;
+					}
+					*retval = lval / rval;
+					return true;
+
+				case '%':
+					if (rval == 0)
+					{
+						fprintf(stderr, "division by zero\n");
+						return false;
+					}
+					*retval = lval % rval;
+					return true;
+
+				default:
+					fprintf(stderr, "unexpected integer operator '%c'\n",
+							expr->u.operator.operator);
+					return false;
+				}
+			}
+
+		case ENODE_FUNCTION:
+			{
+				PgBenchFunction func = expr->u.function.function;
+				PgBenchExprList *args = expr->u.function.args;
+
+				switch (func)
+				{
+					case PGBENCH_RANDOM:
+					case PGBENCH_RANDOM_EXPONENTIAL:
+					case PGBENCH_RANDOM_GAUSSIAN:
+					{
+						int64 arg1, arg2;
+
+						if (!evalInt(thread, st, args->expr, &arg1))
+							return false;
+						if (!evalInt(thread, st, args->next->expr, &arg2))
+							return false;
+
+						/* check random range */
+						if (arg1 > arg2)
 						{
-							fprintf(stderr, "division by zero\n");
+							fprintf(stderr, "empty range given to random\n");
+							st->ecnt++;
+							return false;
+						}
+						else if (arg2 - arg1 < 0 || (arg2 - arg1) + 1 < 0)
+						{
+							/* prevent int overflows in random functions */
+							fprintf(stderr, "random range is too large\n");
+							st->ecnt++;
 							return false;
 						}
-						*retval = lval / rval;
-						return true;
 
-					case '%':
-						if (rval == 0)
+						if (func == PGBENCH_RANDOM)
+							*retval = getrand(thread, arg1, arg2);
+						else /* gaussian & exponential */
 						{
-							fprintf(stderr, "division by zero\n");
+							double threshold;
+							if (!evalDouble(thread, st, args->next->next->expr,
+											&threshold))
+								return false;
+							if (func == PGBENCH_RANDOM_GAUSSIAN)
+								*retval = getGaussianRand(thread, arg1, arg2, threshold);
+							else /* exponential */
+								*retval = getExponentialRand(thread, arg1, arg2, threshold);
+						}
+
+						return true;
+					}
+					case PGBENCH_IDEBUG: /* unary functions */
+					{
+						if (!evalInt(thread, st, args->expr, retval))
+							return false;
+
+						fprintf(stderr, "idebug(script=%d,command=%d): "
+								INT64_FORMAT "\n", st->use_file, st->state+1, *retval);
+
+						return true;
+					}
+					case PGBENCH_ABS: /* both an int & double function */
+					{
+						if (!evalInt(thread, st, args->expr, retval))
 							return false;
+
+						if ((*retval) < 0)
+							*retval = - *retval;
+
+						return true;
+					}
+					case PGBENCH_MIN: /* n-ary, at least one argument */
+					case PGBENCH_MAX:
+					{
+						int64 val = -1;
+						bool first = true;
+						while (args != NULL)
+						{
+							int64 arg;
+
+							if (!evalInt(thread, st, args->expr, &arg))
+								return false;
+
+							if (first)
+								val = arg;
+							else if (func == PGBENCH_MIN)
+								val = val < arg? val: arg;
+							else if (func == PGBENCH_MAX)
+								val = val > arg? val: arg;
+
+							args = args->next;
+							first = false;
 						}
-						*retval = lval % rval;
+
+						*retval = val;
 						return true;
-				}
+					}
+					case PGBENCH_INT: /* eval as double & cast to int */
+					{
+						double arg;
+
+						if (!evalDouble(thread, st, args->expr, &arg))
+							return false;
+
+						*retval = (int64) arg;
+						return true;
+					}
 
-				fprintf(stderr, "bad operator\n");
-				return false;
+					/* cast double specific functions to int */
+					case PGBENCH_PI:
+					case PGBENCH_DOUBLE:
+					case PGBENCH_DDEBUG:
+					case PGBENCH_SQRT:
+					{
+						double arg;
+
+						if (!evalDouble(thread, st, expr, &arg))
+							return false;
+
+						*retval = (int64) arg;
+						return true;
+					}
+				default:
+					fprintf(stderr, "unexpected function tag %d\n", func);
+					exit(1);
+				}
 			}
 
-		default:
-			break;
+		default: /* abort on internal error */
+			fprintf(stderr, "unexpected enode type in int evaluation: %d\n",
+					expr->etype);
+			exit(1);
 	}
 
 	fprintf(stderr, "bad expression\n");
@@ -1613,7 +1921,7 @@ top:
 			PgBenchExpr *expr = commands[st->state]->expr;
 			int64		result;
 
-			if (!evaluateExpr(st, expr, &result))
+			if (!evalInt(thread, st, expr, &result))
 			{
 				st->ecnt++;
 				return true;
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 42e2aae..fffd355 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -14,11 +14,31 @@
 typedef enum PgBenchExprType
 {
 	ENODE_INTEGER_CONSTANT,
+	ENODE_DOUBLE_CONSTANT,
 	ENODE_VARIABLE,
-	ENODE_OPERATOR
+	ENODE_OPERATOR,
+	ENODE_FUNCTION
 } PgBenchExprType;
 
+typedef enum PgBenchFunction
+{
+	PGBENCH_NONE,
+	PGBENCH_PI,
+	PGBENCH_INT,
+	PGBENCH_DOUBLE,
+	PGBENCH_IDEBUG,
+	PGBENCH_DDEBUG,
+	PGBENCH_ABS,
+	PGBENCH_SQRT,
+	PGBENCH_MIN,
+	PGBENCH_MAX,
+	PGBENCH_RANDOM,
+	PGBENCH_RANDOM_GAUSSIAN,
+	PGBENCH_RANDOM_EXPONENTIAL
+} PgBenchFunction;
+
 typedef struct PgBenchExpr PgBenchExpr;
+typedef struct PgBenchExprList PgBenchExprList;
 
 struct PgBenchExpr
 {
@@ -31,6 +51,10 @@ struct PgBenchExpr
 		}			integer_constant;
 		struct
 		{
+			double		dval;
+		}			double_constant;
+		struct
+		{
 			char	   *varname;
 		}			variable;
 		struct
@@ -39,14 +63,25 @@ struct PgBenchExpr
 			PgBenchExpr *lexpr;
 			PgBenchExpr *rexpr;
 		}			operator;
+		struct
+		{
+			PgBenchFunction function;
+			PgBenchExprList *args;
+		}			function;
 	}			u;
 };
 
+struct PgBenchExprList {
+	PgBenchExpr *expr;
+	PgBenchExprList *next;
+};
+
 extern PgBenchExpr *expr_parse_result;
 
 extern int	expr_yyparse(void);
 extern int	expr_yylex(void);
 extern void expr_yyerror(const char *str);
+extern void expr_yyerror_more(const char *str, const char *more);
 extern void expr_scanner_init(const char *str, const char *source,
 				  const int lineno, const char *line,
 				  const char *cmd, const int ecol);

functions.sql
Description: application/sql

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] extend pgbench expressions with functions

Reply via email to