Re: [HACKERS] extend pgbench expressions with functions

Fabien COELHO Thu, 28 Jan 2016 12:21:24 -0800


<Oops, resent because I again put the wrong From address, sorry>


v22 compared to previous:
 - remove the short macros (although IMO it is a code degradation)
 - try not to remove/add blanks lines
 - let some assert "as is"
 - still exit on float to int overflow, see arguments in other mails
 - check for INT64_MIN / -1 (although I think it is useless)

--
Fabien.

diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 42d0667..d42208a 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -796,17 +796,21 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
       Sets variable <replaceable>varname</> to an integer value calculated
       from <replaceable>expression</>.
       The expression may contain integer constants such as <literal>5432</>,
-      references to variables <literal>:</><replaceable>variablename</>,
+      double constants such as <literal>3.14159</>,
+      references to integer variables <literal>:</><replaceable>variablename</>,
       and expressions composed of unary (<literal>-</>) or binary operators
-      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>, <literal>%</>)
-      with their usual associativity, and parentheses.
+      (<literal>+</>, <literal>-</>, <literal>*</>, <literal>/</>,
+      <literal>%</>) with their usual associativity, function calls and
+      parentheses.
+      <xref linkend="functions-pgbench-func-table"> shows the available
+      functions.
      </para>
 
      <para>
       Examples:
 <programlisting>
 \set ntellers 10 * :scale
-\set aid (1021 * :aid) % (100000 * :scale) + 1
+\set aid (1021 * random(1, 100000 * :scale)) % (100000 * :scale) + 1
 </programlisting></para>
     </listitem>
    </varlistentry>
@@ -826,66 +830,35 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
      </para>
 
      <para>
-      By default, or when <literal>uniform</> is specified, all values in the
-      range are drawn with equal probability.  Specifying <literal>gaussian</>
-      or  <literal>exponential</> options modifies this behavior; each
-      requires a mandatory parameter which determines the precise shape of the
-      distribution.
-     </para>
+      <itemizedlist>
+       <listitem>
+        <para>
+         <literal>\setrandom n 1 10</> or <literal>\setrandom n 1 10 uniform</>
+         is equivalent to <literal>\set n random(1, 10)</> and uses a uniform
+         distribution.
+        </para>
+       </listitem>
 
-     <para>
-      For a Gaussian distribution, the interval is mapped onto a standard
-      normal distribution (the classical bell-shaped Gaussian curve) truncated
-      at <literal>-parameter</> on the left and <literal>+parameter</>
-      on the right.
-      Values in the middle of the interval are more likely to be drawn.
-      To be precise, if <literal>PHI(x)</> is the cumulative distribution
-      function of the standard normal distribution, with mean <literal>mu</>
-      defined as <literal>(max + min) / 2.0</>, with
-<literallayout>
- f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
-        (2.0 * PHI(parameter) - 1.0)
-</literallayout>
-      then value <replaceable>i</> between <replaceable>min</> and
-      <replaceable>max</> inclusive is drawn with probability:
-      <literal>f(i + 0.5) - f(i - 0.5)</>.
-      Intuitively, the larger <replaceable>parameter</>, the more
-      frequently values close to the middle of the interval are drawn, and the
-      less frequently values close to the <replaceable>min</> and
-      <replaceable>max</> bounds. About 67% of values are drawn from the
-      middle <literal>1.0 / parameter</>, that is a relative
-      <literal>0.5 / parameter</> around the mean, and 95% in the middle
-      <literal>2.0 / parameter</>, that is a relative
-      <literal>1.0 / parameter</> around the mean; for instance, if
-      <replaceable>parameter</> is 4.0, 67% of values are drawn from the
-      middle quarter (1.0 / 4.0) of the interval (i.e. from
-      <literal>3.0 / 8.0</> to <literal>5.0 / 8.0</>) and 95% from
-      the middle half (<literal>2.0 / 4.0</>) of the interval (second and
-      third quartiles). The minimum <replaceable>parameter</> is 2.0 for
-      performance of the Box-Muller transform.
-     </para>
+      <listitem>
+       <para>
+        <literal>\setrandom n 1 10 exponential 3.0</> is equivalent to
+        <literal>\set n random_exponential(1, 10, 3.0)</> and uses an
+        exponential distribution.
+       </para>
+      </listitem>
 
-     <para>
-      For an exponential distribution, <replaceable>parameter</>
-      controls the distribution by truncating a quickly-decreasing
-      exponential distribution at <replaceable>parameter</>, and then
-      projecting onto integers between the bounds.
-      To be precise, with
-<literallayout>
-f(x) = exp(-parameter * (x - min) / (max - min + 1)) / (1.0 - exp(-parameter))
-</literallayout>
-      Then value <replaceable>i</> between <replaceable>min</> and
-      <replaceable>max</> inclusive is drawn with probability:
-      <literal>f(x) - f(x + 1)</>.
-      Intuitively, the larger <replaceable>parameter</>, the more
-      frequently values close to <replaceable>min</> are accessed, and the
-      less frequently values close to <replaceable>max</> are accessed.
-      The closer to 0 <replaceable>parameter</>, the flatter (more uniform)
-      the access distribution.
-      A crude approximation of the distribution is that the most frequent 1%
-      values in the range, close to <replaceable>min</>, are drawn
-      <replaceable>parameter</>% of the time.
-      <replaceable>parameter</> value must be strictly positive.
+      <listitem>
+       <para>
+        <literal>\setrandom n 1 10 gaussian 2.0</> is equivalent to
+        <literal>\set n random_gaussian(1, 10, 2.0)</>, and uses a gaussian
+        distribution.
+       </para>
+      </listitem>
+     </itemizedlist>
+
+       See the documentation of these functions below for further information
+       about the precise shape of these distributions, depending on the value
+       of the parameter.
      </para>
 
      <para>
@@ -965,18 +938,184 @@ f(x) = exp(-parameter * (x - min) / (max - min + 1)) / (1.0 - exp(-parameter))
    </varlistentry>
   </variablelist>
 
+   <!-- list pgbench functions in alphabetical order -->
+   <table id="functions-pgbench-func-table">
+    <title>PgBench Functions</title>
+    <tgroup cols="5">
+     <thead>
+      <row>
+       <entry>Function</entry>
+       <entry>Return Type</entry>
+       <entry>Description</entry>
+       <entry>Example</entry>
+       <entry>Result</entry>
+      </row>
+     </thead>
+     <tbody>
+      <row>
+       <entry><literal><function>abs(<replaceable>a</>)</></></>
+       <entry>same as <replaceable>a</></>
+       <entry>integer or double absolute value</>
+       <entry><literal>abs(-17)</></>
+       <entry><literal>17</></>
+      </row>
+      <row>
+       <entry><literal><function>debug(<replaceable>a</>)</></></>
+       <entry>same as<replaceable>a</> </>
+       <entry>print to <systemitem>stderr</systemitem> the given argument</>
+       <entry><literal>debug(5432.1)</></>
+       <entry><literal>5432.1</></>
+      </row>
+      <row>
+       <entry><literal><function>double(<replaceable>i</>)</></></>
+       <entry>double</>
+       <entry>cast to double</>
+       <entry><literal>double(5432)</></>
+       <entry><literal>5432.0</></>
+      </row>
+      <row>
+       <entry><literal><function>int(<replaceable>x</>)</></></>
+       <entry>integer</>
+       <entry>cast to int</>
+       <entry><literal>int(5.4 + 3.8)</></>
+       <entry><literal>9</></>
+      </row>
+      <row>
+       <entry><literal><function>max(<replaceable>i</> [, <replaceable>...</> ] )</></></>
+       <entry>integer</>
+       <entry>maximum value</>
+       <entry><literal>max(5, 4, 3, 2)</></>
+       <entry><literal>5</></>
+      </row>
+      <row>
+       <entry><literal><function>min(<replaceable>i</> [, <replaceable>...</> ] )</></></>
+       <entry>integer</>
+       <entry>minimum value</>
+       <entry><literal>min(5, 4, 3, 2)</></>
+       <entry><literal>2</></>
+      </row>
+      <row>
+       <entry><literal><function>pi()</></></>
+       <entry>double</>
+       <entry>value of the PI constant</>
+       <entry><literal>pi()</></>
+       <entry><literal>3.14159265358979323846</></>
+      </row>
+      <row>
+       <entry><literal><function>random(<replaceable>lb</>, <replaceable>ub</>)</></></>
+       <entry>integer</>
+       <entry>uniformly-distributed random integer in <literal>[lb, ub]</></>
+       <entry><literal>random(1, 10)</></>
+       <entry>an integer between <literal>1</> and <literal>10</></>
+      </row>
+      <row>
+       <entry><literal><function>random_exponential(<replaceable>lb</>, <replaceable>ub</>, <replaceable>parameter</>)</></></>
+       <entry>integer</>
+       <entry>exponentially-distributed random integer in <literal>[lb, ub]</>,
+              see below</>
+       <entry><literal>random_exponential(1, 10, 3.0)</></>
+       <entry>an integer between <literal>1</> and <literal>10</></>
+      </row>
+      <row>
+       <entry><literal><function>random_gaussian(<replaceable>lb</>, <replaceable>ub</>, <replaceable>parameter</>)</></></>
+       <entry>integer</>
+       <entry>gaussian-distributed random integer in <literal>[lb, ub]</>,
+              see below</>
+       <entry><literal>random_gaussian(1, 10, 2.5)</></>
+       <entry>an integer between <literal>1</> and <literal>10</></>
+      </row>
+      <row>
+       <entry><literal><function>sqrt(<replaceable>x</>)</></></>
+       <entry>double</>
+       <entry>square root</>
+       <entry><literal>sqrt(2.0)</></>
+       <entry><literal>1.414213562</></>
+      </row>
+     </tbody>
+     </tgroup>
+   </table>
+
+   <para>
+    The <literal>random</> function generates values using a uniform
+    distribution, that is all the values are drawn within the specified
+    range with equal probability. The <literal>random_exponential</> and
+    <literal>random_gaussian</> functions require an additional double
+    parameter which determines the precise shape of the distribution.
+   </para>
+
+   <itemizedlist>
+    <listitem>
+     <para>
+      For an exponential distribution, <replaceable>parameter</>
+      controls the distribution by truncating a quickly-decreasing
+      exponential distribution at <replaceable>parameter</>, and then
+      projecting onto integers between the bounds.
+      To be precise, with
+<literallayout>
+f(x) = exp(-parameter * (x - min) / (max - min + 1)) / (1 - exp(-parameter))
+</literallayout>
+      Then value <replaceable>i</> between <replaceable>min</> and
+      <replaceable>max</> inclusive is drawn with probability:
+      <literal>f(x) - f(x + 1)</>.
+     </para>
+
+     <para>
+      Intuitively, the larger the <replaceable>parameter</>, the more
+      frequently values close to <replaceable>min</> are accessed, and the
+      less frequently values close to <replaceable>max</> are accessed.
+      The closer to 0 <replaceable>parameter</> is, the flatter (more
+      uniform) the access distribution.
+      A crude approximation of the distribution is that the most frequent 1%
+      values in the range, close to <replaceable>min</>, are drawn
+      <replaceable>parameter</>%  of the time.
+      The <replaceable>parameter</> value must be strictly positive.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      For a Gaussian distribution, the interval is mapped onto a standard
+      normal distribution (the classical bell-shaped Gaussian curve) truncated
+      at <literal>-parameter</> on the left and <literal>+parameter</>
+      on the right.
+      Values in the middle of the interval are more likely to be drawn.
+      To be precise, if <literal>PHI(x)</> is the cumulative distribution
+      function of the standard normal distribution, with mean <literal>mu</>
+      defined as <literal>(max + min) / 2.0</>, with
+<literallayout>
+ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
+        (2.0 * PHI(parameter) - 1)
+</literallayout>
+      then value <replaceable>i</> between <replaceable>min</> and
+      <replaceable>max</> inclusive is drawn with probability:
+      <literal>f(i + 0.5) - f(i - 0.5)</>.
+      Intuitively, the larger the <replaceable>parameter</>, the more
+      frequently values close to the middle of the interval are drawn, and the
+      less frequently values close to the <replaceable>min</> and
+      <replaceable>max</> bounds. About 67% of values are drawn from the
+      middle <literal>1.0 / parameter</>, that is a relative
+      <literal>0.5 / parameter</> around the mean, and 95% in the middle
+      <literal>2.0 / parameter</>, that is a relative
+      <literal>1.0 / parameter</> around the mean; for instance, if
+      <replaceable>parameter</> is 4.0, 67% of values are drawn from the
+      middle quarter (1.0 / 4.0) of the interval (i.e. from
+      <literal>3.0 / 8.0</> to <literal>5.0 / 8.0</>) and 95% from
+      the middle half (<literal>2.0 / 4.0</>) of the interval (second and third
+      quartiles). The minimum <replaceable>parameter</> is 2.0 for performance
+      of the Box-Muller transform.
+     </para>
+    </listitem>
+   </itemizedlist>
+
   <para>
    As an example, the full definition of the built-in TPC-B-like
    transaction is:
 
 <programlisting>
-\set nbranches :scale
-\set ntellers 10 * :scale
-\set naccounts 100000 * :scale
-\setrandom aid 1 :naccounts
-\setrandom bid 1 :nbranches
-\setrandom tid 1 :ntellers
-\setrandom delta -5000 5000
+\set aid random(1, 100000 * :scale)
+\set bid random(1, 1 * :scale)
+\set tid random(1, 10 * :scale)
+\set delta random(-5000, 5000)
 BEGIN;
 UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
 SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
@@ -1135,17 +1274,16 @@ number of clients: 10
 number of threads: 1
 number of transactions per client: 1000
 number of transactions actually processed: 10000/10000
+latency average: 16.052 ms
+latency stddev: 8.204 ms
 tps = 618.764555 (including connections establishing)
 tps = 622.977698 (excluding connections establishing)
 SQL script 1: &lt;builtin: TPC-B (sort of)&gt;
  - statement latencies in milliseconds:
-        0.004386        \set nbranches 1 * :scale
-        0.001343        \set ntellers 10 * :scale
-        0.001212        \set naccounts 100000 * :scale
-        0.001310        \setrandom aid 1 :naccounts
-        0.001073        \setrandom bid 1 :nbranches
-        0.001005        \setrandom tid 1 :ntellers
-        0.001078        \setrandom delta -5000 5000
+        0.002522        \set aid random(1, 100000 * :scale)
+        0.005459        \set bid random(1, 1 * :scale)
+        0.002348        \set tid random(1, 10 * :scale)
+        0.001078        \set delta random(-5000, 5000)
         0.326152        BEGIN;
         0.603376        UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
         0.454643        SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
diff --git a/src/bin/pgbench/exprparse.y b/src/bin/pgbench/exprparse.y
index 06ee04b..0d9278e 100644
--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -16,10 +16,14 @@
 
 PgBenchExpr *expr_parse_result;
 
+static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
 static PgBenchExpr *make_integer_constant(int64 ival);
+static PgBenchExpr *make_double_constant(double dval);
 static PgBenchExpr *make_variable(char *varname);
-static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
+static PgBenchExpr *make_op(const char *operator, PgBenchExpr *lexpr,
 		PgBenchExpr *rexpr);
+static int find_func(const char * fname);
+static PgBenchExpr *make_func(const int fnumber, PgBenchExprList *args);
 
 %}
 
@@ -29,15 +33,19 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
 %union
 {
 	int64		ival;
+	double		dval;
 	char	   *str;
 	PgBenchExpr *expr;
+	PgBenchExprList *elist;
 }
 
+%type <elist> elist
 %type <expr> expr
-%type <ival> INTEGER
-%type <str> VARIABLE
+%type <ival> INTEGER function
+%type <dval> DOUBLE
+%type <str> VARIABLE FUNCTION
 
-%token INTEGER VARIABLE
+%token INTEGER DOUBLE VARIABLE FUNCTION
 %token CHAR_ERROR /* never used, will raise a syntax error */
 
 /* Precedence: lowest to highest */
@@ -49,16 +57,26 @@ static PgBenchExpr *make_op(char operator, PgBenchExpr *lexpr,
 
 result: expr				{ expr_parse_result = $1; }
 
+elist:                  	{ $$ = NULL; }
+	| expr 					{ $$ = make_elist($1, NULL); }
+	| elist ',' expr		{ $$ = make_elist($3, $1); /* reverse order */ }
+	;
+
 expr: '(' expr ')'			{ $$ = $2; }
 	| '+' expr %prec UMINUS	{ $$ = $2; }
-	| '-' expr %prec UMINUS	{ $$ = make_op('-', make_integer_constant(0), $2); }
-	| expr '+' expr			{ $$ = make_op('+', $1, $3); }
-	| expr '-' expr			{ $$ = make_op('-', $1, $3); }
-	| expr '*' expr			{ $$ = make_op('*', $1, $3); }
-	| expr '/' expr			{ $$ = make_op('/', $1, $3); }
-	| expr '%' expr			{ $$ = make_op('%', $1, $3); }
+	| '-' expr %prec UMINUS	{ $$ = make_op("-", make_integer_constant(0), $2); }
+	| expr '+' expr			{ $$ = make_op("+", $1, $3); }
+	| expr '-' expr			{ $$ = make_op("-", $1, $3); }
+	| expr '*' expr			{ $$ = make_op("*", $1, $3); }
+	| expr '/' expr			{ $$ = make_op("/", $1, $3); }
+	| expr '%' expr			{ $$ = make_op("%", $1, $3); }
 	| INTEGER				{ $$ = make_integer_constant($1); }
+	| DOUBLE				{ $$ = make_double_constant($1); }
 	| VARIABLE 				{ $$ = make_variable($1); }
+	| function '(' elist ')'{ $$ = make_func($1, $3); }
+	;
+
+function: FUNCTION			{ $$ = find_func($1); pg_free($1); }
 	;
 
 %%
@@ -68,8 +86,20 @@ make_integer_constant(int64 ival)
 {
 	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
 
-	expr->etype = ENODE_INTEGER_CONSTANT;
-	expr->u.integer_constant.ival = ival;
+	expr->etype = ENODE_CONSTANT;
+	expr->u.constant.type = PGBT_INT;
+	expr->u.constant.u.ival = ival;
+	return expr;
+}
+
+static PgBenchExpr *
+make_double_constant(double dval)
+{
+	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
+
+	expr->etype = ENODE_CONSTANT;
+	expr->u.constant.type = PGBT_DOUBLE;
+	expr->u.constant.u.dval = dval;
 	return expr;
 }
 
@@ -84,14 +114,137 @@ make_variable(char *varname)
 }
 
 static PgBenchExpr *
-make_op(char operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
+make_op(const char *operator, PgBenchExpr *lexpr, PgBenchExpr *rexpr)
+{
+	return make_func(find_func(operator),
+					 /* beware that the list is reversed in make_func */
+					 make_elist(rexpr, make_elist(lexpr, NULL)));
+}
+
+/*
+ * List of available functions:
+ * - fname: function name
+ * - nargs: number of arguments (-1 is a special value for min & max)
+ * - tag: function identifier from PgBenchFunction enum
+ */
+static struct
+{
+	char * fname;
+	int nargs;
+	PgBenchFunction tag;
+} PGBENCH_FUNCTIONS[] = {
+	/* parsed as operators, executed as functions */
+	{ "+", 2, PGBENCH_ADD },
+	{ "-", 2, PGBENCH_SUB },
+	{ "*", 2, PGBENCH_MUL },
+	{ "/", 2, PGBENCH_DIV },
+	{ "%", 2, PGBENCH_MOD },
+	/* actual functions */
+	{ "pi", 0, PGBENCH_PI },
+	{ "abs", 1, PGBENCH_ABS },
+	{ "sqrt", 1, PGBENCH_SQRT },
+	{ "int", 1, PGBENCH_INT },
+	{ "double", 1, PGBENCH_DOUBLE },
+	{ "min", -1, PGBENCH_MIN },
+	{ "max", -1, PGBENCH_MAX },
+	{ "random", 2, PGBENCH_RANDOM },
+	{ "random_gaussian", 3, PGBENCH_RANDOM_GAUSSIAN },
+	{ "random_exponential", 3, PGBENCH_RANDOM_EXPONENTIAL },
+	{ "debug", 1, PGBENCH_DEBUG },
+
+	/* keep as last array element */
+	{ NULL, 0, 0 }
+};
+
+/*
+ * Find a function from its name
+ *
+ * return the index of the function from the PGBENCH_FUNCTIONS array
+ * or fail if the function is unknown.
+ */
+static int
+find_func(const char * fname)
+{
+	int i = 0;
+
+	while (PGBENCH_FUNCTIONS[i].fname)
+	{
+		if (pg_strcasecmp(fname, PGBENCH_FUNCTIONS[i].fname) == 0)
+			return i;
+		i++;
+	}
+
+	expr_yyerror_more("unexpected function name", fname);
+
+	/* not reached */
+	return -1;
+}
+
+/* Expression linked list builder */
+static PgBenchExprList *
+make_elist(PgBenchExpr *expr, PgBenchExprList *list)
+{
+	PgBenchExprList *cons = pg_malloc(sizeof(PgBenchExprList));
+	cons->expr = expr;
+	cons->next = list;
+	return cons;
+}
+
+/*
+ * Reverse expression linked list
+ *
+ * The list of function arguments is built in reverse order, and reversed once
+ * at the end so as to avoid appending repeatedly at the end of the list.
+ */
+static PgBenchExprList *
+reverse_elist(PgBenchExprList *list)
+{
+	PgBenchExprList *cur = list, *prec = NULL, *next = NULL;
+
+	while (cur != NULL)
+	{
+		next = cur->next;
+		cur->next = prec;
+		prec = cur;
+		cur = next;
+	}
+
+	return prec;
+}
+
+/* Return the length of an expression list */
+static int
+elist_length(PgBenchExprList *list)
+{
+	int len = 0;
+
+	for (; list != NULL; list = list->next)
+		len++;
+
+	return len;
+}
+
+/* Build function call expression */
+static PgBenchExpr *
+make_func(const int fnumber, PgBenchExprList *args)
 {
 	PgBenchExpr *expr = pg_malloc(sizeof(PgBenchExpr));
 
-	expr->etype = ENODE_OPERATOR;
-	expr->u.operator.operator = operator;
-	expr->u.operator.lexpr = lexpr;
-	expr->u.operator.rexpr = rexpr;
+	Assert(fnumber >= 0);
+
+	if ((PGBENCH_FUNCTIONS[fnumber].nargs >= 0 &&
+		 PGBENCH_FUNCTIONS[fnumber].nargs != elist_length(args)) ||
+		/* check at least one arg for min & max */
+		(PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
+		 elist_length(args) == 0))
+		expr_yyerror_more("unexpected number of arguments",
+						  PGBENCH_FUNCTIONS[fnumber].fname);
+
+	expr->etype = ENODE_FUNCTION;
+	expr->u.function.function = PGBENCH_FUNCTIONS[fnumber].tag;
+	/* the argument list has been built in reverse order, it is fixed here */
+	expr->u.function.args = reverse_elist(args);
+
 	return expr;
 }
 
diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l
index f1c4c7e..b56850d 100644
--- a/src/bin/pgbench/exprscan.l
+++ b/src/bin/pgbench/exprscan.l
@@ -46,6 +46,7 @@ space			[ \t\r\f]
 "%"				{ yycol += yyleng; return '%'; }
 "("				{ yycol += yyleng; return '('; }
 ")"				{ yycol += yyleng; return ')'; }
+","				{ yycol += yyleng; return ','; }
 
 :[a-zA-Z0-9_]+	{
 					yycol += yyleng;
@@ -57,8 +58,19 @@ space			[ \t\r\f]
 					yylval.ival = strtoint64(yytext);
 					return INTEGER;
 				}
+[0-9]+\.[0-9]+	{
+					yycol += yyleng;
+					yylval.dval = atof(yytext);
+					return DOUBLE;
+				}
+[a-zA-Z0-9_]+   {
+					yycol += yyleng;
+					yylval.str = pg_strdup(yytext);
+					return FUNCTION;
+				}
+
+[\n]			{ yycol = 0; yyline++; /* never occurs, input on one line */ }
 
-[\n]			{ yycol = 0; yyline++; }
 {space}+		{ yycol += yyleng; /* ignore */ }
 
 .				{
@@ -71,10 +83,16 @@ space			[ \t\r\f]
 %%
 
 void
-yyerror(const char *message)
+expr_yyerror_more(const char *message, const char *more)
 {
 	syntax_error(expr_source, expr_lineno, expr_full_line, expr_command,
-				 message, NULL, expr_col + yycol);
+				 message, more, expr_col + yycol);
+}
+
+void
+yyerror(const char *message)
+{
+	expr_yyerror_more(message, NULL);
 }
 
 /*
@@ -94,6 +112,9 @@ expr_scanner_init(const char *str, const char *source,
 	expr_command = (char *) cmd;
 	expr_col = (int) ecol;
 
+	/* reset column count for this scan */
+	yycol = 0;
+
 	/*
 	 * Might be left over after error
 	 */
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d5f242c..63c7475 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -317,13 +317,10 @@ static struct
 	{
 		"tpcb-like",
 		"<builtin: TPC-B (sort of)>",
-		"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
-		"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
-		"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-		"\\setrandom aid 1 :naccounts\n"
-		"\\setrandom bid 1 :nbranches\n"
-		"\\setrandom tid 1 :ntellers\n"
-		"\\setrandom delta -5000 5000\n"
+		"\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n"
+		"\\set bid random(1, " CppAsString2(nbranches) " * :scale)\n"
+		"\\set tid random(1, " CppAsString2(ntellers) " * :scale)\n"
+		"\\set delta random(-5000, 5000)\n"
 		"BEGIN;\n"
 		"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
 		"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
@@ -335,13 +332,10 @@ static struct
 	{
 		"simple-update",
 		"<builtin: simple update>",
-		"\\set nbranches " CppAsString2(nbranches) " * :scale\n"
-		"\\set ntellers " CppAsString2(ntellers) " * :scale\n"
-		"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-		"\\setrandom aid 1 :naccounts\n"
-		"\\setrandom bid 1 :nbranches\n"
-		"\\setrandom tid 1 :ntellers\n"
-		"\\setrandom delta -5000 5000\n"
+		"\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n"
+		"\\set bid random(1, " CppAsString2(nbranches) " * :scale)\n"
+		"\\set tid random(1, " CppAsString2(ntellers) " * :scale)\n"
+		"\\set delta random(-5000, 5000)\n"
 		"BEGIN;\n"
 		"UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;\n"
 		"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
@@ -351,13 +345,11 @@ static struct
 	{
 		"select-only",
 		"<builtin: select only>",
-		"\\set naccounts " CppAsString2(naccounts) " * :scale\n"
-		"\\setrandom aid 1 :naccounts\n"
+		"\\set aid random(1, " CppAsString2(naccounts) " * :scale)\n"
 		"SELECT abalance FROM pgbench_accounts WHERE aid = :aid;\n"
 	}
 };
 
-
 /* Function prototypes */
 static void setalarm(int seconds);
 static void *threadRun(void *arg);
@@ -365,6 +357,8 @@ static void *threadRun(void *arg);
 static void doLog(TState *thread, CState *st, FILE *logfile, instr_time *now,
 	  AggVals *agg, bool skipped);
 
+static bool evaluateExpr(TState *, CState *, PgBenchExpr *, PgBenchValue *);
+
 static void
 usage(void)
 {
@@ -519,6 +513,7 @@ getExponentialRand(TState *thread, int64 min, int64 max, double parameter)
 				uniform,
 				rand;
 
+	/* abort if wrong parameter, but must really be checked beforehand */
 	Assert(parameter > 0.0);
 	cut = exp(-parameter);
 	/* erand in [0, 1), uniform in (0, 1] */
@@ -540,6 +535,9 @@ getGaussianRand(TState *thread, int64 min, int64 max, double parameter)
 	double		stdev;
 	double		rand;
 
+	/* abort if parameter is too low, but must really be checked beforehand */
+	Assert(parameter >= MIN_GAUSSIAN_PARAM);
+
 	/*
 	 * Get user specified random number from this loop, with -parameter <
 	 * stdev <= parameter
@@ -908,22 +906,374 @@ getQueryParams(CState *st, const Command *command, const char **params)
 }
 
 /*
+ * Recursive evaluation of int or double expressions
+ *
+ * Note that currently only integer variables are available, with values
+ * stored as text.
+ */
+
+static int64
+coerceToInt(PgBenchValue *pval)
+{
+	if (pval->type == PGBT_INT)
+		return pval->u.ival;
+	else if (pval->type == PGBT_DOUBLE)
+	{
+		double dval = pval->u.dval;
+		if (dval < INT64_MIN || INT64_MAX < dval)
+		{
+			/*
+			 * Stop on double->int overflow.
+			 *
+			 * Other options would be to abort the current transaction or
+			 * the running client, but this would have more impact on the code
+			 * for little benefit.
+			 */
+			fprintf(stderr, "double to int overflow for %f\n", dval);
+			exit(1);
+		}
+		return (int64) dval;
+	}
+	/* stop on internal error */
+	fprintf(stderr, "unexpected value type %d\n", pval->type);
+	exit(1);
+	return 0;
+}
+
+static double
+coerceToDouble(PgBenchValue *pval)
+{
+	if (pval->type == PGBT_DOUBLE)
+		return pval->u.dval;
+	else if (pval->type == PGBT_INT)
+		return (double) pval->u.ival;
+	/* stop on internal error */
+	fprintf(stderr, "unexpected value type %d\n", pval->type);
+	exit(1);
+	return 0;
+}
+
+static void
+setIntValue(PgBenchValue *pv, int64 ival)
+{
+	pv->type = PGBT_INT;
+	pv->u.ival = ival;
+}
+
+static void
+setDoubleValue(PgBenchValue *pv, double dval)
+{
+	pv->type = PGBT_DOUBLE;
+	pv->u.dval = dval;
+}
+
+static bool
+evalFunc(TState *thread, CState *st,
+		 PgBenchFunction func, PgBenchExprList *args, PgBenchValue *retval)
+{
+	switch (func)
+	{
+		case PGBENCH_ADD:
+		case PGBENCH_SUB:
+		case PGBENCH_MUL:
+		case PGBENCH_DIV:
+		case PGBENCH_MOD:
+		{
+			PgBenchValue		lval, rval;
+
+			if (!args || !args->next || args->next->next)
+				/* two arguments only */
+				return false;
+
+			if (!evaluateExpr(thread, st, args->expr, &lval))
+				return false;
+
+			if (!evaluateExpr(thread, st, args->next->expr, &rval))
+				return false;
+
+			/* overloaded type management, double if some double */
+			if (lval.type == PGBT_DOUBLE || rval.type == PGBT_DOUBLE)
+			{
+				switch (func)
+				{
+					case PGBENCH_ADD:
+						setDoubleValue(retval, coerceToDouble(&lval) + coerceToDouble(&rval));
+						return true;
+
+					case PGBENCH_SUB:
+						setDoubleValue(retval, coerceToDouble(&lval) - coerceToDouble(&rval));
+						return true;
+
+					case PGBENCH_MUL:
+						setDoubleValue(retval, coerceToDouble(&lval) * coerceToDouble(&rval));
+						return true;
+
+					case PGBENCH_DIV:
+						setDoubleValue(retval, coerceToDouble(&lval) / coerceToDouble(&rval));
+						return true;
+
+					case PGBENCH_MOD: /* no overloading for modulo */
+						if (coerceToInt(&rval) == 0)
+						{
+							fprintf(stderr, "division by zero\n");
+							return false;
+						}
+						if (coerceToInt(&lval) == INT64_MIN && coerceToInt(&rval) == -1)
+						{
+							fprintf(stderr, "cannot divide INT64_MIN by -1\n");
+							return false;
+						}
+						setIntValue(retval, coerceToInt(&lval) % coerceToInt(&rval));
+						return true;
+
+					default:
+						return false;
+				}
+			}
+			else /* both operands are integers */
+			{
+				switch (func)
+				{
+					case PGBENCH_ADD:
+						setIntValue(retval, coerceToInt(&lval) + coerceToInt(&rval));
+						return true;
+
+					case PGBENCH_SUB:
+						setIntValue(retval, coerceToInt(&lval) - coerceToInt(&rval));
+						return true;
+
+					case PGBENCH_MUL:
+						setIntValue(retval, coerceToInt(&lval) * coerceToInt(&rval));
+						return true;
+
+					case PGBENCH_DIV:
+					case PGBENCH_MOD:
+						if (coerceToInt(&rval) == 0)
+						{
+							fprintf(stderr, "division by zero\n");
+							return false;
+						}
+						if (coerceToInt(&lval) == INT64_MIN && coerceToInt(&rval) == -1)
+						{
+							fprintf(stderr, "cannot divide INT64_MIN by -1\n");
+							return false;
+						}
+						if (func == PGBENCH_DIV)
+							setIntValue(retval, coerceToInt(&lval) / coerceToInt(&rval));
+						else
+							setIntValue(retval, coerceToInt(&lval) % coerceToInt(&rval));
+						return true;
+
+					default:
+						return false;
+				}
+			}
+		}
+
+		case PGBENCH_PI:
+			setDoubleValue(retval, M_PI);
+			return true;
+
+		case PGBENCH_ABS:
+		{
+			PgBenchValue arg;
+
+			if (!evaluateExpr(thread, st, args->expr, &arg))
+				return false;
+
+			if (arg.type == PGBT_DOUBLE)
+			{
+				double d = coerceToDouble(&arg);
+				setDoubleValue(retval, d < 0.0? -d: d);
+			}
+			else if (arg.type == PGBT_INT)
+			{
+				int64 i = coerceToInt(&arg);
+				setIntValue(retval, i < 0? -i: i);
+			}
+
+			return true;
+		}
+
+		case PGBENCH_SQRT:
+		{
+			PgBenchValue arg;
+
+			if (!evaluateExpr(thread, st, args->expr, &arg))
+				return false;
+
+			setDoubleValue(retval, sqrt(coerceToDouble(&arg)));
+
+			return true;
+		}
+
+		case PGBENCH_DEBUG:
+		{
+			if (!evaluateExpr(thread, st, args->expr, retval))
+				return false;
+
+			fprintf(stderr,	"debug(script=%d,command=%d): ",
+					st->use_file, st->state+1);
+
+			if (retval->type == PGBT_INT)
+				fprintf(stderr,	"int " INT64_FORMAT "\n", retval->u.ival);
+			else if (retval->type == PGBT_DOUBLE)
+				fprintf(stderr, "double %f\n", retval->u.dval);
+			else
+				fprintf(stderr, "none\n");
+
+			return true;
+		}
+
+		case PGBENCH_DOUBLE:
+		{
+			PgBenchValue arg;
+
+			if (!evaluateExpr(thread, st, args->expr, &arg))
+				return false;
+
+			setDoubleValue(retval, coerceToDouble(&arg));
+
+			return true;
+		}
+
+		case PGBENCH_INT:
+		{
+			PgBenchValue arg;
+
+			if (!evaluateExpr(thread, st, args->expr, &arg))
+				return false;
+
+			setIntValue(retval, coerceToInt(&arg));
+
+			return true;
+		}
+
+		case PGBENCH_MIN:
+		case PGBENCH_MAX:
+		{
+			int64 val = -1;
+			bool first = true;
+			while (args != NULL)
+			{
+				PgBenchValue arg;
+
+				if (!evaluateExpr(thread, st, args->expr, &arg))
+					return false;
+
+				if (first)
+					val = coerceToInt(&arg);
+				else
+				{
+					int64 i = coerceToInt(&arg);
+					if (func == PGBENCH_MIN)
+						val = val < i? val: i;
+					else if (func == PGBENCH_MAX)
+						val = val > i? val: i;
+				}
+
+				args = args->next;
+				first = false;
+			}
+
+			setIntValue(retval, val);
+			return true;
+		}
+
+		case PGBENCH_RANDOM:
+		case PGBENCH_RANDOM_EXPONENTIAL:
+		case PGBENCH_RANDOM_GAUSSIAN:
+		{
+			PgBenchValue varg1, varg2;
+			int64 arg1, arg2;
+
+			if (!evaluateExpr(thread, st, args->expr, &varg1))
+				return false;
+
+			if (!evaluateExpr(thread, st, args->next->expr, &varg2))
+				return false;
+
+			arg1 = coerceToInt(&varg1);
+			arg2 = coerceToInt(&varg2);
+
+			/* check random range */
+			if (arg1 > arg2)
+			{
+				fprintf(stderr, "empty range given to random\n");
+				st->ecnt++;
+				return false;
+			}
+			else if (arg2 - arg1 < 0 || (arg2 - arg1) + 1 < 0)
+			{
+				/* prevent int overflows in random functions */
+				fprintf(stderr, "random range is too large\n");
+				st->ecnt++;
+				return false;
+			}
+
+			if (func == PGBENCH_RANDOM)
+				setIntValue(retval, getrand(thread, arg1, arg2));
+			else /* gaussian & exponential */
+			{
+				PgBenchValue param;
+				double dparam;
+
+				if (!evaluateExpr(thread, st, args->next->next->expr, &param))
+					return false;
+
+				dparam = coerceToDouble(&param);
+				if (func == PGBENCH_RANDOM_GAUSSIAN)
+				{
+					if (dparam < MIN_GAUSSIAN_PARAM)
+					{
+						fprintf(stderr,
+								"gaussian parameter must be at least %f "
+								"(not %f)\n", MIN_GAUSSIAN_PARAM, dparam);
+						st->ecnt++;
+						return false;
+					}
+
+					setIntValue(retval,	getGaussianRand(thread, arg1, arg2,	dparam));
+				}
+				else /* exponential */
+				{
+					if (dparam <= 0.0)
+					{
+						fprintf(stderr,
+								"exponential parameter must be greater than zero"
+								" (got %f)\n", dparam);
+						st->ecnt++;
+						return false;
+					}
+
+					setIntValue(retval,	getExponentialRand(thread, arg1, arg2, dparam));
+				}
+			}
+
+			return true;
+		}
+		default:
+			fprintf(stderr, "unexpected function tag: %d\n", func);
+			exit(1);
+	}
+}
+
+/*
  * Recursive evaluation of an expression in a pgbench script
  * using the current state of variables.
  * Returns whether the evaluation was ok,
  * the value itself is returned through the retval pointer.
  */
 static bool
-evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
+evaluateExpr(TState *thread, CState *st, PgBenchExpr *expr, PgBenchValue *retval)
 {
 	switch (expr->etype)
 	{
-		case ENODE_INTEGER_CONSTANT:
+		case ENODE_CONSTANT:
 			{
-				*retval = expr->u.integer_constant.ival;
+				*retval = expr->u.constant;
 				return true;
 			}
-
 		case ENODE_VARIABLE:
 			{
 				char	   *var;
@@ -934,58 +1284,20 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
 							expr->u.variable.varname);
 					return false;
 				}
-				*retval = strtoint64(var);
+
+				setIntValue(retval, strtoint64(var));
 				return true;
 			}
-
-		case ENODE_OPERATOR:
-			{
-				int64		lval;
-				int64		rval;
-
-				if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
-					return false;
-				if (!evaluateExpr(st, expr->u.operator.rexpr, &rval))
-					return false;
-				switch (expr->u.operator.operator)
-				{
-					case '+':
-						*retval = lval + rval;
-						return true;
-
-					case '-':
-						*retval = lval - rval;
-						return true;
-
-					case '*':
-						*retval = lval * rval;
-						return true;
-
-					case '/':
-						if (rval == 0)
-						{
-							fprintf(stderr, "division by zero\n");
-							return false;
-						}
-						*retval = lval / rval;
-						return true;
-
-					case '%':
-						if (rval == 0)
-						{
-							fprintf(stderr, "division by zero\n");
-							return false;
-						}
-						*retval = lval % rval;
-						return true;
-				}
-
-				fprintf(stderr, "bad operator\n");
-				return false;
-			}
+		case ENODE_FUNCTION:
+				return evalFunc(thread, st,
+								expr->u.function.function,
+								expr->u.function.args,
+								retval);
 
 		default:
-			break;
+			fprintf(stderr, "unexpected enode type in evaluation: %d\n",
+					expr->etype);
+			exit(1);
 	}
 
 	fprintf(stderr, "bad expression\n");
@@ -1512,6 +1824,10 @@ top:
 			fprintf(stderr, "\n");
 		}
 
+		/*
+		 * Note: this section could be removed, as the same functionnality
+		 * is available through \set xxx random_gaussian(...)
+		 */
 		if (pg_strcasecmp(argv[0], "setrandom") == 0)
 		{
 			char	   *var;
@@ -1619,8 +1935,8 @@ top:
 					if (parameter <= 0.0)
 					{
 						fprintf(stderr,
-								"exponential parameter must be greater than zero (not \"%s\")\n",
-								argv[5]);
+							"exponential parameter must be greater than zero (not \"%s\")\n",
+							argv[5]);
 						st->ecnt++;
 						return true;
 					}
@@ -1652,15 +1968,15 @@ top:
 		else if (pg_strcasecmp(argv[0], "set") == 0)
 		{
 			char		res[64];
-			PgBenchExpr *expr = commands[st->state]->expr;
-			int64		result;
+			PgBenchExpr 	*expr = commands[st->state]->expr;
+			PgBenchValue	result;
 
-			if (!evaluateExpr(st, expr, &result))
+			if (!evaluateExpr(thread, st, expr, &result))
 			{
 				st->ecnt++;
 				return true;
 			}
-			sprintf(res, INT64_FORMAT, result);
+			sprintf(res, INT64_FORMAT, coerceToInt(&result));
 
 			if (!putVariable(st, argv[0], argv[1], res))
 			{
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index 5bb2480..f9f5605 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -11,42 +11,97 @@
 #ifndef PGBENCH_H
 #define PGBENCH_H
 
+/*
+ * Variable types used in parser.
+ */
+typedef enum
+{
+	PGBT_NONE,
+	PGBT_INT,
+	PGBT_DOUBLE
+} PgBenchValueType;
+
+typedef struct
+{
+	PgBenchValueType type;
+	union
+	{
+		int64 ival;
+		double dval;
+	} u;
+} PgBenchValue;
+
+/* Types of expression nodes */
 typedef enum PgBenchExprType
 {
-	ENODE_INTEGER_CONSTANT,
+	ENODE_CONSTANT,
 	ENODE_VARIABLE,
-	ENODE_OPERATOR
+	ENODE_FUNCTION
 } PgBenchExprType;
 
+/* List of callable functions */
+typedef enum PgBenchFunction
+{
+	PGBENCH_NONE,
+	PGBENCH_ADD,
+	PGBENCH_SUB,
+	PGBENCH_MUL,
+	PGBENCH_DIV,
+	PGBENCH_MOD,
+	PGBENCH_PI,
+	PGBENCH_INT,
+	PGBENCH_DOUBLE,
+	PGBENCH_DEBUG,
+	PGBENCH_ABS,
+	PGBENCH_SQRT,
+	PGBENCH_MIN,
+	PGBENCH_MAX,
+	PGBENCH_RANDOM,
+	PGBENCH_RANDOM_GAUSSIAN,
+	PGBENCH_RANDOM_EXPONENTIAL
+} PgBenchFunction;
+
 typedef struct PgBenchExpr PgBenchExpr;
+typedef struct PgBenchExprList PgBenchExprList;
 
+/*
+ * Basic representation of an expression parsed. This can be used as
+ * different things by the parser as defined by PgBenchExprType:
+ * - ENODE_CONSTANT, constant integer or double value
+ * - ENODE_VARIABLE, variable result of \set or \setrandom
+ * - ENODE_FUNCTION, in-core functions and operators
+ */
 struct PgBenchExpr
 {
 	PgBenchExprType etype;
 	union
 	{
-		struct
-		{
-			int64		ival;
-		}			integer_constant;
+		PgBenchValue	constant;
 		struct
 		{
 			char	   *varname;
 		}			variable;
 		struct
 		{
-			char		operator;
-			PgBenchExpr *lexpr;
-			PgBenchExpr *rexpr;
-		}			operator;
+			PgBenchFunction function;
+			PgBenchExprList *args;
+		}			function;
 	}			u;
 };
 
+/* List of expression nodes */
+struct PgBenchExprList
+{
+	PgBenchExpr *expr;
+	PgBenchExprList *next;
+};
+
 extern PgBenchExpr *expr_parse_result;
 
 extern int	expr_yyparse(void);
 extern int	expr_yylex(void);
 extern void expr_yyerror(const char *str);
+extern void expr_yyerror_more(const char *str, const char *more);
 extern void expr_scanner_init(const char *str, const char *source,
 				  const int lineno, const char *line,
 				  const char *cmd, const int ecol);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] extend pgbench expressions with functions

Reply via email to