On Wed, Apr 02, 2025 at 05:28:40PM -0500, Eric Blake via M4-patches wrote:
> While a recursive descent parser is easy to write, it involves a LOT
> of function calls and boilerplate. Merely parsing "eval(1)" requires
> descending through ALL 11 levels of operator precedence, only for each
> layer to discover there is no operator. Better is the Pratt style of
> LR(1) parsing [1], which can handle any grammar where no two
> consecutive non-terminals or epsilon appear in the right side of any
> rule [2]. Now, parsing is done with just two mutually recursive
> functions; "eval(1)" works with just two function calls (primary()
> determines the value, and parse_expr() determines no operators are
> present), while more complicated expressions still produce the correct
> results but with less recursion.
>
> +/* Parse binary operators with at least MIN_PREC precedence. */
> +static eval_error
> +parse_expr (int32_t *v1, int min_prec)
> +{
> + eval_token et;
> + eval_token et2;
> + eval_error er = NO_ERROR;
> + eval_error er2;
> + int32_t v2;
> + int32_t v3;
> + uint32_t u1;
> +
> + et = eval_lex (&v2);
> + while (precedence[et] >= min_prec)
> + {
> + if ((er = primary (&v2)) >= SYNTAX_ERROR)
> + return er;
This blindly overwrites er from the left side of any binary operand,
which caused a temporary regression in expressions like '1/0+1'.
Fixing it thus:
From d850009d869802e0b14f22a3de19559ce2fd23e2 Mon Sep 17 00:00:00 2001
From: Eric Blake <[email protected]>
Date: Mon, 14 Apr 2025 08:44:57 -0500
Subject: [PATCH] eval: Don't forget division by zero on left side of expr
Content-type: text/plain
Fix a regression introduced in e3c4d07c - when the left half of an
expression was syntactically valid but computationally undefined, the
parser was overwriting that status with a successful parse of the
right half, when the second operator has lower precedence than the
operator that caused the problem in the left half. The simplest test
case is "eval(1/0+1)"; also vulnerable was "eval(1/0||1/0)".
* src/eval.c (evaluate): Adjust signature, to avoid losing error
status of left half.
(primary, evaluate): Update callers.
* doc/m4.texi (Eval): Test it.
---
doc/m4.texi | 10 ++++++++--
src/eval.c | 46 +++++++++++++++++++---------------------------
2 files changed, 27 insertions(+), 29 deletions(-)
diff --git a/doc/m4.texi b/doc/m4.texi
index f5577b6f..229ebcbd 100644
--- a/doc/m4.texi
+++ b/doc/m4.texi
@@ -6411,15 +6411,21 @@ Eval
@result{}0
eval(`+ + - ~ ! ~ 0')
@result{}1
+eval(`1 / 0 + 1')
+@error{}m4:stdin:8: divide by zero in eval: 1 / 0 + 1
+@result{}
+eval(`1 / 0 || 1 / 0')
+@error{}m4:stdin:9: divide by zero in eval: 1 / 0 || 1 / 0
+@result{}
eval(`2 || 1 / 0')
@result{}1
eval(`0 || 1 / 0')
-@error{}m4:stdin:9: divide by zero in eval: 0 || 1 / 0
+@error{}m4:stdin:11: divide by zero in eval: 0 || 1 / 0
@result{}
eval(`0 && (1 % 0)')
@result{}0
eval(`2 && (1 % 0)')
-@error{}m4:stdin:11: modulo by zero in eval: 2 && (1 % 0)
+@error{}m4:stdin:13: modulo by zero in eval: 2 && (1 % 0)
@result{}
@end example
diff --git a/src/eval.c b/src/eval.c
index f48e142a..f186ac04 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -82,7 +82,7 @@ typedef enum eval_error
eval_error;
static eval_error primary (int32_t *);
-static eval_error parse_expr (int32_t *, unsigned);
+static eval_error parse_expr (int32_t *, eval_error, unsigned);
/*--------------------.
| Lexical functions. |
@@ -303,7 +303,6 @@ static eval_error
primary (int32_t *v1)
{
eval_error er;
- eval_error er2;
int32_t v2;
switch (eval_lex (v1))
@@ -314,12 +313,10 @@ primary (int32_t *v1)
/* Parenthesis */
case LEFTP:
- if ((er = primary (v1)) >= SYNTAX_ERROR)
+ er = primary (v1);
+ er = parse_expr (v1, er, 1);
+ if (er >= SYNTAX_ERROR)
return er;
- if ((er2 = parse_expr (v1, 1)) >= SYNTAX_ERROR)
- return er2;
- if (er == NO_ERROR)
- er = er2;
switch (eval_lex (&v2))
{
case ERROR:
@@ -362,30 +359,29 @@ primary (int32_t *v1)
/* Parse binary operators with at least MIN_PREC precedence. */
static eval_error
-parse_expr (int32_t *v1, unsigned min_prec)
+parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
{
eval_token et;
eval_token et2;
- eval_error er = NO_ERROR;
eval_error er2;
int32_t v2;
int32_t v3;
uint32_t u1;
+ if (er >= SYNTAX_ERROR)
+ return er;
et = eval_lex (&v2);
while (et / 10 >= min_prec)
{
- if ((er = primary (&v2)) >= SYNTAX_ERROR)
- return er;
+ if ((er2 = primary (&v2)) >= SYNTAX_ERROR)
+ return er2;
et2 = eval_lex (&v3);
/* Handle binary operators of higher precedence or right-associativity */
while (et2 / 10 > et / 10 || et2 == EXPONENT)
{
eval_undo ();
- if ((er2 = parse_expr (&v2, et2 / 10)) >= SYNTAX_ERROR)
+ if ((er2 = parse_expr (&v2, er2, et2 / 10)) >= SYNTAX_ERROR)
return er2;
- if (er == NO_ERROR)
- er = er2;
et2 = eval_lex (&v3);
}
/* Reduce the two values by the given binary operator */
@@ -487,20 +483,15 @@ Warning: recommend ==, not =, for equality operator")));
/* Implement short-circuiting of valid syntax. */
case LAND:
- if (er == NO_ERROR)
- *v1 = *v1 && v2;
- else if (*v1 == 0)
- er = NO_ERROR;
+ if (!*v1)
+ er2 = NO_ERROR;
+ *v1 = *v1 && v2;
break;
case LOR:
- if (er == NO_ERROR)
- *v1 = *v1 || v2;
- else if (*v1 != 0)
- {
- *v1 = 1;
- er = NO_ERROR;
- }
+ if (*v1)
+ er2 = NO_ERROR;
+ *v1 = *v1 || v2;
break;
default:
@@ -508,6 +499,8 @@ Warning: recommend ==, not =, for equality operator")));
"INTERNAL ERROR: unexpected operator in evaluate ()"));
abort ();
}
+ if (er == NO_ERROR)
+ er = er2;
et = et2;
}
@@ -526,8 +519,7 @@ evaluate (const char *expr, int32_t *val)
eval_init_lex (expr);
err = primary (val);
- if (err == NO_ERROR)
- err = parse_expr (val, 1);
+ err = parse_expr (val, err, 1);
if (err == NO_ERROR && *eval_text != '\0')
{
base-commit: 6ebfc546b600625db70bf3c0a6a128997a3c04be
--
2.49.0
--
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization: qemu.org | libguestfs.org
_______________________________________________
M4-patches mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/m4-patches