POSIX is clear that strtol("0x") is an error (there must be digits
after the explicit base); and other m4 implementations that rely on
strtol for eval have rejected "eval(0x)". Although it can be argued
that rejecting something that used to silently work as 0 might break
existing scripts, we had never documented it as a feature and the risk
is low.
Meanwhile, parsing "eval(08)" as "0" followed by "8" always produces a
semantic error, since 8 was found when an operator was expected; but
the error message is nicer if a digit too large for the base triggers
the same new error as for an incomplete base ["invalid number"] rather
than the old way ["bad expression in eval (excess input)"].
* src/eval.c (MIN_PREC): New define to avoid a magic number to
parse_expr.
(BADNUM, INVALID_NUMBER): New enum values. Shuffle values on
other eval_tokens to keep math consistent.
(eval_lex): Return BADNUM on failed numeric parse.
(primary, parse_expr, evaluate): Handle invalid numbers.
* doc/m4.texi (Eval): Test it.
* NEWS: Document the bug fix.
---
This patch is worth back-porting to m4 1.4.x.
For reference, BSD m4 and m4p both reject "eval(0x)". Checking other
languages, bash rejects $((0b)) but accidentally accepts $((0x)).
gawk silently accepts 0x, but hey, it silently accepts garbage like 0q
too. C, Python, Go, Rust, and many other compiled languages reject
bare 0x.
NEWS | 4 +++
doc/m4.texi | 8 +++++-
src/eval.c | 75 +++++++++++++++++++++++++++++++++++------------------
3 files changed, 61 insertions(+), 26 deletions(-)
diff --git a/NEWS b/NEWS
index f25dac3d..960e42f7 100644
--- a/NEWS
+++ b/NEWS
@@ -122,6 +122,10 @@ GNU M4 NEWS - User visible changes.
context of a macro name, rather than acting on the empty string. This
was already done for `define', `pushdef', `builtin', and `indir'.
+** Fix the `eval' builtin to reject input like `0x' that C does not
+ accept as an integer literal, rather than silently treating it as zero
+ (present since "the beginning").
+
** Enhance the `eval' builtin to understand the `?:' and `>>>' operators,
and downgrade a failed parse due to an unknown operator from an error to
a warning (the same as for all other syntax errors). Further, the
diff --git a/doc/m4.texi b/doc/m4.texi
index 01b0a915..2bd2d042 100644
--- a/doc/m4.texi
+++ b/doc/m4.texi
@@ -7341,6 +7341,12 @@ Eval
eval(`1+')
@error{}m4:stdin:10: warning: eval: missing operand: '1+'
@result{}
+eval(`0x')
+@error{}m4:stdin:11: warning: eval: invalid number: '0x'
+@result{}
+eval(`01239')
+@error{}m4:stdin:12: warning: eval: invalid number: '01239'
+@result{}
define(`foo', `666')
@result{}
eval(0r36:foo)
@@ -7350,7 +7356,7 @@ Eval
eval(`0R36:'defn(`foo'))
@result{}7998
eval(`foo / 6')
-@error{}m4:stdin:15: warning: eval: bad input: 'foo / 6'
+@error{}m4:stdin:17: warning: eval: bad input: 'foo / 6'
@result{}
eval(foo / 6)
@result{}111
diff --git a/src/eval.c b/src/eval.c
index 236b34ca..18cae9e2 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -28,10 +28,13 @@
/* Evaluates token types. */
+#define MIN_PREC 2
+
typedef enum eval_token
{
- /* Value / 10 is precedence order. */
+ /* Value / 10 is precedence order, if >= MIN_PREC. */
ERROR = 0,
+ BADNUM,
BADOP,
EMPTY,
EOTEXT,
@@ -41,28 +44,28 @@ typedef enum eval_token
NOT,
NUMBER,
COLON,
- QUESTION = 10,
- LOR = 20,
- LAND = 30,
- OR = 40,
- XOR = 50,
- AND = 60,
- EQ = 70,
+ QUESTION = 20,
+ LOR = 30,
+ LAND = 40,
+ OR = 50,
+ XOR = 60,
+ AND = 70,
+ EQ = 80,
NOTEQ,
- GT = 80,
+ GT = 90,
GTEQ,
LS,
LSEQ,
- LSHIFT = 90,
+ LSHIFT = 100,
RSHIFT,
URSHIFT,
/* precedence given for binary op; PLUS and MINUS also serve as a unary op */
- PLUS = 100,
+ PLUS = 110,
MINUS,
- TIMES = 110,
+ TIMES = 120,
DIVIDE,
MODULO,
- EXPONENT = 120
+ EXPONENT = 130
}
eval_token;
@@ -82,6 +85,7 @@ typedef enum eval_error
MISSING_COLON,
UNKNOWN_INPUT,
EXCESS_INPUT,
+ INVALID_NUMBER,
INVALID_OPERATOR,
MISSING_VALUE,
EMPTY_ARGUMENT
@@ -141,6 +145,7 @@ eval_lex (int32_t *val)
Therefore use an unsigned integer type to avoid undefined behaviour
when parsing '-2147483648'. */
uint32_t value;
+ bool seen_digit = false;
if (*eval_text == '0')
{
@@ -166,19 +171,20 @@ eval_lex (int32_t *val)
while (c_isdigit (*eval_text) && base <= 36)
base = 10 * base + *eval_text++ - '0';
if (base == 0 || base > 36 || *eval_text != ':')
- return ERROR;
+ return BADNUM;
eval_text++;
break;
default:
base = 8;
+ seen_digit = true;
}
}
else
base = 10;
value = 0;
- for (; *eval_text; eval_text++)
+ for (; *eval_text; eval_text++, seen_digit = true)
{
if (c_isdigit (*eval_text))
digit = *eval_text - '0';
@@ -196,14 +202,16 @@ eval_lex (int32_t *val)
else if (digit == 0 && value == 0)
continue;
else
- break;
+ return BADNUM;
}
else if (digit >= base)
- break;
+ return BADNUM;
else
value = value * base + digit;
}
*val = value;
+ if (!seen_digit)
+ return BADNUM;
return NUMBER;
}
@@ -336,13 +344,15 @@ primary (int32_t *v1)
/* Parenthesis */
case LEFTP:
er = primary (v1);
- er = parse_expr (v1, er, 1);
+ er = parse_expr (v1, er, MIN_PREC);
if (er >= SYNTAX_ERROR)
return er;
switch (eval_lex (&v2))
{
case ERROR:
return UNKNOWN_INPUT;
+ case BADNUM:
+ return INVALID_NUMBER;
case BADOP:
return INVALID_OPERATOR;
case RIGHTP:
@@ -374,6 +384,8 @@ primary (int32_t *v1)
/* Anything else */
case ERROR:
return UNKNOWN_INPUT;
+ case BADNUM:
+ return INVALID_NUMBER;
case BADOP:
return INVALID_OPERATOR;
case EMPTY:
@@ -550,14 +562,16 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
break;
case QUESTION:
- if (et2 == BADOP)
+ if (et2 == BADNUM)
+ er = INVALID_NUMBER;
+ else if (et2 == BADOP)
er = INVALID_OPERATOR;
else if (et2 != COLON)
er = MISSING_COLON;
else
{
er3 = primary (&v3);
- er3 = parse_expr (&v3, er3, 1);
+ er3 = parse_expr (&v3, er3, MIN_PREC);
if (er3 >= SYNTAX_ERROR)
return er3;
if (*v1)
@@ -591,14 +605,21 @@ evaluate (const call_info *me, const char *expr, size_t
len, int32_t *val)
eval_init_lex (expr, len);
err = primary (val);
- err = parse_expr (val, err, 1);
+ err = parse_expr (val, err, MIN_PREC);
if (err == NO_ERROR && eval_text != end_text)
{
- if (eval_lex (val) == BADOP)
- err = INVALID_OPERATOR;
- else
- err = EXCESS_INPUT;
+ switch (eval_lex (val))
+ {
+ case BADNUM:
+ err = INVALID_NUMBER;
+ break;
+ case BADOP:
+ err = INVALID_OPERATOR;
+ break;
+ default:
+ err = EXCESS_INPUT;
+ }
}
if (err != NO_ERROR)
@@ -634,6 +655,10 @@ evaluate (const call_info *me, const char *expr, size_t
len, int32_t *val)
m4_warn (0, me, _("excess input: %s"), expr);
break;
+ case INVALID_NUMBER:
+ m4_warn (0, me, _("invalid number: %s"), expr);
+ break;
+
case INVALID_OPERATOR:
m4_warn (0, me, _("invalid operator: %s"), expr);
break;
--
2.49.0
_______________________________________________
M4-patches mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/m4-patches