POSIX states in one place that m4 must do signed math with at least
32-bit precision; and in another that math must be done with at least
'signed long'. We comply with the former, but not the latter on
modern 64-bit platforms, where 'signed long' is 64-bit. While we
could unilaterally change to just 64-bit math, that is more likely to
cause back-compat issues with code that has come to expect 32-bit
math; better is to give scripts the option to choose which width they
want, by introducing a new builtin.
Since other places in m4 do not allow more than 2G of content in a
single macro name or body, and output in radix 1 is already a GNU
extension, I opted to make it an error to try and abuse eval64 to
output values with magnitude more than 2G.
* src/eval.c (INT, UINT, SHIFTMASK, EVALUATE): New macros.
(eval_lex, primary, parse_expr, evaluate): Use them to allow both
32- and 64- bit compilation.
* src/eval64.c: New file, implementing evaluate64.
* src/Makefile.am (m4_SOURCES): Build it.
* src/m4.h (evaluate64): New prototype.
* src/builtin.c (builtin_tab): Expose new builtin.
(ntoa): Support 64-bit output.
(m4_eval): Split body...
(eval): ...into new helper. Switch to 64-bit support, except that
radix 1 is still capped to 2G.
(m4_eval64): New function.
* doc/m4.texi (Eval): Document this, including some examples that
get run by the testsuite.
* NEWS: Likewise.
---
NEWS | 2 ++
doc/m4.texi | 72 ++++++++++++++++++++++++++++++++++---------------
src/Makefile.am | 2 +-
src/builtin.c | 37 +++++++++++++++++++------
src/eval.c | 63 +++++++++++++++++++++++++------------------
src/eval64.c | 39 +++++++++++++++++++++++++++
src/m4.h | 9 +++++--
7 files changed, 165 insertions(+), 59 deletions(-)
create mode 100644 src/eval64.c
diff --git a/NEWS b/NEWS
index 0b499a20..dd3ea04b 100644
--- a/NEWS
+++ b/NEWS
@@ -132,6 +132,8 @@ GNU M4 NEWS - User visible changes.
builtin now refuses to recognize `=' as a synonym for `==' (this had
emitted a warning since 1.4.8b).
+** Add a new `eval64' builtin that operates on 64-bit integers.
+
** A number of portability improvements inherited from gnulib.
diff --git a/doc/m4.texi b/doc/m4.texi
index 97da55dd..b0a35eae 100644
--- a/doc/m4.texi
+++ b/doc/m4.texi
@@ -7070,13 +7070,16 @@ Eval
Integer expressions are evaluated with @code{eval}:
@deffn Builtin eval (@var{expression}, @dvar{radix, 10}, @ovar{width})
+@deffnx Builtin eval64 (@var{expression}, @dvar{radix, 10}, @ovar{width})
Expands to the value of @var{expression}. The expansion is empty
if a problem is encountered while parsing the arguments. If specified,
@var{radix} and @var{width} control the format of the output.
-Calculations are done with 32-bit signed numbers. Overflow silently
-results in wraparound. A warning is issued if division by zero is
-attempted, or if @var{expression} could not be parsed.
+Calculations are done with 32-bit signed numbers for @code{eval}, and
+64-bit signed numbers for @code{eval64} (the latter was introduced in M4
+1.6). Overflow silently results in wraparound. A warning is issued if
+division by zero is attempted, or if @var{expression} could not be
+parsed.
Expressions can contain the following operators, listed in order of
decreasing precedence.
@@ -7112,7 +7115,7 @@ Eval
Conditional ternary
@end table
-The macro @code{eval} is recognized only with parameters.
+The macros @code{eval} and @code{eval64} are recognized only with parameters.
@end deffn
All binary operators, except exponentiation, are left associative. C
@@ -7328,32 +7331,56 @@ Eval
passed to @code{eval}.
Some calculations are not portable to other implementations, since they
-have undefined semantics in C, but GNU @code{m4} has
-well-defined behavior on overflow. When shifting, an out-of-range shift
-amount is implicitly brought into the range of 32-bit signed integers
-using an implicit bit-wise and with 0x1f).
+have undefined semantics in C, but GNU @code{m4} has well-defined
+behavior on overflow. When shifting, an out-of-range shift amount is
+implicitly brought into the range of 32-bit signed integers using an
+implicit bit-wise and with 0x1f). For @code{eval64}, the implicit mask
+is 0x3f. However, output in radix 1 only works for signed values that
+fit in 32 bits, in part because the @var{width} parameter is still
+limited to 32 bits.
@example
-define(`max_int', eval(`0x7fffffff'))
+define(`max_int', eval(`0x7fffffff'))dnl
+define(`max_long', eval64(`0x7fffffffffffffff'))
@result{}
-define(`min_int', incr(max_int))
+define(`min_int', incr(max_int))dnl
+define(`min_long', eval64(max_long + 1))
@result{}
-eval(min_int` < 0')
-@result{}1
-eval(max_int` > 0')
-@result{}1
+eval(min_int` < 0') eval64(min_long` < 0')
+@result{}1 1
+eval(max_int` > 0') eval64(max_long` > 0')
+@result{}1 1
ifelse(eval(min_int` / -1'), min_int, `overflow occurred')
@result{}overflow occurred
+ifelse(eval64(min_long` / -1'), min_long, `overflow occurred')
+@result{}overflow occurred
min_int
@result{}-2147483648
-eval(`0x80000000 % -1')
-@result{}0
-eval(`-4 >> 1')
-@result{}-2
-eval(`-4 >> -31')
-@result{}-2
-eval(`-4 >> 33')
-@result{}-2
+min_long
+@result{}-9223372036854775808
+eval(`0x80000000 % -1') eval64(`0x8000000000000000 % -1')
+@result{}0 0
+eval(`-4 >> 1') eval64(`-4 >> 1')
+@result{}-2 -2
+eval(`-4 >> -31') eval64(`-4 >> -63')
+@result{}-2 -2
+eval(`-4 >> 33') eval64(`-4 >> 65')
+@result{}-2 -2
+eval64(`0x80000000', `1')
+@error{}m4:stdin:15: warning: eval64: magnitude too large for base 1:
2147483648
+@result{}
+@end example
+
+If your script wants to use larger math by default, it is easy to
+arrange for that up front with @code{defn}:
+
+@example
+eval(`90000 * 90000')
+@result{}-489934592
+define(`eval', defn(`eval64'))
+@result{}
+eval(`90000 * 90000')
+@result{}8100000000
@end example
If @var{radix} is specified, it specifies the radix to be used in the
@@ -7407,6 +7434,7 @@ Eval
@result{}a
@end example
+
@node Shell commands
@chapter Macros for running shell commands
diff --git a/src/Makefile.am b/src/Makefile.am
index 0f567012..08f48afe 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,7 +26,7 @@ AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
AM_LDFLAGS = $(OS2_LDFLAGS)
bin_PROGRAMS = m4
noinst_HEADERS = m4.h
-m4_SOURCES = m4.c builtin.c debug.c eval.c format.c freeze.c input.c \
+m4_SOURCES = m4.c builtin.c debug.c eval.c eval64.c format.c freeze.c input.c \
macro.c output.c path.c symtab.c
LDADD = ../lib/libm4.a $(LIBM4_LIBDEPS) \
$(CLOCK_TIME_LIB) $(GETLOCALENAME_L_LIB) $(GETRANDOM_LIB) \
diff --git a/src/builtin.c b/src/builtin.c
index bf0d1798..bba80e33 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -57,6 +57,7 @@ DECLARE (m4_dumpdef);
DECLARE (m4_errprint);
DECLARE (m4_esyscmd);
DECLARE (m4_eval);
+DECLARE (m4_eval64);
DECLARE (m4_format);
DECLARE (m4_ifdef);
DECLARE (m4_ifelse);
@@ -109,6 +110,7 @@ static builtin const builtin_tab[] = {
{"errprint", false, false, true, m4_errprint},
{"esyscmd", true, false, true, m4_esyscmd},
{"eval", false, false, true, m4_eval},
+ {"eval64", true, false, true, m4_eval64},
{"format", true, false, true, m4_format},
{"ifdef", false, true, true, m4_ifdef},
{"ifelse", false, true, true, m4_ifelse},
@@ -571,10 +573,10 @@ static char const digits[] =
"0123456789abcdefghijklmnopqrstuvwxyz";
/* The function ntoa () converts VALUE to a signed ASCII
representation in radix RADIX, with the ending \0 in *END. */
static const char *
-ntoa (int32_t value, int radix, const char **end)
+ntoa (int64_t value, int radix, const char **end)
{
bool negative;
- uint32_t uvalue;
+ uint64_t uvalue;
/* Sized for radix 2, plus sign and trailing NUL. */
static char str[sizeof (value) * CHAR_BIT + 2];
char *s = &str[sizeof str];
@@ -585,12 +587,12 @@ ntoa (int32_t value, int radix, const char **end)
if (value < 0)
{
negative = true;
- uvalue = -(uint32_t) value;
+ uvalue = -(uint64_t) value;
}
else
{
negative = false;
- uvalue = (uint32_t) value;
+ uvalue = (uint64_t) value;
}
do
@@ -1253,10 +1255,11 @@ m4_sysval (struct obstack *obs, int argc MAYBE_UNUSED,
The actual work is done in the function evaluate (), which lives in
eval.c. */
static void
-m4_eval (struct obstack *obs, int argc, macro_arguments *argv)
+eval (struct obstack *obs, int argc, macro_arguments *argv,
+ bool func (const call_info *, const char *, size_t, int64_t *))
{
const call_info *me = arg_info (argv);
- int32_t value = 0;
+ int64_t value = 0;
int radix = 10;
int min = 1;
const char *s;
@@ -1283,17 +1286,23 @@ m4_eval (struct obstack *obs, int argc, macro_arguments
*argv)
return;
}
- if (evaluate (me, ARG (1), ARG_LEN (1), &value))
+ if (func (me, ARG (1), ARG_LEN (1), &value))
return;
if (radix == 1)
{
+ if (value < INT_MIN || value > INT_MAX)
+ {
+ m4_warn (0, me, _("magnitude too large for base 1: %" PRId64),
+ value);
+ return;
+ }
if (value < 0)
{
obstack_1grow (obs, '-');
value = -value;
}
- if (value + 0U < min + 0U)
+ if (value + 0ULL < min + 0ULL)
{
obstack_blank (obs, min - value);
memset ((char *) obstack_next_free (obs) - (min - value), '0',
@@ -1321,6 +1330,18 @@ m4_eval (struct obstack *obs, int argc, macro_arguments
*argv)
obstack_grow (obs, s, len);
}
+static void
+m4_eval (struct obstack *obs, int argc, macro_arguments *argv)
+{
+ eval (obs, argc, argv, evaluate);
+}
+
+static void
+m4_eval64 (struct obstack *obs, int argc, macro_arguments *argv)
+{
+ eval (obs, argc, argv, evaluate64);
+}
+
static void
m4_incr (struct obstack *obs, int argc, macro_arguments *argv)
{
diff --git a/src/eval.c b/src/eval.c
index e3bb8b2c..18454d7e 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -22,9 +22,16 @@
/* This file contains the functions to evaluate integer expressions for
the "eval" macro. It is a little, fairly self-contained module, with
its own scanner, and a recursive descent parser. The only entry point
- is evaluate (). */
+ is evaluate (). This file is also used to implement eval64. */
-#include "m4.h"
+#ifndef EVAL64
+# include "m4.h"
+
+# define INT int32_t
+# define UINT uint32_t
+# define SHIFTMASK 0x1f
+# define EVALUATE evaluate
+#endif /* !EVAL64 */
/* Evaluates token types. */
@@ -88,8 +95,9 @@ typedef enum eval_error
}
eval_error;
-static eval_error primary (int32_t *);
-static eval_error parse_expr (int32_t *, eval_error, unsigned);
+
+static eval_error primary (INT *);
+static eval_error parse_expr (INT *, eval_error, unsigned);
/* Lexical functions. */
@@ -121,7 +129,7 @@ eval_undo (void)
/* VAL is numerical value, if any. */
static eval_token
-eval_lex (int32_t *val)
+eval_lex (INT *val)
{
while (eval_text != end_text && c_isspace (*eval_text))
eval_text++;
@@ -140,7 +148,7 @@ eval_lex (int32_t *val)
/* The documentation says that "overflow silently results in wraparound".
Therefore use an unsigned integer type to avoid undefined behaviour
when parsing '-2147483648'. */
- uint32_t value;
+ UINT value;
if (*eval_text == '0')
{
@@ -322,10 +330,10 @@ eval_lex (int32_t *val)
/* Parse `(expr)', unary operators, and numbers. */
static eval_error
-primary (int32_t *v1)
+primary (INT *v1)
{
eval_error er;
- int32_t v2;
+ INT v2;
switch (eval_lex (v1))
{
@@ -360,7 +368,7 @@ primary (int32_t *v1)
return primary (v1);
case MINUS:
er = primary (v1);
- *v1 = (int32_t) -(uint32_t) *v1;
+ *v1 = (INT) -(UINT) *v1;
return er;
case NOT:
er = primary (v1);
@@ -388,17 +396,17 @@ primary (int32_t *v1)
/* Parse binary operators with at least MIN_PREC precedence. */
static eval_error
-parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
+parse_expr (INT *v1, eval_error er, unsigned min_prec)
{
eval_token et;
eval_token et2;
eval_error er2;
eval_error er3;
- int32_t v2;
- int32_t v3;
- uint32_t u1;
- uint32_t u2;
- uint32_t u3;
+ INT v2;
+ INT v3;
+ UINT u1;
+ UINT u2;
+ UINT u3;
if (er >= SYNTAX_ERROR)
return er;
@@ -460,14 +468,14 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
break;
case TIMES:
- *v1 = (int32_t) ((uint32_t) *v1 * (uint32_t) v2);
+ *v1 = (INT) ((UINT) *v1 * (UINT) v2);
break;
case DIVIDE:
if (v2 == 0)
er = DIVIDE_ZERO;
else if (v2 == -1)
/* Avoid overflow, and the x86 SIGFPE on INT_MIN / -1. */
- *v1 = (int32_t) -(uint32_t) *v1;
+ *v1 = (INT) -(UINT) *v1;
else
*v1 /= v2;
break;
@@ -482,25 +490,25 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
break;
case PLUS:
- *v1 = (int32_t) ((uint32_t) *v1 + (uint32_t) v2);
+ *v1 = (INT) ((UINT) *v1 + (UINT) v2);
break;
case MINUS:
- *v1 = (int32_t) ((uint32_t) *v1 - (uint32_t) v2);
+ *v1 = (INT) ((UINT) *v1 - (UINT) v2);
break;
case LSHIFT:
u1 = *v1;
- u1 <<= (uint32_t) (v2 & 0x1f);
+ u1 <<= (UINT) (v2 & SHIFTMASK);
*v1 = u1;
break;
case RSHIFT:
u1 = *v1 < 0 ? ~*v1 : *v1;
- u1 >>= (uint32_t) (v2 & 0x1f);
+ u1 >>= (UINT) (v2 & SHIFTMASK);
*v1 = *v1 < 0 ? ~u1 : u1;
break;
case URSHIFT:
u1 = *v1;
- u1 >>= (uint32_t) (v2 & 0x1f);
+ u1 >>= (UINT) (v2 & SHIFTMASK);
*v1 = u1;
break;
@@ -585,17 +593,19 @@ parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
/* Main entry point, called from "eval". */
bool
-evaluate (const call_info *me, const char *expr, size_t len, int32_t *val)
+EVALUATE (const call_info *me, const char *expr, size_t len, int64_t *pval)
{
eval_error err;
+ INT val = 0;
+ *pval = 0;
eval_init_lex (expr, len);
- err = primary (val);
- err = parse_expr (val, err, 1);
+ err = primary (&val);
+ err = parse_expr (&val, err, 1);
if (err == NO_ERROR && eval_text != end_text)
{
- if (eval_lex (val) == BADOP)
+ if (eval_lex (&val) == BADOP)
err = INVALID_OPERATOR;
else
err = EXCESS_INPUT;
@@ -607,6 +617,7 @@ evaluate (const call_info *me, const char *expr, size_t
len, int32_t *val)
{
/* Cases where result is printed. */
case NO_ERROR:
+ *pval = val;
return false;
case EMPTY_ARGUMENT:
diff --git a/src/eval64.c b/src/eval64.c
new file mode 100644
index 00000000..6d5ae1b7
--- /dev/null
+++ b/src/eval64.c
@@ -0,0 +1,39 @@
+/* GNU m4 -- A simple macro processor
+
+ Copyright (C) 1989-1994, 2006-2014, 2016-2017, 2020-2025 Free
+ Software Foundation, Inc.
+
+ This file is part of GNU M4.
+
+ GNU M4 is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ GNU M4 is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+/* This file contains the functions to evaluate 64-bit integer
+ expressions for the "eval64" macro, by reusing code from eval. The
+ only entry point is evaluate64 (). */
+
+#include "m4.h"
+
+#define EVAL64
+#define INT int64_t
+#define UINT uint64_t
+#define SHIFTMASK 0x3f
+#define EVALUATE evaluate64
+
+#include "eval.c"
+/*
+ Defined by eval.c:
+bool
+evaluate64 (const call_info *me, const char *expr, size_t len, INT *val)
+ */
diff --git a/src/m4.h b/src/m4.h
index 79dca438..c1735318 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -29,6 +29,7 @@
#include <c-ctype.h>
#include <errno.h>
#include <error.h>
+#include <inttypes.h>
#include <limits.h>
#include <locale.h>
#include <stdbool.h>
@@ -546,9 +547,13 @@ extern void include_env_init (void);
extern void add_include_directory (const char *);
extern FILE *m4_path_search (const char *, bool, char **);
-/* File: eval.c --- expression evaluation. */
+/* File: eval.c --- 32-bit expression evaluation. */
-extern bool evaluate (const call_info *, const char *, size_t, int32_t *);
+extern bool evaluate (const call_info *, const char *, size_t, int64_t *);
+
+/* File: eval64.c --- 64-bit expression evaluation. */
+
+extern bool evaluate64 (const call_info *, const char *, size_t, int64_t *);
/* File: format.c --- printf like formatting. */
--
2.49.0
_______________________________________________
M4-patches mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/m4-patches