> Hmm, while writing this, I noticed a potential optimization - there is no > need for m4_escape to waste time on four separate m4_bpatsubst if string > didn't have any problematic bytes in the first place. I'll whip out a > followup patch shortly.
As promised. Well, it turned into more than just optimizing m4_escape, since I noticed some subtle bugs in m4sh. AS_LITERAL_IF wasn't too happy with unbalanced parentheses, even though that can occur in well-formed shell code. And AS_TR_SH and AS_TR_CPP underquoted things, which could inadvertently lead to the wrong macros being expanded after case changes have completed. Meanwhile, idioms such as m4_cond([test1], [1], [$3], [test2], [2], [$2], [$3]) get expensive when $3 is arbitrarily long; rewriting them as m4_if(m4_cond([test1], [1], [], [test2], [2], [-]), [-], [$2], [$3]) minimizes the scanning effort required by m4. >From a75bdb89701e9d42e784de4237a042973182a999 Mon Sep 17 00:00:00 2001 From: Eric Blake <e...@byu.net> Date: Wed, 28 Oct 2009 11:23:45 -0600 Subject: [PATCH 1/3] Optimize m4_escape for common case. * lib/m4sugar/m4sugar.m4 (m4_escape): Don't use regex if string is already sane, by copying from AS_LITERAL_IF. Move guts... (_m4_escape): ...into new helper. Signed-off-by: Eric Blake <e...@byu.net> --- ChangeLog | 5 +++++ lib/m4sugar/m4sugar.m4 | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 0 deletions(-) diff --git a/ChangeLog b/ChangeLog index 41dfb1e..a972039 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2009-10-28 Eric Blake <e...@byu.net> + Optimize m4_escape for common case. + * lib/m4sugar/m4sugar.m4 (m4_escape): Don't use regex if string is + already sane, by copying from AS_LITERAL_IF. Move guts... + (_m4_escape): ...into new helper. + Fix m4_text_wrap handling of quoted whitespace. * lib/m4sugar/m4sugar.m4 (m4_escape): New macro. (m4_text_wrap): Use it to avoid issues with embedded [ and ]. diff --git a/lib/m4sugar/m4sugar.m4 b/lib/m4sugar/m4sugar.m4 index 6fddff4..e0cacfb 100644 --- a/lib/m4sugar/m4sugar.m4 +++ b/lib/m4sugar/m4sugar.m4 @@ -2546,7 +2546,23 @@ m4_define([m4_append_uniq_w], # ----------------- # Output quoted STRING, but with embedded #, $, [ and ] turned into # quadrigraphs. +# +# It is faster to check if STRING is already good using m4_translit +# than to blindly perform four m4_bpatsubst. +# +# Because the translit is stripping quotes, it must also neutralize +# anything that might be in a macro name, as well as comments, commas, +# and parentheses. All the problem characters are unified so that a +# single m4_index can scan the result. +# +# Rather than expand m4_defn every time m4_escape is expanded, we +# inline its expansion up front. m4_define([m4_escape], +[m4_if(m4_index(m4_translit([$1], + [[]#,()]]m4_dquote(m4_defn([m4_cr_symbols2]))[, [$$$]), [$]), + [-1], [m4_echo], [_$0])([$1])]) + +m4_define([_m4_escape], [m4_changequote([-=<{(],[)}>=-])]dnl [m4_bpatsubst(m4_bpatsubst(m4_bpatsubst(m4_bpatsubst( -=<{(-=<{(-=<{(-=<{(-=<{($1)}>=-)}>=-)}>=-)}>=-)}>=-, -- 1.6.4.2 >From c08d93bedbc554e7ed92e45c97a2666719176cb5 Mon Sep 17 00:00:00 2001 From: Eric Blake <e...@byu.net> Date: Wed, 28 Oct 2009 12:21:36 -0600 Subject: [PATCH 2/3] Minor optimizations to m4sh. * lib/m4sugar/m4sh.m4 (AS_VAR_IF, AS_IDENTIFIER_IF) (AS_LITERAL_IF): Parse fewer bytes during expansion, by visiting if-true and if-false arguments only once. Signed-off-by: Eric Blake <e...@byu.net> --- ChangeLog | 5 +++++ lib/m4sugar/m4sh.m4 | 25 ++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index a972039..f28d97c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2009-10-28 Eric Blake <e...@byu.net> + Minor optimizations to m4sh. + * lib/m4sugar/m4sh.m4 (AS_VAR_IF, AS_IDENTIFIER_IF) + (AS_LITERAL_IF): Parse fewer bytes during expansion, by visiting + if-true and if-false arguments only once. + Optimize m4_escape for common case. * lib/m4sugar/m4sugar.m4 (m4_escape): Don't use regex if string is already sane, by copying from AS_LITERAL_IF. Move guts... diff --git a/lib/m4sugar/m4sh.m4 b/lib/m4sugar/m4sh.m4 index 4864088..e3957d0 100644 --- a/lib/m4sugar/m4sh.m4 +++ b/lib/m4sugar/m4sh.m4 @@ -1507,15 +1507,15 @@ m4_define([AS_HELP_STRING], # we worry if the first character also matches m4_cr_symbol1 (ie. does not # match m4_cr_digit). m4_define([AS_IDENTIFIER_IF], -[m4_if(m4_index([$1], [...@]), [-1], - [_$0($@)], - [_$0(m4_bpatsubst([[$1]], [@&t...@]), [$2], [$3])])]) +[m4_if(_$0(m4_if(m4_index([$1], [...@]), [-1], + [[$1]], [m4_bpatsubst([[$1]], [@&t...@])])), [-], [$2], [$3])]) + m4_define([_AS_IDENTIFIER_IF], -[m4_cond([[$1]], [], [$3], +[m4_cond([[$1]], [], [], [m4_eval(m4_len(m4_translit([[$1]], ]]dnl -m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[)) > 0)], [1], [$3], +m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[)) > 0)], [1], [], [m4_len(m4_translit(m4_format([[%.1s]], [$1]), ]]dnl -m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))], [0], [$2], [$3])]) +m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))], [0], [-], [])]) # AS_LITERAL_IF(EXPRESSION, IF-LITERAL, IF-NOT-LITERAL) @@ -1544,13 +1544,16 @@ m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))], [0], [$2], [$3])]) # Rather than expand m4_defn every time AS_LITERAL_IF is expanded, we # inline its expansion up front. m4_define([AS_LITERAL_IF], -[m4_cond([m4_eval(m4_index(m4_quote($1), [...@s|@]) == -1)], [0], [$3], +[m4_if(_$0([$1]), [-], [$2], [$3])]) + +m4_define([_AS_LITERAL_IF], +[m4_cond([m4_eval(m4_index(m4_quote($1), [...@s|@]) == -1)], [0], [], [m4_index(m4_translit(m4_quote($1), [[]`,#]]]dnl m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[, [$$$]), - [$])], [-1], [$2], - [$3])]) + [$])], [-1], [-], + [])]) # AS_TMPDIR(PREFIX, [DIRECTORY = $TMPDIR [= /tmp]]) @@ -1909,9 +1912,9 @@ m4_define([AS_VAR_GET], # Polymorphic, and avoids sh expansion error upon interrupt or term signal. m4_define([AS_VAR_IF], [AS_LITERAL_IF([$1], - [AS_IF([test "x$$1" = x""$2], [$3], [$4])], + [AS_IF([test "x$$1" = x""$2]], [AS_VAR_COPY([as_val], [$1]) - AS_IF([test "x$as_val" = x""$2], [$3], [$4])])]) + AS_IF([test "x$as_val" = x""$2]]), [$3], [$4])]) # AS_VAR_PUSHDEF and AS_VAR_POPDEF -- 1.6.4.2 >From 70fab56b1c20869f54628426e0f3ae1db8cb6f62 Mon Sep 17 00:00:00 2001 From: Eric Blake <e...@byu.net> Date: Wed, 28 Oct 2009 15:58:43 -0600 Subject: [PATCH 3/3] Fix corner cases in AS_LITERAL_IF and AS_TR_SH. * lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Fix bug with unbalanced parens. (_AS_LITERAL_IF): Assume proper quoting. Move guts... (_AS_LITERAL_IF_): into new helper. (AS_TR_SH, AS_TR_CPP): Fix bugs with expansion of wrong macro. Move guts... (_AS_TR_SH, _AS_TR_SH_LITERAL, _AS_TR_SH_INDIR, _AS_TR_CPP) (_AS_TR_CPP_LITERAL, _AS_TR_CPP_INDIR): ...into new helpers. (AS_VAR_PUSHDEF): Hoist m4_require, by moving guts... (_AS_VAR_PUSHDEF): ...into new helper. * tests/m4sh.at (AS@&t...@_literal_if): Enhance test. Signed-off-by: Eric Blake <e...@byu.net> --- ChangeLog | 13 +++++++++ lib/m4sugar/m4sh.m4 | 68 ++++++++++++++++++++++++++++++-------------------- tests/m4sh.at | 4 +- 3 files changed, 56 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index f28d97c..68c5f5e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ 2009-10-28 Eric Blake <e...@byu.net> + Fix corner cases in AS_LITERAL_IF and AS_TR_SH. + * lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Fix bug with unbalanced + parens. + (_AS_LITERAL_IF): Assume proper quoting. Move guts... + (_AS_LITERAL_IF_): into new helper. + (AS_TR_SH, AS_TR_CPP): Fix bugs with expansion of wrong macro. + Move guts... + (_AS_TR_SH, _AS_TR_SH_LITERAL, _AS_TR_SH_INDIR, _AS_TR_CPP) + (_AS_TR_CPP_LITERAL, _AS_TR_CPP_INDIR): ...into new helpers. + (AS_VAR_PUSHDEF): Hoist m4_require, by moving guts... + (_AS_VAR_PUSHDEF): ...into new helper. + * tests/m4sh.at (AS@&t...@_literal_if): Enhance test. + Minor optimizations to m4sh. * lib/m4sugar/m4sh.m4 (AS_VAR_IF, AS_IDENTIFIER_IF) (AS_LITERAL_IF): Parse fewer bytes during expansion, by visiting diff --git a/lib/m4sugar/m4sh.m4 b/lib/m4sugar/m4sh.m4 index e3957d0..cd45be2 100644 --- a/lib/m4sugar/m4sh.m4 +++ b/lib/m4sugar/m4sh.m4 @@ -1537,23 +1537,23 @@ m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))], [0], [-], [])]) # profiling shows that it is faster to use m4_translit. # # Because the translit is stripping quotes, it must also neutralize anything -# that might be in a macro name, as well as comments and commas. All the -# problem characters are unified so that a single m4_index can scan the -# result. +# that might be in a macro name, as well as comments, commas, or unbalanced +# parentheses. All the problem characters are unified so that a single +# m4_index can scan the result. # # Rather than expand m4_defn every time AS_LITERAL_IF is expanded, we # inline its expansion up front. m4_define([AS_LITERAL_IF], -[m4_if(_$0([$1]), [-], [$2], [$3])]) +[_$0(m4_expand([$1]), [$2], [$3])]) m4_define([_AS_LITERAL_IF], -[m4_cond([m4_eval(m4_index(m4_quote($1), [...@s|@]) == -1)], [0], [], - [m4_index(m4_translit(m4_quote($1), - [[]`,#]]]dnl -m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[, - [$$$]), - [$])], [-1], [-], - [])]) +[m4_if($0_([$1]), [-], [$2], [$3])]) + +m4_define([_AS_LITERAL_IF_], +[m4_cond([m4_eval(m4_index([$1], [...@s|@]) == -1)], [0], [], + [m4_index(m4_translit([$1], [[]`,#()]]]dnl +m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[, [$$$]), + [$])], [-1], [-], [])]) # AS_TMPDIR(PREFIX, [DIRECTORY = $TMPDIR [= /tmp]]) @@ -1739,13 +1739,18 @@ as_tr_sh="eval sed 'y%*+%pp%;s%[[^_$as_cr_alnum]]%_%g'" # For speed, we inline the literal definitions that can be computed up front. m4_defun_init([AS_TR_SH], [AS_REQUIRE([_$0_PREPARE])], -[AS_LITERAL_IF([$1], - [m4_translit([$1], [*+[]]]]dnl -m4_dquote(m4_dquote(m4_defn([m4_cr_not_symbols2])))[[, - [pp[]]]]dnl -m4_dquote(m4_dquote(m4_for(,1,255,,[[_]])))[[)], - [`AS_ECHO(["_AS_ESCAPE(m4_dquote(m4_expand([$1])), - [`], [\])"]) | $as_tr_sh`])]) +[_$0(m4_expand([$1]))]) + +m4_define([_AS_TR_SH], +[_AS_LITERAL_IF([$1], [$0_LITERAL], [$0_INDIR])([$1])]) + +m4_define([_AS_TR_SH_LITERAL], +[m4_translit([[$1]], + [*+[]]]m4_dquote(m4_defn([m4_cr_not_symbols2]))[, + [pp[]]]m4_dquote(m4_for(,1,255,,[[_]]))[)]) + +m4_define([_AS_TR_SH_INDIR], +[`AS_ECHO(["_AS_ESCAPE([[$1]], [`], [\])"]) | $as_tr_sh`]) # _AS_TR_CPP_PREPARE @@ -1766,12 +1771,18 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters% P$as_cr_LETTERS%;s%[[^_$as_cr_alnum]]%_%g # See implementation comments in AS_TR_SH. m4_defun_init([AS_TR_CPP], [AS_REQUIRE([_$0_PREPARE])], -[AS_LITERAL_IF([$1], - [m4_translit([$1], [*[]]]]dnl -m4_dquote(m4_dquote(m4_defn([m4_cr_letters])m4_defn([m4_cr_not_symbols2])))[[, - [P[]]]]dnl -m4_dquote(m4_dquote(m4_defn([m4_cr_LETTERS])m4_for(,1,255,,[[_]])))[[)], - [`AS_ECHO(["$1"]) | $as_tr_cpp`])]) +[_$0(m4_expand([$1]))]) + +m4_define([_AS_TR_CPP], +[_AS_LITERAL_IF([$1], [$0_LITERAL], [$0_INDIR])([$1])]) + +m4_define([_AS_TR_CPP_LITERAL], +[m4_translit([$1], + [*[]]]m4_dquote(m4_defn([m4_cr_letters])m4_defn([m4_cr_not_symbols2]))[, + [P[]]]m4_dquote(m4_defn([m4_cr_LETTERS])m4_for(,1,255,,[[_]]))[)]) + +m4_define([_AS_TR_CPP_INDIR], +[`AS_ECHO(["$1"]) | $as_tr_cpp`]) # _AS_TR_PREPARE @@ -1962,9 +1973,12 @@ m4_define([AS_VAR_POPDEF], # don't work. Therefore, we must require the preparation ourselves. m4_defun_init([AS_VAR_PUSHDEF], [AS_REQUIRE([_AS_TR_SH_PREPARE])], -[AS_LITERAL_IF([$2], - [m4_pushdef([$1], [AS_TR_SH($2)])], - [as_$1=AS_TR_SH($2) +[_$0([$1], m4_expand([$2]))]) + +m4_define([_AS_VAR_PUSHDEF], +[_AS_LITERAL_IF([$2], + [m4_pushdef([$1], [_AS_TR_SH_LITERAL([$2])])], + [as_$1=_AS_TR_SH_INDIR([$2]) m4_pushdef([$1], [$as_[$1]])])]) diff --git a/tests/m4sh.at b/tests/m4sh.at index 4627a48..5ff9fe8 100644 --- a/tests/m4sh.at +++ b/tests/m4sh.at @@ -1041,13 +1041,13 @@ AT_DATA_M4SH([script.as], [[dnl AS_INIT echo AS_LITERAL_IF([lit], [ok], [ERR]) 1 echo AS_LITERAL_IF([l$it], [ERR], [ok]) 2 -echo AS_LITERAL_IF([l``it], [ERR], [ok]) 3 +echo AS_LITERAL_IF([l`case a in b) ;; esac`it], [ERR], [ok]) 3 m4_define([mac], [lit]) echo AS_LITERAL_IF([mac], [ok], [ERR]) 4 echo AS_LITERAL_IF([mac($, ``)], [ok], [ERR]) 5 m4_define([mac], [l$it]) echo AS_LITERAL_IF([mac], [ERR], [ok]) 6 -m4_define([mac], [l`it]) +m4_define([mac], [l``it]) echo AS_LITERAL_IF([mac], [ERR], [ok]) 7 ]]) -- 1.6.4.2