Module Name: src Committed By: rillig Date: Sat Feb 18 11:16:09 UTC 2023
Modified Files: src/usr.bin/make: make.h var.c src/usr.bin/make/unit-tests: parse-var.exp parse-var.mk var-eval-short.exp varmod-loop.mk Log Message: make: fix parsing of unevaluated subexpressions with unbalanced '{}' Since var.c 1.323 from 2020-07-26, modifiers containing unbalanced braces or parentheses were parsed differently, depending on whether they were relevant or not. For example, the expression '${VAR:...}' is enclosed with braces. When this expression has a modifier ':S,},}},g' that would double each '}' in that expression, the parser got confused: If the expression was relevant, the modifier was parsed as usual, taking into account that the 3 '}' in the modifier are ordinary characters. If the expression was irrelevant, the parser only counted the '{' and the '}', without taking into account that a '}' might be escaped by a '\' or be an ordinary character. Parsing therefore stopped at the first '}', assuming it would finish the expression '${VAR:S,}'. This parsing mode of only counting balanced '{' and '}' makes sense for the modifier ':@var@...@', which expands each word of the expression using the template from the '...'. These templates tend to be simple enough that counting the '{' and '}' suffices. To generate a diff of this commit: cvs rdiff -u -r1.316 -r1.317 src/usr.bin/make/make.h cvs rdiff -u -r1.1046 -r1.1047 src/usr.bin/make/var.c cvs rdiff -u -r1.6 -r1.7 src/usr.bin/make/unit-tests/parse-var.exp cvs rdiff -u -r1.7 -r1.8 src/usr.bin/make/unit-tests/parse-var.mk cvs rdiff -u -r1.20 -r1.21 src/usr.bin/make/unit-tests/var-eval-short.exp cvs rdiff -u -r1.21 -r1.22 src/usr.bin/make/unit-tests/varmod-loop.mk Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/make/make.h diff -u src/usr.bin/make/make.h:1.316 src/usr.bin/make/make.h:1.317 --- src/usr.bin/make/make.h:1.316 Wed Feb 15 06:52:58 2023 +++ src/usr.bin/make/make.h Sat Feb 18 11:16:09 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: make.h,v 1.316 2023/02/15 06:52:58 rillig Exp $ */ +/* $NetBSD: make.h,v 1.317 2023/02/18 11:16:09 rillig Exp $ */ /* * Copyright (c) 1988, 1989, 1990, 1993 @@ -911,6 +911,14 @@ typedef enum VarEvalMode { */ VARE_PARSE_ONLY, + /* + * Parse text in which '${...}' and '$(...)' are not parsed as + * subexpressions (with all their individual escaping rules) but + * instead simply as text with balanced '${}' or '$()'. Other '$' + * are copied verbatim. + */ + VARE_PARSE_BALANCED, + /* Parse and evaluate the expression. */ VARE_WANTRES, Index: src/usr.bin/make/var.c diff -u src/usr.bin/make/var.c:1.1046 src/usr.bin/make/var.c:1.1047 --- src/usr.bin/make/var.c:1.1046 Wed Feb 15 06:52:58 2023 +++ src/usr.bin/make/var.c Sat Feb 18 11:16:09 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: var.c,v 1.1046 2023/02/15 06:52:58 rillig Exp $ */ +/* $NetBSD: var.c,v 1.1047 2023/02/18 11:16:09 rillig Exp $ */ /* * Copyright (c) 1988, 1989, 1990, 1993 @@ -139,7 +139,7 @@ #include "metachar.h" /* "@(#)var.c 8.3 (Berkeley) 3/19/94" */ -MAKE_RCSID("$NetBSD: var.c,v 1.1046 2023/02/15 06:52:58 rillig Exp $"); +MAKE_RCSID("$NetBSD: var.c,v 1.1047 2023/02/18 11:16:09 rillig Exp $"); /* * Variables are defined using one of the VAR=value assignments. Their @@ -328,6 +328,7 @@ static VarExportedMode var_exportedVars static const char VarEvalMode_Name[][32] = { "parse-only", + "parse-balanced", "eval", "eval-defined", "eval-keep-dollar", @@ -2143,31 +2144,23 @@ ParseModifierPartExpr(const char **pp, L FStr nested_val = Var_Parse(&p, ch->expr->scope, VarEvalMode_WithoutKeepDollar(emode)); /* TODO: handle errors */ - LazyBuf_AddStr(part, nested_val.str); + if (VarEvalMode_ShouldEval(emode)) + LazyBuf_AddStr(part, nested_val.str); + else + LazyBuf_AddSubstring(part, Substring_Init(*pp, p)); FStr_Done(&nested_val); *pp = p; } /* - * In a part of a modifier, parse a subexpression but don't evaluate it. - * - * XXX: This whole block is very similar to Var_Parse with VARE_PARSE_ONLY. - * There may be subtle edge cases though that are not yet covered in the unit - * tests and that are parsed differently, depending on whether they are - * evaluated or not. - * - * This subtle difference is not documented in the manual page, neither is - * the difference between parsing ':D' and ':M' documented. No code should - * ever depend on these details, but who knows. - * - * TODO: Before trying to replace this code with Var_Parse, there need to be - * more unit tests in varmod-loop.mk. The modifier ':@' uses Var_Subst - * internally, in which a '$' is escaped as '$$', not as '\$' like in other - * modifiers. When parsing the body text '$${var}', skipping over the first - * '$' would treat '${var}' as a make expression, not as a shell variable. + * In a part of a modifier, parse some text that looks like a subexpression. + * If the text starts with '$(', any '(' and ')' must be balanced. + * If the text starts with '${', any '{' and '}' must be balanced. + * If the text starts with '$', that '$' is copied, it is not parsed as a + * short-name variable expression. */ static void -ParseModifierPartDollar(const char **pp, LazyBuf *part) +ParseModifierPartBalanced(const char **pp, LazyBuf *part) { const char *p = *pp; const char *start = *pp; @@ -2234,10 +2227,10 @@ ParseModifierPartSubst( else LazyBuf_Add(part, *p); p++; - } else if (VarEvalMode_ShouldEval(emode)) - ParseModifierPartExpr(&p, part, ch, emode); + } else if (emode == VARE_PARSE_BALANCED) + ParseModifierPartBalanced(&p, part); else - ParseModifierPartDollar(&p, part); + ParseModifierPartExpr(&p, part, ch, emode); } if (*p != delim) { @@ -2437,7 +2430,7 @@ ApplyModifier_Loop(const char **pp, ModC return AMR_CLEANUP; } - if (!ParseModifierPart(pp, '@', VARE_PARSE_ONLY, ch, &strBuf)) + if (!ParseModifierPart(pp, '@', VARE_PARSE_BALANCED, ch, &strBuf)) return AMR_CLEANUP; str = LazyBuf_DoneGet(&strBuf); args.body = str.str; Index: src/usr.bin/make/unit-tests/parse-var.exp diff -u src/usr.bin/make/unit-tests/parse-var.exp:1.6 src/usr.bin/make/unit-tests/parse-var.exp:1.7 --- src/usr.bin/make/unit-tests/parse-var.exp:1.6 Tue Feb 14 21:56:48 2023 +++ src/usr.bin/make/unit-tests/parse-var.exp Sat Feb 18 11:16:09 2023 @@ -1,5 +1 @@ -make: Unfinished modifier for "BRACE_GROUP" (',' missing) -make: "parse-var.mk" line 129: Malformed conditional (0 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,}) -make: Fatal errors encountered -- cannot continue -make: stopped in unit-tests -exit status 1 +exit status 0 Index: src/usr.bin/make/unit-tests/parse-var.mk diff -u src/usr.bin/make/unit-tests/parse-var.mk:1.7 src/usr.bin/make/unit-tests/parse-var.mk:1.8 --- src/usr.bin/make/unit-tests/parse-var.mk:1.7 Tue Feb 14 21:56:48 2023 +++ src/usr.bin/make/unit-tests/parse-var.mk Sat Feb 18 11:16:09 2023 @@ -1,4 +1,4 @@ -# $NetBSD: parse-var.mk,v 1.7 2023/02/14 21:56:48 rillig Exp $ +# $NetBSD: parse-var.mk,v 1.8 2023/02/18 11:16:09 rillig Exp $ # # Tests for parsing variable expressions. # @@ -85,12 +85,9 @@ VAR.${:U param }= value . error .endif -# XXX: The following paragraph already uses past tense, in the hope that the -# parsing behavior can be cleaned up soon. - -# Since var.c 1.323 from 2020-07-26 18:11 and except for var.c 1.1028 from -# 2022-08-08, the exact way of parsing an expression depended on whether the -# expression was actually evaluated or merely parsed. +# Since var.c 1.323 from 2020-07-26 18:11 and until var.c 1.1047 from +# 2023-02-18, the exact way of parsing an expression with subexpressions +# depended on whether the expression was actually evaluated or merely parsed. # # If it was evaluated, nested expressions were parsed correctly, parsing each # modifier according to its exact definition (see varmod.mk). @@ -102,30 +99,28 @@ VAR.${:U param }= value # expression was not parsed correctly. Instead, make only counted the opening # and closing delimiters, which failed for nested modifiers with unbalanced # braces. -# -# This naive brace counting was implemented in ParseModifierPartDollar. As of -# var.c 1.1029, there are still several other places that merely count braces -# instead of properly parsing subexpressions. #.MAKEFLAGS: -dcpv # Keep these braces outside the conditions below, to keep them simple to -# understand. If the BRACE_PAIR had been replaced with ':U{}', the '}' would -# have to be escaped, but not the '{'. This asymmetry would have made the -# example even more complicated to understand. +# understand. If the expression ${BRACE_PAIR:...} had been replaced with the +# literal ${:U{}}, the '}' would have to be escaped, but not the '{'. This +# asymmetry would have made the example even more complicated to understand. BRACE_PAIR= {} -# In this test word, the '{{}' in the middle will be replaced. +# In this test word, the below conditions will replace the '{{}' in the middle +# with the string '<lbraces>'. BRACE_GROUP= {{{{}}}} # The inner ':S' modifier turns the word '{}' into '{{}'. # The outer ':S' modifier then replaces '{{}' with '<lbraces>'. -# In the first case, the outer expression is relevant and is parsed correctly. +# Due to the always-true condition '1', the outer expression is relevant and +# is parsed correctly. .if 1 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,} .endif -# In the second case, the outer expression was irrelevant. In this case, in -# the parts of the outer ':S' modifier, make only counted the braces, and since -# the inner expression '${BRACE_PAIR:...}' contains more '{' than '}', parsing -# failed with the error message 'Unfinished modifier for "BRACE_GROUP"'. Fixed -# in var.c 1.1028 from 2022-08-08, reverted in var.c 1.1029 from 2022-08-23. +# Due to the always-false condition '0', the outer expression is irrelevant. +# In this case, in the parts of the outer ':S' modifier, the expression parser +# only counted the braces, and since the inner expression '${BRACE_PAIR:...}' +# contains more '{' than '}', parsing failed with the error message 'Unfinished +# modifier for "BRACE_GROUP"'. Fixed in var.c 1.1047 from 2023-02-18. .if 0 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,} .endif #.MAKEFLAGS: -d0 Index: src/usr.bin/make/unit-tests/var-eval-short.exp diff -u src/usr.bin/make/unit-tests/var-eval-short.exp:1.20 src/usr.bin/make/unit-tests/var-eval-short.exp:1.21 --- src/usr.bin/make/unit-tests/var-eval-short.exp:1.20 Tue Aug 23 19:22:01 2022 +++ src/usr.bin/make/unit-tests/var-eval-short.exp Sat Feb 18 11:16:09 2023 @@ -7,7 +7,9 @@ make: "var-eval-short.mk" line 98: Malfo CondParser_Eval: 0 && ${0:?${FAIL}then:${FAIL}else} Var_Parse: ${0:?${FAIL}then:${FAIL}else} (parse-only) Parsing modifier ${0:?...} +Var_Parse: ${FAIL}then:${FAIL}else} (parse-only) Modifier part: "${FAIL}then" +Var_Parse: ${FAIL}else} (parse-only) Modifier part: "${FAIL}else" Result of ${0:?${FAIL}then:${FAIL}else} is "" (parse-only, defined) Parsing line 163: DEFINED= defined @@ -17,7 +19,9 @@ Var_Parse: ${DEFINED:L:?${FAIL}then:${FA Parsing modifier ${DEFINED:L} Result of ${DEFINED:L} is "defined" (parse-only, regular) Parsing modifier ${DEFINED:?...} +Var_Parse: ${FAIL}then:${FAIL}else} (parse-only) Modifier part: "${FAIL}then" +Var_Parse: ${FAIL}else} (parse-only) Modifier part: "${FAIL}else" Result of ${DEFINED:?${FAIL}then:${FAIL}else} is "defined" (parse-only, regular) Parsing line 166: .MAKEFLAGS: -d0 Index: src/usr.bin/make/unit-tests/varmod-loop.mk diff -u src/usr.bin/make/unit-tests/varmod-loop.mk:1.21 src/usr.bin/make/unit-tests/varmod-loop.mk:1.22 --- src/usr.bin/make/unit-tests/varmod-loop.mk:1.21 Tue Aug 23 21:13:46 2022 +++ src/usr.bin/make/unit-tests/varmod-loop.mk Sat Feb 18 11:16:09 2023 @@ -1,4 +1,4 @@ -# $NetBSD: varmod-loop.mk,v 1.21 2022/08/23 21:13:46 rillig Exp $ +# $NetBSD: varmod-loop.mk,v 1.22 2023/02/18 11:16:09 rillig Exp $ # # Tests for the :@var@...${var}...@ variable modifier. @@ -192,15 +192,14 @@ CMDLINE= global # needed for deleting t # except for '$i', which is replaced with the then-current value '1' of the # iteration variable. # -# XXX: was broken in var.c 1.1028 from 2022-08-08, reverted in var.c 1.1029 -# from 2022-08-23; see parse-var.mk, keyword 'BRACE_GROUP'. +# See parse-var.mk, keyword 'BRACE_GROUP'. all: varmod-loop-literal-dollar varmod-loop-literal-dollar: .PHONY : ${:U1:@i@ t=$$(( $${t:-0} + $i ))@} # When parsing the loop body, each '\$', '\@' and '\\' is unescaped to '$', -# '@' and '\'; all other backslashes are retained. +# '@' and '\', respectively; all other backslashes are retained. # # In practice, the '$' is not escaped as '\$', as there is a second round of # unescaping '$$' to '$' later when the loop body is expanded after setting the