Module Name:    src
Committed By:   rillig
Date:           Sat Feb 18 11:16:09 UTC 2023

Modified Files:
        src/usr.bin/make: make.h var.c
        src/usr.bin/make/unit-tests: parse-var.exp parse-var.mk
            var-eval-short.exp varmod-loop.mk

Log Message:
make: fix parsing of unevaluated subexpressions with unbalanced '{}'

Since var.c 1.323 from 2020-07-26, modifiers containing unbalanced
braces or parentheses were parsed differently, depending on whether they
were relevant or not.

For example, the expression '${VAR:...}' is enclosed with braces. When
this expression has a modifier ':S,},}},g' that would double each '}' in
that expression, the parser got confused:

If the expression was relevant, the modifier was parsed as usual, taking
into account that the 3 '}' in the modifier are ordinary characters.

If the expression was irrelevant, the parser only counted the '{' and
the '}', without taking into account that a '}' might be escaped by a
'\' or be an ordinary character.  Parsing therefore stopped at the first
'}', assuming it would finish the expression '${VAR:S,}'.

This parsing mode of only counting balanced '{' and '}' makes sense for
the modifier ':@var@...@', which expands each word of the expression
using the template from the '...'.  These templates tend to be simple
enough that counting the '{' and '}' suffices.


To generate a diff of this commit:
cvs rdiff -u -r1.316 -r1.317 src/usr.bin/make/make.h
cvs rdiff -u -r1.1046 -r1.1047 src/usr.bin/make/var.c
cvs rdiff -u -r1.6 -r1.7 src/usr.bin/make/unit-tests/parse-var.exp
cvs rdiff -u -r1.7 -r1.8 src/usr.bin/make/unit-tests/parse-var.mk
cvs rdiff -u -r1.20 -r1.21 src/usr.bin/make/unit-tests/var-eval-short.exp
cvs rdiff -u -r1.21 -r1.22 src/usr.bin/make/unit-tests/varmod-loop.mk

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/make/make.h
diff -u src/usr.bin/make/make.h:1.316 src/usr.bin/make/make.h:1.317
--- src/usr.bin/make/make.h:1.316	Wed Feb 15 06:52:58 2023
+++ src/usr.bin/make/make.h	Sat Feb 18 11:16:09 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: make.h,v 1.316 2023/02/15 06:52:58 rillig Exp $	*/
+/*	$NetBSD: make.h,v 1.317 2023/02/18 11:16:09 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -911,6 +911,14 @@ typedef enum VarEvalMode {
 	 */
 	VARE_PARSE_ONLY,
 
+	/*
+	 * Parse text in which '${...}' and '$(...)' are not parsed as
+	 * subexpressions (with all their individual escaping rules) but
+	 * instead simply as text with balanced '${}' or '$()'.  Other '$'
+	 * are copied verbatim.
+	 */
+	VARE_PARSE_BALANCED,
+
 	/* Parse and evaluate the expression. */
 	VARE_WANTRES,
 

Index: src/usr.bin/make/var.c
diff -u src/usr.bin/make/var.c:1.1046 src/usr.bin/make/var.c:1.1047
--- src/usr.bin/make/var.c:1.1046	Wed Feb 15 06:52:58 2023
+++ src/usr.bin/make/var.c	Sat Feb 18 11:16:09 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: var.c,v 1.1046 2023/02/15 06:52:58 rillig Exp $	*/
+/*	$NetBSD: var.c,v 1.1047 2023/02/18 11:16:09 rillig Exp $	*/
 
 /*
  * Copyright (c) 1988, 1989, 1990, 1993
@@ -139,7 +139,7 @@
 #include "metachar.h"
 
 /*	"@(#)var.c	8.3 (Berkeley) 3/19/94" */
-MAKE_RCSID("$NetBSD: var.c,v 1.1046 2023/02/15 06:52:58 rillig Exp $");
+MAKE_RCSID("$NetBSD: var.c,v 1.1047 2023/02/18 11:16:09 rillig Exp $");
 
 /*
  * Variables are defined using one of the VAR=value assignments.  Their
@@ -328,6 +328,7 @@ static VarExportedMode var_exportedVars 
 
 static const char VarEvalMode_Name[][32] = {
 	"parse-only",
+	"parse-balanced",
 	"eval",
 	"eval-defined",
 	"eval-keep-dollar",
@@ -2143,31 +2144,23 @@ ParseModifierPartExpr(const char **pp, L
 	FStr nested_val = Var_Parse(&p, ch->expr->scope,
 	    VarEvalMode_WithoutKeepDollar(emode));
 	/* TODO: handle errors */
-	LazyBuf_AddStr(part, nested_val.str);
+	if (VarEvalMode_ShouldEval(emode))
+		LazyBuf_AddStr(part, nested_val.str);
+	else
+		LazyBuf_AddSubstring(part, Substring_Init(*pp, p));
 	FStr_Done(&nested_val);
 	*pp = p;
 }
 
 /*
- * In a part of a modifier, parse a subexpression but don't evaluate it.
- *
- * XXX: This whole block is very similar to Var_Parse with VARE_PARSE_ONLY.
- * There may be subtle edge cases though that are not yet covered in the unit
- * tests and that are parsed differently, depending on whether they are
- * evaluated or not.
- *
- * This subtle difference is not documented in the manual page, neither is
- * the difference between parsing ':D' and ':M' documented.  No code should
- * ever depend on these details, but who knows.
- *
- * TODO: Before trying to replace this code with Var_Parse, there need to be
- * more unit tests in varmod-loop.mk.  The modifier ':@' uses Var_Subst
- * internally, in which a '$' is escaped as '$$', not as '\$' like in other
- * modifiers.  When parsing the body text '$${var}', skipping over the first
- * '$' would treat '${var}' as a make expression, not as a shell variable.
+ * In a part of a modifier, parse some text that looks like a subexpression.
+ * If the text starts with '$(', any '(' and ')' must be balanced.
+ * If the text starts with '${', any '{' and '}' must be balanced.
+ * If the text starts with '$', that '$' is copied, it is not parsed as a
+ * short-name variable expression.
  */
 static void
-ParseModifierPartDollar(const char **pp, LazyBuf *part)
+ParseModifierPartBalanced(const char **pp, LazyBuf *part)
 {
 	const char *p = *pp;
 	const char *start = *pp;
@@ -2234,10 +2227,10 @@ ParseModifierPartSubst(
 			else
 				LazyBuf_Add(part, *p);
 			p++;
-		} else if (VarEvalMode_ShouldEval(emode))
-			ParseModifierPartExpr(&p, part, ch, emode);
+		} else if (emode == VARE_PARSE_BALANCED)
+			ParseModifierPartBalanced(&p, part);
 		else
-			ParseModifierPartDollar(&p, part);
+			ParseModifierPartExpr(&p, part, ch, emode);
 	}
 
 	if (*p != delim) {
@@ -2437,7 +2430,7 @@ ApplyModifier_Loop(const char **pp, ModC
 		return AMR_CLEANUP;
 	}
 
-	if (!ParseModifierPart(pp, '@', VARE_PARSE_ONLY, ch, &strBuf))
+	if (!ParseModifierPart(pp, '@', VARE_PARSE_BALANCED, ch, &strBuf))
 		return AMR_CLEANUP;
 	str = LazyBuf_DoneGet(&strBuf);
 	args.body = str.str;

Index: src/usr.bin/make/unit-tests/parse-var.exp
diff -u src/usr.bin/make/unit-tests/parse-var.exp:1.6 src/usr.bin/make/unit-tests/parse-var.exp:1.7
--- src/usr.bin/make/unit-tests/parse-var.exp:1.6	Tue Feb 14 21:56:48 2023
+++ src/usr.bin/make/unit-tests/parse-var.exp	Sat Feb 18 11:16:09 2023
@@ -1,5 +1 @@
-make: Unfinished modifier for "BRACE_GROUP" (',' missing)
-make: "parse-var.mk" line 129: Malformed conditional (0 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,})
-make: Fatal errors encountered -- cannot continue
-make: stopped in unit-tests
-exit status 1
+exit status 0

Index: src/usr.bin/make/unit-tests/parse-var.mk
diff -u src/usr.bin/make/unit-tests/parse-var.mk:1.7 src/usr.bin/make/unit-tests/parse-var.mk:1.8
--- src/usr.bin/make/unit-tests/parse-var.mk:1.7	Tue Feb 14 21:56:48 2023
+++ src/usr.bin/make/unit-tests/parse-var.mk	Sat Feb 18 11:16:09 2023
@@ -1,4 +1,4 @@
-# $NetBSD: parse-var.mk,v 1.7 2023/02/14 21:56:48 rillig Exp $
+# $NetBSD: parse-var.mk,v 1.8 2023/02/18 11:16:09 rillig Exp $
 #
 # Tests for parsing variable expressions.
 #
@@ -85,12 +85,9 @@ VAR.${:U param }=	value
 .  error
 .endif
 
-# XXX: The following paragraph already uses past tense, in the hope that the
-# parsing behavior can be cleaned up soon.
-
-# Since var.c 1.323 from 2020-07-26 18:11 and except for var.c 1.1028 from
-# 2022-08-08, the exact way of parsing an expression depended on whether the
-# expression was actually evaluated or merely parsed.
+# Since var.c 1.323 from 2020-07-26 18:11 and until var.c 1.1047 from
+# 2023-02-18, the exact way of parsing an expression with subexpressions
+# depended on whether the expression was actually evaluated or merely parsed.
 #
 # If it was evaluated, nested expressions were parsed correctly, parsing each
 # modifier according to its exact definition (see varmod.mk).
@@ -102,30 +99,28 @@ VAR.${:U param }=	value
 # expression was not parsed correctly.  Instead, make only counted the opening
 # and closing delimiters, which failed for nested modifiers with unbalanced
 # braces.
-#
-# This naive brace counting was implemented in ParseModifierPartDollar.  As of
-# var.c 1.1029, there are still several other places that merely count braces
-# instead of properly parsing subexpressions.
 
 #.MAKEFLAGS: -dcpv
 # Keep these braces outside the conditions below, to keep them simple to
-# understand.  If the BRACE_PAIR had been replaced with ':U{}', the '}' would
-# have to be escaped, but not the '{'.  This asymmetry would have made the
-# example even more complicated to understand.
+# understand.  If the expression ${BRACE_PAIR:...} had been replaced with the
+# literal ${:U{}}, the '}' would have to be escaped, but not the '{'.  This
+# asymmetry would have made the example even more complicated to understand.
 BRACE_PAIR=	{}
-# In this test word, the '{{}' in the middle will be replaced.
+# In this test word, the below conditions will replace the '{{}' in the middle
+# with the string '<lbraces>'.
 BRACE_GROUP=	{{{{}}}}
 
 # The inner ':S' modifier turns the word '{}' into '{{}'.
 # The outer ':S' modifier then replaces '{{}' with '<lbraces>'.
-# In the first case, the outer expression is relevant and is parsed correctly.
+# Due to the always-true condition '1', the outer expression is relevant and
+# is parsed correctly.
 .if 1 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,}
 .endif
-# In the second case, the outer expression was irrelevant.  In this case, in
-# the parts of the outer ':S' modifier, make only counted the braces, and since
-# the inner expression '${BRACE_PAIR:...}' contains more '{' than '}', parsing
-# failed with the error message 'Unfinished modifier for "BRACE_GROUP"'.  Fixed
-# in var.c 1.1028 from 2022-08-08, reverted in var.c 1.1029 from 2022-08-23.
+# Due to the always-false condition '0', the outer expression is irrelevant.
+# In this case, in the parts of the outer ':S' modifier, the expression parser
+# only counted the braces, and since the inner expression '${BRACE_PAIR:...}'
+# contains more '{' than '}', parsing failed with the error message 'Unfinished
+# modifier for "BRACE_GROUP"'.  Fixed in var.c 1.1047 from 2023-02-18.
 .if 0 && ${BRACE_GROUP:S,${BRACE_PAIR:S,{,{{,},<lbraces>,}
 .endif
 #.MAKEFLAGS: -d0

Index: src/usr.bin/make/unit-tests/var-eval-short.exp
diff -u src/usr.bin/make/unit-tests/var-eval-short.exp:1.20 src/usr.bin/make/unit-tests/var-eval-short.exp:1.21
--- src/usr.bin/make/unit-tests/var-eval-short.exp:1.20	Tue Aug 23 19:22:01 2022
+++ src/usr.bin/make/unit-tests/var-eval-short.exp	Sat Feb 18 11:16:09 2023
@@ -7,7 +7,9 @@ make: "var-eval-short.mk" line 98: Malfo
 CondParser_Eval: 0 && ${0:?${FAIL}then:${FAIL}else}
 Var_Parse: ${0:?${FAIL}then:${FAIL}else} (parse-only)
 Parsing modifier ${0:?...}
+Var_Parse: ${FAIL}then:${FAIL}else} (parse-only)
 Modifier part: "${FAIL}then"
+Var_Parse: ${FAIL}else} (parse-only)
 Modifier part: "${FAIL}else"
 Result of ${0:?${FAIL}then:${FAIL}else} is "" (parse-only, defined)
 Parsing line 163: DEFINED=	defined
@@ -17,7 +19,9 @@ Var_Parse: ${DEFINED:L:?${FAIL}then:${FA
 Parsing modifier ${DEFINED:L}
 Result of ${DEFINED:L} is "defined" (parse-only, regular)
 Parsing modifier ${DEFINED:?...}
+Var_Parse: ${FAIL}then:${FAIL}else} (parse-only)
 Modifier part: "${FAIL}then"
+Var_Parse: ${FAIL}else} (parse-only)
 Modifier part: "${FAIL}else"
 Result of ${DEFINED:?${FAIL}then:${FAIL}else} is "defined" (parse-only, regular)
 Parsing line 166: .MAKEFLAGS: -d0

Index: src/usr.bin/make/unit-tests/varmod-loop.mk
diff -u src/usr.bin/make/unit-tests/varmod-loop.mk:1.21 src/usr.bin/make/unit-tests/varmod-loop.mk:1.22
--- src/usr.bin/make/unit-tests/varmod-loop.mk:1.21	Tue Aug 23 21:13:46 2022
+++ src/usr.bin/make/unit-tests/varmod-loop.mk	Sat Feb 18 11:16:09 2023
@@ -1,4 +1,4 @@
-# $NetBSD: varmod-loop.mk,v 1.21 2022/08/23 21:13:46 rillig Exp $
+# $NetBSD: varmod-loop.mk,v 1.22 2023/02/18 11:16:09 rillig Exp $
 #
 # Tests for the :@var@...${var}...@ variable modifier.
 
@@ -192,15 +192,14 @@ CMDLINE=	global		# needed for deleting t
 # except for '$i', which is replaced with the then-current value '1' of the
 # iteration variable.
 #
-# XXX: was broken in var.c 1.1028 from 2022-08-08, reverted in var.c 1.1029
-# from 2022-08-23; see parse-var.mk, keyword 'BRACE_GROUP'.
+# See parse-var.mk, keyword 'BRACE_GROUP'.
 all: varmod-loop-literal-dollar
 varmod-loop-literal-dollar: .PHONY
 	: ${:U1:@i@ t=$$(( $${t:-0} + $i ))@}
 
 
 # When parsing the loop body, each '\$', '\@' and '\\' is unescaped to '$',
-# '@' and '\'; all other backslashes are retained.
+# '@' and '\', respectively; all other backslashes are retained.
 #
 # In practice, the '$' is not escaped as '\$', as there is a second round of
 # unescaping '$$' to '$' later when the loop body is expanded after setting the

Reply via email to