Module Name:    src
Committed By:   rillig
Date:           Mon Jun 21 08:17:39 UTC 2021

Modified Files:
        src/usr.bin/make/unit-tests: moderrs.exp moderrs.mk
            varmod-subst-regex.exp varmod-subst-regex.mk

Log Message:
tests/make: move and extend test for unmatched '\1' in ':C'

This test lived together with a few unrelated tests in moderrs.mk, it is
better placed in varmod-subst-regex.mk though.

While here, extend, document and explain the test since its purpose was
not obvious from reading the code alone.


To generate a diff of this commit:
cvs rdiff -u -r1.30 -r1.31 src/usr.bin/make/unit-tests/moderrs.exp
cvs rdiff -u -r1.28 -r1.29 src/usr.bin/make/unit-tests/moderrs.mk
cvs rdiff -u -r1.5 -r1.6 src/usr.bin/make/unit-tests/varmod-subst-regex.exp
cvs rdiff -u -r1.6 -r1.7 src/usr.bin/make/unit-tests/varmod-subst-regex.mk

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/make/unit-tests/moderrs.exp
diff -u src/usr.bin/make/unit-tests/moderrs.exp:1.30 src/usr.bin/make/unit-tests/moderrs.exp:1.31
--- src/usr.bin/make/unit-tests/moderrs.exp:1.30	Mon Jun 21 04:24:17 2021
+++ src/usr.bin/make/unit-tests/moderrs.exp	Mon Jun 21 08:17:39 2021
@@ -86,15 +86,6 @@ make: Unclosed variable expression, expe
 6: TheVariable
 7: TheVariable
 
-mod-regex-undefined-subexpression:
-one one 2 3 5 8 one3 2one 34
-make: No match for subexpression \2
-make: No match for subexpression \2
-make: No match for subexpression \1
-make: No match for subexpression \2
-make: No match for subexpression \1
-()+() ()+() ()+() 3 5 8 (3)+() ()+(1) 34
-
 mod-ts-parse:
 112358132134
 15152535558513521534

Index: src/usr.bin/make/unit-tests/moderrs.mk
diff -u src/usr.bin/make/unit-tests/moderrs.mk:1.28 src/usr.bin/make/unit-tests/moderrs.mk:1.29
--- src/usr.bin/make/unit-tests/moderrs.mk:1.28	Mon Jun 21 04:24:17 2021
+++ src/usr.bin/make/unit-tests/moderrs.mk	Mon Jun 21 08:17:39 2021
@@ -1,4 +1,4 @@
-# $NetBSD: moderrs.mk,v 1.28 2021/06/21 04:24:17 sjg Exp $
+# $NetBSD: moderrs.mk,v 1.29 2021/06/21 08:17:39 rillig Exp $
 #
 # various modifier error tests
 
@@ -22,7 +22,6 @@ all:	words
 all:	exclam
 all:	mod-subst-delimiter
 all:	mod-regex-delimiter
-all:	mod-regex-undefined-subexpression
 all:	mod-ts-parse
 all:	mod-t-parse
 all:	mod-ifelse-parse
@@ -122,23 +121,6 @@ mod-regex-delimiter: print-header print-
 	@echo 6: ${VAR:C,from,to,
 	@echo 7: ${VAR:C,from,to,}
 
-# In regular expressions with alternatives, not all capturing groups are
-# always set; some may be missing.  Warn about these.
-#
-# Since there is no way to turn off this warning, the combination of
-# alternative matches and capturing groups is seldom used, if at all.
-#
-# A newly added modifier 'U' such as in :C,(a.)|(b.),\1\2,U might be added
-# for treating undefined capturing groups as empty, but that would create a
-# syntactical ambiguity since the :S and :C modifiers are open-ended (see
-# mod-subst-chain).  Luckily the modifier :U does not make sense after :C,
-# therefore this case does not happen in practice.
-# The sub-modifier for the :S and :C modifiers would have to be chosen
-# wisely, to not create ambiguities while parsing.
-mod-regex-undefined-subexpression: print-header print-footer
-	@echo ${FIB:C,1(.*),one\1,}		# all ok
-	@echo ${FIB:C,1(.*)|2(.*),(\1)+(\2),:Q}	# no match for subexpression
-
 mod-ts-parse: print-header print-footer
 	@echo ${FIB:ts}
 	@echo ${FIB:ts\65}	# octal 065 == U+0035 == '5'

Index: src/usr.bin/make/unit-tests/varmod-subst-regex.exp
diff -u src/usr.bin/make/unit-tests/varmod-subst-regex.exp:1.5 src/usr.bin/make/unit-tests/varmod-subst-regex.exp:1.6
--- src/usr.bin/make/unit-tests/varmod-subst-regex.exp:1.5	Tue Feb 23 15:19:41 2021
+++ src/usr.bin/make/unit-tests/varmod-subst-regex.exp	Mon Jun 21 08:17:39 2021
@@ -22,4 +22,25 @@ make: Regex compilation error: (details 
 mod-regex-errors:
 make: Unknown modifier "Z"
 mod-regex-errors: xy
-exit status 0
+unmatched-subexpression.ok: one one 2 3 5 8 one3 2one 34
+make: No match for subexpression \2
+unmatched-subexpression.1: ()()
+make: No match for subexpression \2
+unmatched-subexpression.1: ()()
+make: No match for subexpression \1
+unmatched-subexpression.2: ()()
+unmatched-subexpression.3: 3
+unmatched-subexpression.5: 5
+unmatched-subexpression.8: 8
+make: No match for subexpression \2
+unmatched-subexpression.13: (3)()
+make: No match for subexpression \1
+unmatched-subexpression.21: ()(1)
+unmatched-subexpression.34: 34
+make: No match for subexpression \2
+make: No match for subexpression \2
+make: No match for subexpression \1
+make: No match for subexpression \2
+make: No match for subexpression \1
+unmatched-subexpression.all: ()() ()() ()() 3 5 8 (3)() ()(1) 34
+exit status 2

Index: src/usr.bin/make/unit-tests/varmod-subst-regex.mk
diff -u src/usr.bin/make/unit-tests/varmod-subst-regex.mk:1.6 src/usr.bin/make/unit-tests/varmod-subst-regex.mk:1.7
--- src/usr.bin/make/unit-tests/varmod-subst-regex.mk:1.6	Sat Dec  5 18:13:44 2020
+++ src/usr.bin/make/unit-tests/varmod-subst-regex.mk	Mon Jun 21 08:17:39 2021
@@ -1,10 +1,14 @@
-# $NetBSD: varmod-subst-regex.mk,v 1.6 2020/12/05 18:13:44 rillig Exp $
+# $NetBSD: varmod-subst-regex.mk,v 1.7 2021/06/21 08:17:39 rillig Exp $
 #
 # Tests for the :C,from,to, variable modifier.
 
+# report unmatched subexpressions
+.MAKEFLAGS: -dL
+
 all: mod-regex-compile-error
 all: mod-regex-limits
 all: mod-regex-errors
+all: unmatched-subexpression
 
 # The variable expression expands to 4 words.  Of these words, none matches
 # the regular expression "a b" since these words don't contain any
@@ -107,3 +111,51 @@ mod-regex-errors:
 	# unknown modifier, the parse error is ignored in ParseModifierPart
 	# and the faulty variable expression expands to "".
 	@echo $@: ${word:L:C,.*,x${:U:Z}y,W}
+
+# In regular expressions with alternatives, not all capturing groups are
+# always set; some may be missing.  Make calls these "unmatched
+# subexpressions".
+#
+# Between var.c 1.16 from 1996-12-24 until before var.c 1.933 from 2021-06-21,
+# unmatched subexpressions produced an "error message" but did not have any
+# further effect since the "error handling" didn't influence the exit status.
+#
+# Before 2021-06-21 there was no way to turn off this warning, thus the
+# combination of alternative matches and capturing groups was seldom used, if
+# at all.
+#
+# Since var.c 1.933 from 2021-06-21, the error message is only printed in lint
+# mode (-dL), but not in default mode.
+#
+# As an alternative to the change from var.c 1.933 from 2021-06-21, a possible
+# mitigation would have been to add a new modifier 'U' to the already existing
+# '1Wg' modifiers of the ':C' modifier.  That modifier could have been used in
+# the modifier ':C,(a.)|(b.),\1\2,U' to treat unmatched subexpressions as
+# empty.  This approach would have created a syntactical ambiguity since the
+# modifiers ':S' and ':C' are open-ended (see mod-subst-chain), that is, they
+# do not need to be followed by a ':' to separate them from the next modifier.
+# Luckily the modifier :U does not make sense after :C, therefore this case
+# does not happen in practice.
+unmatched-subexpression:
+	# In each of the following cases, if the regular expression matches at
+	# all, the subexpression \1 matches as well.
+	@echo $@.ok: ${:U1 1 2 3 5 8 13 21 34:C,1(.*),one\1,}
+
+	# In the following cases:
+	#	* The subexpression \1 is only defined for 1 and 13.
+	#	* The subexpression \2 is only defined for 2 and 21.
+	#	* If the regular expression does not match at all, the
+	#	  replacement string is not analyzed, thus no error messages.
+	# In total, there are 5 error messages about unmatched subexpressions.
+	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
+	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
+	@echo $@.2:  ${:U  2:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
+	@echo $@.3:  ${:U  3:C,1(.*)|2(.*),(\1)(\2),:Q}
+	@echo $@.5:  ${:U  5:C,1(.*)|2(.*),(\1)(\2),:Q}
+	@echo $@.8:  ${:U  8:C,1(.*)|2(.*),(\1)(\2),:Q}
+	@echo $@.13: ${:U 13:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
+	@echo $@.21: ${:U 21:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
+	@echo $@.34: ${:U 34:C,1(.*)|2(.*),(\1)(\2),:Q}
+
+	# And now all together: 5 error messages for 1, 1, 2, 13, 21.
+	@echo $@.all: ${:U1 1 2 3 5 8 13 21 34:C,1(.*)|2(.*),(\1)(\2),:Q}

Reply via email to