Module Name:    src
Committed By:   kre
Date:           Sun Jul 22 23:07:49 UTC 2018

Modified Files:
        src/bin/sh: expand.c parser.c

Log Message:
Part 2 of pattern matching (glob etc) fixes.

Attempt to correctly deal with \ (both when it is a literal,
in appropriate cases, and when it appears as CTLESC when it was
detected as a quoting character during parsing).

In a pattern, in sh, no quoted character can ever be anything other
than a literal character.   This is quite different than regular
expressions, and even different than other uses of glob matching,
where shell quoting is not an issue.

In something like

        ls ?\*.c

the ? is a meta-character, the * is a literal (it was quoted).  This
is nothing new, sh has handled that properly for ever.

But the same happens with
        VAR='?\*.c'
and
        ls $VAR

which has not always been handled correctly.   Of course, in

        ls "$VAR"

nothing in VAR is a meta-character (the entire expansion is quoted)
so even the '\' must match literally (or more accurately, no matching
happens - VAR simply contains an "unusual" filename).  But if it had
been

        ls *"$VAR"

then we would be looking for filenames that end with the literal 5
characters that make up $VAR.

The same kinds of things are requires of matching patterns in case
statements, and sub-strings with the % and # operators in variable
expansions.

While here, the final remnant of the ancient !! pattern matching
hack has been removed (the code that actually implemented it was
long gone, but one small piece remained, not doing any real harm,
but potentially wasting time - if someone gave a pattern which would
once have invoked that hack.)


To generate a diff of this commit:
cvs rdiff -u -r1.126 -r1.127 src/bin/sh/expand.c
cvs rdiff -u -r1.148 -r1.149 src/bin/sh/parser.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/bin/sh/expand.c
diff -u src/bin/sh/expand.c:1.126 src/bin/sh/expand.c:1.127
--- src/bin/sh/expand.c:1.126	Sun Jul 22 21:16:58 2018
+++ src/bin/sh/expand.c	Sun Jul 22 23:07:48 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: expand.c,v 1.126 2018/07/22 21:16:58 kre Exp $	*/
+/*	$NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
 #else
-__RCSID("$NetBSD: expand.c,v 1.126 2018/07/22 21:16:58 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.127 2018/07/22 23:07:48 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -927,7 +927,9 @@ evalvar(const char *p, int flag)
 					varlen++;
 			} else {
 				while (*val) {
-					if (quotes && syntax[(int)*val] == CCTL)
+					if (quotes && (varflags & VSQUOTE) &&
+					    (syntax[(int)*val] == CCTL ||
+					     syntax[(int)*val] == CBACK))
 						STPUTC(CTLESC, expdest);
 					STPUTC(*val++, expdest);
 				}
@@ -1465,22 +1467,59 @@ expmeta(char *enddir, char *name)
 			metaflag = 1;
 		else if (*p == '[') {
 			q = p + 1;
-			if (*q == '!')
+			if (*q == '!' || *q == '^')
 				q++;
 			for (;;) {
 				while (*q == CTLQUOTEMARK || *q == CTLNONL)
 					q++;
-				if (*q == CTLESC)
+				if (*q == ']') {
 					q++;
-				if (*q == '/' || *q == '\0')
+					metaflag = 1;
 					break;
-				if (*++q == ']') {
+				}
+				if (*q == '[' && q[1] == ':') {
+					/*
+					 * character class, look for :] ending
+					 * also stop on ']' (end bracket expr)
+					 * or '\0' or '/' (end pattern)
+					 */
+					while (*++q != '\0' && *q != ']' &&
+					    *q != '/') {
+						if (*q == CTLESC) {
+							if (*++q == '\0')
+								break;
+							if (*q == '/')
+								break;
+						} else if (*q == ':' &&
+						    q[1] == ']')
+							break;
+					}
+					if (*q == ':') {
+						/*
+						 * stopped at ':]'
+						 * still in [...]
+						 * skip ":]" and continue;
+						 */
+						q += 2;
+						continue;
+					}
+
+					/* done at end of pattern, not [...] */
+					if (*q == '\0' || *q == '/')
+						break;
+
+					/* found the ']', we have a [...] */
 					metaflag = 1;
+					q++;	/* skip ']' */
 					break;
 				}
+				if (*q == CTLESC)
+					q++;
+				/* end of pattern cannot be escaped */
+				if (*q == '/' || *q == '\0')
+					break;
+				q++;
 			}
-		} else if (*p == '!' && p[1] == '!'	&& (p == name || p[-1] == '/')) {
-			metaflag = 1;
 		} else if (*p == '\0')
 			break;
 		else if (*p == CTLQUOTEMARK || *p == CTLNONL)
@@ -1707,6 +1746,10 @@ patmatch(const char *pattern, const char
 	for (;;) {
 		switch (c = *p++) {
 		case '\0':
+			if (squoted && *q == CTLESC) {
+				if (q[1] == '\0')
+					q++;
+			}
 			if (*q != '\0')
 				goto backtrack;
 			VTRACE(DBG_MATCH, ("match\n"));
@@ -1714,6 +1757,16 @@ patmatch(const char *pattern, const char
 		case CTLESC:
 			if (squoted && *q == CTLESC)
 				q++;
+			if (*p == '\0' && *q == '\0') {
+				VTRACE(DBG_MATCH, ("match-\\\n"));
+				return 1;
+			}
+			if (*q++ != *p++)
+				goto backtrack;
+			break;
+		case '\\':
+			if (squoted && *q == CTLESC)
+				q++;
 			if (*q++ != *p++)
 				goto backtrack;
 			break;
@@ -1747,6 +1800,10 @@ patmatch(const char *pattern, const char
 					q++;
 				}
 			}
+			if (c == CTLESC && p[1] == '\0') {
+				VTRACE(DBG_MATCH, ("match+\\\n"));
+				return 1;
+			}
 			/*
 			 * First try the shortest match for the '*' that
 			 * could work. We can forget any earlier '*' since
@@ -1798,6 +1855,8 @@ patmatch(const char *pattern, const char
 				VTRACE(DBG_MATCH, ("[]fail\n"));
 				return 0;
 			}
+			if (squoted && *q == CTLESC)
+				q++;
 			chr = (unsigned char)*q++;
 			c = *p++;
 			do {

Index: src/bin/sh/parser.c
diff -u src/bin/sh/parser.c:1.148 src/bin/sh/parser.c:1.149
--- src/bin/sh/parser.c:1.148	Fri Jul 20 22:47:26 2018
+++ src/bin/sh/parser.c	Sun Jul 22 23:07:48 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $	*/
+/*	$NetBSD: parser.c,v 1.149 2018/07/22 23:07:48 kre Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)parser.c	8.7 (Berkeley) 5/16/95";
 #else
-__RCSID("$NetBSD: parser.c,v 1.148 2018/07/20 22:47:26 kre Exp $");
+__RCSID("$NetBSD: parser.c,v 1.149 2018/07/22 23:07:48 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -1817,8 +1817,10 @@ readtoken1(int firstc, char const *syn, 
 			}
 			quotef = 1;	/* current token is quoted */
 			if (ISDBLQUOTE() && c != '\\' && c != '`' &&
-			    c != '$' && (c != '"' || magicq))
+			    c != '$' && (c != '"' || magicq)) {
+				USTPUTC(CTLESC, out);
 				USTPUTC('\\', out);
+			}
 			if (SQSYNTAX[c] == CCTL || SQSYNTAX[c] == CSBACK)
 				USTPUTC(CTLESC, out);
 			else if (!magicq) {

Reply via email to