Module Name: src
Committed By: christos
Date: Wed Apr 26 17:43:33 UTC 2017
Modified Files:
src/bin/sh: expand.c expand.h
Log Message:
Convert the pattern matcher from recursive to backtracking (from FreeBSD).
To generate a diff of this commit:
cvs rdiff -u -r1.104 -r1.105 src/bin/sh/expand.c
cvs rdiff -u -r1.20 -r1.21 src/bin/sh/expand.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/bin/sh/expand.c
diff -u src/bin/sh/expand.c:1.104 src/bin/sh/expand.c:1.105
--- src/bin/sh/expand.c:1.104 Mon Mar 20 07:48:01 2017
+++ src/bin/sh/expand.c Wed Apr 26 13:43:33 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $ */
+/* $NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $ */
/*-
* Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
#if 0
static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95";
#else
-__RCSID("$NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $");
+__RCSID("$NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $");
#endif
#endif /* not lint */
@@ -52,6 +52,7 @@ __RCSID("$NetBSD: expand.c,v 1.104 2017/
#include <stdlib.h>
#include <stdio.h>
#include <wctype.h>
+#include <wchar.h>
/*
* Routines to expand arguments to commands. We have to deal with
@@ -112,8 +113,9 @@ STATIC void expmeta(char *, char *);
STATIC void addfname(char *);
STATIC struct strlist *expsort(struct strlist *);
STATIC struct strlist *msort(struct strlist *, int);
-STATIC int pmatch(char *, char *, int);
+STATIC int patmatch(const char *, const char *, int);
STATIC char *cvtnum(int, char *);
+static int collate_range_cmp(wchar_t, wchar_t);
/*
* Expand shell variables and backquotes inside a here document.
@@ -129,6 +131,18 @@ expandhere(union node *arg, int fd)
}
+static int
+collate_range_cmp(wchar_t c1, wchar_t c2)
+{
+ wchar_t s1[2], s2[2];
+
+ s1[0] = c1;
+ s1[1] = L'\0';
+ s2[0] = c2;
+ s2[1] = L'\0';
+ return (wcscoll(s1, s2));
+}
+
/*
* Perform variable substitution and command substitution on an argument,
* placing the resulting list of arguments in arglist. If EXP_FULL is true,
@@ -1399,7 +1413,7 @@ msort(struct strlist *list, int len)
* pointer is stored into *end.
*/
static int
-match_charclass(char *p, wchar_t chr, char **end)
+match_charclass(const char *p, wchar_t chr, const char **end)
{
char name[20];
char *nameend;
@@ -1427,35 +1441,29 @@ match_charclass(char *p, wchar_t chr, ch
* Returns true if the pattern matches the string.
*/
-int
-patmatch(char *pattern, char *string, int squoted)
-{
-#ifdef notdef
- if (pattern[0] == '!' && pattern[1] == '!')
- return 1 - pmatch(pattern + 2, string);
- else
-#endif
- return pmatch(pattern, string, squoted);
-}
-
-
STATIC int
-pmatch(char *pattern, char *string, int squoted)
+patmatch(const char *pattern, const char *string, int squoted)
{
- char *p, *q, *end;
+ const char *p, *q, *end;
+ const char *bt_p, *bt_q;
char c;
+ wchar_t wc, wc2;
p = pattern;
q = string;
+ bt_p = NULL;
+ bt_q = NULL;
for (;;) {
switch (c = *p++) {
case '\0':
- goto breakloop;
+ if (*q != '\0')
+ goto backtrack;
+ return 1;
case CTLESC:
if (squoted && *q == CTLESC)
q++;
if (*q++ != *p++)
- return 0;
+ goto backtrack;
break;
case CTLQUOTEMARK:
continue;
@@ -1482,17 +1490,19 @@ pmatch(char *pattern, char *string, int
q++;
}
}
- do {
- if (pmatch(p, q, squoted))
- return 1;
- if (squoted && *q == CTLESC)
- q++;
- } while (*q++ != '\0');
- return 0;
+ /*
+ * First try the shortest match for the '*' that
+ * could work. We can forget any earlier '*' since
+ * there is no way having it match more characters
+ * can help us, given that we are already here.
+ */
+ bt_p = p;
+ bt_q = q;
+ break;
case '[': {
- char *endp;
+ const char *savep, *saveq, *endp;
int invert, found;
- char chr;
+ unsigned char chr;
endp = p;
if (*endp == '!')
@@ -1508,20 +1518,25 @@ pmatch(char *pattern, char *string, int
break;
}
invert = 0;
- if (*p == '!') {
+ savep = p, saveq = q;
+ invert = 0;
+ if (*p == '!' || *p == '^') {
invert++;
p++;
}
found = 0;
- chr = *q++;
- if (squoted && chr == CTLESC)
- chr = *q++;
- if (chr == '\0')
+ if (*q == '\0')
return 0;
+ chr = (unsigned char)*q++;
c = *p++;
do {
if (c == CTLQUOTEMARK)
continue;
+ if (c == '\0') {
+ p = savep, q = saveq;
+ c = '[';
+ goto dft;
+ }
if (c == '[' && *p == ':') {
found |= match_charclass(p, chr, &end);
if (end != NULL)
@@ -1529,36 +1544,46 @@ pmatch(char *pattern, char *string, int
}
if (c == CTLESC)
c = *p++;
+ wc = (unsigned char)c;
if (*p == '-' && p[1] != ']') {
p++;
- while (*p == CTLQUOTEMARK)
- p++;
if (*p == CTLESC)
p++;
- if (chr >= c && chr <= *p)
+ wc2 = (unsigned char)*p++;
+ if ( collate_range_cmp(chr, wc) >= 0
+ && collate_range_cmp(chr, wc2) <= 0
+ )
found = 1;
- p++;
} else {
- if (chr == c)
+ if (chr == wc)
found = 1;
}
} while ((c = *p++) != ']');
if (found == invert)
- return 0;
+ goto backtrack;
break;
}
dft: default:
if (squoted && *q == CTLESC)
q++;
- if (*q++ != c)
+ if (*q++ == c)
+ break;
+backtrack:
+ /*
+ * If we have a mismatch (other than hitting the end
+ * of the string), go back to the last '*' seen and
+ * have it match one additional character.
+ */
+ if (bt_p == NULL)
return 0;
+ if (*bt_q == '\0')
+ return 0;
+ bt_q++;
+ p = bt_p;
+ q = bt_q;
break;
}
}
-breakloop:
- if (*q != '\0')
- return 0;
- return 1;
}
Index: src/bin/sh/expand.h
diff -u src/bin/sh/expand.h:1.20 src/bin/sh/expand.h:1.21
--- src/bin/sh/expand.h:1.20 Mon Mar 20 07:26:07 2017
+++ src/bin/sh/expand.h Wed Apr 26 13:43:33 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: expand.h,v 1.20 2017/03/20 11:26:07 kre Exp $ */
+/* $NetBSD: expand.h,v 1.21 2017/04/26 17:43:33 christos Exp $ */
/*-
* Copyright (c) 1991, 1993
@@ -63,6 +63,5 @@ union node;
void expandhere(union node *, int);
void expandarg(union node *, struct arglist *, int);
void expari(int);
-int patmatch(char *, char *, int);
void rmescapes(char *);
int casematch(union node *, char *);