Module Name: src Committed By: christos Date: Wed Apr 26 17:43:33 UTC 2017
Modified Files: src/bin/sh: expand.c expand.h Log Message: Convert the pattern matcher from recursive to backtracking (from FreeBSD). To generate a diff of this commit: cvs rdiff -u -r1.104 -r1.105 src/bin/sh/expand.c cvs rdiff -u -r1.20 -r1.21 src/bin/sh/expand.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/bin/sh/expand.c diff -u src/bin/sh/expand.c:1.104 src/bin/sh/expand.c:1.105 --- src/bin/sh/expand.c:1.104 Mon Mar 20 07:48:01 2017 +++ src/bin/sh/expand.c Wed Apr 26 13:43:33 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $ */ +/* $NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -37,7 +37,7 @@ #if 0 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95"; #else -__RCSID("$NetBSD: expand.c,v 1.104 2017/03/20 11:48:01 kre Exp $"); +__RCSID("$NetBSD: expand.c,v 1.105 2017/04/26 17:43:33 christos Exp $"); #endif #endif /* not lint */ @@ -52,6 +52,7 @@ __RCSID("$NetBSD: expand.c,v 1.104 2017/ #include <stdlib.h> #include <stdio.h> #include <wctype.h> +#include <wchar.h> /* * Routines to expand arguments to commands. We have to deal with @@ -112,8 +113,9 @@ STATIC void expmeta(char *, char *); STATIC void addfname(char *); STATIC struct strlist *expsort(struct strlist *); STATIC struct strlist *msort(struct strlist *, int); -STATIC int pmatch(char *, char *, int); +STATIC int patmatch(const char *, const char *, int); STATIC char *cvtnum(int, char *); +static int collate_range_cmp(wchar_t, wchar_t); /* * Expand shell variables and backquotes inside a here document. @@ -129,6 +131,18 @@ expandhere(union node *arg, int fd) } +static int +collate_range_cmp(wchar_t c1, wchar_t c2) +{ + wchar_t s1[2], s2[2]; + + s1[0] = c1; + s1[1] = L'\0'; + s2[0] = c2; + s2[1] = L'\0'; + return (wcscoll(s1, s2)); +} + /* * Perform variable substitution and command substitution on an argument, * placing the resulting list of arguments in arglist. If EXP_FULL is true, @@ -1399,7 +1413,7 @@ msort(struct strlist *list, int len) * pointer is stored into *end. */ static int -match_charclass(char *p, wchar_t chr, char **end) +match_charclass(const char *p, wchar_t chr, const char **end) { char name[20]; char *nameend; @@ -1427,35 +1441,29 @@ match_charclass(char *p, wchar_t chr, ch * Returns true if the pattern matches the string. */ -int -patmatch(char *pattern, char *string, int squoted) -{ -#ifdef notdef - if (pattern[0] == '!' && pattern[1] == '!') - return 1 - pmatch(pattern + 2, string); - else -#endif - return pmatch(pattern, string, squoted); -} - - STATIC int -pmatch(char *pattern, char *string, int squoted) +patmatch(const char *pattern, const char *string, int squoted) { - char *p, *q, *end; + const char *p, *q, *end; + const char *bt_p, *bt_q; char c; + wchar_t wc, wc2; p = pattern; q = string; + bt_p = NULL; + bt_q = NULL; for (;;) { switch (c = *p++) { case '\0': - goto breakloop; + if (*q != '\0') + goto backtrack; + return 1; case CTLESC: if (squoted && *q == CTLESC) q++; if (*q++ != *p++) - return 0; + goto backtrack; break; case CTLQUOTEMARK: continue; @@ -1482,17 +1490,19 @@ pmatch(char *pattern, char *string, int q++; } } - do { - if (pmatch(p, q, squoted)) - return 1; - if (squoted && *q == CTLESC) - q++; - } while (*q++ != '\0'); - return 0; + /* + * First try the shortest match for the '*' that + * could work. We can forget any earlier '*' since + * there is no way having it match more characters + * can help us, given that we are already here. + */ + bt_p = p; + bt_q = q; + break; case '[': { - char *endp; + const char *savep, *saveq, *endp; int invert, found; - char chr; + unsigned char chr; endp = p; if (*endp == '!') @@ -1508,20 +1518,25 @@ pmatch(char *pattern, char *string, int break; } invert = 0; - if (*p == '!') { + savep = p, saveq = q; + invert = 0; + if (*p == '!' || *p == '^') { invert++; p++; } found = 0; - chr = *q++; - if (squoted && chr == CTLESC) - chr = *q++; - if (chr == '\0') + if (*q == '\0') return 0; + chr = (unsigned char)*q++; c = *p++; do { if (c == CTLQUOTEMARK) continue; + if (c == '\0') { + p = savep, q = saveq; + c = '['; + goto dft; + } if (c == '[' && *p == ':') { found |= match_charclass(p, chr, &end); if (end != NULL) @@ -1529,36 +1544,46 @@ pmatch(char *pattern, char *string, int } if (c == CTLESC) c = *p++; + wc = (unsigned char)c; if (*p == '-' && p[1] != ']') { p++; - while (*p == CTLQUOTEMARK) - p++; if (*p == CTLESC) p++; - if (chr >= c && chr <= *p) + wc2 = (unsigned char)*p++; + if ( collate_range_cmp(chr, wc) >= 0 + && collate_range_cmp(chr, wc2) <= 0 + ) found = 1; - p++; } else { - if (chr == c) + if (chr == wc) found = 1; } } while ((c = *p++) != ']'); if (found == invert) - return 0; + goto backtrack; break; } dft: default: if (squoted && *q == CTLESC) q++; - if (*q++ != c) + if (*q++ == c) + break; +backtrack: + /* + * If we have a mismatch (other than hitting the end + * of the string), go back to the last '*' seen and + * have it match one additional character. + */ + if (bt_p == NULL) return 0; + if (*bt_q == '\0') + return 0; + bt_q++; + p = bt_p; + q = bt_q; break; } } -breakloop: - if (*q != '\0') - return 0; - return 1; } Index: src/bin/sh/expand.h diff -u src/bin/sh/expand.h:1.20 src/bin/sh/expand.h:1.21 --- src/bin/sh/expand.h:1.20 Mon Mar 20 07:26:07 2017 +++ src/bin/sh/expand.h Wed Apr 26 13:43:33 2017 @@ -1,4 +1,4 @@ -/* $NetBSD: expand.h,v 1.20 2017/03/20 11:26:07 kre Exp $ */ +/* $NetBSD: expand.h,v 1.21 2017/04/26 17:43:33 christos Exp $ */ /*- * Copyright (c) 1991, 1993 @@ -63,6 +63,5 @@ union node; void expandhere(union node *, int); void expandarg(union node *, struct arglist *, int); void expari(int); -int patmatch(char *, char *, int); void rmescapes(char *); int casematch(union node *, char *);