As reported in https://research.swtch.com/glob, matching
aaaaaaaaaaaaaaaaaaaaaaa with a*a*a*a*a*a*...etc takes exponential time.
Tested with rsc's program: https://news.ycombinator.com/item?id=14187948
I copied the glob implementation from one of the BSDs - I believe
FreeBSD - into a standalone C program and ran that program on the same
Linux system as the rest of the tests. Here's the version that tests the
system glob. If you run it on your FooBSD systems you can see whether it
runs quickly or not. The program assumes that you've already done:
rm -rf /tmp/glob
mkdir /tmp/glob
cd /tmp/glob
touch $(perl -e 'print "a"x100')
And here's the program:
#include <stdio.h>
#include <glob.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <time.h>
int
main(void)
{
glob_t g;
char pattern[1000], *p;
struct timespec t, t2;
double dt;
int i, j, k;
chdir("/tmp/glob");
setlinebuf(stdout);
int mul = 1000;
for(i = 0; i < 100; i++) {
p = pattern;
for (k = 0; k < i; k++) {
*p++ = 'a';
*p++ = '*';
}
*p++ = 'b';
*p = '\0';
printf("# %d %s\n", i, pattern);
clock_gettime(CLOCK_REALTIME, &t);
for (j = 0; j < mul; j++) {
memset(&g, 0, sizeof g);
if(glob(pattern, 0, 0, &g) != GLOB_NOMATCH) {
fprintf(stderr, "error: found matches\n");
exit(2);
}
globfree(&g);
}
clock_gettime(CLOCK_REALTIME, &t2);
t2.tv_sec -= t.tv_sec;
t2.tv_nsec -= t.tv_nsec;
dt = t2.tv_sec + (double)t2.tv_nsec/1e9;
printf("%d %.9f\n", i, dt/mul);
fflush(stdout);
if(dt/mul >= 0.0001)
mul = 1;
if(i >= 8 && dt/mul >= 10)
break;
}
}
I won't be filing any specific bugs myself. I mailed oss-security@ this
morning, which should reach relevant BSD maintainers, but more bug
filing can't hurt.
Fix ported from
https://perl5.git.perl.org/perl.git/commit/33252c318625f3c6c89b816ee88481940e3e6f95
diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c
index e521dcd098d..ee9ab78ef88 100644
--- a/lib/libc/gen/glob.c
+++ b/lib/libc/gen/glob.c
@@ -126,9 +126,6 @@ typedef char Char;
#define GLOB_LIMIT_STAT 2048
#define GLOB_LIMIT_READDIR 16384
-/* Limit of recursion during matching attempts. */
-#define GLOB_LIMIT_RECUR 64
-
struct glob_lim {
size_t glim_malloc;
size_t glim_stat;
@@ -161,7 +158,7 @@ static const Char *
static int globexp1(const Char *, glob_t *, struct glob_lim *);
static int globexp2(const Char *, const Char *, glob_t *,
struct glob_lim *);
-static int match(Char *, Char *, Char *, int);
+static int match(Char *, Char *, Char *);
#ifdef DEBUG
static void qprintf(const char *, Char *);
#endif
@@ -753,7 +750,7 @@ glob3(Char *pathbuf, Char *pathbuf_last, Char
*pathend, Char *pathend_last,
break;
}
- if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
+ if (!match(pathend, pattern, restpattern)) {
*pathend = EOS;
continue;
}
@@ -883,17 +880,25 @@ globextend(const Char *path, glob_t *pglob, struct
glob_lim *limitp,
/*
* pattern matching function for filenames. Each occurrence of the *
- * pattern causes a recursion level.
+ * pattern causes an iteration.
+ *
+ * Note, this function differs from the original as per the discussion
+ * here: https://research.swtch.com/glob
+ *
+ * Basically we removed the recursion and made it use the algorithm
+ * from Russ Cox to not go quadratic on cases like a file called ("a" x
100) . "x"
+ * matched against a pattern like "a*a*a*a*a*a*a*y".
+ *
*/
static int
-match(Char *name, Char *pat, Char *patend, int recur)
+match(Char *name, Char *pat, Char *patend)
{
int ok, negate_range;
Char c, k;
+ Char *nextp = NULL;
+ Char *nextn = NULL;
- if (recur-- == 0)
- return(GLOB_NOSPACE);
-
+loop:
while (pat < patend) {
c = *pat++;
switch (c & M_MASK) {
@@ -902,19 +907,19 @@ match(Char *name, Char *pat, Char *patend, int recur)
pat++; /* eat consecutive '*' */
if (pat == patend)
return(1);
- do {
- if (match(name, pat, patend, recur))
- return(1);
- } while (*name++ != EOS);
- return(0);
+ if (*name == EOS)
+ return 0;
+ nextn = name + 1;
+ nextp = pat - 1;
+ break;
case M_ONE:
if (*name++ == EOS)
- return(0);
+ goto fail;
break;
case M_SET:
ok = 0;
if ((k = *name++) == EOS)
- return(0);
+ goto fail;
if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
++pat;
while (((c = *pat++) & M_MASK) != M_END) {
@@ -933,15 +938,25 @@ match(Char *name, Char *pat, Char *patend, int recur)
ok = 1;
}
if (ok == negate_range)
- return(0);
+ goto fail;
break;
default:
if (*name++ != c)
- return(0);
+ goto fail;
break;
}
}
return(*name == EOS);
+ if (*name == EOS)
+ return 1;
+
+fail:
+ if (nextn) {
+ pat = nextp;
+ name = nextn;
+ goto loop;
+ }
+ return 0;
}
/* Free allocated data belonging to a glob_t structure. */