As reported in https://research.swtch.com/glob, matching aaaaaaaaaaaaaaaaaaaaaaa with a*a*a*a*a*a*...etc takes exponential time.

Tested with rsc's program: https://news.ycombinator.com/item?id=14187948


I copied the glob implementation from one of the BSDs - I believe FreeBSD - into a standalone C program and ran that program on the same Linux system as the rest of the tests. Here's the version that tests the system glob. If you run it on your FooBSD systems you can see whether it runs quickly or not. The program assumes that you've already done:
  rm -rf /tmp/glob
  mkdir /tmp/glob
  cd /tmp/glob
  touch $(perl -e 'print "a"x100')
And here's the program:
  #include <stdio.h>
  #include <glob.h>
  #include <unistd.h>
  #include <string.h>
  #include <stdlib.h>
  #include <dirent.h>
  #include <time.h>

  int
  main(void)
  {
      glob_t g;
      char pattern[1000], *p;
      struct timespec t, t2;
      double dt;
      int i, j, k;

      chdir("/tmp/glob");
      setlinebuf(stdout);

      int mul = 1000;
      for(i = 0; i < 100; i++) {
          p = pattern;
          for (k = 0; k < i; k++) {
              *p++ = 'a';
              *p++ = '*';
          }
          *p++ = 'b';
          *p = '\0';
          printf("# %d %s\n", i, pattern);
          clock_gettime(CLOCK_REALTIME, &t);
          for (j = 0; j < mul; j++) {
              memset(&g, 0, sizeof g);
              if(glob(pattern, 0, 0, &g) != GLOB_NOMATCH) {
                  fprintf(stderr, "error: found matches\n");
                  exit(2);
              }
              globfree(&g);
          }
          clock_gettime(CLOCK_REALTIME, &t2);
          t2.tv_sec -= t.tv_sec;
          t2.tv_nsec -= t.tv_nsec;
          dt = t2.tv_sec + (double)t2.tv_nsec/1e9;
          printf("%d %.9f\n", i, dt/mul);
          fflush(stdout);
          if(dt/mul >= 0.0001)
              mul = 1;
          if(i >= 8 && dt/mul >= 10)
              break;
      }
  }
I won't be filing any specific bugs myself. I mailed oss-security@ this morning, which should reach relevant BSD maintainers, but more bug filing can't hurt.


Fix ported from https://perl5.git.perl.org/perl.git/commit/33252c318625f3c6c89b816ee88481940e3e6f95


diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c
index e521dcd098d..ee9ab78ef88 100644
--- a/lib/libc/gen/glob.c
+++ b/lib/libc/gen/glob.c
@@ -126,9 +126,6 @@ typedef char Char;
 #define    GLOB_LIMIT_STAT        2048
 #define    GLOB_LIMIT_READDIR    16384

-/* Limit of recursion during matching attempts. */
-#define GLOB_LIMIT_RECUR    64
-
 struct glob_lim {
     size_t    glim_malloc;
     size_t    glim_stat;
@@ -161,7 +158,7 @@ static const Char *
 static int     globexp1(const Char *, glob_t *, struct glob_lim *);
 static int     globexp2(const Char *, const Char *, glob_t *,
             struct glob_lim *);
-static int     match(Char *, Char *, Char *, int);
+static int     match(Char *, Char *, Char *);
 #ifdef DEBUG
 static void     qprintf(const char *, Char *);
 #endif
@@ -753,7 +750,7 @@ glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
             break;
         }

-        if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
+        if (!match(pathend, pattern, restpattern)) {
             *pathend = EOS;
             continue;
         }
@@ -883,17 +880,25 @@ globextend(const Char *path, glob_t *pglob, struct glob_lim *limitp,

 /*
  * pattern matching function for filenames.  Each occurrence of the *
- * pattern causes a recursion level.
+ * pattern causes an iteration.
+ *
+ * Note, this function differs from the original as per the discussion
+ * here: https://research.swtch.com/glob
+ *
+ * Basically we removed the recursion and made it use the algorithm
+ * from Russ Cox to not go quadratic on cases like a file called ("a" x 100) . "x"
+ * matched against a pattern like "a*a*a*a*a*a*a*y".
+ *
  */
 static int
-match(Char *name, Char *pat, Char *patend, int recur)
+match(Char *name, Char *pat, Char *patend)
 {
     int ok, negate_range;
     Char c, k;
+    Char *nextp = NULL;
+    Char *nextn = NULL;

-    if (recur-- == 0)
-        return(GLOB_NOSPACE);
-
+loop:
     while (pat < patend) {
         c = *pat++;
         switch (c & M_MASK) {
@@ -902,19 +907,19 @@ match(Char *name, Char *pat, Char *patend, int recur)
                 pat++;    /* eat consecutive '*' */
             if (pat == patend)
                 return(1);
-            do {
-                if (match(name, pat, patend, recur))
-                    return(1);
-            } while (*name++ != EOS);
-            return(0);
+            if (*name == EOS)
+                return 0;
+            nextn = name + 1;
+            nextp = pat - 1;
+            break;
         case M_ONE:
             if (*name++ == EOS)
-                return(0);
+                goto fail;
             break;
         case M_SET:
             ok = 0;
             if ((k = *name++) == EOS)
-                return(0);
+                goto fail;
             if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
                 ++pat;
             while (((c = *pat++) & M_MASK) != M_END) {
@@ -933,15 +938,25 @@ match(Char *name, Char *pat, Char *patend, int recur)
                     ok = 1;
             }
             if (ok == negate_range)
-                return(0);
+                goto fail;
             break;
         default:
             if (*name++ != c)
-                return(0);
+                goto fail;
             break;
         }
     }
     return(*name == EOS);
+    if (*name == EOS)
+        return 1;
+
+fail:
+    if (nextn) {
+        pat = nextp;
+        name = nextn;
+        goto loop;
+    }
+    return 0;
 }

 /* Free allocated data belonging to a glob_t structure. */

Reply via email to