From 034407d34217437325edc82a8ef7fd0100e8b912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppi...@redhat.com>
Date: Fri, 16 Jun 2017 13:46:55 +0200
Subject: Make File::Glob more resistant against degenerative matching

---
 ...31211-fixup-File-Glob-degenerate-matching.patch | 258 +++++++++++++++++++++
 perl.spec                                          |  11 +-
 2 files changed, 268 insertions(+), 1 deletion(-)
 create mode 100644 
perl-5.27.0-perl-131211-fixup-File-Glob-degenerate-matching.patch

diff --git a/perl-5.27.0-perl-131211-fixup-File-Glob-degenerate-matching.patch 
b/perl-5.27.0-perl-131211-fixup-File-Glob-degenerate-matching.patch
new file mode 100644
index 0000000..c52d790
--- /dev/null
+++ b/perl-5.27.0-perl-131211-fixup-File-Glob-degenerate-matching.patch
@@ -0,0 +1,258 @@
+From 0db967b2e6a4093a6a5f649190159767e5d005e0 Mon Sep 17 00:00:00 2001
+From: Yves Orton <demer...@gmail.com>
+Date: Tue, 25 Apr 2017 15:17:06 +0200
+Subject: [PATCH] [perl #131211] fixup File::Glob degenerate matching
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The old code would go quadratic with recursion and backtracking
+when doing patterns like "a*a*a*a*a*a*a*x" on a file like
+"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".
+
+This patch changes the code to not recurse, and to not backtrack,
+as per this article from Russ Cox: https://research.swtch.com/glob
+
+It also adds a micro-optimisation for M_ONE and M_SET under the new code.
+
+Thanks to Avar and Russ Cox for helping with this patch, along with
+Jilles Tjoelker and the rest of the FreeBSD community.
+
+Signed-off-by: Petr Písař <ppi...@redhat.com>
+---
+ MANIFEST                   |  1 +
+ ext/File-Glob/bsd_glob.c   | 64 +++++++++++++++++++++++--------
+ ext/File-Glob/t/rt131211.t | 94 ++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 144 insertions(+), 15 deletions(-)
+ create mode 100644 ext/File-Glob/t/rt131211.t
+
+diff --git a/MANIFEST b/MANIFEST
+index b7b6e74..af0da6c 100644
+--- a/MANIFEST
++++ b/MANIFEST
+@@ -3948,6 +3948,7 @@ ext/File-Glob/t/basic.t          See if File::Glob works
+ ext/File-Glob/t/case.t                See if File::Glob works
+ ext/File-Glob/t/global.t      See if File::Glob works
+ ext/File-Glob/t/rt114984.t    See if File::Glob works
++ext/File-Glob/t/rt131211.t    See if File::Glob works
+ ext/File-Glob/t/taint.t               See if File::Glob works
+ ext/File-Glob/t/threads.t     See if File::Glob + threads works
+ ext/File-Glob/TODO            File::Glob extension todo list
+diff --git a/ext/File-Glob/bsd_glob.c b/ext/File-Glob/bsd_glob.c
+index 821ef20..e96fb73 100644
+--- a/ext/File-Glob/bsd_glob.c
++++ b/ext/File-Glob/bsd_glob.c
+@@ -563,8 +563,12 @@ glob0(const Char *pattern, glob_t *pglob)
+                       break;
+               case BG_STAR:
+                       pglob->gl_flags |= GLOB_MAGCHAR;
+-                      /* collapse adjacent stars to one,
+-                       * to avoid exponential behavior
++                        /* Collapse adjacent stars to one.
++                         * This is required to ensure that a pattern like
++                         * "a**" matches a name like "a", as without this
++                         * check when the first star matched everything it 
would
++                         * cause the second star to return a match fail.
++                         * As long ** is folded here this does not happen.
+                        */
+                       if (bufnext == patbuf || bufnext[-1] != M_ALL)
+                               *bufnext++ = M_ALL;
+@@ -909,35 +913,56 @@ globextend(const Char *path, glob_t *pglob, size_t 
*limitp)
+ 
+ 
+ /*
+- * pattern matching function for filenames.  Each occurrence of the *
+- * pattern causes a recursion level.
++ * pattern matching function for filenames using state machine to avoid
++ * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
++ * without additional callframes, and to do cleanly prune the backtracking
++ * state when multiple '*' (start) matches are included in the patter.
++ *
++ * Thanks to Russ Cox for the improved state machine logic to avoid quadratic
++ * matching on failure.
++ *
++ * https://research.swtch.com/glob
++ *
++ * An example would be a pattern
++ *  ("a*" x 100) . "y"
++ * against a file name like
++ *  ("a" x 100) . "x"
++ *
+  */
+ static int
+ match(Char *name, Char *pat, Char *patend, int nocase)
+ {
+       int ok, negate_range;
+       Char c, k;
++      Char *nextp = NULL;
++      Char *nextn = NULL;
+ 
++    loop:
+       while (pat < patend) {
+               c = *pat++;
+               switch (c & M_MASK) {
+               case M_ALL:
+                       if (pat == patend)
+                               return(1);
+-                      do
+-                          if (match(name, pat, patend, nocase))
+-                                  return(1);
+-                      while (*name++ != BG_EOS)
+-                              ;
+-                      return(0);
++                      if (*name == BG_EOS)
++                              return 0;
++                      nextn = name + 1;
++                      nextp = pat - 1;
++                      break;
+               case M_ONE:
++                        /* since * matches leftmost-shortest first   *
++                         * if we encounter the EOS then backtracking *
++                         * will not help, so we can exit early here. */
+                       if (*name++ == BG_EOS)
+-                              return(0);
++                                return 0;
+                       break;
+               case M_SET:
+                       ok = 0;
++                        /* since * matches leftmost-shortest first   *
++                         * if we encounter the EOS then backtracking *
++                         * will not help, so we can exit early here. */
+                       if ((k = *name++) == BG_EOS)
+-                              return(0);
++                                return 0;
+                       if ((negate_range = ((*pat & M_MASK) == M_NOT)) != 
BG_EOS)
+                               ++pat;
+                       while (((c = *pat++) & M_MASK) != M_END)
+@@ -953,16 +978,25 @@ match(Char *name, Char *pat, Char *patend, int nocase)
+                               } else if (nocase ? (tolower(c) == tolower(k)) 
: (c == k))
+                                       ok = 1;
+                       if (ok == negate_range)
+-                              return(0);
++                              goto fail;
+                       break;
+               default:
+                       k = *name++;
+                       if (nocase ? (tolower(k) != tolower(c)) : (k != c))
+-                              return(0);
++                              goto fail;
+                       break;
+               }
+       }
+-      return(*name == BG_EOS);
++      if (*name == BG_EOS)
++              return 1;
++
++    fail:
++      if (nextn) {
++              pat = nextp;
++              name = nextn;
++              goto loop;
++      }
++      return 0;
+ }
+ 
+ /* Free allocated data belonging to a glob_t structure. */
+diff --git a/ext/File-Glob/t/rt131211.t b/ext/File-Glob/t/rt131211.t
+new file mode 100644
+index 0000000..c1bcbe0
+--- /dev/null
++++ b/ext/File-Glob/t/rt131211.t
+@@ -0,0 +1,94 @@
++use strict;
++use warnings;
++use v5.16.0;
++use File::Temp 'tempdir';
++use File::Spec::Functions;
++use Test::More;
++use Time::HiRes qw(time);
++
++plan tests => 13;
++
++my $path = tempdir uc cleanup => 1;
++my @files= (
++    "x".("a" x 50)."b", # 0
++    "abbbbbbbbbbbbc",   # 1
++    "abbbbbbbbbbbbd",   # 2
++    "aaabaaaabaaaabc",  # 3
++    "pq",               # 4
++    "r",                # 5
++    "rttiiiiiii",       # 6
++    "wewewewewewe",     # 7
++    "weeeweeeweee",     # 8
++    "weewweewweew",     # 9
++    "wewewewewewewewewewewewewewewewewq", # 10
++    "wtttttttetttttttwr", # 11
++);
++
++
++foreach (@files) {
++    open(my $f, ">", catfile $path, $_);
++}
++
++my $elapsed_fail= 0;
++my $elapsed_match= 0;
++my @got_files;
++my @no_files;
++my $count = 0;
++
++while (++$count < 10) {
++    $elapsed_match -= time;
++    @got_files= glob catfile $path, "x".("a*" x $count) . "b";
++    $elapsed_match += time;
++
++    $elapsed_fail -= time;
++    @no_files= glob catfile $path, "x".("a*" x $count) . "c";
++    $elapsed_fail += time;
++    last if $elapsed_fail > $elapsed_match * 100;
++}
++
++is $count,10,
++    "tried all the patterns without bailing out";
++
++cmp_ok $elapsed_fail/$elapsed_match,"<",2,
++    "time to fail less than twice the time to match";
++is "@got_files", catfile($path, $files[0]),
++    "only got the expected file for xa*..b";
++is "@no_files", "", "shouldnt have files for xa*..c";
++
++
++@got_files= glob catfile $path, "a*b*b*b*bc";
++is "@got_files", catfile($path, $files[1]),
++    "only got the expected file for a*b*b*b*bc";
++
++@got_files= sort glob catfile $path, "a*b*b*bc";
++is "@got_files", catfile($path, $files[3])." ".catfile($path,$files[1]),
++    "got the expected two files for a*b*b*bc";
++
++@got_files= sort glob catfile $path, "p*";
++is "@got_files", catfile($path, $files[4]),
++    "p* matches pq";
++
++@got_files= sort glob catfile $path, "r*???????";
++is "@got_files", catfile($path, $files[6]),
++    "r*??????? works as expected";
++
++@got_files= sort glob catfile $path, "w*e*w??e";
++is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (7,8)),
++    "w*e*w??e works as expected";
++
++@got_files= sort glob catfile $path, "w*e*we??";
++is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } 
(7,8,9,10)),
++    "w*e*we?? works as expected";
++
++@got_files= sort glob catfile $path, "w**e**w";
++is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (9)),
++    "w**e**w works as expected";
++
++@got_files= sort glob catfile $path, "*wee*";
++is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } (8,9)),
++    "*wee* works as expected";
++
++@got_files= sort glob catfile $path, "we*";
++is "@got_files", join(" ", sort map { catfile($path, $files[$_]) } 
(7,8,9,10)),
++    "we* works as expected";
++
+-- 
+2.9.4
+
diff --git a/perl.spec b/perl.spec
index 04b51d9..11c5b98 100644
--- a/perl.spec
+++ b/perl.spec
@@ -37,7 +37,7 @@
 Name:           perl
 Version:        %{perl_version}
 # release number must be even higher, because dual-lived modules will be 
broken otherwise
-Release:        393%{?dist}
+Release:        394%{?dist}
 Epoch:          %{perl_epoch}
 Summary:        Practical Extraction and Report Language
 Group:          Development/Languages
@@ -137,6 +137,10 @@ Patch26:        
perl-5.18.2-Destroy-GDBM-NDBM-ODBM-SDBM-_File-objects-only-from-
 # This allows not to require perl-devel. Bug #1129443
 Patch30:        
perl-5.22.1-Replace-EU-MM-dependnecy-with-EU-MM-Utils-in-IPC-Cmd.patch
 
+# Make File::Glob more resistant against degenerative matching, RT#131211,
+# in upstream after 5.27.0
+Patch31:        
perl-5.27.0-perl-131211-fixup-File-Glob-degenerate-matching.patch
+
 # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
 Patch200:       
perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
 
@@ -2785,6 +2789,7 @@ Perl extension for Version Objects
 %patch22 -p1
 %patch26 -p1
 %patch30 -p1
+%patch31 -p1
 %patch200 -p1
 %patch201 -p1
 
@@ -2805,6 +2810,7 @@ perl -x patchlevel.h \
     'Fedora Patch26: Make *DBM_File desctructors thread-safe (RT#61912)' \
     'Fedora Patch27: Make PadlistNAMES() lvalue again (CPAN RT#101063)' \
     'Fedora Patch30: Replace EU::MakeMaker dependency with EU::MM::Utils in 
IPC::Cmd (bug #1129443)' \
+    'Fedora Patch31: Make File::Glob more resistant against degenerative 
matching (RT#131211)' \
     'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on 
Linux' \
     'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
     %{nil}
@@ -5087,6 +5093,9 @@ popd
 
 # Old changelog entries are preserved in CVS.
 %changelog
+* Fri Jun 16 2017 Petr Pisar <ppi...@redhat.com> - 4:5.26.0-394
+- Make File::Glob more resistant against degenerative matching (RT#131211)
+
 * Tue Jun 06 2017 Jitka Plesnikova <jples...@redhat.com> - 4:5.26.0-393
 - Stop providing old perl(MODULE_COMPAT_5.24.*)
 
-- 
cgit v1.1


        
https://src.fedoraproject.org/cgit/perl.git/commit/?h=master&id=034407d34217437325edc82a8ef7fd0100e8b912
_______________________________________________
perl-devel mailing list -- perl-devel@lists.fedoraproject.org
To unsubscribe send an email to perl-devel-le...@lists.fedoraproject.org

Reply via email to