Package: perl
Version: 5.18.1-4
Severity: serious
Tags: patch

Hi,

in #723913 (see http://bugs.debian.org/723913) it turned out that FTBFS of
latex2html and some packages FTBFSing (condor, starlink-ast) could be traced
back to perl.

https://bugzilla.redhat.com/show_bug.cgi?id=978233

suggests that perl's upstream commit f1e1b256c5c1773d90e828cca6323c53fa23391b
fixes this.

I tested this and it works. :-)

Attaching the adjusted patch that applies to Debian's 5.18.1-4.

Thanks to Tim Theisen for digging this out.

Roland


-- System Information:
Debian Release: 7.0
  APT prefers unreleased
  APT policy: (500, 'unreleased'), (500, 'unstable')
Architecture: powerpcspe (ppc)

Kernel: Linux 3.9.0-dirty (SMP w/2 CPU cores)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8) (ignored: LC_ALL 
set to en_GB.UTF-8)
Shell: /bin/sh linked to /bin/dash
--- perl-5.18.1.orig/regcomp.c
+++ perl-5.18.1/regcomp.c
@@ -10659,7 +10659,7 @@ tryagain:
                     if (num < 1)
                         vFAIL("Reference to nonexistent or unclosed group");
                 }
-		if (!isg && num > 9 && num >= RExC_npar)
+                if (!isg && num > 9 && num >= RExC_npar && *RExC_parse != '8' && *RExC_parse != '9')
                     /* Probably a character specified in octal, e.g. \35 */
 		    goto defchar;
 		else {
@@ -10936,10 +10936,28 @@ tryagain:
 			p++;
 			ender = grok_bslash_c(*p++, UTF, SIZE_ONLY);
 			break;
-		    case '0': case '1': case '2': case '3':case '4':
+                    case '8': case '9': /* must be a backreference */
+                        --p;
+                        goto loopdone;
+                    case '1': case '2': case '3':case '4':
 		    case '5': case '6': case '7':
-			if (*p == '0' ||
-			    (isDIGIT(p[1]) && atoi(p) >= RExC_npar))
+                        /* When we parse backslash escapes there is ambiguity between
+                         * backreferences and octal escapes. Any escape from \1 - \9 is
+                         * a backreference, any multi-digit escape which does not start with
+                         * 0 and which when evaluated as decimal could refer to an already
+                         * parsed capture buffer is a backslash. Anything else is octal.
+                         *
+                         * Note this implies that \118 could be interpreted as 118 OR as
+                         * "\11" . "8" depending on whether there were 118 capture buffers
+                         * defined already in the pattern.
+                         */
+                        if ( !isDIGIT(p[1]) || atoi(p) <= RExC_npar )
+                        {  /* Not to be treated as an octal constant, go
+                                   find backref */
+                            --p;
+                            goto loopdone;
+                        }
+                    case '0':
 			{
 			    I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
 			    STRLEN numlen = 3;
@@ -10958,11 +10976,6 @@ tryagain:
                                          form_short_octal_warning(p, numlen));
                             }
 			}
-                        else {  /* Not to be treated as an octal constant, go
-                                   find backref */
-			    --p;
-			    goto loopdone;
-			}
 			if (PL_encoding && ender < 0x100)
 			    goto recode_encoding;
 			break;
--- perl-5.18.1.orig/t/re/pat.t
+++ perl-5.18.1/t/re/pat.t
@@ -20,7 +20,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan tests => 472;  # Update this when adding/deleting tests.
+plan tests => 572;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1380,6 +1380,23 @@ EOP
 	is ($s, 'XXcdXXX&', 'RT #119125 with /x');
     }
 
+    {
+        # if we have 87 capture buffers defined then \87 should refer to the 87th.
+        # test that this is true for 1..100
+        my $str= "aa";
+        for my $i (1..100) {
+            my $pat= "a";
+            $pat= "($pat)" for 1 .. $i;
+            $pat.="\\$i";
+            eval {
+                ok($str=~/$pat/,"\\$i works with $i buffers");
+                1;
+            } or do {
+                ok(0,"\\$i works with $i buffers");
+            };
+        }
+    }
+
 } # End of sub run_tests
 
 1;
--- perl-5.18.1.orig/t/re/re_tests
+++ perl-5.18.1/t/re/re_tests
@@ -1487,10 +1487,9 @@ abc\N{def	-	c	-	\\N{NAME} must be resolv
 [a\o{1000}]	\x{200}	y	$&	\x{200}
 
 # The below were inserting a NULL
-\87	87	y	$&	87
-a\87	a87	y	$&	a87
-a\97	a97	y	$&	a97
-
+\87	87	c	-	Reference to nonexistent group in regex
+a\87	a87	c	-	Reference to nonexistent group in regex
+a\97	a97	c	-	Reference to nonexistent group in regex
 
 # The below was inserting a NULL into the character class.
 [\8\9]	\000	Sn	-	-
--- perl-5.18.1.orig/t/re/reg_mesg.t
+++ perl-5.18.1/t/re/reg_mesg.t
@@ -174,6 +174,9 @@ my @death =
  'm/[\o]/' => 'Missing braces on \o{} {#} m/[\o{#}]/',
  'm/[\o{}]/' => 'Number with no digits {#} m/[\o{}{#}]/',
  'm/(?^-i:foo)/' => 'Sequence (?^-...) not recognized {#} m/(?^-{#}i:foo)/',
+ 'm/\87/' => 'Reference to nonexistent group {#} m/\87{#}/',
+ 'm/a\87/' => 'Reference to nonexistent group {#} m/a\87{#}/',
+ 'm/a\97/' => 'Reference to nonexistent group {#} m/a\97{#}/',
 );
 # Tests involving a user-defined charnames translator are in pat_advanced.t
 
@@ -200,9 +203,6 @@ my @warning = (
     '/\018/' => '\'\018\' resolved to \'\o{1}8\' {#} m/\018{#}/',
     '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' {#} m/[\08{#}]/',
     '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' {#} m/[\018{#}]/',
-    '/\87/' => 'Unrecognized escape \8 passed through {#} m/\8{#}7/',
-    '/a\87/' => 'Unrecognized escape \8 passed through {#} m/a\8{#}7/',
-    '/a\97/' => 'Unrecognized escape \9 passed through {#} m/a\9{#}7/',
     '/(?=a)*/' => '(?=a)* matches null string many times {#} m/(?=a)*{#}/',
     'my $x = \'\m\'; qr/a$x/' => 'Unrecognized escape \m passed through {#} m/a\m{#}/',
     '/\q/' => 'Unrecognized escape \q passed through {#} m/\q{#}/',

Reply via email to