In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/871e132ced1b7ede7c74b978b6ed8187f64b268c?hp=a4ea1387b8d394336c13c7517f4e2b3c83b62f0b>

- Log -----------------------------------------------------------------
commit 871e132ced1b7ede7c74b978b6ed8187f64b268c
Author: Hugo van der Sanden <[email protected]>
Date:   Mon Jan 9 16:46:11 2017 +0000

    [perl #130522] test cases for len(STCLASS) > len(target)

M       t/re/pat.t

commit dda01918af6d12338c1e93d6ff79df676c11d43a
Author: Hugo van der Sanden <[email protected]>
Date:   Sun Jan 8 14:59:36 2017 +0000

    [perl #130522] don't try to find_by_class outside the string
    
    If the calculated latest start position to try is before our current
    start position, stop immediately: else we can read out of bounds,
    and end up doing unnecessary work.

M       regexec.c

commit 67853908828c6be05781083bf27d19b72bfe2ade
Author: Hugo van der Sanden <[email protected]>
Date:   Sun Jan 8 14:54:57 2017 +0000

    [perl #130522] do not allow endpos to exceed strend
    
    Check substrings can come from lookaheads, so their length can exceed
    minlen. Use a clamped variant of HOP3c to avoid a bad endpos in this case.

M       regexec.c
-----------------------------------------------------------------------

Summary of changes:
 regexec.c  | 15 +++++----------
 t/re/pat.t | 21 ++++++++++++++++++++-
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/regexec.c b/regexec.c
index 056a993945..7d2a3ac1e1 100644
--- a/regexec.c
+++ b/regexec.c
@@ -149,6 +149,7 @@ static const char* const non_utf8_target_but_utf8_required
 #define HOP3lim(pos,off,lim) (reginfo->is_utf8_target \
     ? reghop3((U8*)(pos), off, (U8*)(lim)) \
     : (U8*)((pos + off) > lim ? lim : (pos + off)))
+#define HOP3clim(pos,off,lim) ((char*)HOP3lim(pos,off,lim))
 
 #define HOP4(pos,off,llim, rlim) (reginfo->is_utf8_target \
     ? reghop4((U8*)(pos), off, (U8*)(llim), (U8*)(rlim)) \
@@ -1291,10 +1292,10 @@ Perl_re_intuit_start(pTHX_
          */
 
        if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
-            endpos= HOP3c(rx_origin, (prog->minlen ? cl_l : 0), strend);
+            endpos = HOP3clim(rx_origin, (prog->minlen ? cl_l : 0), strend);
         else if (prog->float_substr || prog->float_utf8) {
            rx_max_float = HOP3c(check_at, -start_shift, strbeg);
-           endpos= HOP3c(rx_max_float, cl_l, strend);
+           endpos = HOP3clim(rx_max_float, cl_l, strend);
         }
         else 
             endpos= strend;
@@ -1973,10 +1974,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
          * trying that it will fail; so don't start a match past the
          * required minimum number from the far end */
         e = HOP3c(strend, -((SSize_t)ln), s);
-
-        if (reginfo->intuit && e < s) {
-            e = s;                     /* Due to minlen logic of intuit() */
-        }
+        if (e < s)
+            break;
 
         c1 = *pat_string;
         c2 = fold_array[c1];
@@ -2020,10 +2019,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
          */
         e = HOP3c(strend, -((SSize_t)lnc), s);
 
-        if (reginfo->intuit && e < s) {
-            e = s;                     /* Due to minlen logic of intuit() */
-        }
-
         /* XXX Note that we could recalculate e to stop the loop earlier,
          * as the worst case expansion above will rarely be met, and as we
          * go along we would usually find that e moves further to the left.
diff --git a/t/re/pat.t b/t/re/pat.t
index c5de2cda07..d8315c4276 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -23,7 +23,7 @@ BEGIN {
     skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
     skip_all_without_unicode_tables();
 
-plan tests => 828;  # Update this when adding/deleting tests.
+plan tests => 834;  # Update this when adding/deleting tests.
 
 run_tests() unless caller;
 
@@ -1868,6 +1868,25 @@ EOF_CODE
             '[perl #130495] utf-8 character at end of /x comment should not 
misparse',
         );
     }
+    {
+        # [perl #130522] causes out-of-bounds read detected by clang with
+        # address=sanitized when length of the STCLASS string is greater than
+        # length of target string.
+        my $re = qr{(?=\0z)\0?z?$}i;
+        my($yes, $no) = (1, "");
+        for my $test (
+            [ $no,  undef,   '<undef>' ],
+            [ $no,  '',      '' ],
+            [ $no,  "\0",    '\0' ],
+            [ $yes, "\0z",   '\0z' ],
+            [ $no,  "\0z\0", '\0z\0' ],
+            [ $yes, "\0z\n", '\0z\n' ],
+        ) {
+            my($result, $target, $disp) = @$test;
+            no warnings qw/uninitialized/;
+            is($target =~ $re, $result, "[perl #130522] with target '$disp'");
+        }
+    }
 } # End of sub run_tests
 
 1;

--
Perl5 Master Repository

Reply via email to