[perl.git] branch blead, updated. v5.19.8-533-g63baef5

Karl Williamson Wed, 19 Feb 2014 13:34:26 -0800

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/63baef57e83f77e202ae14ef902a6615cf69c8a2?hp=ee1ec05fa88c0444e7d8e506b018b9b80be61dd0>


- Log -----------------------------------------------------------------
commit 63baef57e83f77e202ae14ef902a6615cf69c8a2
Author: Karl Williamson <pub...@khwilliamson.com>
Date:   Tue Feb 18 12:59:26 2014 -0700

    Make taint checking regex compile time instead of runtime
    
    See discussion at https://rt.perl.org/Ticket/Display.html?id=120675
    There are several unresolved  items in this discussion, but we did agree
    that tainting should be dependent only on the regex pattern, and not the
    particular input string being matched against:
    
    "The bottom line is we are moving to the policy that tainting is based
    on the operation being in locale, without regard to the particular
    operand's contents passed this time to the operation. This means simpler
    core code and more consistent tainting results. And it lessens the
    likelihood that there are paths in the core that should taint but don't"
    
    This commit does the minimal work to change regex pattern matching to
    determine tainting at pattern compilation time.  Simply put, if a
    pattern contains a regnode whose match/not match depends on the run-time
    locale, any attempt to match against that pattern will taint, regardless
    of the actual target string or runtime locale in effect.  Given this
    change, there are optimizations that can be made to avoid runtime work,
    but these are deferred until later.
    
    Note that just because a regular expression is compiled under locale
    doesn't mean that the generated pattern will be tainted.  It depends on
    the actual pattern.  For example, the pattern /(.)/ doesn't taint
    because it will match exactly one character of the input, regardless of
    locale settings.

M       lib/locale.t
M       pod/perldelta.pod
M       pod/perllocale.pod
M       pp_hot.c
M       regcomp.c
M       regexec.c

commit fdf73a7f7fb994c00e17a01f146018fcb3c47ffb
Author: Karl Williamson <pub...@khwilliamson.com>
Date:   Tue Feb 18 11:37:10 2014 -0700

    lib/locale.t: Add some test names

M       lib/locale.t

commit 9486279ceb5a3e105081b7470810dca288230535
Author: Karl Williamson <pub...@khwilliamson.com>
Date:   Tue Feb 18 11:45:48 2014 -0700

    lib/locale.t: Untaint before checking if next thing taints
    
    The tests weren't testing what they purported to, as we should be sure
    to start with untained values to see if the operation taints.

M       lib/locale.t
-----------------------------------------------------------------------

Summary of changes:
 lib/locale.t       | 258 +++++++++++++++++++++++++++++++++++------------------
 pod/perldelta.pod  |  27 ++++--
 pod/perllocale.pod |  23 +++--
 pp_hot.c           |   7 +-
 regcomp.c          |  32 +++++--
 regexec.c          |  18 ----
 6 files changed, 237 insertions(+), 128 deletions(-)

diff --git a/lib/locale.t b/lib/locale.t
index b195394..4708b58 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -110,7 +110,7 @@ check_taint_not   $&, "not tainted outside 'use locale'";
 
 use locale;    # engage locale and therefore locale taint.
 
-check_taint_not   $a;
+check_taint_not   $a, "\t\$a";
 
 check_taint       uc($a);
 check_taint       "\U$a";
@@ -133,139 +133,227 @@ $_ = $a;        # untaint $_
 
 $_ = uc($a);   # taint $_
 
-check_taint      $_;
+check_taint      $_, "\t\$_";
 
 /(\w)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $&, "\t/(\\w)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 /(.)/; # untaint $&, $`, $', $+, $1.
-check_taint_not  $&;
-check_taint_not  $`;
-check_taint_not  $';
-check_taint_not  $+;
-check_taint_not  $1;
-check_taint_not  $2;
+check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $`, "\t\$`";
+check_taint_not  $', "\t\$'";
+check_taint_not  $+, "\t\$+";
+check_taint_not  $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 /(\W)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $&, "\t/(\\W)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/(.)/ \$&";
+check_taint_not  $`, "\t\$`";
+check_taint_not  $', "\t\$'";
+check_taint_not  $+, "\t\$+";
+check_taint_not  $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 /(\s)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $&, "\t/(\\s)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/(.)/ \$&";
 
 /(\S)/;        # taint $&, $`, $', $+, $1.
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $&, "\t/(\\S)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/(.)/ \$&";
+
+"a" =~ /(a)|(\w)/;     # taint $&, $`, $', $+, $1.
+check_taint      $&, "\t/(a)|(\\w)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+ok($1 eq 'a', ("\t" x 4) . "\$1 is 'a'");
+ok(! defined $2, ("\t" x 4) . "\$2 is undefined");
+check_taint_not  $2, "\t\$2";
+check_taint_not  $3, "\t\$3";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/(.)/ \$&";
+
+"\N{CYRILLIC SMALL LETTER A}" =~ /(\N{CYRILLIC CAPITAL LETTER A})/i;   # no 
tainting because no locale dependence
+check_taint_not      $&, "\t/(\\N{CYRILLIC CAPITAL LETTER A})/i \$&";
+check_taint_not      $`, "\t\$`";
+check_taint_not      $', "\t\$'";
+check_taint_not      $+, "\t\$+";
+check_taint_not      $1, "\t\$1";
+ok($1 eq "\N{CYRILLIC SMALL LETTER A}", ("\t" x 4) . "\$1 is 'small cyrillic 
a'");
+check_taint_not      $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/./ \$&";
+
+"k" =~ /(\N{KELVIN SIGN})/i;   # taints because depends on locale
+check_taint      $&, "\t/(\\N{KELVIN SIGN})/i \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+ok($1 eq 'k', ("\t" x 4) . "\$1 is 'k'");
+check_taint_not      $2, "\t\$2";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/(.)/ \$&";
+
+"a:" =~ /(.)\b(.)/;    # taint $&, $`, $', $+, $1.
+check_taint      $&, "\t/(.)\\b(.)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint      $2, "\t\$2";
+check_taint_not  $3, "\t\$3";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/./ \$&";
+
+"aa" =~ /(.)\B(.)/;    # taint $&, $`, $', $+, $1.
+check_taint      $&, "\t/(.)\\B(.)/ \$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint      $2, "\t\$2";
+check_taint_not  $3, "\t\$3";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/./ \$&";
+
+"aaa" =~ /(.).(\1)/i;  # notaint because not locale dependent
+check_taint_not      $&, "\t/(.).(\\1)/ \$&";
+check_taint_not      $`, "\t\$`";
+check_taint_not      $', "\t\$'";
+check_taint_not      $+, "\t\$+";
+check_taint_not      $1, "\t\$1";
+check_taint_not      $2, "\t\$2";
+check_taint_not  $3, "\t\$3";
+
+/(.)/; # untaint $&, $`, $', $+, $1.
+check_taint_not  $&, "\t/./ \$&";
 
 $_ = $a;       # untaint $_
 
-check_taint_not  $_;
+check_taint_not  $_, "\t\$_";
 
 /(b)/;         # this must not taint
-check_taint_not  $&;
-check_taint_not  $`;
-check_taint_not  $';
-check_taint_not  $+;
-check_taint_not  $1;
-check_taint_not  $2;
+check_taint_not  $&, "\t/(b)/ \$&";
+check_taint_not  $`, "\t\$`";
+check_taint_not  $', "\t\$'";
+check_taint_not  $+, "\t\$+";
+check_taint_not  $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 $_ = $a;       # untaint $_
 
-check_taint_not  $_;
+check_taint_not  $_, "\t\$_";
 
 $b = uc($a);   # taint $b
 s/(.+)/$b/;    # this must taint only the $_
 
-check_taint      $_;
-check_taint_not  $&;
-check_taint_not  $`;
-check_taint_not  $';
-check_taint_not  $+;
-check_taint_not  $1;
-check_taint_not  $2;
+check_taint      $_, "\t\$_";
+check_taint_not  $&, "\t\$&";
+check_taint_not  $`, "\t\$`";
+check_taint_not  $', "\t\$'";
+check_taint_not  $+, "\t\$+";
+check_taint_not  $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 $_ = $a;       # untaint $_
 
 s/(.+)/b/;     # this must not taint
-check_taint_not  $_;
-check_taint_not  $&;
-check_taint_not  $`;
-check_taint_not  $';
-check_taint_not  $+;
-check_taint_not  $1;
-check_taint_not  $2;
+check_taint_not  $_, "\t\$_";
+check_taint_not  $&, "\t\$&";
+check_taint_not  $`, "\t\$`";
+check_taint_not  $', "\t\$'";
+check_taint_not  $+, "\t\$+";
+check_taint_not  $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 $b = $a;       # untaint $b
 
 ($b = $a) =~ s/\w/$&/;
-check_taint      $b;   # $b should be tainted.
-check_taint_not  $a;   # $a should be not.
+check_taint      $b, "\t\$b";  # $b should be tainted.
+check_taint_not  $a, "\t\$a";  # $a should be not.
 
 $_ = $a;       # untaint $_
 
 s/(\w)/\l$1/;  # this must taint
-check_taint      $_;
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $_, "\t\$_";
+check_taint      $&, "\t\$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 $_ = $a;       # untaint $_
 
 s/(\w)/\L$1/;  # this must taint
-check_taint      $_;
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $_, "\t\$_";
+check_taint      $&, "\t\$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 $_ = $a;       # untaint $_
 
 s/(\w)/\u$1/;  # this must taint
-check_taint      $_;
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $_, "\t\$_";
+check_taint      $&, "\t\$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 $_ = $a;       # untaint $_
 
 s/(\w)/\U$1/;  # this must taint
-check_taint      $_;
-check_taint      $&;
-check_taint      $`;
-check_taint      $';
-check_taint      $+;
-check_taint      $1;
-check_taint_not  $2;
+check_taint      $_, "\t\$_";
+check_taint      $&, "\t\$&";
+check_taint      $`, "\t\$`";
+check_taint      $', "\t\$'";
+check_taint      $+, "\t\$+";
+check_taint      $1, "\t\$1";
+check_taint_not  $2, "\t\$2";
 
 # After all this tainting $a should be cool.
 
-check_taint_not  $a;
+check_taint_not  $a, "\t\$a";
 
 "a" =~ /([a-z])/;
 check_taint_not $1, '"a" =~ /([a-z])/';
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index a5578d4..0c36067 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -94,14 +94,27 @@ L</Selected Bug Fixes> section.
 
 =head2 Tainting happens under more circumstances; now conforms to documentation
 
-When changing the case of a string (C<lc>, C<"\U">, I<etc>.), within the
-scope of C<use locale>, the result is now tainted no matter what the
+This affects regular expression matching and changing the case of a
+string (C<lc>, C<"\U">, I<etc>.) within the scope of C<use locale>.
+The result is now tainted based on the operation, no matter what the
 contents of the string were, as the documentation (L<perlsec>,
-L<perllocale/SECURITY>) indicates it should.   Previously, if the string
-contained no characters whose case change could be affected by the
-locale, the result would not be tainted.  For example, the result of
-C<uc()> on an empty string or one containing only above-Latin1 code
-points is now tainted.  This leads to more consistent tainting results.
+L<perllocale/SECURITY>) indicates it should.  Previously, for the case
+change operation, if the string contained no characters whose case
+change could be affected by the locale, the result would not be tainted.
+For example, the result of C<uc()> on an empty string or one containing
+only above-Latin1 code points is now tainted, and wasn't before.  This
+leads to more consistent tainting results.  Regular expression patterns
+taint their non-binary results (like C<$&>, C<$2>) if and only if the
+pattern contains elements whose matching depends on the current
+(potentially tainted) locale.  Like the case changing functions, the
+actual contents of the string being matched now do not matter, whereas
+formerly it did.  For example, if the pattern contains a C<\w>, the
+results will be tainted even if the match did not have to use that
+portion of the pattern to succeed or fail, because what a C<\w> matches
+depends on locale.  However, for example, a C<.> in a pattern will not
+enable tainting, because the dot matches any single character, and what
+the current locale is doesn't change in any way what matches and what
+doesn't.
 
 =head2 Quote-like escape changes
 
diff --git a/pod/perllocale.pod b/pod/perllocale.pod
index 62a2d8b..c7546f8 100644
--- a/pod/perllocale.pod
+++ b/pod/perllocale.pod
@@ -1011,16 +1011,23 @@ Scalar true/false result never tainted.
 All subpatterns, either delivered as a list-context result or as C<$1>
 I<etc>., are tainted if C<use locale> (but not
 S<C<use locale ':not_characters'>>) is in effect, and the subpattern
-regular expression is matched case-insensitively (C</i>) or contains a
-locale-dependent construct.  These constructs include C<\w>
-(to match an alphanumeric character), C<\W> (non-alphanumeric
-character), C<\s> (whitespace character), C<\S> (non whitespace
-character), and the POSIX character classes, such as C<[:alpha:]> (see
-L<perlrecharclass/POSIX Character Classes>).
+regular expression contains a locale-dependent construct.  These
+constructs include C<\w> (to match an alphanumeric character), C<\W>
+(non-alphanumeric character), C<\b> and C<\B> (word-boundary and
+non-boundardy, which depend on what C<\w> and C<\W> match), C<\s>
+(whitespace character), C<\S> (non whitespace character), C<\d> and
+C<\D> (digits and non-digits), and the POSIX character classes, such as
+C<[:alpha:]> (see L<perlrecharclass/POSIX Character Classes>).
+
+Tainting is also likely if the pattern is to be matched
+case-insensitively (via C</i>).  The exception is if all the code points
+to be matched this way are above 255 and do not have folds under Unicode
+rules to below 256.  Tainting is not done for these because Perl
+only uses Unicode rules for such code points, and those rules are the
+same no matter what the current locale.
+
 The matched-pattern variables, C<$&>, C<$`> (pre-match), C<$'>
 (post-match), and C<$+> (last match) also are tainted.
-(Note that currently there are some bugs where not everything that
-should be tainted gets tainted in all circumstances.)
 
 =item  *
 
diff --git a/pp_hot.c b/pp_hot.c
index 79b77ab..fb22b38 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1951,17 +1951,14 @@ While the pattern is being assembled/concatenated and 
then compiled,
 PL_tainted will get set (via TAINT_set) if any component of the pattern
 is tainted, e.g. /.*$tainted/.  At the end of pattern compilation,
 the RXf_TAINTED flag is set on the pattern if PL_tainted is set (via
-TAINT_get).
+TAINT_get).  Also, if any component of the pattern matches based on
+locale-dependent behavior, the RXf_TAINTED_SEEN flag is set.
 
 When the pattern is copied, e.g. $r = qr/..../, the SV holding the ref to
 the pattern is marked as tainted. This means that subsequent usage, such
 as /x$r/, will set PL_tainted using TAINT_set, and thus RXf_TAINTED,
 on the new pattern too.
 
-At the start of execution of a pattern, the RXf_TAINTED_SEEN flag on the
-regex is cleared; during execution, locale-variant ops such as POSIXL may
-set RXf_TAINTED_SEEN.
-
 RXf_TAINTED_SEEN is used post-execution by the get magic code
 of $1 et al to indicate whether the returned value should be tainted.
 It is the responsibility of the caller of the pattern (i.e. pp_match,
diff --git a/regcomp.c b/regcomp.c
index 803a79b..b3a4845 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6330,10 +6330,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int 
pat_count,
     if (rx_flags & PMf_FOLD) {
         RExC_contains_i = 1;
     }
-    if (initial_charset == REGEX_LOCALE_CHARSET) {
-       RExC_contains_locale = 1;
-    }
-    else if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) {
+    if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) {
 
        /* Set to use unicode semantics if the pattern is in utf8 and has the
         * 'depends' charset specified, as it means unicode when utf8  */
@@ -7072,6 +7069,11 @@ reStudy:
             r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
 
     }
+
+    if (RExC_contains_locale) {
+        RXp_EXTFLAGS(r) |= RXf_TAINTED_SEEN;
+    }
+
 #ifdef DEBUGGING
     if (RExC_paren_names) {
         ri->name_list_idx = add_data( pRExC_state, STR_WITH_LEN("a"));
@@ -9159,7 +9161,6 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t 
*pRExC_state)
                 }
                 cs = REGEX_LOCALE_CHARSET;
                 has_charset_modifier = LOCALE_PAT_MOD;
-                RExC_contains_locale = 1;
                 break;
             case UNICODE_PAT_MOD:
                 if (has_charset_modifier) {
@@ -11018,6 +11019,10 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t 
*pRExC_state,
     {
         *flagp |= SIMPLE;
     }
+
+    if (OP(node) == EXACTFL) {
+        RExC_contains_locale = 1;
+    }
 }
 
 
@@ -11289,6 +11294,9 @@ tryagain:
             if (op > BOUNDA) {  /* /aa is same as /a */
                 op = BOUNDA;
             }
+            else if (op == BOUNDL) {
+                RExC_contains_locale = 1;
+            }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
@@ -11304,6 +11312,9 @@ tryagain:
             if (op > NBOUNDA) { /* /aa is same as /a */
                 op = NBOUNDA;
             }
+            else if (op == NBOUNDL) {
+                RExC_contains_locale = 1;
+            }
            ret = reg_node(pRExC_state, op);
            FLAGS(ret) = get_regex_charset(RExC_flags);
            *flagp |= SIMPLE;
@@ -11353,6 +11364,9 @@ tryagain:
             if (op > POSIXA) {  /* /aa is same as /a */
                 op = POSIXA;
             }
+            else if (op == POSIXL) {
+                RExC_contains_locale = 1;
+            }
 
         join_posix_op_known:
 
@@ -14194,6 +14208,9 @@ parseit:
             else {
                 RExC_emit = (regnode *)orig_emit;
                 if (PL_regkind[op] == POSIXD) {
+                    if (op == POSIXL) {
+                        RExC_contains_locale = 1;
+                    }
                     if (invert) {
                         op += NPOSIXD - POSIXD;
                     }
@@ -14789,6 +14806,11 @@ parseit:
                   swash, has_user_defined_property);
 
     *flagp |= HASWIDTH|SIMPLE;
+
+    if (ANYOF_FLAGS(ret) & ANYOF_LOCALE_FLAGS) {
+        RExC_contains_locale = 1;
+    }
+
     return ret;
 }
 
diff --git a/regexec.c b/regexec.c
index fd00cc6..489a10b 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1778,13 +1778,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
         break;
     }
     case BOUNDL:
-        RXp_MATCH_TAINTED_on(prog);
         FBC_BOUND(isWORDCHAR_LC,
                   isWORDCHAR_LC_uvchr(tmp),
                   isWORDCHAR_LC_utf8((U8*)s));
         break;
     case NBOUNDL:
-        RXp_MATCH_TAINTED_on(prog);
         FBC_NBOUND(isWORDCHAR_LC,
                    isWORDCHAR_LC_uvchr(tmp),
                    isWORDCHAR_LC_utf8((U8*)s));
@@ -1833,7 +1831,6 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
         /* FALLTHROUGH */
 
     case POSIXL:
-        RXp_MATCH_TAINTED_on(prog);
         REXEC_FBC_CSCAN(to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) 
s)),
                         to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
         break;
@@ -2519,8 +2516,6 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char 
*stringarg, char *strend,
        Perl_croak(aTHX_ "corrupted regexp program");
     }
 
-    RX_MATCH_TAINTED_off(rx);
-
     reginfo->prog = rx;         /* Yes, sorry that this is confusing.  */
     reginfo->intuit = 0;
     reginfo->is_utf8_target = cBOOL(utf8_target);
@@ -4422,7 +4417,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
            const char * s;
            U32 fold_utf8_flags;
 
-            RX_MATCH_TAINTED_on(reginfo->prog);
             folder = foldEQ_locale;
             fold_array = PL_fold_locale;
            fold_utf8_flags = FOLDEQ_LOCALE;
@@ -4495,8 +4489,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
         * have to set the FLAGS fields of these */
        case BOUNDL:  /*  /\b/l  */
        case NBOUNDL: /*  /\B/l  */
-            RX_MATCH_TAINTED_on(reginfo->prog);
-           /* FALL THROUGH */
        case BOUND:   /*  /\b/   */
        case BOUNDU:  /*  /\b/u  */
        case BOUNDA:  /*  /\b/a  */
@@ -4603,10 +4595,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
             if (NEXTCHR_IS_EOS)
                 sayNO;
 
-            /* The locale hasn't influenced the outcome before this, so defer
-             * tainting until now */
-            RX_MATCH_TAINTED_on(reginfo->prog);
-
             /* Use isFOO_lc() for characters within Latin1.  (Note that
              * UTF8_IS_INVARIANT works even on non-UTF-8 strings, or else
              * wouldn't be invariant) */
@@ -4980,7 +4968,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
            const U8 *fold_array;
            UV utf8_fold_flags;
 
-            RX_MATCH_TAINTED_on(reginfo->prog);
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            type = REFFL;
@@ -5025,7 +5012,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
            goto do_nref_ref_common;
 
        case REFFL:  /*  /\1/il  */
-            RX_MATCH_TAINTED_on(reginfo->prog);
            folder = foldEQ_locale;
            fold_array = PL_fold_locale;
            utf8_fold_flags = FOLDEQ_LOCALE;
@@ -7131,7 +7117,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const 
regnode *p,
        goto do_exactf;
 
     case EXACTFL:
-        RXp_MATCH_TAINTED_on(prog);
        utf8_flags = FOLDEQ_LOCALE;
        goto do_exactf;
 
@@ -7225,7 +7210,6 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const 
regnode *p,
         /* FALLTHROUGH */
 
     case POSIXL:
-        RXp_MATCH_TAINTED_on(prog);
        if (! utf8_target) {
            while (scan < loceol && to_complement ^ cBOOL(isFOO_lc(FLAGS(p),
                                                                    *scan)))
@@ -7673,7 +7657,6 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
        }
        else if (flags & ANYOF_LOCALE_FLAGS) {
            if (flags & ANYOF_LOC_FOLD) {
-                RXp_MATCH_TAINTED_on(prog);
                 if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) {
                     match = TRUE;
                 }
@@ -7713,7 +7696,6 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
                 int count = 0;
                 int to_complement = 0;
 
-                RXp_MATCH_TAINTED_on(prog);
                 while (count < ANYOF_MAX) {
                     if (ANYOF_POSIXL_TEST(n, count)
                         && to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.19.8-533-g63baef5

Reply via email to