[perl.git] branch blead, updated. v5.25.6-251-g894226f

Matthew Horsfall Mon, 14 Nov 2016 05:24:00 -0800

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/894226fa42af3bb6f50857abf7febd95b6fcd2bc?hp=ee16020279bc895096981c490d3477b7a8deebae>


- Log -----------------------------------------------------------------
commit 894226fa42af3bb6f50857abf7febd95b6fcd2bc
Author: Matthew Horsfall <wolfs...@gmail.com>
Date:   Mon Nov 14 08:03:33 2016 -0500

    Add perldelta for Indented Here-docs

M       pod/perldelta.pod

commit 47eb44116b9aa0f7f4c7c732af48fc3dad2a7aa2
Author: Matthew Horsfall <wolfs...@gmail.com>
Date:   Fri Nov 11 06:35:32 2016 -0500

    Document Indented Here-docs

M       pod/perlop.pod

commit cdd6375d6dbc0eea3676faa4e615af1ae28bb103
Author: Matthew Horsfall <wolfs...@gmail.com>
Date:   Fri Nov 11 04:58:18 2016 -0500

    Add indented here-docs.
    
    This adds a new modifier '~' to here-docs that tells the parser
    that it should look for /^\s*$DELIM\n/ as the closing delimiter.
    
    These syntaxes are all supported:
    
      <<~EOF;
      <<~\EOF;
      <<~'EOF';
      <<~"EOF";
      <<~`EOF`;
      <<~ 'EOF';
      <<~ "EOF";
      <<~ `EOF`;
    
    The '~' modifier will strip, from each line in the here-doc, the
    same whitespace that appears before the delimiter.
    
    Newlines will be copied as is, and lines that don't include the
    proper beginning whitespace will cause perl to croak.
    
    Some examples:
    
      if (1) {
        print <<~EOF;
          Hello there
          EOF
      }
    
    prints "Hello there\n";
    
    The following:
    
      if (1) {
        print <<~EOF;
         Hello There
          EOF
      }
    
    croaks with:
    
      Indentation on line 1 of here-doc doesn't match delimiter at - line 2.

M       pod/perldiag.pod
M       t/lib/warnings/toke
M       t/op/exec.t
M       t/op/heredoc.t
M       toke.c
-----------------------------------------------------------------------

Summary of changes:
 pod/perldelta.pod   |  32 +++++++++++
 pod/perldiag.pod    |  20 +++++++
 pod/perlop.pod      |  52 ++++++++++++++++++
 t/lib/warnings/toke |   9 +++
 t/op/exec.t         |   6 +-
 t/op/heredoc.t      | 104 ++++++++++++++++++++++++++++++++++-
 toke.c              | 154 ++++++++++++++++++++++++++++++++++++++++++++++------
 7 files changed, 357 insertions(+), 20 deletions(-)

diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index d686561..4c15c0a 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -27,6 +27,38 @@ here, but most should go in the L</Performance Enhancements> 
section.
 
 [ List each enhancement as a =head2 entry ]
 
+=head2 Indented Here-documents
+
+This adds a new modifier '~' to here-docs that tells the parser
+that it should look for /^\s*$DELIM\n/ as the closing delimiter.
+
+These syntaxes are all supported:
+
+    <<~EOF;
+    <<~\EOF;
+    <<~'EOF';
+    <<~"EOF";
+    <<~`EOF`;
+    <<~ 'EOF';
+    <<~ "EOF";
+    <<~ `EOF`;
+
+The '~' modifier will strip, from each line in the here-doc, the
+same whitespace that appears before the delimiter.
+
+Newlines will be copied as is, and lines that don't include the
+proper beginning whitespace will cause perl to croak.
+
+For example:
+
+    if (1) {
+      print <<~EOF;
+        Hello there
+        EOF
+    }
+
+prints "Hello there\n" with no leading whitespace.
+
 =head1 Security
 
 XXX Any security-related notices go here.  In particular, any security
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 5740059..89ad147 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -2733,6 +2733,26 @@ parent '%s'
 C3-consistent, and you have enabled the C3 MRO for this class.  See the C3
 documentation in L<mro> for more information.
 
+=item Indentation on line %d of here-doc doesn't match delimiter
+
+(F) You have an indented here-document where one or more of its lines
+have whitespace at the beginning that does not match the closing
+delimiter.
+
+For example, line 2 below is wrong because it does not have at least
+2 spaces, but lines 1 and 3 are fine because they have at least 2:
+
+    if ($something) {
+      print <<~EOF;
+        Line 1
+       Line 2 not
+          Line 3
+        EOF
+    }
+
+Note that tabs and spaces are compared strictly, meaning 1 tab will
+not match 8 spaces.
+
 =item Infinite recursion in regex
 
 (F) You used a pattern that references itself without consuming any input
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 75f266c..8fefc4e 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -2511,6 +2511,9 @@ syntax.  Following a C<< << >> you specify a string to 
terminate
 the quoted material, and all lines following the current line down to
 the terminating string are the value of the item.
 
+Prefixing the terminating string with a C<~> specifies that you
+want to use L</Indented Here-docs> (see below).
+
 The terminating string may be either an identifier (a word), or some
 quoted text.  An unquoted identifier works like double quotes.
 There may not be a space between the C<< << >> and the identifier,
@@ -2574,6 +2577,55 @@ the results of the execution returned.
 
 =back
 
+=over 4
+
+=item Indented Here-docs
+
+The here-doc modifier C<~> allows you to indent your here-docs to make
+the code more readable:
+
+    if ($some_var) {
+      print <<~EOF;
+        This is a here-doc
+        EOF
+    }
+
+This will print...
+
+    This is a here-doc
+
+...with no leading whitespace.
+
+The delimiter is used to determine the B<exact> whitespace to
+remove from the beginning of each line.  All lines B<must> have
+at least the same starting whitespace (except lines only
+containing a newline) or perl will croak.  Tabs and spaces can
+be mixed, but are matched exactly.  One tab will not be equal to
+8 spaces!
+
+Additional beginning whitespace (beyond what preceded the
+delimiter) will be preserved:
+
+    print <<~EOF;
+      This text is not indented
+        This text is indented with two spaces
+               This text is indented with two tabs
+      EOF
+
+Finally, the modifier may be used with all of the forms
+mentioned above:
+
+    <<~\EOF;
+    <<~'EOF'
+    <<~"EOF"
+    <<~`EOF`
+
+And whitespace may be used between the C<~> and quoted delimiters:
+
+    <<~ 'EOF'; # ... "EOF", `EOF`
+
+=back
+
 It is possible to stack multiple here-docs in a row:
 
        print <<"foo", <<"bar"; # you can stack them
diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke
index 3e829c7..1eb9f2e 100644
--- a/t/lib/warnings/toke
+++ b/t/lib/warnings/toke
@@ -150,6 +150,15 @@ EXPECT
 Use of bare << to mean <<"" is deprecated at - line 2.
 ########
 # toke.c
+$a = <<~;
+
+$a = <<~ ;
+
+EXPECT
+Use of bare << to mean <<"" is deprecated at - line 2.
+Use of bare << to mean <<"" is deprecated at - line 4.
+########
+# toke.c
 $a =~ m/$foo/eq;
 $a =~ s/$foo/fool/seq;
 
diff --git a/t/op/exec.t b/t/op/exec.t
index 886c323..1155439 100644
--- a/t/op/exec.t
+++ b/t/op/exec.t
@@ -36,7 +36,7 @@ $ENV{LANGUAGE} = 'C';         # Ditto in GNU.
 my $Is_VMS   = $^O eq 'VMS';
 my $Is_Win32 = $^O eq 'MSWin32';
 
-plan(tests => 24);
+plan(tests => 25);
 
 my $Perl = which_perl();
 
@@ -123,6 +123,10 @@ is( <<`END`,                    "ok\n",     '<<`HEREDOC`' 
);
 $Perl -le "print 'ok'"
 END
 
+is( <<~`END`,                   "ok\n",     '<<~`HEREDOC`' );
+  $Perl -le "print 'ok'"
+  END
+
 {
     local $_ = qq($Perl -le "print 'ok'");
     is( readpipe, "ok\n", 'readpipe default argument' );
diff --git a/t/op/heredoc.t b/t/op/heredoc.t
index 13d1074..b7e4c7d 100644
--- a/t/op/heredoc.t
+++ b/t/op/heredoc.t
@@ -7,8 +7,7 @@ BEGIN {
 }
 
 use strict;
-plan(tests => 43);
-
+plan(tests => 136);
 
 # heredoc without newline (#65838)
 {
@@ -124,5 +123,106 @@ HEREDOC
         {},
         "delimcpy(): handle last char being backslash properly"
     );
+}
+
+
+# indented here-docs
+{
+    my $string = 'some data';
+
+    my %delimiters = (
+        q{EOF}     => "EOF",
+        q{'EOF'}   => "EOF",
+        q{"EOF"}   => "EOF",
+        q{\EOF}    => "EOF",
+        q{' EOF'}  => " EOF",
+        q{'EOF '}  => "EOF ",
+        q{' EOF '} => " EOF ",
+        q{" EOF"}  => " EOF",
+        q{"EOF "}  => "EOF ",
+        q{" EOF "} => " EOF ",
+        q{''}      => "",
+        q{""}      => "",
+    );
 
+    my @modifiers = ("~", "~ ");
+
+    my @script_ends = ("", "\n");
+
+    my @tests;
+
+    for my $start_delim (sort keys %delimiters) {
+        my $end_delim = $delimiters{$start_delim};
+
+        for my $modifier (@modifiers) {
+            # For now, "<<~ EOF" and "<<~ \EOF" aren't allowed
+            next if $modifier =~ /\s/ && $start_delim !~ /('|")/n;
+
+            for my $script_end (@script_ends) {
+                # Normal heredoc
+                my $test =   "print <<$modifier$start_delim\n  $string\n"
+                           . "  $end_delim$script_end";
+                unshift @tests, [
+                    $test,
+                    $string,
+                    "Indented here-doc:  $test",
+                ];
+
+                # Eval'd heredoc
+                my $safe_start_delim = $start_delim =~ s/'/\\'/gr;
+                my $eval = "
+                    \$_ = '';
+                    eval 's//<<$modifier$safe_start_delim.\"\"/e; print
+                        $string
+                        $end_delim$script_end'
+                    or die \$\@
+                ";
+                push @tests, [
+                    $eval,
+                    $string,
+                    "Eval'd Indented here-doc: $eval",
+                ];
+            }
+        }
+    }
+
+    push @tests, [
+        "print <<~EOF;\n\t \t$string\n\t \tEOF\n",
+        $string,
+        "indented here-doc with tabs and spaces",
+    ];
+
+    push @tests, [
+        "print <<~EOF;\n\t \tx EOF\n\t \t$string\n\t \tEOF\n",
+         "x EOF\n$string",
+        "Embedded delimiter ignored",
+    ];
+
+    push @tests, [
+        "print <<~EOF;\n\t \t$string\n\t \tTEOF",
+        "Can't find string terminator \"EOF\" anywhere before EOF at - line 
1.",
+        "indented here-doc missing terminator error is correct"
+    ];
+
+    push @tests, [
+        "print <<~EOF;\n $string\n$string\n   $string\n $string\n   EOF",
+        "Indentation on line 1 of here-doc doesn't match delimiter at - line 
1.\n",
+        "indented here-doc with bad indentation"
+    ];
+
+    # If our delim is " EOF ", make sure other spaced version don't match
+    push @tests, [
+        "print <<~' EOF ';\n $string\n EOF\nEOF \n  EOF  \n EOF \n",
+        " $string\n EOF\nEOF \n  EOF  \n",
+        "intented here-doc matches final delimiter correctly"
+    ];
+
+    for my $test (@tests) {
+        fresh_perl_is(
+            $test->[0],
+            $test->[1],
+            { switches => ['-w'], stderr => 1 },
+            $test->[2],
+        );
+    }
 }
diff --git a/toke.c b/toke.c
index 1131063..524a999 100644
--- a/toke.c
+++ b/toke.c
@@ -9560,6 +9560,9 @@ S_scan_heredoc(pTHX_ char *s)
     char *d;
     char *e;
     char *peek;
+    char *indent = 0;
+    I32 indent_len = 0;
+    bool indented = FALSE;
     const bool infile = PL_rsfp || PL_parser->filtered;
     const line_t origline = CopLINE(PL_curcop);
     LEXSHARED *shared = PL_parser->lex_shared;
@@ -9571,6 +9574,10 @@ S_scan_heredoc(pTHX_ char *s)
     e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
     *PL_tokenbuf = '\n';
     peek = s;
+    if (*peek == '~') {
+       indented = TRUE;
+       peek++; s++;
+    }
     while (SPACE_OR_TAB(*peek))
        peek++;
     if (*peek == '`' || *peek == '\'' || *peek =='"') {
@@ -9693,12 +9700,45 @@ S_scan_heredoc(pTHX_ char *s)
        linestr = shared->ls_linestr;
        bufend = SvEND(linestr);
        d = s;
-       while (s < bufend - len + 1
-               && memNE(s,PL_tokenbuf,len) )
-        {
-           if (*s++ == '\n')
-               ++PL_parser->herelines;
+       if (indented) {
+           char *myolds = s;
+
+           while (s < bufend - len + 1) {
+               if (*s++ == '\n')
+                   ++PL_parser->herelines;
+
+               if (memEQ(s, PL_tokenbuf + 1, len - 1)) {
+                   char *backup = s;
+                   indent_len = 0;
+
+                   /* Only valid if it's preceded by whitespace only */
+                   while (backup != myolds && --backup >= myolds) {
+                       if (*backup != ' ' && *backup != '\t') {
+                           break;
+                       }
+
+                       indent_len++;
+                   }
+
+                   /* No whitespace or all! */
+                   if (backup == s || *backup == '\n') {
+                       Newxz(indent, indent_len + 1, char);
+                       memcpy(indent, backup + 1, indent_len);
+                       s--; /* before our delimiter */
+                       PL_parser->herelines--; /* this line doesn't count */
+                       break;
+                   }
+               }
+           }
+       } else {
+           while (s < bufend - len + 1
+                  && memNE(s,PL_tokenbuf,len) )
+           {
+               if (*s++ == '\n')
+                   ++PL_parser->herelines;
+           }
        }
+
        if (s >= bufend - len + 1) {
            goto interminable;
        }
@@ -9800,23 +9840,103 @@ S_scan_heredoc(pTHX_ char *s)
        else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
            PL_bufend[-1] = '\n';
 #endif
-       if (*s == term && PL_bufend-s >= len
-        && memEQ(s,PL_tokenbuf + 1,len)) {
-           SvREFCNT_dec(PL_linestr);
-           PL_linestr = linestr_save;
-           PL_linestart = SvPVX(linestr_save);
-           PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
-            PL_oldbufptr = oldbufptr_save;
-            PL_oldoldbufptr = oldoldbufptr_save;
-           s = d;
-           break;
-       }
-       else {
+       if (indented && (PL_bufend-s) >= len) {
+           char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf 
+1 + len));
+
+           if (found) {
+               char *backup = found;
+               indent_len = 0;
+
+               /* Only valid if it's preceded by whitespace only */
+               while (backup != s && --backup >= s) {
+                   if (*backup != ' ' && *backup != '\t') {
+                       break;
+                   }
+                   indent_len++;
+               }
+
+               /* All whitespace or none! */
+               if (backup == found || *backup == ' ' || *backup == '\t') {
+                   Newxz(indent, indent_len + 1, char);
+                   memcpy(indent, backup, indent_len);
+                   SvREFCNT_dec(PL_linestr);
+                   PL_linestr = linestr_save;
+                   PL_linestart = SvPVX(linestr_save);
+                   PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+                   PL_oldbufptr = oldbufptr_save;
+                   PL_oldoldbufptr = oldoldbufptr_save;
+                   s = d;
+                   break;
+               }
+           }
+
+           /* Didn't find it */
            sv_catsv(tmpstr,PL_linestr);
+       } else {
+           if (*s == term && PL_bufend-s >= len
+               && memEQ(s,PL_tokenbuf + 1,len))
+           {
+               SvREFCNT_dec(PL_linestr);
+               PL_linestr = linestr_save;
+               PL_linestart = SvPVX(linestr_save);
+               PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+               PL_oldbufptr = oldbufptr_save;
+               PL_oldoldbufptr = oldoldbufptr_save;
+               s = d;
+               break;
+           } else {
+               sv_catsv(tmpstr,PL_linestr);
+           }
        }
       }
     }
     PL_multi_end = origline + PL_parser->herelines;
+    if (indented && indent) {
+       STRLEN linecount = 1;
+       STRLEN herelen = SvCUR(tmpstr);
+       char *ss = SvPVX(tmpstr);
+       char *se = ss + herelen;
+       SV *newstr = newSVpvs("");
+       SvGROW(newstr, herelen);
+
+       /* Trim leading whitespace */
+       while (ss < se) {
+           /* newline only? Copy and move on */
+           if (*ss == '\n') {
+               sv_catpv(newstr,"\n");
+               ss++;
+
+           /* Found our indentation? Strip it */
+           } else if (se - ss >= indent_len
+                      && memEQ(ss, indent, indent_len))
+           {
+               STRLEN le = 0;
+
+               ss += indent_len;
+
+               while ((ss + le) < se && *(ss + le) != '\n')
+                   le++;
+
+               sv_catpvn(newstr, ss, le);
+
+               ss += le;
+
+           /* Line doesn't begin with our indentation? Croak */
+           } else {
+               Perl_croak(aTHX_
+                   "Indentation on line %d of here-doc doesn't match 
delimiter",
+                   (int)linecount
+               );
+           }
+
+           linecount++;
+       }
+
+       sv_setsv(tmpstr,newstr);
+
+       Safefree(indent);
+       SvREFCNT_dec_NN(newstr);
+    }
     if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
        SvPV_shrink_to_cur(tmpstr);
     }

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.25.6-251-g894226f

Reply via email to