In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/894226fa42af3bb6f50857abf7febd95b6fcd2bc?hp=ee16020279bc895096981c490d3477b7a8deebae>
- Log ----------------------------------------------------------------- commit 894226fa42af3bb6f50857abf7febd95b6fcd2bc Author: Matthew Horsfall <wolfs...@gmail.com> Date: Mon Nov 14 08:03:33 2016 -0500 Add perldelta for Indented Here-docs M pod/perldelta.pod commit 47eb44116b9aa0f7f4c7c732af48fc3dad2a7aa2 Author: Matthew Horsfall <wolfs...@gmail.com> Date: Fri Nov 11 06:35:32 2016 -0500 Document Indented Here-docs M pod/perlop.pod commit cdd6375d6dbc0eea3676faa4e615af1ae28bb103 Author: Matthew Horsfall <wolfs...@gmail.com> Date: Fri Nov 11 04:58:18 2016 -0500 Add indented here-docs. This adds a new modifier '~' to here-docs that tells the parser that it should look for /^\s*$DELIM\n/ as the closing delimiter. These syntaxes are all supported: <<~EOF; <<~\EOF; <<~'EOF'; <<~"EOF"; <<~`EOF`; <<~ 'EOF'; <<~ "EOF"; <<~ `EOF`; The '~' modifier will strip, from each line in the here-doc, the same whitespace that appears before the delimiter. Newlines will be copied as is, and lines that don't include the proper beginning whitespace will cause perl to croak. Some examples: if (1) { print <<~EOF; Hello there EOF } prints "Hello there\n"; The following: if (1) { print <<~EOF; Hello There EOF } croaks with: Indentation on line 1 of here-doc doesn't match delimiter at - line 2. M pod/perldiag.pod M t/lib/warnings/toke M t/op/exec.t M t/op/heredoc.t M toke.c ----------------------------------------------------------------------- Summary of changes: pod/perldelta.pod | 32 +++++++++++ pod/perldiag.pod | 20 +++++++ pod/perlop.pod | 52 ++++++++++++++++++ t/lib/warnings/toke | 9 +++ t/op/exec.t | 6 +- t/op/heredoc.t | 104 ++++++++++++++++++++++++++++++++++- toke.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++------ 7 files changed, 357 insertions(+), 20 deletions(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index d686561..4c15c0a 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -27,6 +27,38 @@ here, but most should go in the L</Performance Enhancements> section. [ List each enhancement as a =head2 entry ] +=head2 Indented Here-documents + +This adds a new modifier '~' to here-docs that tells the parser +that it should look for /^\s*$DELIM\n/ as the closing delimiter. + +These syntaxes are all supported: + + <<~EOF; + <<~\EOF; + <<~'EOF'; + <<~"EOF"; + <<~`EOF`; + <<~ 'EOF'; + <<~ "EOF"; + <<~ `EOF`; + +The '~' modifier will strip, from each line in the here-doc, the +same whitespace that appears before the delimiter. + +Newlines will be copied as is, and lines that don't include the +proper beginning whitespace will cause perl to croak. + +For example: + + if (1) { + print <<~EOF; + Hello there + EOF + } + +prints "Hello there\n" with no leading whitespace. + =head1 Security XXX Any security-related notices go here. In particular, any security diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 5740059..89ad147 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -2733,6 +2733,26 @@ parent '%s' C3-consistent, and you have enabled the C3 MRO for this class. See the C3 documentation in L<mro> for more information. +=item Indentation on line %d of here-doc doesn't match delimiter + +(F) You have an indented here-document where one or more of its lines +have whitespace at the beginning that does not match the closing +delimiter. + +For example, line 2 below is wrong because it does not have at least +2 spaces, but lines 1 and 3 are fine because they have at least 2: + + if ($something) { + print <<~EOF; + Line 1 + Line 2 not + Line 3 + EOF + } + +Note that tabs and spaces are compared strictly, meaning 1 tab will +not match 8 spaces. + =item Infinite recursion in regex (F) You used a pattern that references itself without consuming any input diff --git a/pod/perlop.pod b/pod/perlop.pod index 75f266c..8fefc4e 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -2511,6 +2511,9 @@ syntax. Following a C<< << >> you specify a string to terminate the quoted material, and all lines following the current line down to the terminating string are the value of the item. +Prefixing the terminating string with a C<~> specifies that you +want to use L</Indented Here-docs> (see below). + The terminating string may be either an identifier (a word), or some quoted text. An unquoted identifier works like double quotes. There may not be a space between the C<< << >> and the identifier, @@ -2574,6 +2577,55 @@ the results of the execution returned. =back +=over 4 + +=item Indented Here-docs + +The here-doc modifier C<~> allows you to indent your here-docs to make +the code more readable: + + if ($some_var) { + print <<~EOF; + This is a here-doc + EOF + } + +This will print... + + This is a here-doc + +...with no leading whitespace. + +The delimiter is used to determine the B<exact> whitespace to +remove from the beginning of each line. All lines B<must> have +at least the same starting whitespace (except lines only +containing a newline) or perl will croak. Tabs and spaces can +be mixed, but are matched exactly. One tab will not be equal to +8 spaces! + +Additional beginning whitespace (beyond what preceded the +delimiter) will be preserved: + + print <<~EOF; + This text is not indented + This text is indented with two spaces + This text is indented with two tabs + EOF + +Finally, the modifier may be used with all of the forms +mentioned above: + + <<~\EOF; + <<~'EOF' + <<~"EOF" + <<~`EOF` + +And whitespace may be used between the C<~> and quoted delimiters: + + <<~ 'EOF'; # ... "EOF", `EOF` + +=back + It is possible to stack multiple here-docs in a row: print <<"foo", <<"bar"; # you can stack them diff --git a/t/lib/warnings/toke b/t/lib/warnings/toke index 3e829c7..1eb9f2e 100644 --- a/t/lib/warnings/toke +++ b/t/lib/warnings/toke @@ -150,6 +150,15 @@ EXPECT Use of bare << to mean <<"" is deprecated at - line 2. ######## # toke.c +$a = <<~; + +$a = <<~ ; + +EXPECT +Use of bare << to mean <<"" is deprecated at - line 2. +Use of bare << to mean <<"" is deprecated at - line 4. +######## +# toke.c $a =~ m/$foo/eq; $a =~ s/$foo/fool/seq; diff --git a/t/op/exec.t b/t/op/exec.t index 886c323..1155439 100644 --- a/t/op/exec.t +++ b/t/op/exec.t @@ -36,7 +36,7 @@ $ENV{LANGUAGE} = 'C'; # Ditto in GNU. my $Is_VMS = $^O eq 'VMS'; my $Is_Win32 = $^O eq 'MSWin32'; -plan(tests => 24); +plan(tests => 25); my $Perl = which_perl(); @@ -123,6 +123,10 @@ is( <<`END`, "ok\n", '<<`HEREDOC`' ); $Perl -le "print 'ok'" END +is( <<~`END`, "ok\n", '<<~`HEREDOC`' ); + $Perl -le "print 'ok'" + END + { local $_ = qq($Perl -le "print 'ok'"); is( readpipe, "ok\n", 'readpipe default argument' ); diff --git a/t/op/heredoc.t b/t/op/heredoc.t index 13d1074..b7e4c7d 100644 --- a/t/op/heredoc.t +++ b/t/op/heredoc.t @@ -7,8 +7,7 @@ BEGIN { } use strict; -plan(tests => 43); - +plan(tests => 136); # heredoc without newline (#65838) { @@ -124,5 +123,106 @@ HEREDOC {}, "delimcpy(): handle last char being backslash properly" ); +} + + +# indented here-docs +{ + my $string = 'some data'; + + my %delimiters = ( + q{EOF} => "EOF", + q{'EOF'} => "EOF", + q{"EOF"} => "EOF", + q{\EOF} => "EOF", + q{' EOF'} => " EOF", + q{'EOF '} => "EOF ", + q{' EOF '} => " EOF ", + q{" EOF"} => " EOF", + q{"EOF "} => "EOF ", + q{" EOF "} => " EOF ", + q{''} => "", + q{""} => "", + ); + my @modifiers = ("~", "~ "); + + my @script_ends = ("", "\n"); + + my @tests; + + for my $start_delim (sort keys %delimiters) { + my $end_delim = $delimiters{$start_delim}; + + for my $modifier (@modifiers) { + # For now, "<<~ EOF" and "<<~ \EOF" aren't allowed + next if $modifier =~ /\s/ && $start_delim !~ /('|")/n; + + for my $script_end (@script_ends) { + # Normal heredoc + my $test = "print <<$modifier$start_delim\n $string\n" + . " $end_delim$script_end"; + unshift @tests, [ + $test, + $string, + "Indented here-doc: $test", + ]; + + # Eval'd heredoc + my $safe_start_delim = $start_delim =~ s/'/\\'/gr; + my $eval = " + \$_ = ''; + eval 's//<<$modifier$safe_start_delim.\"\"/e; print + $string + $end_delim$script_end' + or die \$\@ + "; + push @tests, [ + $eval, + $string, + "Eval'd Indented here-doc: $eval", + ]; + } + } + } + + push @tests, [ + "print <<~EOF;\n\t \t$string\n\t \tEOF\n", + $string, + "indented here-doc with tabs and spaces", + ]; + + push @tests, [ + "print <<~EOF;\n\t \tx EOF\n\t \t$string\n\t \tEOF\n", + "x EOF\n$string", + "Embedded delimiter ignored", + ]; + + push @tests, [ + "print <<~EOF;\n\t \t$string\n\t \tTEOF", + "Can't find string terminator \"EOF\" anywhere before EOF at - line 1.", + "indented here-doc missing terminator error is correct" + ]; + + push @tests, [ + "print <<~EOF;\n $string\n$string\n $string\n $string\n EOF", + "Indentation on line 1 of here-doc doesn't match delimiter at - line 1.\n", + "indented here-doc with bad indentation" + ]; + + # If our delim is " EOF ", make sure other spaced version don't match + push @tests, [ + "print <<~' EOF ';\n $string\n EOF\nEOF \n EOF \n EOF \n", + " $string\n EOF\nEOF \n EOF \n", + "intented here-doc matches final delimiter correctly" + ]; + + for my $test (@tests) { + fresh_perl_is( + $test->[0], + $test->[1], + { switches => ['-w'], stderr => 1 }, + $test->[2], + ); + } } diff --git a/toke.c b/toke.c index 1131063..524a999 100644 --- a/toke.c +++ b/toke.c @@ -9560,6 +9560,9 @@ S_scan_heredoc(pTHX_ char *s) char *d; char *e; char *peek; + char *indent = 0; + I32 indent_len = 0; + bool indented = FALSE; const bool infile = PL_rsfp || PL_parser->filtered; const line_t origline = CopLINE(PL_curcop); LEXSHARED *shared = PL_parser->lex_shared; @@ -9571,6 +9574,10 @@ S_scan_heredoc(pTHX_ char *s) e = PL_tokenbuf + sizeof PL_tokenbuf - 1; *PL_tokenbuf = '\n'; peek = s; + if (*peek == '~') { + indented = TRUE; + peek++; s++; + } while (SPACE_OR_TAB(*peek)) peek++; if (*peek == '`' || *peek == '\'' || *peek =='"') { @@ -9693,12 +9700,45 @@ S_scan_heredoc(pTHX_ char *s) linestr = shared->ls_linestr; bufend = SvEND(linestr); d = s; - while (s < bufend - len + 1 - && memNE(s,PL_tokenbuf,len) ) - { - if (*s++ == '\n') - ++PL_parser->herelines; + if (indented) { + char *myolds = s; + + while (s < bufend - len + 1) { + if (*s++ == '\n') + ++PL_parser->herelines; + + if (memEQ(s, PL_tokenbuf + 1, len - 1)) { + char *backup = s; + indent_len = 0; + + /* Only valid if it's preceded by whitespace only */ + while (backup != myolds && --backup >= myolds) { + if (*backup != ' ' && *backup != '\t') { + break; + } + + indent_len++; + } + + /* No whitespace or all! */ + if (backup == s || *backup == '\n') { + Newxz(indent, indent_len + 1, char); + memcpy(indent, backup + 1, indent_len); + s--; /* before our delimiter */ + PL_parser->herelines--; /* this line doesn't count */ + break; + } + } + } + } else { + while (s < bufend - len + 1 + && memNE(s,PL_tokenbuf,len) ) + { + if (*s++ == '\n') + ++PL_parser->herelines; + } } + if (s >= bufend - len + 1) { goto interminable; } @@ -9800,23 +9840,103 @@ S_scan_heredoc(pTHX_ char *s) else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r') PL_bufend[-1] = '\n'; #endif - if (*s == term && PL_bufend-s >= len - && memEQ(s,PL_tokenbuf + 1,len)) { - SvREFCNT_dec(PL_linestr); - PL_linestr = linestr_save; - PL_linestart = SvPVX(linestr_save); - PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); - PL_oldbufptr = oldbufptr_save; - PL_oldoldbufptr = oldoldbufptr_save; - s = d; - break; - } - else { + if (indented && (PL_bufend-s) >= len) { + char * found = ninstr(s, PL_bufend, (PL_tokenbuf + 1), (PL_tokenbuf +1 + len)); + + if (found) { + char *backup = found; + indent_len = 0; + + /* Only valid if it's preceded by whitespace only */ + while (backup != s && --backup >= s) { + if (*backup != ' ' && *backup != '\t') { + break; + } + indent_len++; + } + + /* All whitespace or none! */ + if (backup == found || *backup == ' ' || *backup == '\t') { + Newxz(indent, indent_len + 1, char); + memcpy(indent, backup, indent_len); + SvREFCNT_dec(PL_linestr); + PL_linestr = linestr_save; + PL_linestart = SvPVX(linestr_save); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_oldbufptr = oldbufptr_save; + PL_oldoldbufptr = oldoldbufptr_save; + s = d; + break; + } + } + + /* Didn't find it */ sv_catsv(tmpstr,PL_linestr); + } else { + if (*s == term && PL_bufend-s >= len + && memEQ(s,PL_tokenbuf + 1,len)) + { + SvREFCNT_dec(PL_linestr); + PL_linestr = linestr_save; + PL_linestart = SvPVX(linestr_save); + PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr); + PL_oldbufptr = oldbufptr_save; + PL_oldoldbufptr = oldoldbufptr_save; + s = d; + break; + } else { + sv_catsv(tmpstr,PL_linestr); + } } } } PL_multi_end = origline + PL_parser->herelines; + if (indented && indent) { + STRLEN linecount = 1; + STRLEN herelen = SvCUR(tmpstr); + char *ss = SvPVX(tmpstr); + char *se = ss + herelen; + SV *newstr = newSVpvs(""); + SvGROW(newstr, herelen); + + /* Trim leading whitespace */ + while (ss < se) { + /* newline only? Copy and move on */ + if (*ss == '\n') { + sv_catpv(newstr,"\n"); + ss++; + + /* Found our indentation? Strip it */ + } else if (se - ss >= indent_len + && memEQ(ss, indent, indent_len)) + { + STRLEN le = 0; + + ss += indent_len; + + while ((ss + le) < se && *(ss + le) != '\n') + le++; + + sv_catpvn(newstr, ss, le); + + ss += le; + + /* Line doesn't begin with our indentation? Croak */ + } else { + Perl_croak(aTHX_ + "Indentation on line %d of here-doc doesn't match delimiter", + (int)linecount + ); + } + + linecount++; + } + + sv_setsv(tmpstr,newstr); + + Safefree(indent); + SvREFCNT_dec_NN(newstr); + } if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { SvPV_shrink_to_cur(tmpstr); } -- Perl5 Master Repository