In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/be332ba06074217fe64db4a8ba1152985b21428a?hp=99111b894a3d385cc1a6057abf20c35e849caaa8>
- Log ----------------------------------------------------------------- commit be332ba06074217fe64db4a8ba1152985b21428a Author: Karl Williamson <k...@cpan.org> Date: Thu Mar 2 11:31:36 2017 -0700 Fatalize \N{} This has been deprecated, scheduled to be fatal now. M pod/perldelta.pod M pod/perldeprecation.pod M pod/perldiag.pod M t/lib/warnings/regcomp M t/re/pat_advanced.t M toke.c commit 37ee551862cea8d768e64169c7e6a464282d3f90 Author: Karl Williamson <k...@cpan.org> Date: Thu Mar 2 11:26:22 2017 -0700 t/re/pat_advanced.t: Prepare for fatal \N{} The next commit will make \N{} fatal, but we still allow a custom charnames handler to evaluate a name to the empty string, and that still needs to be tested. This changes to do that. M t/re/pat_advanced.t commit 90c172854f0c6789f3c87c398725be2f833a1004 Author: Karl Williamson <k...@cpan.org> Date: Thu Mar 2 11:19:39 2017 -0700 Move tests to pat_advanced.t The next commit will change these tests to require the infrastructure already available in pat_advanced.t M t/re/pat.t M t/re/pat_advanced.t M t/re/re_tests commit 73157762e2382837c0c047c77aca4bb0f5377216 Author: Karl Williamson <k...@cpan.org> Date: Wed Mar 1 21:52:22 2017 -0700 regcomp.c: Simplify expression Here, there is no advantage to assigning a variable within an 'if', and it is somewhat harder to read, so don't do it. M regcomp.c commit b7ad6c3e180a5e9028b5b3b88da3a2b0b45486fb Author: Karl Williamson <k...@cpan.org> Date: Tue Feb 14 12:04:28 2017 -0700 re/pat_advanced.t: Convert ok to like 'like' gives better diagnostics than 'ok'. This converts the ones it is straight forward to do. M t/re/pat_advanced.t ----------------------------------------------------------------------- Summary of changes: pod/perldelta.pod | 4 + pod/perldeprecation.pod | 5 +- pod/perldiag.pod | 6 - regcomp.c | 3 +- t/lib/warnings/regcomp | 31 +---- t/re/pat.t | 7 +- t/re/pat_advanced.t | 293 ++++++++++++++++++++++++------------------------ t/re/re_tests | 1 - toke.c | 5 +- 9 files changed, 161 insertions(+), 194 deletions(-) diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 584474d729..1a83f22830 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -55,6 +55,10 @@ allowed. This has been deprecated since perl 5.000. These have been no-ops and deprecated since perl 5.12 and 5.10, respectively. +=head2 C<\N{}> with nothing between the braces is now illegal. + +This has been deprecated since Perl 5.24. + =head1 Deprecations XXX Any deprecated features, syntax, modules etc. should be listed here. diff --git a/pod/perldeprecation.pod b/pod/perldeprecation.pod index c203655efe..b13483e30f 100644 --- a/pod/perldeprecation.pod +++ b/pod/perldeprecation.pod @@ -323,17 +323,14 @@ It's allowed to use a list of variables in a format, without separating them with commas. This usage has been deprecated for a long time, and it will be a fatal error in Perl 5.28. - - =head3 Use of C<\N{}> Use of C<\N{}> with nothing between the braces was deprecated in -Perl 5.24, and will throw a fatal error in Perl 5.28. +Perl 5.24, and throws a fatal error as of Perl 5.28. Since such a construct is equivalent to using an empty string, you are recommended to remove such C<\N{}> constructs. - =head3 Using the same symbol to open a filehandle and a dirhandle It used to be legal to use C<open()> to associate both a diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 5652b10b6b..876833338f 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -6483,12 +6483,6 @@ exactly, regardless of whether C<:loose> is used or not.) This error may also happen if the C<\N{}> is not in the scope of the corresponding C<S<use charnames>>. -=item Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 - -(D deprecated) You had a C<\N{}> with nothing between the braces. This -usage was deprecated in Perl 5.24, and will be made a syntax error in -in Perl 5.28. - =item Unknown error (P) Perl was about to print an error message in C<$@>, but the C<$@> variable diff --git a/regcomp.c b/regcomp.c index 6fc3716951..97c5949257 100644 --- a/regcomp.c +++ b/regcomp.c @@ -12017,7 +12017,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, RExC_parse++; /* Skip past the '{' */ - if (! (endbrace = strchr(RExC_parse, '}'))) { /* no trailing brace */ + endbrace = strchr(RExC_parse, '}'); + if (! endbrace) { /* no trailing brace */ vFAIL2("Missing right brace on \\%c{}", 'N'); } else if(!(endbrace == RExC_parse /* nothing between the {} */ diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp index 2b084c59b0..129b24ac04 100644 --- a/t/lib/warnings/regcomp +++ b/t/lib/warnings/regcomp @@ -83,38 +83,15 @@ EXPECT ]]]]][\ ==> Unmatched [ in regex; marked by <-- HERE in m/]]]]][\ <-- HERE / at - line 2. ######## # NAME [perl #123417] -use warnings 'regexp'; -qr/[\N{}]/; -qr/\N{}/; -no warnings 'regexp'; -qr/[\N{}]/; -qr/\N{}/; -no warnings 'deprecated'; -qr/[\N{}]/; -qr/\N{}/; -EXPECT -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 at - line 2. -Ignoring zero length \N{} in character class in regex; marked by <-- HERE in m/[\N{} <-- HERE ]/ at - line 2. -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 at - line 3. -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 at - line 5. -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 at - line 6. -######## -# NAME [perl #123417] # OPTION fatal -use warnings 'regexp'; -no warnings 'experimental::re_strict'; -use re 'strict'; qr/[\N{}]/; EXPECT -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 at - line 5. -Zero length \N{} in regex; marked by <-- HERE in m/[\N{} <-- HERE ]/ at - line 5. +Unknown charname '' at - line 2, within pattern +Execution of - aborted due to compilation errors. ######## # NAME [perl #123417] # OPTION fatal -use warnings 'regexp'; -no warnings 'experimental::re_strict'; -use re 'strict'; qr/\N{}/; EXPECT -Unknown charname '' is deprecated. Its use will be fatal in Perl 5.28 at - line 5. -Zero length \N{} in regex; marked by <-- HERE in m/\N{} <-- HERE / at - line 5. +Unknown charname '' at - line 2, within pattern +Execution of - aborted due to compilation errors. diff --git a/t/re/pat.t b/t/re/pat.t index 16bfc8e773..3ac95e91cf 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -23,7 +23,7 @@ BEGIN { skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader; skip_all_without_unicode_tables(); -plan tests => 837; # Update this when adding/deleting tests. +plan tests => 836; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1814,11 +1814,6 @@ EOP ok($AE =~ $re, '/[\xE6\s]/i matches \xC6 when in UTF-8'); } - { # [perl #126606 crashed the interpreter - no warnings 'deprecated'; - like("sS", qr/\N{}Ss|/i, "\N{} with empty branch alternation works"); - } - { is(0+("\n" =~ m'\n'), 1, q|m'\n' should interpolate escapes|); } diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 5e5cc1f1fa..7f0859cf8e 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -40,9 +40,9 @@ sub run_tests { # The trick is that in EBCDIC the explicit numeric range should # match (as also in non-EBCDIC) but the explicit alphabetic range # should not match. - ok "\x8e" =~ /[\x89-\x91]/, '"\x8e" =~ /[\x89-\x91]/'; - ok "\xce" =~ /[\xc9-\xd1]/, '"\xce" =~ /[\xc9-\xd1]/'; - ok "\xd0" =~ /[\xc9-\xd1]/, '"\xd0" =~ /[\xc9-\xd1]/'; + like "\x8e", qr/[\x89-\x91]/, '"\x8e" =~ /[\x89-\x91]/'; + like "\xce", qr/[\xc9-\xd1]/, '"\xce" =~ /[\xc9-\xd1]/'; + like "\xd0", qr/[\xc9-\xd1]/, '"\xd0" =~ /[\xc9-\xd1]/'; skip "Not an EBCDIC platform", 2 unless ord ('i') == 0x89 && ord ('J') == 0xd1; @@ -57,8 +57,8 @@ sub run_tests { } { - ok "\x{ab}" =~ /\x{ab}/, '"\x{ab}" =~ /\x{ab}/ '; - ok "\x{abcd}" =~ /\x{abcd}/, '"\x{abcd}" =~ /\x{abcd}/'; + like "\x{ab}", qr/\x{ab}/, '"\x{ab}" =~ /\x{ab}/ '; + like "\x{abcd}", qr/\x{abcd}/, '"\x{abcd}" =~ /\x{abcd}/'; } { @@ -241,107 +241,107 @@ sub run_tests { # Check that \x## works. 5.6.1 and 5.005_03 fail some of these. my $x; $x = "\x4e" . "E"; - ok ($x =~ /^\x4EE$/, "Check only 2 bytes of hex are matched."); + like ($x, qr/^\x4EE$/, "Check only 2 bytes of hex are matched."); $x = "\x4e" . "i"; - ok ($x =~ /^\x4Ei$/, "Check that invalid hex digit stops it (2)"); + like ($x, qr/^\x4Ei$/, "Check that invalid hex digit stops it (2)"); $x = "\x4" . "j"; - ok ($x =~ /^\x4j$/, "Check that invalid hex digit stops it (1)"); + like ($x, qr/^\x4j$/, "Check that invalid hex digit stops it (1)"); $x = "\x0" . "k"; - ok ($x =~ /^\xk$/, "Check that invalid hex digit stops it (0)"); + like ($x, qr/^\xk$/, "Check that invalid hex digit stops it (0)"); $x = "\x0" . "x"; - ok ($x =~ /^\xx$/, "\\xx isn't to be treated as \\0"); + like ($x, qr/^\xx$/, "\\xx isn't to be treated as \\0"); $x = "\x0" . "xa"; - ok ($x =~ /^\xxa$/, "\\xxa isn't to be treated as \\xa"); + like ($x, qr/^\xxa$/, "\\xxa isn't to be treated as \\xa"); $x = "\x9" . "_b"; - ok ($x =~ /^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); + like ($x, qr/^\x9_b$/, "\\x9_b isn't to be treated as \\x9b"); # and now again in [] ranges $x = "\x4e" . "E"; - ok ($x =~ /^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); + like ($x, qr/^[\x4EE]{2}$/, "Check only 2 bytes of hex are matched."); $x = "\x4e" . "i"; - ok ($x =~ /^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); + like ($x, qr/^[\x4Ei]{2}$/, "Check that invalid hex digit stops it (2)"); $x = "\x4" . "j"; - ok ($x =~ /^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); + like ($x, qr/^[\x4j]{2}$/, "Check that invalid hex digit stops it (1)"); $x = "\x0" . "k"; - ok ($x =~ /^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); + like ($x, qr/^[\xk]{2}$/, "Check that invalid hex digit stops it (0)"); $x = "\x0" . "x"; - ok ($x =~ /^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); + like ($x, qr/^[\xx]{2}$/, "\\xx isn't to be treated as \\0"); $x = "\x0" . "xa"; - ok ($x =~ /^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); + like ($x, qr/^[\xxa]{3}$/, "\\xxa isn't to be treated as \\xa"); $x = "\x9" . "_b"; - ok ($x =~ /^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); + like ($x, qr/^[\x9_b]{3}$/, "\\x9_b isn't to be treated as \\x9b"); # Check that \x{##} works. 5.6.1 fails quite a few of these. $x = "\x9b"; - ok ($x =~ /^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); + like ($x, qr/^\x{9_b}$/, "\\x{9_b} is to be treated as \\x9b"); $x = "\x9b" . "y"; - ok ($x =~ /^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); + like ($x, qr/^\x{9_b}y$/, "\\x{9_b} is to be treated as \\x9b (again)"); $x = "\x9b" . "y"; - ok ($x =~ /^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); + like ($x, qr/^\x{9b_}y$/, "\\x{9b_} is to be treated as \\x9b"); $x = "\x9b" . "y"; - ok ($x =~ /^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); + like ($x, qr/^\x{9_bq}y$/, "\\x{9_bc} is to be treated as \\x9b"); $x = "\x0" . "y"; - ok ($x =~ /^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); + like ($x, qr/^\x{x9b}y$/, "\\x{x9b} is to be treated as \\x0"); $x = "\x0" . "y"; - ok ($x =~ /^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); + like ($x, qr/^\x{0x9b}y$/, "\\x{0x9b} is to be treated as \\x0"); $x = "\x9b" . "y"; - ok ($x =~ /^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); + like ($x, qr/^\x{09b}y$/, "\\x{09b} is to be treated as \\x9b"); $x = "\x9b"; - ok ($x =~ /^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); + like ($x, qr/^[\x{9_b}]$/, "\\x{9_b} is to be treated as \\x9b"); $x = "\x9b" . "y"; - ok ($x =~ /^[\x{9_b}y]{2}$/, + like ($x, qr/^[\x{9_b}y]{2}$/, "\\x{9_b} is to be treated as \\x9b (again)"); $x = "\x9b" . "y"; - ok ($x =~ /^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); + like ($x, qr/^[\x{9b_}y]{2}$/, "\\x{9b_} is to be treated as \\x9b"); $x = "\x9b" . "y"; - ok ($x =~ /^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); + like ($x, qr/^[\x{9_bq}y]{2}$/, "\\x{9_bc} is to be treated as \\x9b"); $x = "\x0" . "y"; - ok ($x =~ /^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); + like ($x, qr/^[\x{x9b}y]{2}$/, "\\x{x9b} is to be treated as \\x0"); $x = "\x0" . "y"; - ok ($x =~ /^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); + like ($x, qr/^[\x{0x9b}y]{2}$/, "\\x{0x9b} is to be treated as \\x0"); $x = "\x9b" . "y"; - ok ($x =~ /^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); + like ($x, qr/^[\x{09b}y]{2}$/, "\\x{09b} is to be treated as \\x9b"); } { # High bit bug -- japhy my $x = "ab\200d"; - ok $x =~ /.*?\200/, "High bit fine"; + like $x, qr/.*?\200/, "High bit fine"; } { # The basic character classes and Unicode - ok "\x{0100}" =~ /\w/, 'LATIN CAPITAL LETTER A WITH MACRON in /\w/'; - ok "\x{0660}" =~ /\d/, 'ARABIC-INDIC DIGIT ZERO in /\d/'; - ok "\x{1680}" =~ /\s/, 'OGHAM SPACE MARK in /\s/'; + like "\x{0100}", qr/\w/, 'LATIN CAPITAL LETTER A WITH MACRON in /\w/'; + like "\x{0660}", qr/\d/, 'ARABIC-INDIC DIGIT ZERO in /\d/'; + like "\x{1680}", qr/\s/, 'OGHAM SPACE MARK in /\s/'; } { @@ -562,10 +562,10 @@ sub run_tests { # More whitespace: U+0085, U+2028, U+2029\n"; # U+0085, U+00A0 need to be forced to be Unicode, the \x{100} does that. - ok "<\x{100}" . uni_to_native("\x{0085}") . ">" =~ /<\x{100}\s>/, '\x{0085} in \s'; - ok "<" . uni_to_native("\x{0085}") . ">" =~ /<\v>/, '\x{0085} in \v'; - ok "<\x{100}" . uni_to_native("\x{00A0}") . ">" =~ /<\x{100}\s>/, '\x{00A0} in \s'; - ok "<" . uni_to_native("\x{00A0}") . ">" =~ /<\h>/, '\x{00A0} in \h'; + like "<\x{100}" . uni_to_native("\x{0085}") . ">", qr/<\x{100}\s>/, '\x{0085} in \s'; + like "<" . uni_to_native("\x{0085}") . ">", qr/<\v>/, '\x{0085} in \v'; + like "<\x{100}" . uni_to_native("\x{00A0}") . ">", qr/<\x{100}\s>/, '\x{00A0} in \s'; + like "<" . uni_to_native("\x{00A0}") . ">", qr/<\h>/, '\x{00A0} in \h'; my @h = map {sprintf "%05x" => $_} 0x01680, 0x02000 .. 0x0200A, 0x0202F, 0x0205F, 0x03000; my @v = map {sprintf "%05x" => $_} 0x02028, 0x02029; @@ -577,28 +577,28 @@ sub run_tests { for my $hex (@h) { my $str = eval qq ["<\\x{$hex}>"]; - ok $str =~ /<\s>/, "\\x{$hex} in \\s"; - ok $str =~ /<\h>/, "\\x{$hex} in \\h"; - ok $str !~ /<\v>/, "\\x{$hex} not in \\v"; + like $str, qr/<\s>/, "\\x{$hex} in \\s"; + like $str, qr/<\h>/, "\\x{$hex} in \\h"; + unlike $str, qr/<\v>/, "\\x{$hex} not in \\v"; } for my $hex (@v) { my $str = eval qq ["<\\x{$hex}>"]; - ok $str =~ /<\s>/, "\\x{$hex} in \\s"; - ok $str =~ /<\v>/, "\\x{$hex} in \\v"; - ok $str !~ /<\h>/, "\\x{$hex} not in \\h"; + like $str, qr/<\s>/, "\\x{$hex} in \\s"; + like $str, qr/<\v>/, "\\x{$hex} in \\v"; + unlike $str, qr/<\h>/, "\\x{$hex} not in \\h"; } for my $hex (@H) { my $str = eval qq ["<\\x{$hex}>"]; - ok $str =~ /<\S>/, "\\x{$hex} in \\S"; - ok $str =~ /<\H>/, "\\x{$hex} in \\H"; + like $str, qr/<\S>/, "\\x{$hex} in \\S"; + like $str, qr/<\H>/, "\\x{$hex} in \\H"; } for my $hex (@V) { my $str = eval qq ["<\\x{$hex}>"]; - ok $str =~ /<\S>/, "\\x{$hex} in \\S"; - ok $str =~ /<\V>/, "\\x{$hex} in \\V"; + like $str, qr/<\S>/, "\\x{$hex} in \\S"; + like $str, qr/<\V>/, "\\x{$hex} in \\V"; } } @@ -637,10 +637,10 @@ sub run_tests { { my $message = "Unicode lookbehind"; - like("A\x{100}B" , qr/(?<=A.)B/, $message); + like("A\x{100}B", qr/(?<=A.)B/, $message); like("A\x{200}\x{300}B", qr/(?<=A..)B/, $message); - like("\x{400}AB" , qr/(?<=\x{400}.)B/, $message); - like("\x{500}\x{600}B" , qr/(?<=\x{500}.)B/, $message); + like("\x{400}AB", qr/(?<=\x{400}.)B/, $message); + like("\x{500}\x{600}B", qr/(?<=\x{500}.)B/, $message); # Original code also contained: # ok "\x{500\x{600}}B" =~ /(?<=\x{500}.)B/; @@ -767,7 +767,7 @@ sub run_tests { } { - ok "\x{100}\n" =~ /\x{100}\n$/, "UTF-8 length cache and fbm_compile"; + like "\x{100}\n", qr/\x{100}\n$/, "UTF-8 length cache and fbm_compile"; } { @@ -791,7 +791,7 @@ sub run_tests { } { - ok "123\x{100}" =~ /^.*1.*23\x{100}$/, + like "123\x{100}", qr/^.*1.*23\x{100}$/, 'UTF-8 + multiple floating substr'; } @@ -844,7 +844,7 @@ sub run_tests { $re = qr/\b$re\b/; foreach (@nums) { - ok $_ =~ /$re/, "Trie nums"; + like $_, qr/$re/, "Trie nums"; } $_ = join " ", @nums; @@ -909,7 +909,7 @@ sub run_tests { } use Cname; - ok 'fooB' =~ /\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; + like 'fooB', qr/\N{foo}[\N{B}\N{b}]/, "Passthrough charname"; my $name = "foo\xDF"; my $result = eval "'A${name}B' =~ /^A\\N{$name}B\$/"; ok !$@ && $result, "Passthrough charname of non-ASCII, Latin1"; @@ -922,32 +922,33 @@ sub run_tests { ok !$@ && $result && ! $w, '\N{} returning multi-char works'; undef $w; - eval q [ok "\0" !~ /[\N{EMPTY-STR}XY]/, + eval q [unlike "\0", qr/[\N{EMPTY-STR}XY]/, "Zerolength charname in charclass doesn't match \\\\0"]; ok $w && $w =~ /Ignoring zero length/, 'Ignoring zero length \N{} in character class warning'; undef $w; - eval q [ok 'xy' =~ /x[\N{EMPTY-STR} y]/x, + eval q [like 'xy', qr/x[\N{EMPTY-STR} y]/x, 'Empty string charname in [] is ignored; finds a following character']; ok $w && $w =~ /Ignoring zero length/, 'Ignoring zero length \N{} in character class warning'; undef $w; - eval q [ok 'x ' =~ /x[\N{EMPTY-STR} y]/, + eval q [like 'x ', qr/x[\N{EMPTY-STR} y]/, 'Empty string charname in [] is ignored; finds a following blank under /x']; - ok $w && $w =~ /Ignoring zero length/, + like $w, qr/Ignoring zero length/, 'Ignoring zero length \N{} in character class warning'; ok 'AB' =~ /(\N{EVIL})/ && $1 eq 'A', 'Charname caching $1'; - ok 'ABC' =~ /(\N{EVIL})/, 'Charname caching $1'; - ok 'xy' =~ /x\N{EMPTY-STR}y/, + like 'ABC', qr/(\N{EVIL})/, 'Charname caching $1'; + like 'xy', qr/x\N{EMPTY-STR}y/, 'Empty string charname produces NOTHING node'; - ok '' =~ /\N{EMPTY-STR}/, + like '', qr/\N{EMPTY-STR}/, 'Empty string charname produces NOTHING node'; - ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/, 'Verify that long string works'; - ok "\N{LONG-STR}" =~ /^\N{LONG-STR}$/i, 'Verify under folding that long string works'; + like "\N{LONG-STR}", qr/^\N{LONG-STR}$/, 'Verify that long string works'; + like "\N{LONG-STR}", qr/^\N{LONG-STR}$/i, 'Verify under folding that long string works'; + like "\xc4", qr/\N{EMPTY-STR}\xe4/i, 'Empty \N{} should change /d to /u'; eval '/(?[[\N{EMPTY-STR}]])/'; - ok $@ && $@ =~ /Zero length \\N\{\}/; + like $@, qr/Zero length \\N\{\}/, 'Verify zero-length return from \N{} correctly fails'; undef $w; { @@ -1008,7 +1009,7 @@ sub run_tests { # If remove the limitation in regcomp code these should work # differently undef $w; - eval q [ok "\N{TOO-LONG-STR}" =~ /^\N{TOO-LONG-STR}$/, 'Verify that what once was too long a string works']; + eval q [like "\N{TOO-LONG-STR}" =~ /^\N{TOO-LONG-STR}$/, 'Verify that what once was too long a string works']; eval 'q() =~ /\N{4F}/'; ok $@ && $@ =~ /Invalid character/, 'Verify that leading digit in name gives error'; eval 'q() =~ /\N{COM,MA}/'; @@ -1047,24 +1048,24 @@ sub run_tests { my $r = eval "qr/\\N{\x{100}\x{100}}/"; isnt $r, undef, "Generated regex for multi-char UTF-8 charname" or diag($@); - ok "\x{100}\x{100}" =~ $r, "which matches"; + like "\x{100}\x{100}", $r, "which matches"; } { use charnames ':full'; - ok 'aabc' !~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against aabc'; - ok 'a+bc' =~ /a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against a+bc'; + unlike 'aabc', qr/a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against aabc'; + like 'a+bc', qr/a\N{PLUS SIGN}b/, '/a\N{PLUS SIGN}b/ against a+bc'; - ok ' A B' =~ /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + like ' A B', qr/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, 'Intermixed named and unicode escapes'; - ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ - /\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, + like "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}", + qr/\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}/, 'Intermixed named and unicode escapes'; - ok "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}" =~ - /[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, + like "\N{SPACE}\N{U+0041}\N{SPACE}\N{U+0042}", + qr/[\N{SPACE}\N{U+0041}][\N{SPACE}\N{U+0042}]/, 'Intermixed named and unicode escapes'; - ok "\0" =~ /^\N{NULL}$/, 'Verify that \N{NULL} works; is not confused with an error'; + like "\0", qr/^\N{NULL}$/, 'Verify that \N{NULL} works; is not confused with an error'; } { @@ -1073,7 +1074,7 @@ sub run_tests { { (?> [^{}]+ | (??{ $brackets }) )* } }x; - ok "{b{c}d" !~ m/^((??{ $brackets }))/, "Bracket mismatch"; + unlike "{b{c}d", qr/^((??{ $brackets }))/, "Bracket mismatch"; SKIP: { our @stack = (); @@ -1160,8 +1161,8 @@ sub run_tests { { # \c\ followed by _ - ok "x\c_y" !~ /x\c\_y/, '\_ in a pattern'; - ok "x\c\_y" =~ /x\c\_y/, '\_ in a pattern'; + unlike "x\c_y", qr/x\c\_y/, '\_ in a pattern'; + like "x\c\_y", qr/x\c\_y/, '\_ in a pattern'; # \c\ followed by other characters for my $c ("z", "\0", "!", chr(254), chr(256)) { @@ -1252,7 +1253,7 @@ sub run_tests { is($count, 1, "Expect 1 with (*COMMIT)"); is("@res", "aaab", "Adjacent (*COMMIT) works as expected"); - ok("1\n2a\n" !~ /^\d+(*COMMIT)\w+/m, "COMMIT and anchors"); + unlike("1\n2a\n", qr/^\d+(*COMMIT)\w+/m, "COMMIT and anchors"); } { @@ -1408,62 +1409,62 @@ sub run_tests { { use charnames ":full"; - ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "I =~ Alphabetic"; - ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/, "I =~ Uppercase"; - ok "\N{ROMAN NUMERAL ONE}" !~ /\p{Lowercase}/, "I !~ Lowercase"; - ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "I =~ ID_Start"; - ok "\N{ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "I =~ ID_Continue"; - ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Alphabetic}/, "i =~ Alphabetic"; - ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Uppercase}/, "i !~ Uppercase"; - ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Uppercase}/i, "i =~ Uppercase under /i"; - ok "\N{SMALL ROMAN NUMERAL ONE}" !~ /\p{Titlecase}/, "i !~ Titlecase"; - ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Titlecase}/i, "i =~ Titlecase under /i"; - ok "\N{ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/i, "I =~ Lowercase under /i"; - - ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{Lowercase}/, "i =~ Lowercase"; - ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDStart}/, "i =~ ID_Start"; - ok "\N{SMALL ROMAN NUMERAL ONE}" =~ /\p{IDContinue}/, "i =~ ID_Continue" + like "\N{ROMAN NUMERAL ONE}", qr/\p{Alphabetic}/, "I =~ Alphabetic"; + like "\N{ROMAN NUMERAL ONE}", qr/\p{Uppercase}/, "I =~ Uppercase"; + unlike "\N{ROMAN NUMERAL ONE}", qr/\p{Lowercase}/, "I !~ Lowercase"; + like "\N{ROMAN NUMERAL ONE}", qr/\p{IDStart}/, "I =~ ID_Start"; + like "\N{ROMAN NUMERAL ONE}", qr/\p{IDContinue}/, "I =~ ID_Continue"; + like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Alphabetic}/, "i =~ Alphabetic"; + unlike "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Uppercase}/, "i !~ Uppercase"; + like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Uppercase}/i, "i =~ Uppercase under /i"; + unlike "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Titlecase}/, "i !~ Titlecase"; + like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Titlecase}/i, "i =~ Titlecase under /i"; + like "\N{ROMAN NUMERAL ONE}", qr/\p{Lowercase}/i, "I =~ Lowercase under /i"; + + like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{Lowercase}/, "i =~ Lowercase"; + like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{IDStart}/, "i =~ ID_Start"; + like "\N{SMALL ROMAN NUMERAL ONE}", qr/\p{IDContinue}/, "i =~ ID_Continue" } { # More checking that /i works on the few properties that it makes a # difference. Uppercase, Lowercase, and Titlecase were done in the # block above - ok "A" =~ /\p{PosixUpper}/, "A =~ PosixUpper"; - ok "A" =~ /\p{PosixUpper}/i, "A =~ PosixUpper under /i"; - ok "A" !~ /\p{PosixLower}/, "A !~ PosixLower"; - ok "A" =~ /\p{PosixLower}/i, "A =~ PosixLower under /i"; - ok "a" !~ /\p{PosixUpper}/, "a !~ PosixUpper"; - ok "a" =~ /\p{PosixUpper}/i, "a =~ PosixUpper under /i"; - ok "a" =~ /\p{PosixLower}/, "a =~ PosixLower"; - ok "a" =~ /\p{PosixLower}/i, "a =~ PosixLower under /i"; - - ok uni_to_native("\xC0") =~ /\p{XPosixUpper}/, "\\xC0 =~ XPosixUpper"; - ok uni_to_native("\xC0") =~ /\p{XPosixUpper}/i, "\\xC0 =~ XPosixUpper under /i"; - ok uni_to_native("\xC0") !~ /\p{XPosixLower}/, "\\xC0 !~ XPosixLower"; - ok uni_to_native("\xC0") =~ /\p{XPosixLower}/i, "\\xC0 =~ XPosixLower under /i"; - ok uni_to_native("\xE0") !~ /\p{XPosixUpper}/, "\\xE0 !~ XPosixUpper"; - ok uni_to_native("\xE0") =~ /\p{XPosixUpper}/i, "\\xE0 =~ XPosixUpper under /i"; - ok uni_to_native("\xE0") =~ /\p{XPosixLower}/, "\\xE0 =~ XPosixLower"; - ok uni_to_native("\xE0") =~ /\p{XPosixLower}/i, "\\xE0 =~ XPosixLower under /i"; - - ok uni_to_native("\xC0") =~ /\p{UppercaseLetter}/, "\\xC0 =~ UppercaseLetter"; - ok uni_to_native("\xC0") =~ /\p{UppercaseLetter}/i, "\\xC0 =~ UppercaseLetter under /i"; - ok uni_to_native("\xC0") !~ /\p{LowercaseLetter}/, "\\xC0 !~ LowercaseLetter"; - ok uni_to_native("\xC0") =~ /\p{LowercaseLetter}/i, "\\xC0 =~ LowercaseLetter under /i"; - ok uni_to_native("\xC0") !~ /\p{TitlecaseLetter}/, "\\xC0 !~ TitlecaseLetter"; - ok uni_to_native("\xC0") =~ /\p{TitlecaseLetter}/i, "\\xC0 =~ TitlecaseLetter under /i"; - ok uni_to_native("\xE0") !~ /\p{UppercaseLetter}/, "\\xE0 !~ UppercaseLetter"; - ok uni_to_native("\xE0") =~ /\p{UppercaseLetter}/i, "\\xE0 =~ UppercaseLetter under /i"; - ok uni_to_native("\xE0") =~ /\p{LowercaseLetter}/, "\\xE0 =~ LowercaseLetter"; - ok uni_to_native("\xE0") =~ /\p{LowercaseLetter}/i, "\\xE0 =~ LowercaseLetter under /i"; - ok uni_to_native("\xE0") !~ /\p{TitlecaseLetter}/, "\\xE0 !~ TitlecaseLetter"; - ok uni_to_native("\xE0") =~ /\p{TitlecaseLetter}/i, "\\xE0 =~ TitlecaseLetter under /i"; - ok "\x{1C5}" !~ /\p{UppercaseLetter}/, "\\x{1C5} !~ UppercaseLetter"; - ok "\x{1C5}" =~ /\p{UppercaseLetter}/i, "\\x{1C5} =~ UppercaseLetter under /i"; - ok "\x{1C5}" !~ /\p{LowercaseLetter}/, "\\x{1C5} !~ LowercaseLetter"; - ok "\x{1C5}" =~ /\p{LowercaseLetter}/i, "\\x{1C5} =~ LowercaseLetter under /i"; - ok "\x{1C5}" =~ /\p{TitlecaseLetter}/, "\\x{1C5} =~ TitlecaseLetter"; - ok "\x{1C5}" =~ /\p{TitlecaseLetter}/i, "\\x{1C5} =~ TitlecaseLetter under /i"; + like "A", qr/\p{PosixUpper}/, "A =~ PosixUpper"; + like "A", qr/\p{PosixUpper}/i, "A =~ PosixUpper under /i"; + unlike "A", qr/\p{PosixLower}/, "A !~ PosixLower"; + like "A", qr/\p{PosixLower}/i, "A =~ PosixLower under /i"; + unlike "a", qr/\p{PosixUpper}/, "a !~ PosixUpper"; + like "a", qr/\p{PosixUpper}/i, "a =~ PosixUpper under /i"; + like "a", qr/\p{PosixLower}/, "a =~ PosixLower"; + like "a", qr/\p{PosixLower}/i, "a =~ PosixLower under /i"; + + like uni_to_native("\xC0"), qr/\p{XPosixUpper}/, "\\xC0 =~ XPosixUpper"; + like uni_to_native("\xC0"), qr/\p{XPosixUpper}/i, "\\xC0 =~ XPosixUpper under /i"; + unlike uni_to_native("\xC0"), qr/\p{XPosixLower}/, "\\xC0 !~ XPosixLower"; + like uni_to_native("\xC0"), qr/\p{XPosixLower}/i, "\\xC0 =~ XPosixLower under /i"; + unlike uni_to_native("\xE0"), qr/\p{XPosixUpper}/, "\\xE0 !~ XPosixUpper"; + like uni_to_native("\xE0"), qr/\p{XPosixUpper}/i, "\\xE0 =~ XPosixUpper under /i"; + like uni_to_native("\xE0"), qr/\p{XPosixLower}/, "\\xE0 =~ XPosixLower"; + like uni_to_native("\xE0"), qr/\p{XPosixLower}/i, "\\xE0 =~ XPosixLower under /i"; + + like uni_to_native("\xC0"), qr/\p{UppercaseLetter}/, "\\xC0 =~ UppercaseLetter"; + like uni_to_native("\xC0"), qr/\p{UppercaseLetter}/i, "\\xC0 =~ UppercaseLetter under /i"; + unlike uni_to_native("\xC0"), qr/\p{LowercaseLetter}/, "\\xC0 !~ LowercaseLetter"; + like uni_to_native("\xC0"), qr/\p{LowercaseLetter}/i, "\\xC0 =~ LowercaseLetter under /i"; + unlike uni_to_native("\xC0"), qr/\p{TitlecaseLetter}/, "\\xC0 !~ TitlecaseLetter"; + like uni_to_native("\xC0"), qr/\p{TitlecaseLetter}/i, "\\xC0 =~ TitlecaseLetter under /i"; + unlike uni_to_native("\xE0"), qr/\p{UppercaseLetter}/, "\\xE0 !~ UppercaseLetter"; + like uni_to_native("\xE0"), qr/\p{UppercaseLetter}/i, "\\xE0 =~ UppercaseLetter under /i"; + like uni_to_native("\xE0"), qr/\p{LowercaseLetter}/, "\\xE0 =~ LowercaseLetter"; + like uni_to_native("\xE0"), qr/\p{LowercaseLetter}/i, "\\xE0 =~ LowercaseLetter under /i"; + unlike uni_to_native("\xE0"), qr/\p{TitlecaseLetter}/, "\\xE0 !~ TitlecaseLetter"; + like uni_to_native("\xE0"), qr/\p{TitlecaseLetter}/i, "\\xE0 =~ TitlecaseLetter under /i"; + unlike "\x{1C5}", qr/\p{UppercaseLetter}/, "\\x{1C5} !~ UppercaseLetter"; + like "\x{1C5}", qr/\p{UppercaseLetter}/i, "\\x{1C5} =~ UppercaseLetter under /i"; + unlike "\x{1C5}", qr/\p{LowercaseLetter}/, "\\x{1C5} !~ LowercaseLetter"; + like "\x{1C5}", qr/\p{LowercaseLetter}/i, "\\x{1C5} =~ LowercaseLetter under /i"; + like "\x{1C5}", qr/\p{TitlecaseLetter}/, "\\x{1C5} =~ TitlecaseLetter"; + like "\x{1C5}", qr/\p{TitlecaseLetter}/i, "\\x{1C5} =~ TitlecaseLetter under /i"; } { @@ -1473,7 +1474,7 @@ sub run_tests { no warnings 'utf8'; # oops my $c = chr $u; my $x = sprintf '%04X', $u; - ok "A${c}B" =~ /A[\0-\x{10000}]B/, "Unicode range - $x"; + like "A${c}B", qr/A[\0-\x{10000}]B/, "Unicode range - $x"; } } @@ -1737,7 +1738,7 @@ EOP my $chr_byte = chr($chr); my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8); my $rx = qr{$chr_byte|X}i; - ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr"); + like($chr_utf8, $rx, "utf8/latin, codepoint $chr"); } } @@ -2113,7 +2114,7 @@ EOP like(uni_to_native("\xC0"), qr/$p/, "Verify \"\\xC0\" =~ /[\\xE0_]/i; pattern in utf8"); } - ok "x" =~ /\A(?>(?:(?:)A|B|C?x))\z/, + like "x", qr/\A(?>(?:(?:)A|B|C?x))\z/, "Check TRIE does not overwrite EXACT following NOTHING at start - RT #111842"; { @@ -2279,13 +2280,6 @@ EOF "afoot" =~ eval "qr/$qr/"; is "$1" || $@, "foo", 'multichar \N{...} stringified and retoked'; } - { # empty \N{...} tripping roundly - no warnings 'deprecated'; - BEGIN { $^H{charnames} = sub { "" } } - my $qr = qr$(a\N{foo}t)$; - "at" =~ eval "qr/$qr/"; - is "$1" || $@, "at", 'empty \N{...} stringified and retoked'; - } is (scalar split(/\b{sb}/, "Don't think twice. It's all right."), 2, '\b{wb} splits sentences correctly'); @@ -2296,7 +2290,7 @@ EOF print "# Tests that follow may crash perl\n"; { eval '/\k/'; - ok $@ =~ /\QSequence \k... not terminated in regex;\E/, + like $@, qr/\QSequence \k... not terminated in regex;\E/, 'Lone \k not allowed'; } @@ -2400,11 +2394,16 @@ EOF utf8::downgrade($string); utf8::downgrade($folded_string); + use Cname; like($string, qr/$string/i, "LATIN SMALL SHARP S matches itself under /id"); unlike($folded_string, qr/$string/i, "LATIN SMALL SHARP S doesn't match 'ss' under /di"); - no warnings 'deprecated'; - like($folded_string, qr/\N{}$string/i, "\\N{} earlier than LATIN SMALL SHARP S transforms /di into /ui, matches 'ss'"); - like($folded_string, qr/$string\N{}/i, "\\N{} after LATIN SMALL SHARP S transforms /di into /ui, matches 'ss'"); + like($folded_string, qr/\N{EMPTY-STR}$string/i, "\\N{} earlier than LATIN SMALL SHARP S transforms /di into /ui, matches 'ss'"); + like($folded_string, qr/$string\N{EMPTY-STR}/i, "\\N{} after LATIN SMALL SHARP S transforms /di into /ui, matches 'ss'"); + } + + { # [perl #126606 crashed the interpreter + use Cname; + like("sS", qr/\N{EMPTY-STR}Ss|/i, '\N{} with empty branch alternation works'); } { # Regexp:Grammars was broken: diff --git a/t/re/re_tests b/t/re/re_tests index 410fceadac..0bd9b5541f 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -1443,7 +1443,6 @@ foo(\h)bar foo\tbar y $1 \t /\N{U+41}\x{c1}/i a\x{e1} y $& a\x{e1} /[\N{U+41}\x{c1}]/i \x{e1} y $& \x{e1} '\N{U+41}' A y $& A # Even for single quoted patterns -/\N{}\xe4/i \xc4 y $& \xc4 # Empty \N{} should change /d to /u [\s][\S] \x{a0}\x{a0} n - - # Unicode complements should not match same character diff --git a/toke.c b/toke.c index ce6fe22162..e9d3979879 100644 --- a/toke.c +++ b/toke.c @@ -2588,8 +2588,9 @@ S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e) PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; if (!SvCUR(res)) { - deprecate_fatal_in("5.28", "Unknown charname '' is deprecated"); - return res; + /* diag_listed_as: Unknown charname '%s' */ + yyerror("Unknown charname ''"); + return NULL; } res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr, -- Perl5 Master Repository