> On Aug 5, 2021, at 3:15 PM, Tom Lane <[email protected]> wrote:
>
> I don't immediately see what's different about your failing case
> versus the not-failing ones.
I have now found lots of cases of this failure. I *believe* the backreference
is always greater than 1, and it is always in a capture group which then has
the {0} or {0,0} applied to it.
You can find lots of cases using the attached regex generating script I whipped
up for testing your work. (Note this is just a quick and dirty tool for
hacking, not anything refined.)
#!/usr/bin/perl
use strict;
use warnings;
our @alphabet = ('a'..'z');
sub rand_num
{
my $result = 0;
$result++ while(int(rand(3)));
return $result;
}
sub rand_char
{
return $alphabet[int(rand(@alphabet))];
}
our @strings;
sub rand_string
{
if (scalar(@strings))
{
my $dice = int(rand(3));
return $strings[int(rand(@strings))] if ($dice == 0);
shift(@strings) if ($dice == 1);
pop(@strings) if ($dice == 2);
}
my $result = join('', map { rand_char() } (1..rand_num()));
push (@strings, $result) if (int(rand(2)));
return $result;
}
sub rand_long_string
{
my $result = "";
$result .= rand_string() while(int(rand(10)));
return $result;
}
sub rand_quantifier
{
my $dice = int(rand(12));
return "*" if ($dice == 0);
return "+" if ($dice == 1);
return "?" if ($dice == 2);
return "*?" if ($dice == 3);
return "+?" if ($dice == 4);
return "??" if ($dice == 5);
my $beg = rand_num();
return "{$beg}" if ($dice == 6);
return "{$beg,}" if ($dice == 7);
return "{$beg}?" if ($dice == 8);
return "{$beg,}?" if ($dice == 9);
my $end = rand_num() + $beg;
return "{$beg,$end}" if ($dice == 10);
return "{$beg,$end}?" if ($dice == 11);
return "";
}
sub rand_escape
{
my $dice = int(rand(5));
return '\\0' if ($dice == 0);
return '\\' . rand_char() if ($dice == 1);
return '\\' . uc(rand_char()) if ($dice == 2);
return '\\' . rand_string() if ($dice == 3);
return '\\' . uc(rand_string()) if ($dice == 4);
return "";
}
our $max_capture = 0;
sub rand_rgx
{
my ($depth) = @_;
$depth = 0 unless defined $depth;
# Choose option, but limit the choice if we're in danger of deep recursion
my $dice = int(rand($depth < 5 ? 100 : 20));
# Base cases
return "" if ($dice == 0);
return rand_escape() if ($dice == 2);
return rand_char() if ($dice < 5);
if ($dice < 10 && $max_capture)
{
my $capgroup = 1 + int(rand($max_capture));
return '\\' . $capgroup;
}
return "." if ($dice < 20);
# Recursive cases
return '[' . rand_escape() . ']' if ($dice == 20);
return '[^' . rand_escape() . ']' if ($dice == 21);
return '[' . rand_string() . ']' if ($dice == 22);
return '[^' . rand_string() . ']' if ($dice == 23);
if ($dice < 60)
{
my $result = '(' . rand_rgx($depth+1) . ')';
$max_capture++;
return $result;
}
return '(?:' . rand_rgx($depth+1) . ')' if ($dice < 70);
return '(?=' . rand_rgx($depth+1) . ')' if ($dice == 71);
return '(?!' . rand_rgx($depth+1) . ')' if ($dice == 72);
return '(?<=' . rand_rgx($depth+1) . ')' if ($dice == 73);
return '(?<!' . rand_rgx($depth+1) . ')' if ($dice == 74);
return rand_rgx($depth+1) . rand_quantifier() if ($dice == 75);
return rand_rgx($depth+1) . rand_rgx($depth+1);
}
sub rand_regex
{
$max_capture = 0;
return rand_rgx();
}
sub rand_flags
{
local @alphabet = qw(b c e i m n p q s t w x);
return join('', grep { int(rand(@alphabet)) < 2 } @alphabet);
}
for (1..1000000)
{
print("select '", rand_long_string(), "' ~ '", rand_regex(), "';\n");
print("select '", rand_long_string(), "' !~ '", rand_regex(), "';\n");
print("select regexp_match('", rand_long_string(), "', '", rand_regex(), "');\n");
print("select regexp_matches('", rand_long_string(), "', '", rand_regex(), "');\n");
print("select regexp_matches('", rand_long_string(), "', '", rand_regex(), "', '", rand_flags(), "');\n");
print("select regexp_split_to_array('", rand_long_string(), "', '", rand_regex(), "');\n");
print("select regexp_split_to_array('", rand_long_string(), "', '", rand_regex(), "', '", rand_flags(), "');\n");
print("select regexp_replace('", rand_long_string(), "', '", rand_regex(), "', '", rand_string(), "', '", rand_flags(), "');\n");
}
—
Mark Dilger
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company