Am Don, 2002-02-14 um 17.18 schrieb Daniel R. Allen:
> finding the longest common (consecutive) substring
Here is a very straightforward approach (C-ish). All found substrings
are saved in a hash (this part could be optimized/replaced easily)...
sub common($$) {
my($x, $y) = @_;
my %common = ();
for my $xpos (0 .. length($x) - 1) { # go through first string
my $xc = substr($x,$xpos, 1); # get 1 char at the position
next if $xc eq ' ';
# find occurances of this character in the other string
my $ypos = 0;
while(($ypos = index($y, $xc, $ypos)) > -1) {
my $l = 2; # try 2 characters now
# find out how long the common substring is
MAX: while(substr($x, $xpos, $l) eq substr($y, $ypos, $l)) {
$l++;
last MAX if $xpos+$l > length $x;
last MAX if $ypos+$l > length $y;
}
$common{$xpos}{$ypos} = $l-1; # common length found
$ypos++;
}
}
# Find largest substring in the hash we just filled
my($max, $maxx, $maxy) = (0,0,0);
for my $xpos (keys %common) {
for my $ypos (keys %{$common{$xpos}}) {
next if $common{$xpos}{$ypos} < $max;
$max = $common{$xpos}{$ypos};
$maxx = $xpos;
$maxy = $ypos;
}
}
return "nothing in common\n" if $max == 0;
return "max $max at x $maxx y $maxy: '" . substr($x,$maxx,$max) .
"'\n";
}
-Sven