Am Don, 2002-02-14 um 17.18 schrieb Daniel R. Allen: > finding the longest common (consecutive) substring
Here is a very straightforward approach (C-ish). All found substrings are saved in a hash (this part could be optimized/replaced easily)... sub common($$) { my($x, $y) = @_; my %common = (); for my $xpos (0 .. length($x) - 1) { # go through first string my $xc = substr($x,$xpos, 1); # get 1 char at the position next if $xc eq ' '; # find occurances of this character in the other string my $ypos = 0; while(($ypos = index($y, $xc, $ypos)) > -1) { my $l = 2; # try 2 characters now # find out how long the common substring is MAX: while(substr($x, $xpos, $l) eq substr($y, $ypos, $l)) { $l++; last MAX if $xpos+$l > length $x; last MAX if $ypos+$l > length $y; } $common{$xpos}{$ypos} = $l-1; # common length found $ypos++; } } # Find largest substring in the hash we just filled my($max, $maxx, $maxy) = (0,0,0); for my $xpos (keys %common) { for my $ypos (keys %{$common{$xpos}}) { next if $common{$xpos}{$ypos} < $max; $max = $common{$xpos}{$ypos}; $maxx = $xpos; $maxy = $ypos; } } return "nothing in common\n" if $max == 0; return "max $max at x $maxx y $maxy: '" . substr($x,$maxx,$max) . "'\n"; } -Sven