Am Don, 2002-02-14 um 17.18 schrieb Daniel R. Allen:
> finding the longest common (consecutive) substring

Here is a very straightforward approach (C-ish). All found substrings
are saved in a hash (this part could be optimized/replaced easily)...

sub common($$) {
    my($x, $y) = @_;
    my %common = ();
    for my $xpos (0 .. length($x) - 1) { # go through first string
        my $xc = substr($x,$xpos, 1); # get 1 char at the position
        next if $xc eq ' ';
        # find occurances of this character in the other string
        my $ypos = 0;
        while(($ypos = index($y, $xc, $ypos)) > -1) {
            my $l = 2; # try 2 characters now
            # find out how long the common substring is
            MAX: while(substr($x, $xpos, $l) eq substr($y, $ypos, $l)) {
                $l++;
                last MAX if $xpos+$l > length $x;
                last MAX if $ypos+$l > length $y;
            }
            $common{$xpos}{$ypos} = $l-1; # common length found
            $ypos++;
        }
    }
    # Find largest substring in the hash we just filled
    my($max, $maxx, $maxy) = (0,0,0);
    for my $xpos (keys %common) {
        for my $ypos (keys %{$common{$xpos}}) {
            next if $common{$xpos}{$ypos} < $max;
            $max = $common{$xpos}{$ypos};
            $maxx = $xpos;
            $maxy = $ypos;
        }
    }
    return "nothing in common\n" if $max == 0;
    return "max $max at x $maxx y $maxy: '" . substr($x,$maxx,$max) .
"'\n";

}

-Sven


Reply via email to