extract concordance line s from HTML file

Sunthari Mon, 15 Oct 2001 21:14:29 -0700

Hi, 

Can anyone help me with this codes ? I have tried
extracting HTML to plain text and then use a pattern
matching to extract the concordance lines.


Really hope someone can show what's wrong with the
attached code.

Thanks in advance,


#!usr\perl\bin -w


use strict;
use HTML::FormatText;
use HTML::TreeBuilder;
use CGI qw/:standard/;

$string = param('query');
chomp($string);
my $file = 'C:\folder\backends.htm';

print header()
  ,start_html ('WebConcord')
  ,h1({-align=>center},'Web Concordance Search
Results')
  ,h2({-align=>center},"for search term '$string'")
  ,h4({-align=>center},"Producing output....\n");
  print h3('Examples of Usage : '),


sub get_html {
    my $file = shift;
    my $html;
    open(FILE, $file) or die "open: $!";
    $html .= $_ while <FILE>;
    close(FILE)       or die "close: $!";
    return $html;
}

my $html = get_html($file);
my $form = HTML::FormatText->new();
my $tree = HTML::TreeBuilder->new();
   $tree->parse($html);
my $text = $form->format($tree);
#print $text;

 my $text = "@text";
 @splittext = split(/$string/,$text);

#To extract concordance lines from text
 for (my $i=1; $i < @splittext; $i++)
    {
    my $before = substr(('
'x10).$splittext[$i-1],-20,20);
      my $after = substr($splittext[$i].' 'x10,0,20);
      print p($before, strong($string), $after,"\n"),
      }

end_html;






-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

extract concordance line s from HTML file

Reply via email to