Hi,

I need to parse an HTML file [0] and pull out all the form elements and
put them into a data structure. What I can't seem to do is when I have
found a <select> tag is then parse the associated <option> tags!

So far I have the following...

<perl>
use strict;
use Data::Dumper;
while (<IN>) {
    $p->parse($_);
}
$p->eof;
  
print Dumper($p->htmltree);


package FormParser;
use base "HTML::Parser";

my $HTMLTREE = {};

sub start {
    my ($self, $tag, $attr, $attrseq, $origtext) = @_;
  
    if ($tag =~ /^form|input|texarea$/) {
      
        my $name = $attr->{name};
        foreach my $i (@$attrseq) {
            next if ($i =~ /name/);
            $HTMLTREE->{$name}->{$i} = $attr->{$i};
        }
        
    } elsif ($tag =~ /^select$/) {
        
        my $name = $attr->{name};
        warn "FOUND: $tag : $name\n";
        foreach my $i (@$attrseq) {
            next if ($i =~ /name/);
            $HTMLTREE->{$name}->{$i} = $attr->{$i};
        }
        
        # Now need to assign the options to each select....
        
        
    }
}
  
sub text {
    my ($self, $text) = @_;
    #print $text;
}
  
sub end {
    my ($self, $tag, $origtext) = @_;
    #print $origtext;
}
  
sub htmltree {
    return $HTMLTREE;
}
</perl>


Andy

[0] - here's the HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html
lang="en">
    <head>
        <title>Form Test</title>
    </head>
    <body>
        <form action="/cgi-bin/test.pl" method="post">
            <input type="text" name="forename"><br>
            <input type="text" name="surname"><br>
            <input type="hidden" name="cid" value="1234"><br>
            <input type="radio" name="rad_1" value="Y">Yes<input
type="radio" name="rad_1" value="N">No<br>
            <input type="radio" name="rad_2" value="1">One<input
type="radio" name="rad_2" value="2">Two<br>
            <input type="checkbox" name="check_1" value="1">Check 1
            <input type="checkbox" name="check_2" value="2">Check 2
            <input type="checkbox" name="check_3" value="3">Check 3<br>
            Password:<input type="password" name="password"><br>
            <textarea name="text_area"></textarea>
            <select name="single_select">
                <option name="1" value="1">1</option>
                <option name="2" value="2">2</option>
            </select>
            <select name="multiple_select" multiple>
                <option name="1" value="1">1</option>
                <option name="2" value="2">2</option>
                <option name="3" value="3">3</option>
                <option name="4" value="4">4</option>
            </select>
            <br>
            <input type="button" name="button">
            <input type="submit" name="submit_btn">
            <input type="reset" name="reset">

        </form>
    </body>
</html>


Reply via email to