I'm trying to dereference the @{$links} produced
by WWW::SimpleRobot and am having a heck of
a time getting it done.  Can anybody help?
You can see some of the things I have tried
below.

I know I can do this link extraction myself with
LinkExtor, or at least think I can do it, but
I'd like to know how to dereference this script.


Mike Flannigan



#
#
#
#!/usr/local/bin/perl
#
use strict;
use warnings;
use WWW::SimpleRobot;
my $robot = WWW::SimpleRobot->new(
    URLS            => [ 'http://www.portofhouston.com/' ],
    FOLLOW_REGEX    => "^http://www.portofhouston.com//";,
    DEPTH           => 1,
    TRAVERSAL       => 'depth',
    VISIT_CALLBACK  =>
        sub {
            my ( $url, $depth, $html, $links ) = @_;
            my @linkder = @{$links};
            print STDERR "Visiting $url\n\n";
#            print STDERR "Depth = $depth\n";
#            print STDERR "HTML = $html\n";
#              print STDERR "Links = @{$links}\n";
#             print STDERR "Links = @linkder\n";
#             foreach (@linkder){
#                print STDERR "$_\n";
#            }
            for (my $num = 0; $num <= $#linkder; $num++) {
                print STDERR "$linkder[$num]\n";
            }
#            for (my $num = 0; $num <= $#linkder; $num++) {
#                print STDERR "${$linkder}[$num]\n";
#            }
        }

    ,
    BROKEN_LINK_CALLBACK  =>
        sub {
            my ( $url, $linked_from, $depth ) = @_;
print STDERR "$url looks like a broken link on $linked_from\n";
            print STDERR "Depth = $depth\n";
        }
);
$robot->traverse;
my @urls = @{$robot->urls};
my @pages = @{$robot->pages};
for my $page ( @pages )
{
    my $url = $page->{url};
    my $depth = $page->{depth};
    my $modification_time = $page->{modification_time};
}

print "\nAll done.\n";


__END__



--
To unsubscribe, e-mail: beginners-unsubscr...@perl.org
For additional commands, e-mail: beginners-h...@perl.org
http://learn.perl.org/


Reply via email to