I've got a script designed to do mass HTTP grabs, but after ~2000 requests it
runs into 

sub poco_weeble_connect_error {
  my ($kernel, $heap, $operation, $errnum, $errstr, $wheel_id) =
    @_[KERNEL, HEAP, ARG0..ARG3];

  DEBUG and
    warn "wheel $wheel_id encountered $operation error $errnum: $errstr\n";

  # Drop the wheel and its cross-references.
  my $request_id = delete $heap->{wheel_to_request}->{$wheel_id};
  die "expected a request ID, but there is none" unless defined $request_id;

and dies. However, I can't figure out where the info's been lost. My code
follows -

#!/usr/local/bin/perl

use strict;
$| = 1;

use POE qw/Component::Client::HTTP/;
use HTTP::Request;
use HTML::HeadParser;

open IN, $ARGV[1];

system("mkdir out.$ARGV[1]");

POE::Component::Client::HTTP->spawn(
        Timeout => 5,
        Agent => 'Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)',
        Alias => 'ua'
);

sub handler_start {
        for (1..$ARGV[0]) {
                start_next_request( @_[ KERNEL ] );
        }
}

sub start_next_request {
        my ($kernel) = @_;
        my $line = <IN>;
        return unless ($line);
        chomp($line);
        my $req = new HTTP::Request('GET', "http://$line/";);
        $kernel->post( 'ua', 'request', 'response', $req );
}

sub handler_response {
        my ($req_p, $res_p) = @_[ ARG0, ARG1 ];
        my ($req, $res) = ($req_p->[0], $res_p->[0]);
        my $url = $req->url();
        $url =~ m!http://(.*)/!;
        my $dom = $1;
        if ($res->is_success) {
              open OUT, ">out.$ARGV[1]/$dom";
              print OUT $res->content;
              close OUT;
        }
        start_next_request( @_[ KERNEL ] );
}

POE::Session->create(
        inline_states => {
                _start => \&handler_start,
                response => \&handler_response
                }
        );

$poe_kernel->run();

exit;

-- 
Bring me my etherkiller; Oh clouds unfold! / Bring me the magic smoke of desire
I shall not cease from mental fight / Nor shall my LART rest in my hand
Till we have buried the bodies / Of all the lusers in all this land
  -- rpg, ASR                        [ My homepage is http://www.trout.me.uk/ ]

Reply via email to