As I was setting up this RSS feed the other day
http://www.speech.cs.cmu.edu/~sburke/new_rfcs.rss
I had occasion to want to request the last N bytes of a particular URL.
And this is how I did it, with LWP:
use strict;
use warnings;
{
my $browser = LWP::UserAgent::Taily->new;
print $browser->last_n_bytes(
'http://mailman.rfc-editor.org/pipermail/rfc-dist.mbox/rfc-dist.mbox',
3_000
);
}
{
package LWP::UserAgent::Taily;
use base qw(LWP::UserAgent);
# In an ideal world, we would return an error object
# instead of throwing an exception. But this is proof
# of concept, okay?
sub last_n_bytes {
my($browser, $url, $length_to_get) = @_;
my $r = $browser->head( $url );
die "Can't HEAD-get $url\n" . $r->headers_as_string
. "\n Aborting" unless $r->is_success and $r->code == 200;
my $available_length;
die "Too short?\n" . $r->headers_as_string
. "\n\n Aborting" unless(
($available_length = $r->content_length) > $length_to_get );
my($start, $end) = (
$available_length - $length_to_get,
$available_length - 1,
);
$r = $browser->get( $url,
'Range' => sprintf( "bytes=%s-%s", $start, $end),
);
die "Can't get $url ($start-$end)\n" . $r->headers_as_string .
"\n\n Aborting" unless $r->is_success;
die "Wrong length!?" . $r->headers_as_string . "\n\n Aborting"
if $r->content_length > $length_to_get;
return $r->content;
}
}
--
Sean M. Burke http://search.cpan.org/~sburke/