Author: sparky
Date: Sun Jun  7 17:58:05 2009
New Revision: 10374

Modified:
   toys/fun/rsget.pl
Log:
- handle cases where we get html page instead of the file
  to be downloaded
- added filefactory.com


Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl   (original)
+++ toys/fun/rsget.pl   Sun Jun  7 17:58:05 2009
@@ -16,11 +16,12 @@
 
 =item Status:
 - RS: 2009-06-07 OK
-- NL: 2009-06-07 OK
+- NL: 2009-06-07 OK, captcha works
 - OS: not working, captcha not supported
 - MU: not working, new captcha not supported
 - UT: 2009-06-07 OK
 - HF: 2009-06-07 OK
+- FF: 2009-06-07 OK
 
 =item Wishlist:
 - handle multiple alternatives for same file
@@ -97,10 +98,10 @@
 use WWW::Curl::Multi;
 
 my $curl_headers = [
-       'User-Agent: Mozilla/5.0 (X11; U; Linux ppc; ca-AD; rv:1.8.1.17) 
Gecko/20080926 PLD/3.0 (Th) Iceape/1.1.12',
+       'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; 
rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
        'Accept: 
text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
-       'Accept-Language: ca,en-us;q=0.7,en;q=0.3',
+       'Accept-Language: en-us,en;q=0.5',
        ];
 
 sub file_init
@@ -108,14 +109,24 @@
        my $self = shift;
        my $curl = $self->{curl};
 
+       $self->{time_start} = time;
+
+       {
+               my $mime = $curl->getinfo(      CURLINFO_CONTENT_TYPE );
+               if ( $mime =~ m#^text/html# ) {
+                       $self->{is_html} = 1;
+                       $self->{size_total} = 0;
+                       $self->{size_got} = 0;
+                       return;
+               }
+       }
+
        {
                my $f_len = $curl->getinfo( CURLINFO_CONTENT_LENGTH_DOWNLOAD );
                $self->{size_total} = $f_len || 0;
                $self->{size_got} = 0;
        }
 
-       $self->{time_start} = time;
-
        if ( $self->{head} =~ 
/^Content-Disposition:\s*attachment;\s*filename\s*=\s*"?(.*?)"?\s*$/im ) {
                $self->{file_name} = $1;
        } else {
@@ -155,8 +166,12 @@
        my $len = length $chunk;
        $self->{size_got} += $len;
 
-       my $file = $self->{file};
-       print $file $chunk;
+       if ( $self->{file} ) {
+               my $file = $self->{file};
+               print $file $chunk;
+       } else {
+               $self->{body} .= $chunk;
+       }
 
        return $len;
 }
@@ -222,6 +237,7 @@
                $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_file );
                $curl->setopt( CURLOPT_WRITEDATA, $ecurl );
        } else {
+               $ecurl->{is_html} = 1;
                $curl->setopt( CURLOPT_WRITEFUNCTION, \&body_scalar );
                $curl->setopt( CURLOPT_WRITEDATA, \$ecurl->{body} );
        }
@@ -251,7 +267,7 @@
 
        if ( $err ) {
                my $error = $curl->errbuf;
-               $obj->print( "error: $err" );
+               $obj->print( "error($err): $error" );
                $obj->problem();
                return undef;
        }
@@ -264,7 +280,7 @@
                        : $ecurl->{body};
                my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
                
-               &$func( $obj, $body, $eurl );
+               &$func( $obj, $body, $eurl, $ecurl->{is_html} );
        }
 }
 
@@ -485,6 +501,7 @@
 {
        my $self = shift;
        $self->print("starting download");
+       $self->{file_html} = \&start unless defined $self->{file_html};
        $self->curl( $self->{file_url}, \&finish, save => 1, @_ );
 }
 
@@ -505,10 +522,19 @@
 sub finish
 {
        my $self = shift;
-       my $reason = shift;
+       my $body = shift;
+       my $url = shift;
+       my $is_html = shift;
 
-       my $url = $self->{url};
-       $gotlist{ $url } = $reason;
+       if ( $is_html ) {
+               if ( my $func = $self->{file_html} ) {
+                       delete $self->{file_url};
+                       delete $self->{file_html};
+                       return &$func( $self, $body, $url );
+               }
+       }
+
+       $gotlist{ $self->{url} } = $body;
 
        my $net = $self->{net};
        my $id = $self->{id};
@@ -629,7 +655,6 @@
 
        $body =~ /form name="dlf" action="(.*?)"/m;
        $self->{file_url} = $1;
-       $self->{file_referer} = $url;
 
        $self->wait( $wait, \&stage4, "starting download in" );
 }
@@ -663,7 +688,7 @@
        my $url = shift;
 
        ++$nlcookie;
-       my $cookie = ".nl.$nlcookie.txt";
+       my $cookie = ".cookie.nl.$nlcookie.txt";
        unlink $cookie if -e $cookie;
 
        Get::makenew( "NL", $class, $url, cookies => $cookie );
@@ -981,7 +1006,7 @@
        my $url = shift;
 
        ++$oscookie;
-       my $cookie = ".os.$oscookie.txt";
+       my $cookie = ".cookie.os.$oscookie.txt";
        unlink $cookie if -e $cookie;
 
        Get::makenew( "OS", $class, $url, slots => 16, cookies => $cookie );
@@ -1042,7 +1067,7 @@
        my $url = shift;
 
        ++$mucookie;
-       my $cookie = ".mu.$nlcookie.txt";
+       my $cookie = ".cookie.mu.$nlcookie.txt";
        unlink $cookie if -e $cookie;
 
        Get::makenew( "MU", $class, $url, cookies => $cookie );
@@ -1364,6 +1389,93 @@
 };
 
 # }}}
+package Get::FileFactory; # {{{
+
+BEGIN {
+       our @ISA;
+       @ISA = qw(Get);
+}
+
+sub new
+{
+       my $proto = shift;
+       my $class = ref $proto || $proto;
+       my $url = shift;
+       Get::makenew( "FF", $class, $url );
+}
+
+sub stage1
+{
+       my $self = shift;
+       delete $self->{referer};
+
+       $self->print("starting...");
+       $self->curl( $self->{url}, \&stage2 );
+}
+
+sub stage2
+{
+       my ($self, $body, $url) = @_;
+       $self->print("starting......");
+       $self->{referer} = $url;
+
+       my $link;
+       if ( $body =~ /You are currently downloading/ ) {
+               return $self->error( "multi-download not allowed" );
+       } elsif ( $body =~ /starthtimer[\s\S]*timerend=d\.getTime\(\)\+(\d+);/m 
and $1 > 0 ) {
+               return $self->wait( 1 + int ( $1 / 1000 ), \&stage1, "free 
limit reached, waiting" );
+       } elsif ( $body =~ m#<form action="(.*)" method="post">\s*<input 
type="submit" value="Free#m ) {
+               $link = $1;
+       } else {
+               return $self->problem( "link", $body );
+       }
+
+       $self->curl( $link, \&stage3, post => "freeBtn=Free%20Download" );
+}
+
+sub stage3
+{
+       my ($self, $body, $url) = @_;
+       $self->{referer} = $url;
+       $self->print("starting.........");
+       if ( $body =~ m#<a href="(.*?)">Click here to begin your download</a># 
) {
+               $self->{file_url} = $1;
+       } else {
+               return $self->problem( "file url", $body );
+       }
+       
+       $self->wait( 30, \&stage4, "starting download in" );
+}
+
+sub stage4
+{
+       my $self = shift;
+       $self->print("downloading");
+       $self->{file_html} = \&stage5;
+
+       $self->download();
+}
+
+sub stage5
+{
+       my ($self, $body, $url) = @_;
+       # file turned out to be html, meens we need to wait
+       if ( $body =~ /You are currently downloading too many files at once/ ) {
+               return $self->error( "multi-download not allowed" );
+       } elsif ( $body =~ /Please wait (\d+) minutes to download more files/ ) 
{
+               return $self->wait( $1 * 60 - 30, \&stage1, "free limit 
reached, waiting" );
+       } elsif ( $body =~ /Please wait (\d+) seconds to download more files/ ) 
{
+               return $self->wait( $1, \&stage1, "free limit reached, waiting" 
);
+       }
+       return $self->problem( undef, $body );
+}
+
+$getters{FF} = {
+       uri => qr{(www.)?filefactory\.com/.*?},
+       add => sub { Get::FileFactory->new( @_ ) },
+};
+
+# }}}
 package main; # {{{
 
 my $get_list = 'get.list';
_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

Reply via email to