Author: sparky
Date: Mon Jun  8 03:45:43 2009
New Revision: 10382

Modified:
   toys/fun/rsget.pl
Log:
- updated MegaUpload, including captcha (requires Image::Magick and db.png file)


Modified: toys/fun/rsget.pl
==============================================================================
--- toys/fun/rsget.pl   (original)
+++ toys/fun/rsget.pl   Mon Jun  8 03:45:43 2009
@@ -11,17 +11,17 @@
 - check all the URIs just after finding them in the list
   (catch non-existing files quickly)
 - restart download if same URI has been added second time
-- MegaUpload: update, there is new page, and new captcha
 - OdSiebie: there is a captcha now
 
 =item Status:
 - RS: 2009-06-07 OK
 - NL: 2009-06-07 OK, captcha works
 - OS: not working, captcha not supported
-- MU: not working, new captcha not supported
+- MU: 2009-06-08 OK, captcha works, requires db.png
 - UT: 2009-06-07 OK
 - HF: 2009-06-07 OK
 - FF: 2009-06-07 OK
+- DF: 2009-06-07 OK
 
 =item Wishlist:
 - handle multiple alternatives for same file
@@ -32,6 +32,8 @@
 use warnings;
 use Time::HiRes;
 
+our $data_path = $ENV{PWD};
+
 my $checklist = 1;
 my %gotlist;
 $SIG{CHLD} = "IGNORE";
@@ -96,6 +98,7 @@
 package Curl; # {{{
 use WWW::Curl::Easy;
 use WWW::Curl::Multi;
+use URI::Escape;
 
 my $curl_headers = [
        'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; 
rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10',
@@ -132,7 +135,7 @@
        } else {
                my $eurl = $curl->getinfo( CURLINFO_EFFECTIVE_URL );
                $eurl =~ s#^.*/##;
-               $self->{file_name} = $eurl;
+               $self->{file_name} = uri_unescape( $eurl );
        }
 
        {
@@ -1088,18 +1091,15 @@
        $self->print("starting......");
        $self->{referer} = $url;
 
-       if ( $body =~ /The file you are trying to access is temporarily 
unavailable/ ) {
-               return $self->error( "file temporarily unavailable" );
-       }
-       if ( $body =~ /Unfortunately, the link you have clicked is not 
available./ ) {
+       if ( $body =~ /The file you are trying to access is temporarily 
unavailable/
+                       or $body =~ /Unfortunately, the link you have clicked 
is not available/
+                       or $body =~ /This file has expired due to inactivity/ ) 
{
                return $self->error( "file not found" );
        }
        my %search = (
-               captcha_img => qr#<img src="(/capgen\.php\?[0-9a-f]+)"#,
-               action => qr#<form method="POST" action="(.*?)"#,
-               s2id => qr#<input type="hidden" name="d" value="(.*?)"#,
-               s2icode => qr#<input type="hidden" name="imagecode" 
value="(.*?)"#,
-               s2mevagar => qr#<input type="hidden" name="megavar" 
value="(.*?)"#,
+               captcha_img => qr#<img 
src="(http://.*/gencap\.php\?[0-9a-f]+\.gif)"#,
+               s2icode => qr#<INPUT type="hidden" name="captchacode" 
value="(.*?)"#,
+               s2mevagar => qr#<INPUT type="hidden" name="megavar" 
value="(.*?)"#,
        );
 
        foreach my $name ( keys %search ) {
@@ -1119,13 +1119,13 @@
        my ($self, $body, $url) = @_;
        $self->print("reading captcha");
 
-       my $captcha = Get::MegaUpload::Captcha::resolve( $body );
+       my $captcha = Get::MegaUpload::Captcha::resolve( \$body );
 
        unless ( defined $captcha ) {
                return $self->stage1();
        }
 
-       my $post = 
"d=$self->{s2id}&imagecode=$self->{s2icode}&megavar=$self->{s2mevagar}&imagestring=$captcha";
+       my $post = 
"captchacode=$self->{s2icode}&megavar=$self->{s2mevagar}&captcha=$captcha";
 
        $self->curl( $self->{action}, \&stage4, post => $post );
 }
@@ -1136,34 +1136,21 @@
        $self->print("starting.........");
        $self->{referer} = $url;
 
-       my %search = (
-               s4wait => qr#x[0-9]+=([0-9]+);#,
-               s4g => qr#var . = 
String\.fromCharCode\(Math.abs\(-?([0-9]+)\)\);#,
-               s4j1 => qr#var . = '(.)' \+ 
String\.fromCharCode\(Math\.sqrt\([0-9]+\)\);#,
-               s4j2 => qr#var . = '.' \+ 
String\.fromCharCode\(Math\.sqrt\(([0-9]+)\)\);#,
-               s4href => qr#document\.getElementById\("dlbutton"\)\.innerHTML 
= '<a href="(.*?)"#,
-       );
-
-       foreach my $name ( keys %search ) {
-               my $search = $search{$name};
-               if ( $body =~ m/$search/ ) {
-                       $self->{$name} = $1;
-               } else {
-                       return $self->problem( $name, $body )
-               }
+       if ( $body =~ /id="captchaform"/ ) {
+               return $self->stage1( @_ );
        }
 
-       my $furl = $self->{s4href};
-
-       my $g = chr $self->{s4g};
-       my $j2 = chr sqrt $self->{s4j2};
-       my $jg = $self->{s4j1} . $j2 . $g;
-
-       $furl =~ s/' \+ . \+ . \+ '/$jg/;
-
-       $self->{file_url} = $furl;
+       my $wait;
+       if ( $body =~ /count=([0-9]+);/ ) {
+               $wait = $1;
+       }
+       if ( $body =~ /<a href="(.*?)".*IMG SRC=".*?but_dnld_regular.gif/ ) {
+               $self->{file_url} = $1;
+       } else {
+               return $self->problem( "link", $body )
+       }
 
-       $self->wait( $self->{s4wait}, \&stage5, "starting in" );
+       $self->wait( $wait, \&stage5, "starting in" );
 }
 
 sub stage5
@@ -1188,60 +1175,93 @@
 # }}}
 package Get::MegaUpload::Captcha; # {{{
 
-sub resolve
+my %size = (
+       A => 28, B => 22, C => 21, D => 27, E => 16,
+       F => 16, G => 26, H => 26, K => 20, M => 38,
+       N => 28, P => 21, Q => 30, R => 22, S => 18,
+       T => 19, U => 26, V => 22, W => 40, X => 23,
+       Y => 18, Z => 18
+);
+
+my @db;
+
+sub read_db()
+{
+       my $dbf = new Image::Magick;
+       $dbf->Read( $main::data_path . "/MU-captcha/db.png" );
+       foreach my $pos ( 0..3 ) {
+               my @list = sort keys %size;
+               @list = (1..9) if $pos == 3;
+
+               my $height = 32;
+               my $width = 40;
+               my $left = $width * $pos;
+               $width = 22 if $pos == 3;
+               my $top = 0;
+       
+               my %db;
+               foreach my $char ( @list ) {
+                       my $db = $dbf->Clone();
+                       $db->Crop( width => $width, height => $height, x => 
$left, y => $top );
+                       $db{$char} = $db;
+                       $top += 32;
+               }
+               push @db, \%db;
+       }
+}
+
+sub get_char
 {
-       my $capdata = shift;
-       require GD;
+       my ($src, $db, $width, $x) = @_;
 
-       my $img = GD::Image->new( $capdata );
+       my $img = $src->Clone();
+       $img->Crop( width => $width, height => 32, x => $x, y => 0 );
+       $img->Extent( width => $width, height => 32, x => 0, y => 0 );
 
-       my @img;
-       foreach ( 0..2 ) {
-               my $img = GD::Image->newPalette( 70, 32 );
-               my $w = $img->colorAllocate( 255, 255, 255 );
-               my $b = $img->colorAllocate( 0, 0, 0 );
-               push @img, { img => $img, w => $w, b => $b };
+       my $min = 1;
+       my $min_char = undef;
+       foreach my $n ( keys %$db ) {
+               my $x = $img->Compare( image => $db->{$n} );
+               my ($e, $em) = $img->Get( 'error', 'mean-error' );
+               if ( $em < $min ) {
+                       $min = $em;
+                       $min_char = $n;
+               }
        }
+       return $min_char;
+}
 
-       my $bg = $img->getPixel( 0, 0 );
+sub resolve
+{
+       my $data_ref = shift;
 
-       foreach my $y ( 1..30 ) {
-               foreach my $x ( 1..68 ) {
-                       my $ci = $img->getPixel( $x, $y );
-                       next if $ci == $bg;
-                       my ($r, $g, $b ) = $img->rgb( $ci );
+       require Image::Magick;
 
-                       next if $g < 0x60 or $g > 0x80 or $b < 0x60 or $g > 
0x80;
-                       if ( $r > 110 and $r < 130 ) {
-                               $img[ 0 ]->{img}->setPixel( $x, $y, $img[ 0 
]->{b} );
-                       }
-                       if ( $r > 160 and $r < 180 ) {
-                               $img[ 1 ]->{img}->setPixel( $x, $y, $img[ 1 
]->{b} );
-                       }
-                       if ( $r > 210 and $r < 230 ) {
-                               $img[ 2 ]->{img}->setPixel( $x, $y, $img[ 2 
]->{b} );
-                       }
-               }
-       }
+       read_db() unless @db;
 
-       my @l;
-       require IPC::Open2;
-       foreach ( 0..2 ) {
-               my $img = $img[$_]->{img};
-               IPC::Open2::open2( *READ, *WRITE, "pngtopnm | gocr -f ASCII -m 
56 -C A-Z - 2>/dev/null" );
-               print WRITE $img->png;
-               close WRITE;
-               my $out = <READ> || "";
-               close READ;
+       open IMAGE, '>', '.captcha.gif';
+       print IMAGE $$data_ref;
+       close IMAGE;
 
-               if ( $out =~ /^([A-Z])/ ) {
-                       push @l, $1;
-               } else {
-                       return undef;
-               }
-       }
+       my $img = new Image::Magick;
+       my $x = $img->Read( '.captcha.gif' );
+       unlink '.captcha.gif';
+       return if length $x;
+
+       my ($width, $height) = $img->Get( 'columns', 'rows' );
+
+       my $bg = new Image::Magick;
+       $bg->Set( size => $width."x32" );
+       $bg->Read( "xc:white" );
+       $bg->Composite( image => $img );
+
+       my @cap;
+       push @cap, get_char( $bg, $db[0], 40, 0 );
+       push @cap, get_char( $bg, $db[1], 40, $size{$cap[0]} - 6 );
+       push @cap, get_char( $bg, $db[2], 40, $width - 56 );
+       push @cap, get_char( $bg, $db[3], 22, $width - 22 );
 
-       return join "", @l;
+       return join "", @cap;
 }
 
 # }}}
_______________________________________________
pld-cvs-commit mailing list
[email protected]
http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit

Reply via email to