Revision: 51740
Author:   werdna
Date:     2009-06-11 12:15:36 +0000 (Thu, 11 Jun 2009)

Log Message:
-----------
Fix HoneypotIntegration extension to use hashes instead of FSS. Now takes ~8m 
to load a day's worth of data, and 2ms or so to look up a particular IP.

Modified Paths:
--------------
    trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php
    trunk/extensions/HoneypotIntegration/HoneypotIntegration.php

Modified: trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php
===================================================================
--- trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php  
2009-06-11 11:34:42 UTC (rev 51739)
+++ trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php  
2009-06-11 12:15:36 UTC (rev 51740)
@@ -33,12 +33,41 @@
                $out->addHTML( self::generateHoneypotLink( $randomText ) );
                return 1;
        }
+       
+       public static function getHoneypotURLs() {
+               $key = wfMemcKey( 'honeypot-integration-urls' );
+               
+               global $wgMemc;
+               $urls = $wgMemc->get( $key );
+               
+               if ( is_array($urls) ) {
+                       return $urls;
+               }
+               
+               global $wgHoneypotAutoLoad;
+               if (!$wgHoneypotAutoLoad)
+                       return array( 'http://www.google.com' ); // Dummy URL
+                       
+               global $wgHoneypotURLSource;
+               // Curl opt is a hack because the honeypot folks don't seem to 
have a valid
+               //  certificate.
+               $data = Http::get( $wgHoneypotURLSource, 'default',
+                                               array( CURLOPT_SSL_VERIFYHOST 
=> 1 ) );
+               
+               $urls = explode( "\n", $data );
+               
+               $wgMemc->set( $key, $urls, 86400 );
+               
+               return $urls;
+       }
 
        public static function generateHoneypotLink( $randomText = null ) {
-               global $wgHoneypotURLs, $wgHoneypotTemplates;
+               global $wgHoneypotTemplates;
+               
+               $urls = self::getHoneypotURLs();
 
-               $index = rand( 0, count( $wgHoneypotURLs ) - 1 );
-               $url = $wgHoneypotURLs[$index];
+               $index = rand( 0, count( $urls ) - 1 );
+               $url = $urls[$index];
                $index = rand( 0, count( $wgHoneypotTemplates ) - 1 );
                $template = $wgHoneypotTemplates[$index];
 
@@ -59,9 +88,8 @@
        public static function isIPListed( $ip ) {
                $subnet = substr( IP::toHex( $ip ), 0, -6 );
                $subnet_ips = self::getHoneypotIPs( $subnet );
-               
-               $fss = fss_prep_search( "[$ip]" );
-               return false !== fss_exec_search( $fss, $subnet_ips );
+
+               return !empty($subnet_ips[$ip]);
        }
        
        // Gets data from memcached
@@ -124,6 +152,7 @@
                
                global $wgMemc;
                foreach ( $ips as $subnet => $ipData ) {
+                       wfDebugLog( 'HoneypotDebug', "Inserting data for subnet 
$subnet" );
                        $wgMemc->set( wfMemcKey( 'honeypot-data', $subnet ), 
$data[$subnet], 86400 );
                        $wgMemc->set( wfMemcKey( 'honeypot-ips', $subnet ), 
$ips[$subnet], 86400 );
                }
@@ -139,6 +168,8 @@
                $save_data = array();
                $ips = array();
                
+               $count = 0;
+               
                while ( !feof($fh) ) {
                        $line = trim( fgets( $fh ) );
                        $data = preg_split( '/\s/', $line, 3 );
@@ -147,12 +178,18 @@
                                $subnet = substr( IP::toHex( $data[0] ), 0, -6 
);
                                
                                if ( !isset($ips[$subnet]) )
-                                       $ips[$subnet] = '';
+                                       $ips[$subnet] = array();
                                if ( !isset( $save_data[$subnet] ) )
                                        $save_data[$subnet] = array();
                                
                                $save_data[$subnet][$data[0]] = $data;
-                               $ips[$subnet] .= '['.$data[0]."]\n";
+                               $ips[$subnet][$data[0]] = true;
+                               
+                               $count++;
+                               
+                               if ( $count % 100 == 0) {
+                                       wfDebugLog( 'HoneypotDebug', "Done 
$count IPs -- $data[0]" );
+                               }
                        }
                }
                

Modified: trunk/extensions/HoneypotIntegration/HoneypotIntegration.php
===================================================================
--- trunk/extensions/HoneypotIntegration/HoneypotIntegration.php        
2009-06-11 11:34:42 UTC (rev 51739)
+++ trunk/extensions/HoneypotIntegration/HoneypotIntegration.php        
2009-06-11 12:15:36 UTC (rev 51740)
@@ -31,7 +31,8 @@
 $wgHooks['GetUserPermissionsErrorsExpensive'][] =
        'HoneypotIntegration::onGetUserPermissionsErrorsExpensive';
 
-$wgHoneypotURLs = array( 'http://www.google.com' );
+$wgHoneypotURLSource = '';
+
 $wgHoneypotTemplates = array(
        '<a href="honeypoturl"><!-- randomtext --></a>',
 );
@@ -40,6 +41,3 @@
 
 $wgHoneypotDataFile = false;
 
-if ( !extension_loaded( 'fss' ) ) {
-       die( "FastStringSearch is required for Project Honeypot Integration" );
-}



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to