Revision: 51740 Author: werdna Date: 2009-06-11 12:15:36 +0000 (Thu, 11 Jun 2009)
Log Message: ----------- Fix HoneypotIntegration extension to use hashes instead of FSS. Now takes ~8m to load a day's worth of data, and 2ms or so to look up a particular IP. Modified Paths: -------------- trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php trunk/extensions/HoneypotIntegration/HoneypotIntegration.php Modified: trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php =================================================================== --- trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php 2009-06-11 11:34:42 UTC (rev 51739) +++ trunk/extensions/HoneypotIntegration/HoneypotIntegration.class.php 2009-06-11 12:15:36 UTC (rev 51740) @@ -33,12 +33,41 @@ $out->addHTML( self::generateHoneypotLink( $randomText ) ); return 1; } + + public static function getHoneypotURLs() { + $key = wfMemcKey( 'honeypot-integration-urls' ); + + global $wgMemc; + $urls = $wgMemc->get( $key ); + + if ( is_array($urls) ) { + return $urls; + } + + global $wgHoneypotAutoLoad; + if (!$wgHoneypotAutoLoad) + return array( 'http://www.google.com' ); // Dummy URL + + global $wgHoneypotURLSource; + // Curl opt is a hack because the honeypot folks don't seem to have a valid + // certificate. + $data = Http::get( $wgHoneypotURLSource, 'default', + array( CURLOPT_SSL_VERIFYHOST => 1 ) ); + + $urls = explode( "\n", $data ); + + $wgMemc->set( $key, $urls, 86400 ); + + return $urls; + } public static function generateHoneypotLink( $randomText = null ) { - global $wgHoneypotURLs, $wgHoneypotTemplates; + global $wgHoneypotTemplates; + + $urls = self::getHoneypotURLs(); - $index = rand( 0, count( $wgHoneypotURLs ) - 1 ); - $url = $wgHoneypotURLs[$index]; + $index = rand( 0, count( $urls ) - 1 ); + $url = $urls[$index]; $index = rand( 0, count( $wgHoneypotTemplates ) - 1 ); $template = $wgHoneypotTemplates[$index]; @@ -59,9 +88,8 @@ public static function isIPListed( $ip ) { $subnet = substr( IP::toHex( $ip ), 0, -6 ); $subnet_ips = self::getHoneypotIPs( $subnet ); - - $fss = fss_prep_search( "[$ip]" ); - return false !== fss_exec_search( $fss, $subnet_ips ); + + return !empty($subnet_ips[$ip]); } // Gets data from memcached @@ -124,6 +152,7 @@ global $wgMemc; foreach ( $ips as $subnet => $ipData ) { + wfDebugLog( 'HoneypotDebug', "Inserting data for subnet $subnet" ); $wgMemc->set( wfMemcKey( 'honeypot-data', $subnet ), $data[$subnet], 86400 ); $wgMemc->set( wfMemcKey( 'honeypot-ips', $subnet ), $ips[$subnet], 86400 ); } @@ -139,6 +168,8 @@ $save_data = array(); $ips = array(); + $count = 0; + while ( !feof($fh) ) { $line = trim( fgets( $fh ) ); $data = preg_split( '/\s/', $line, 3 ); @@ -147,12 +178,18 @@ $subnet = substr( IP::toHex( $data[0] ), 0, -6 ); if ( !isset($ips[$subnet]) ) - $ips[$subnet] = ''; + $ips[$subnet] = array(); if ( !isset( $save_data[$subnet] ) ) $save_data[$subnet] = array(); $save_data[$subnet][$data[0]] = $data; - $ips[$subnet] .= '['.$data[0]."]\n"; + $ips[$subnet][$data[0]] = true; + + $count++; + + if ( $count % 100 == 0) { + wfDebugLog( 'HoneypotDebug', "Done $count IPs -- $data[0]" ); + } } } Modified: trunk/extensions/HoneypotIntegration/HoneypotIntegration.php =================================================================== --- trunk/extensions/HoneypotIntegration/HoneypotIntegration.php 2009-06-11 11:34:42 UTC (rev 51739) +++ trunk/extensions/HoneypotIntegration/HoneypotIntegration.php 2009-06-11 12:15:36 UTC (rev 51740) @@ -31,7 +31,8 @@ $wgHooks['GetUserPermissionsErrorsExpensive'][] = 'HoneypotIntegration::onGetUserPermissionsErrorsExpensive'; -$wgHoneypotURLs = array( 'http://www.google.com' ); +$wgHoneypotURLSource = ''; + $wgHoneypotTemplates = array( '<a href="honeypoturl"><!-- randomtext --></a>', ); @@ -40,6 +41,3 @@ $wgHoneypotDataFile = false; -if ( !extension_loaded( 'fss' ) ) { - die( "FastStringSearch is required for Project Honeypot Integration" ); -} _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs