https://www.mediawiki.org/wiki/Special:Code/MediaWiki/102951
Revision: 102951 Author: catrope Date: 2011-11-14 09:13:58 +0000 (Mon, 14 Nov 2011) Log Message: ----------- (bug 29854) Store protocol-relative links twice in the externallinks table, one with http: in el_index and once with https: . Modified patch by Brad Jorsch Modified Paths: -------------- trunk/phase3/includes/AutoLoader.php trunk/phase3/includes/GlobalFunctions.php trunk/phase3/includes/LinksUpdate.php trunk/phase3/includes/api/ApiQueryExternalLinks.php trunk/phase3/includes/installer/DatabaseUpdater.php trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php Added Paths: ----------- trunk/phase3/maintenance/fixExtLinksProtocolRelative.php Modified: trunk/phase3/includes/AutoLoader.php =================================================================== --- trunk/phase3/includes/AutoLoader.php 2011-11-14 08:41:21 UTC (rev 102950) +++ trunk/phase3/includes/AutoLoader.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -861,6 +861,7 @@ 'FakeMaintenance' => 'maintenance/Maintenance.php', 'LoggedUpdateMaintenance' => 'maintenance/Maintenance.php', 'Maintenance' => 'maintenance/Maintenance.php', + 'FixExtLinksProtocolRelative' => 'maintenance/fixExtLinksProtocolRelative.php', 'PopulateCategory' => 'maintenance/populateCategory.php', 'PopulateImageSha1' => 'maintenance/populateImageSha1.php', 'PopulateLogSearch' => 'maintenance/populateLogSearch.php', Modified: trunk/phase3/includes/GlobalFunctions.php =================================================================== --- trunk/phase3/includes/GlobalFunctions.php 2011-11-14 08:41:21 UTC (rev 102950) +++ trunk/phase3/includes/GlobalFunctions.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -647,12 +647,12 @@ } /** - * Make a URL index, appropriate for the el_index field of externallinks. + * Make URL indexes, appropriate for the el_index field of externallinks. * * @param $url String - * @return String + * @return array */ -function wfMakeUrlIndex( $url ) { +function wfMakeUrlIndexes( $url ) { $bits = wfParseUrl( $url ); // Reverse the labels in the hostname, convert to lower case @@ -692,7 +692,12 @@ if ( isset( $bits['fragment'] ) ) { $index .= '#' . $bits['fragment']; } - return $index; + + if ( $prot == '' ) { + return array( "http:$index", "https:$index" ); + } else { + return array( $index ); + } } /** Modified: trunk/phase3/includes/LinksUpdate.php =================================================================== --- trunk/phase3/includes/LinksUpdate.php 2011-11-14 08:41:21 UTC (rev 102950) +++ trunk/phase3/includes/LinksUpdate.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -456,11 +456,13 @@ $arr = array(); $diffs = array_diff_key( $this->mExternals, $existing ); foreach( $diffs as $url => $dummy ) { - $arr[] = array( - 'el_from' => $this->mId, - 'el_to' => $url, - 'el_index' => wfMakeUrlIndex( $url ), - ); + foreach( wfMakeUrlIndexes( $url ) as $index ) { + $arr[] = array( + 'el_from' => $this->mId, + 'el_to' => $url, + 'el_index' => $index, + ); + } } return $arr; } Modified: trunk/phase3/includes/api/ApiQueryExternalLinks.php =================================================================== --- trunk/phase3/includes/api/ApiQueryExternalLinks.php 2011-11-14 08:41:21 UTC (rev 102950) +++ trunk/phase3/includes/api/ApiQueryExternalLinks.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -69,6 +69,11 @@ $this->addOption( 'ORDER BY', 'el_from' ); } + // If we're querying all protocols, use DISTINCT to avoid repeating protocol-relative links twice + if ( $protocol === null ) { + $this->addOption( 'DISTINCT' ); + } + $this->addOption( 'LIMIT', $params['limit'] + 1 ); $offset = isset( $params['offset'] ) ? $params['offset'] : 0; if ( $offset ) { Modified: trunk/phase3/includes/installer/DatabaseUpdater.php =================================================================== --- trunk/phase3/includes/installer/DatabaseUpdater.php 2011-11-14 08:41:21 UTC (rev 102950) +++ trunk/phase3/includes/installer/DatabaseUpdater.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -43,7 +43,8 @@ 'DeleteDefaultMessages', 'PopulateRevisionLength', 'PopulateRevisionSha1', - 'PopulateImageSha1' + 'PopulateImageSha1', + 'FixExtLinksProtocolRelative', ); /** Added: trunk/phase3/maintenance/fixExtLinksProtocolRelative.php =================================================================== --- trunk/phase3/maintenance/fixExtLinksProtocolRelative.php (rev 0) +++ trunk/phase3/maintenance/fixExtLinksProtocolRelative.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -0,0 +1,81 @@ +<?php +/** + * Fixes any entries for protocol-relative URLs in the externallinks table, + * replacing each protocol-relative entry with two entries, one for http + * and one for https. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @ingroup Maintenance + */ + +require_once( dirname( __FILE__ ) . '/Maintenance.php' ); + +class FixExtLinksProtocolRelative extends LoggedUpdateMaintenance { + public function __construct() { + parent::__construct(); + $this->mDescription = "Fixes any entries in the externallinks table containing protocol-relative URLs"; + } + + protected function getUpdateKey() { + return 'fix protocol-relative URLs in externallinks'; + } + + protected function updateSkippedMessage() { + return 'protocol-relative URLs in externallinks table already fixed.'; + } + + protected function doDBUpdates() { + $db = wfGetDB( DB_MASTER ); + if ( !$db->tableExists( 'externallinks' ) ) { + $this->error( "externallinks table does not exist" ); + return false; + } + $this->output( "Fixing protocol-relative entries in the externallinks table...\n" ); + $res = $db->select( 'externallinks', array( 'el_from', 'el_to', 'el_index' ), + array( 'el_index' . $db->buildLike( '//', $db->anyString() ) ), + __METHOD__ + ); + $count = 0; + foreach ( $res as $row ) { + $count++; + if ( $count % 100 == 0 ) { + $this->output( $count ); + wfWaitForSlaves(); + } + $db->insert( 'externallinks', + array( + array( + 'el_from' => $row->el_from, + 'el_to' => $row->el_to, + 'el_index' => "http:{$row->el_index}", + ), + array( + 'el_from' => $row->el_from, + 'el_to' => $row->el_to, + 'el_index' => "https:{$row->el_index}", + ) + ), __METHOD__, array( 'IGNORE' ) + ); + $db->delete( 'externallinks', array( 'el_index' => $row->el_index ), __METHOD__ ); + } + $this->output( "Done, $count rows updated.\n" ); + return true; + } +} + +$maintClass = "FixExtLinksProtocolRelative"; +require_once( RUN_MAINTENANCE_IF_MAIN ); Property changes on: trunk/phase3/maintenance/fixExtLinksProtocolRelative.php ___________________________________________________________________ Added: svn:eol-style + native Modified: trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php =================================================================== --- trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php 2011-11-14 08:41:21 UTC (rev 102950) +++ trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php 2011-11-14 09:13:58 UTC (rev 102951) @@ -831,42 +831,42 @@ } /** - * @dataProvider provideMakeUrlIndex() + * @dataProvider provideMakeUrlIndexes() */ - function testMakeUrlIndex( $url, $expected ) { - $index = wfMakeUrlIndex( $url ); - $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" ); + function testMakeUrlIndexes( $url, $expected ) { + $index = wfMakeUrlIndexes( $url ); + $this->assertEquals( $expected, $index, "wfMakeUrlIndexes(\"$url\")" ); } - function provideMakeUrlIndex() { + function provideMakeUrlIndexes() { return array( array( // just a regular :) 'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627', - 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' + array( 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' ) ), array( // mailtos are handled special // is this really right though? that final . probably belongs earlier? 'mailto:w...@wikimedia.org', - 'mailto:org.wikimedia@wiki.', + array( 'mailto:org.wikimedia@wiki.' ) ), // file URL cases per bug 28627... array( // three slashes: local filesystem path Unix-style 'file:///whatever/you/like.txt', - 'file://./whatever/you/like.txt' + array( 'file://./whatever/you/like.txt' ) ), array( // three slashes: local filesystem path Windows-style 'file:///c:/whatever/you/like.txt', - 'file://./c:/whatever/you/like.txt' + array( 'file://./c:/whatever/you/like.txt' ) ), array( // two slashes: UNC filesystem path Windows-style 'file://intranet/whatever/you/like.txt', - 'file://intranet./whatever/you/like.txt' + array( 'file://intranet./whatever/you/like.txt' ) ), // Multiple-slash cases that can sorta work on Mozilla // if you hack it just right are kinda pathological, @@ -875,6 +875,15 @@ // // Those will survive the algorithm but with results that // are less consistent. + + // protocol-relative URL cases per bug 29854... + array( + '//bugzilla.wikimedia.org/show_bug.cgi?id=28627', + array( + 'http://org.wikimedia.bugzilla./show_bug.cgi?id=28627', + 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' + ) + ), ); } _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs