https://www.mediawiki.org/wiki/Special:Code/MediaWiki/102951

Revision: 102951
Author:   catrope
Date:     2011-11-14 09:13:58 +0000 (Mon, 14 Nov 2011)
Log Message:
-----------
(bug 29854) Store protocol-relative links twice in the externallinks table, one 
with http: in el_index and once with https: . Modified patch by Brad Jorsch

Modified Paths:
--------------
    trunk/phase3/includes/AutoLoader.php
    trunk/phase3/includes/GlobalFunctions.php
    trunk/phase3/includes/LinksUpdate.php
    trunk/phase3/includes/api/ApiQueryExternalLinks.php
    trunk/phase3/includes/installer/DatabaseUpdater.php
    trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php

Added Paths:
-----------
    trunk/phase3/maintenance/fixExtLinksProtocolRelative.php

Modified: trunk/phase3/includes/AutoLoader.php
===================================================================
--- trunk/phase3/includes/AutoLoader.php        2011-11-14 08:41:21 UTC (rev 
102950)
+++ trunk/phase3/includes/AutoLoader.php        2011-11-14 09:13:58 UTC (rev 
102951)
@@ -861,6 +861,7 @@
        'FakeMaintenance' => 'maintenance/Maintenance.php',
        'LoggedUpdateMaintenance' => 'maintenance/Maintenance.php',
        'Maintenance' => 'maintenance/Maintenance.php',
+       'FixExtLinksProtocolRelative' => 
'maintenance/fixExtLinksProtocolRelative.php',
        'PopulateCategory' => 'maintenance/populateCategory.php',
        'PopulateImageSha1' => 'maintenance/populateImageSha1.php',
        'PopulateLogSearch' => 'maintenance/populateLogSearch.php',

Modified: trunk/phase3/includes/GlobalFunctions.php
===================================================================
--- trunk/phase3/includes/GlobalFunctions.php   2011-11-14 08:41:21 UTC (rev 
102950)
+++ trunk/phase3/includes/GlobalFunctions.php   2011-11-14 09:13:58 UTC (rev 
102951)
@@ -647,12 +647,12 @@
 }
 
 /**
- * Make a URL index, appropriate for the el_index field of externallinks.
+ * Make URL indexes, appropriate for the el_index field of externallinks.
  *
  * @param $url String
- * @return String
+ * @return array
  */
-function wfMakeUrlIndex( $url ) {
+function wfMakeUrlIndexes( $url ) {
        $bits = wfParseUrl( $url );
 
        // Reverse the labels in the hostname, convert to lower case
@@ -692,7 +692,12 @@
        if ( isset( $bits['fragment'] ) ) {
                $index .= '#' . $bits['fragment'];
        }
-       return $index;
+
+       if ( $prot == '' ) {
+               return array( "http:$index", "https:$index" );
+       } else {
+               return array( $index );
+       }
 }
 
 /**

Modified: trunk/phase3/includes/LinksUpdate.php
===================================================================
--- trunk/phase3/includes/LinksUpdate.php       2011-11-14 08:41:21 UTC (rev 
102950)
+++ trunk/phase3/includes/LinksUpdate.php       2011-11-14 09:13:58 UTC (rev 
102951)
@@ -456,11 +456,13 @@
                $arr = array();
                $diffs = array_diff_key( $this->mExternals, $existing );
                foreach( $diffs as $url => $dummy ) {
-                       $arr[] = array(
-                               'el_from'   => $this->mId,
-                               'el_to'     => $url,
-                               'el_index'  => wfMakeUrlIndex( $url ),
-                       );
+                       foreach( wfMakeUrlIndexes( $url ) as $index ) {
+                               $arr[] = array(
+                                       'el_from'   => $this->mId,
+                                       'el_to'     => $url,
+                                       'el_index'  => $index,
+                               );
+                       }
                }
                return $arr;
        }

Modified: trunk/phase3/includes/api/ApiQueryExternalLinks.php
===================================================================
--- trunk/phase3/includes/api/ApiQueryExternalLinks.php 2011-11-14 08:41:21 UTC 
(rev 102950)
+++ trunk/phase3/includes/api/ApiQueryExternalLinks.php 2011-11-14 09:13:58 UTC 
(rev 102951)
@@ -69,6 +69,11 @@
                        $this->addOption( 'ORDER BY', 'el_from' );
                }
 
+               // If we're querying all protocols, use DISTINCT to avoid 
repeating protocol-relative links twice
+               if ( $protocol === null ) {
+                       $this->addOption( 'DISTINCT' );
+               }
+
                $this->addOption( 'LIMIT', $params['limit'] + 1 );
                $offset = isset( $params['offset'] ) ? $params['offset'] : 0;
                if ( $offset ) {

Modified: trunk/phase3/includes/installer/DatabaseUpdater.php
===================================================================
--- trunk/phase3/includes/installer/DatabaseUpdater.php 2011-11-14 08:41:21 UTC 
(rev 102950)
+++ trunk/phase3/includes/installer/DatabaseUpdater.php 2011-11-14 09:13:58 UTC 
(rev 102951)
@@ -43,7 +43,8 @@
                'DeleteDefaultMessages',
                'PopulateRevisionLength',
                'PopulateRevisionSha1',
-               'PopulateImageSha1'
+               'PopulateImageSha1',
+               'FixExtLinksProtocolRelative',
        );
 
        /**

Added: trunk/phase3/maintenance/fixExtLinksProtocolRelative.php
===================================================================
--- trunk/phase3/maintenance/fixExtLinksProtocolRelative.php                    
        (rev 0)
+++ trunk/phase3/maintenance/fixExtLinksProtocolRelative.php    2011-11-14 
09:13:58 UTC (rev 102951)
@@ -0,0 +1,81 @@
+<?php
+/**
+ * Fixes any entries for protocol-relative URLs in the externallinks table,
+ * replacing each protocol-relative entry with two entries, one for http
+ * and one for https.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class FixExtLinksProtocolRelative extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Fixes any entries in the externallinks 
table containing protocol-relative URLs";
+       }
+
+       protected function getUpdateKey() {
+               return 'fix protocol-relative URLs in externallinks';
+       }
+
+       protected function updateSkippedMessage() {
+               return 'protocol-relative URLs in externallinks table already 
fixed.';
+       }
+       
+       protected function doDBUpdates() {
+               $db = wfGetDB( DB_MASTER );
+               if ( !$db->tableExists( 'externallinks' ) ) {
+                       $this->error( "externallinks table does not exist" );
+                       return false;
+               }
+               $this->output( "Fixing protocol-relative entries in the 
externallinks table...\n" );
+               $res = $db->select( 'externallinks', array( 'el_from', 'el_to', 
'el_index' ),
+                       array( 'el_index' . $db->buildLike( '//', 
$db->anyString() ) ),
+                       __METHOD__
+               );
+               $count = 0;
+               foreach ( $res as $row ) {
+                       $count++;
+                       if ( $count % 100 == 0 ) {
+                               $this->output( $count );
+                               wfWaitForSlaves();
+                       }
+                       $db->insert( 'externallinks',
+                               array(
+                                       array(
+                                               'el_from' => $row->el_from,
+                                               'el_to' => $row->el_to,
+                                               'el_index' => 
"http:{$row->el_index}",
+                                       ),
+                                       array(
+                                               'el_from' => $row->el_from,
+                                               'el_to' => $row->el_to,
+                                               'el_index' => 
"https:{$row->el_index}",
+                                       )
+                               ), __METHOD__, array( 'IGNORE' )
+                       );
+                       $db->delete( 'externallinks', array( 'el_index' => 
$row->el_index ), __METHOD__ );
+               }
+               $this->output( "Done, $count rows updated.\n" );
+               return true;
+       }
+}
+
+$maintClass = "FixExtLinksProtocolRelative";
+require_once( RUN_MAINTENANCE_IF_MAIN );


Property changes on: trunk/phase3/maintenance/fixExtLinksProtocolRelative.php
___________________________________________________________________
Added: svn:eol-style
   + native

Modified: trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php
===================================================================
--- trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php  
2011-11-14 08:41:21 UTC (rev 102950)
+++ trunk/phase3/tests/phpunit/includes/GlobalFunctions/GlobalTest.php  
2011-11-14 09:13:58 UTC (rev 102951)
@@ -831,42 +831,42 @@
        }
 
        /**
-        * @dataProvider provideMakeUrlIndex()
+        * @dataProvider provideMakeUrlIndexes()
         */
-       function testMakeUrlIndex( $url, $expected ) {
-               $index = wfMakeUrlIndex( $url );
-               $this->assertEquals( $expected, $index, 
"wfMakeUrlIndex(\"$url\")" );
+       function testMakeUrlIndexes( $url, $expected ) {
+               $index = wfMakeUrlIndexes( $url );
+               $this->assertEquals( $expected, $index, 
"wfMakeUrlIndexes(\"$url\")" );
        }
 
-       function provideMakeUrlIndex() {
+       function provideMakeUrlIndexes() {
                return array(
                        array(
                                // just a regular :)
                                
'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627',
-                               
'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627'
+                               array( 
'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' )
                        ),
                        array(
                                // mailtos are handled special
                                // is this really right though? that final . 
probably belongs earlier?
                                'mailto:w...@wikimedia.org',
-                               'mailto:org.wikimedia@wiki.',
+                               array( 'mailto:org.wikimedia@wiki.' )
                        ),
 
                        // file URL cases per bug 28627...
                        array(
                                // three slashes: local filesystem path 
Unix-style
                                'file:///whatever/you/like.txt',
-                               'file://./whatever/you/like.txt'
+                               array( 'file://./whatever/you/like.txt' )
                        ),
                        array(
                                // three slashes: local filesystem path 
Windows-style
                                'file:///c:/whatever/you/like.txt',
-                               'file://./c:/whatever/you/like.txt'
+                               array( 'file://./c:/whatever/you/like.txt' )
                        ),
                        array(
                                // two slashes: UNC filesystem path 
Windows-style
                                'file://intranet/whatever/you/like.txt',
-                               'file://intranet./whatever/you/like.txt'
+                               array( 'file://intranet./whatever/you/like.txt' 
)
                        ),
                        // Multiple-slash cases that can sorta work on Mozilla
                        // if you hack it just right are kinda pathological,
@@ -875,6 +875,15 @@
                        //
                        // Those will survive the algorithm but with results 
that
                        // are less consistent.
+
+                       // protocol-relative URL cases per bug 29854...
+                       array(
+                               
'//bugzilla.wikimedia.org/show_bug.cgi?id=28627',
+                               array(
+                                       
'http://org.wikimedia.bugzilla./show_bug.cgi?id=28627',
+                                       
'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627'
+                               )
+                       ),
                );
        }
        


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to