Aaron Schulz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/117373

Change subject: Redid WhatLinksHere query and added a _from_namespace field
......................................................................

Redid WhatLinksHere query and added a _from_namespace field

* Also tweaked the query so MySQL avoids doing a page_name
  index scan when it should start with the link table index
* Added population script (triggered by update.php)
* Also removed uniqueness from some indexes where it is redundant

bug: 60618
Change-Id: Icca99b6ae0ef76cb77695faf82c615516191da36
---
M includes/deferred/LinksUpdate.php
M includes/installer/DatabaseUpdater.php
M includes/installer/MysqlUpdater.php
M includes/installer/PostgresUpdater.php
M includes/installer/SqliteUpdater.php
M includes/specials/SpecialMergeHistory.php
M includes/specials/SpecialWhatlinkshere.php
A maintenance/archives/patch-il_from_namespace.sql
A maintenance/archives/patch-pl_from_namespace.sql
A maintenance/archives/patch-tl_from_namespace.sql
M maintenance/tables.sql
11 files changed, 100 insertions(+), 51 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/73/117373/1

diff --git a/includes/deferred/LinksUpdate.php 
b/includes/deferred/LinksUpdate.php
index 79232e5..6082542 100644
--- a/includes/deferred/LinksUpdate.php
+++ b/includes/deferred/LinksUpdate.php
@@ -358,6 +358,7 @@
                        foreach ( $diffs as $dbk => $id ) {
                                $arr[] = array(
                                        'pl_from' => $this->mId,
+                                       'pl_from_namespace' => 
$this->mTitle->getNamespace(),
                                        'pl_namespace' => $ns,
                                        'pl_title' => $dbk
                                );
@@ -379,6 +380,7 @@
                        foreach ( $diffs as $dbk => $id ) {
                                $arr[] = array(
                                        'tl_from' => $this->mId,
+                                       'tl_from_namespace' => 
$this->mTitle->getNamespace(),
                                        'tl_namespace' => $ns,
                                        'tl_title' => $dbk
                                );
@@ -400,6 +402,7 @@
                foreach ( $diffs as $iname => $dummy ) {
                        $arr[] = array(
                                'il_from' => $this->mId,
+                               'il_from_namespace' => 
$this->mTitle->getNamespace(),
                                'il_to' => $iname
                        );
                }
diff --git a/includes/installer/DatabaseUpdater.php 
b/includes/installer/DatabaseUpdater.php
index 3f2e2cb..7078c02 100644
--- a/includes/installer/DatabaseUpdater.php
+++ b/includes/installer/DatabaseUpdater.php
@@ -1063,4 +1063,12 @@
                        $wgContentHandlerUseDB = $this->holdContentHandlerUseDB;
                }
        }
+
+       /**
+        * Fill *_from_namespace fields in links tables
+        */
+       protected function populateBacklinkNamespaces() {
+               $task = $this->maintenance->runChild( 
'PopulateBacklinkNamespaces', 'populateBacklinkNamespaces.php' );
+               $task->execute();
+       }
 }
diff --git a/includes/installer/MysqlUpdater.php 
b/includes/installer/MysqlUpdater.php
index bcd50b3..197d487 100644
--- a/includes/installer/MysqlUpdater.php
+++ b/includes/installer/MysqlUpdater.php
@@ -250,6 +250,10 @@
                        array( 'addIndex', 'logging', 'log_user_text_time', 
'patch-logging_user_text_time_index.sql' ),
                        array( 'addField', 'page', 'page_links_updated', 
'patch-page_links_updated.sql' ),
                        array( 'addField', 'user', 'user_password_expires', 
'patch-user_password_expire.sql' ),
+                       array( 'addField', 'pagelinks', 'pl_from_namespace', 
'patch-pl_from_namespace.sql' ),
+                       array( 'addField', 'templatelinks', 
'tl_from_namespace', 'patch-tl_from_namespace.sql' ),
+                       array( 'addField', 'imagelinks', 'il_from_namespace', 
'patch-il_from_namespace.sql' ),
+                       array( 'populateBacklinkNamespaces' )
                );
        }
 
diff --git a/includes/installer/PostgresUpdater.php 
b/includes/installer/PostgresUpdater.php
index c5c10b3..e906372 100644
--- a/includes/installer/PostgresUpdater.php
+++ b/includes/installer/PostgresUpdater.php
@@ -405,6 +405,10 @@
                        array( 'addPgField', 'recentchanges', 'rc_source', 
"TEXT NOT NULL DEFAULT ''" ),
                        array( 'addPgField', 'page', 'page_links_updated', 
"TIMESTAMPTZ NULL" ),
                        array( 'addPgField', 'mwuser', 'user_password_expires', 
'TIMESTAMPTZ NULL' ),
+                       array( 'addPgField', 'pagelinks', 'pl_from_namespace', 
'INTEGER NOT NULL DEFAULT 0' ),
+                       array( 'addPgField', 'templatelinks', 
'tl_from_namespace', 'INTEGER NOT NULL DEFAULT 0' ),
+                       array( 'addPgField', 'imagelinks', 'il_from_namespace', 
'INTEGER NOT NULL DEFAULT 0' ),
+                       array( 'populateBacklinkNamespaces' )
                );
        }
 
diff --git a/includes/installer/SqliteUpdater.php 
b/includes/installer/SqliteUpdater.php
index ea19efa..8a19de0 100644
--- a/includes/installer/SqliteUpdater.php
+++ b/includes/installer/SqliteUpdater.php
@@ -128,6 +128,10 @@
                        array( 'addIndex', 'logging', 'log_user_text_time', 
'patch-logging_user_text_time_index.sql' ),
                        array( 'addField', 'page', 'page_links_updated', 
'patch-page_links_updated.sql' ),
                        array( 'addField', 'user', 'user_password_expires', 
'patch-user_password_expire.sql' ),
+                       array( 'addField', 'pagelinks', 'pl_from_namespace', 
'patch-pl_from_namespace.sql' ),
+                       array( 'addField', 'templatelinks', 
'tl_from_namespace', 'patch-tl_from_namespace.sql' ),
+                       array( 'addField', 'imagelinks', 'il_from_namespace', 
'patch-il_from_namespace.sql' ),
+                       array( 'populateBacklinkNamespaces' )
                );
        }
 
diff --git a/includes/specials/SpecialMergeHistory.php 
b/includes/specials/SpecialMergeHistory.php
index f799fc5..f9fe69b 100644
--- a/includes/specials/SpecialMergeHistory.php
+++ b/includes/specials/SpecialMergeHistory.php
@@ -396,6 +396,7 @@
                                $dbw->insert( 'pagelinks',
                                        array(
                                                'pl_from' => $this->mDestID,
+                                               'pf_from_namespace' => 
$destTitle->getNamespace(),
                                                'pl_namespace' => 
$destTitle->getNamespace(),
                                                'pl_title' => 
$destTitle->getDBkey() ),
                                        __METHOD__
diff --git a/includes/specials/SpecialWhatlinkshere.php 
b/includes/specials/SpecialWhatlinkshere.php
index 789ea76..baa0c57 100644
--- a/includes/specials/SpecialWhatlinkshere.php
+++ b/includes/specials/SpecialWhatlinkshere.php
@@ -102,9 +102,9 @@
         */
        function showIndirectLinks( $level, $target, $limit, $from = 0, $back = 
0 ) {
                global $wgMaxRedirectLinksRetrieved;
+
                $out = $this->getOutput();
                $dbr = wfGetDB( DB_SLAVE );
-               $options = array();
 
                $hidelinks = $this->opts->getValue( 'hidelinks' );
                $hideredirs = $this->opts->getValue( 'hideredirs' );
@@ -113,80 +113,84 @@
 
                $fetchlinks = ( !$hidelinks || !$hideredirs );
 
-               // Make the query
-               $plConds = array(
-                       'page_id=pl_from',
+               // Build query conds in concert for all three tables...
+               $conds['pagelinks'] = array(
                        'pl_namespace' => $target->getNamespace(),
                        'pl_title' => $target->getDBkey(),
                );
-               if ( $hideredirs ) {
-                       $plConds['rd_from'] = null;
-               } elseif ( $hidelinks ) {
-                       $plConds[] = 'rd_from is NOT NULL';
-               }
-
-               $tlConds = array(
-                       'page_id=tl_from',
+               $conds['templatelinks'] = array(
                        'tl_namespace' => $target->getNamespace(),
                        'tl_title' => $target->getDBkey(),
                );
-
-               $ilConds = array(
-                       'page_id=il_from',
+               $conds['imagelinks'] = array(
                        'il_to' => $target->getDBkey(),
                );
 
                $namespace = $this->opts->getValue( 'namespace' );
                if ( is_int( $namespace ) ) {
-                       $plConds['page_namespace'] = $namespace;
-                       $tlConds['page_namespace'] = $namespace;
-                       $ilConds['page_namespace'] = $namespace;
+                       $conds['pagelinks']['pl_from_namespace'] = $namespace;
+                       $conds['templatelinks']['tl_from_namespace'] = 
$namespace;
+                       $conds['imagelinks']['il_from_namespace'] = $namespace;
                }
 
                if ( $from ) {
-                       $tlConds[] = "tl_from >= $from";
-                       $plConds[] = "pl_from >= $from";
-                       $ilConds[] = "il_from >= $from";
+                       $conds['templatelinks'][] = "tl_from >= $from";
+                       $conds['pagelinks'][] = "pl_from >= $from";
+                       $conds['imagelinks'][] = "il_from >= $from";
                }
 
-               // Read an extra row as an at-end check
-               $queryLimit = $limit + 1;
+               if ( $hideredirs ) {
+                       $conds['pagelinks']['rd_from'] = null;
+                       $conds['templatelinks']['rd_from'] = null;
+                       $conds['imagelinks']['rd_from'] = null;
+               } elseif ( $hidelinks ) {
+                       $conds['pagelinks'][] = 'rd_from is NOT NULL';
+                       $conds['templatelinks'][] = 'rd_from is NOT NULL';
+                       $conds['imagelinks'][] = 'rd_from is NOT NULL';
+               }
 
-               $options['LIMIT'] = $queryLimit;
-               $fields = array( 'page_id', 'page_namespace', 'page_title', 
'rd_from' );
-
-               $joinConds = array( 'redirect' => array( 'LEFT JOIN', array(
-                       'rd_from = page_id',
-                       'rd_namespace' => $target->getNamespace(),
-                       'rd_title' => $target->getDBkey(),
-                       'rd_interwiki = ' . $dbr->addQuotes( '' ) . ' OR 
rd_interwiki IS NULL'
-               )));
+               $queryFunc = function( $dbr, $table, $fromCol ) use ( $conds, 
$target, $limit ) {
+                       // Read an extra row as an at-end check
+                       $queryLimit = $limit + 1;
+                       // Inner LIMIT is 2X in case of stale backlinks with no 
page
+                       $subQuery = $dbr->selectSqlText(
+                               array( $table, 'redirect' ),
+                               array( $fromCol, 'rd_from' ),
+                               $conds[$table],
+                               __CLASS__ . '::showIndirectLinks',
+                               array( 'ORDER BY' => $fromCol, 'LIMIT' => 2 * 
$queryLimit ),
+                               array( 'redirect' => array( 'LEFT JOIN', array(
+                                       "rd_from = $fromCol",
+                                       'rd_namespace' => 
$target->getNamespace(),
+                                       'rd_title' => $target->getDBkey(),
+                                       'rd_interwiki = ' . $dbr->addQuotes( '' 
) . ' OR rd_interwiki IS NULL'
+                               ) ) )
+                       );
+                       return $dbr->select(
+                               array( 'page', 'temp_backlink_range' => 
"($subQuery)" ),
+                               array( 'page_id', 'page_namespace', 
'page_title', 'rd_from' ),
+                               array(),
+                               __CLASS__ . '::showIndirectLinks',
+                               array( 'ORDER BY' => 'page_id', 'LIMIT' => 
$queryLimit ),
+                               array( 'page' => array( 'INNER JOIN', "$fromCol 
= page_id" ) )
+                       );
+               };
 
                if ( $fetchlinks ) {
-                       $options['ORDER BY'] = 'pl_from';
-                       $plRes = $dbr->select( array( 'pagelinks', 'page', 
'redirect' ), $fields,
-                               $plConds, __METHOD__, $options,
-                               $joinConds
-                       );
+                       $plRes = $queryFunc( $dbr, 'pagelinks', 'pl_from' );
                }
 
                if ( !$hidetrans ) {
-                       $options['ORDER BY'] = 'tl_from';
-                       $tlRes = $dbr->select( array( 'templatelinks', 'page', 
'redirect' ), $fields,
-                               $tlConds, __METHOD__, $options,
-                               $joinConds
-                       );
+                       $tlRes = $queryFunc( $dbr, 'templatelinks', 'tl_from' );
                }
 
                if ( !$hideimages ) {
-                       $options['ORDER BY'] = 'il_from';
-                       $ilRes = $dbr->select( array( 'imagelinks', 'page', 
'redirect' ), $fields,
-                               $ilConds, __METHOD__, $options,
-                               $joinConds
-                       );
+                       $ilRes = $queryFunc( $dbr, 'imagelinks', 'il_from' );
                }
 
-               if ( ( !$fetchlinks || !$plRes->numRows() ) && ( $hidetrans || 
!$tlRes->numRows() ) && ( $hideimages || !$ilRes->numRows() ) ) {
+               if ( ( !$fetchlinks || !$plRes->numRows() )
+                       && ( $hidetrans || !$tlRes->numRows() ) && ( 
$hideimages || !$ilRes->numRows() )
+               ) {
                        if ( 0 == $level ) {
                                $out->addHTML( $this->whatlinkshereForm() );
 
diff --git a/maintenance/archives/patch-il_from_namespace.sql 
b/maintenance/archives/patch-il_from_namespace.sql
new file mode 100644
index 0000000..6b67032
--- /dev/null
+++ b/maintenance/archives/patch-il_from_namespace.sql
@@ -0,0 +1,4 @@
+ALTER TABLE /*_*/imagelinks
+       ADD COLUMN il_from_namespace int NOT NULL default 0; 
+
+CREATE INDEX /*i*/il_backlinks_namespace ON /*_*/imagelinks 
(il_to,il_from_namespace,il_from);
\ No newline at end of file
diff --git a/maintenance/archives/patch-pl_from_namespace.sql 
b/maintenance/archives/patch-pl_from_namespace.sql
new file mode 100644
index 0000000..2f7ff04
--- /dev/null
+++ b/maintenance/archives/patch-pl_from_namespace.sql
@@ -0,0 +1,4 @@
+ALTER TABLE /*_*/pagelinks
+       ADD COLUMN pl_from_namespace int NOT NULL default 0;
+
+CREATE INDEX /*i*/pl_backlinks_namespace ON /*_*/pagelinks 
(pl_namespace,pl_title,pl_from_namespace,pl_from);
diff --git a/maintenance/archives/patch-tl_from_namespace.sql 
b/maintenance/archives/patch-tl_from_namespace.sql
new file mode 100644
index 0000000..8d6c76b
--- /dev/null
+++ b/maintenance/archives/patch-tl_from_namespace.sql
@@ -0,0 +1,4 @@
+ALTER TABLE /*_*/templatelinks
+       ADD COLUMN tl_from_namespace int NOT NULL default 0;
+
+CREATE INDEX /*i*/tl_backlinks_namespace ON /*_*/templatelinks 
(tl_namespace,tl_title,tl_from_namespace,tl_from);
diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index ca610fc..c889966 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -470,6 +470,8 @@
 CREATE TABLE /*_*/pagelinks (
   -- Key to the page_id of the page containing the link.
   pl_from int unsigned NOT NULL default 0,
+  -- Namespace for this page
+  pl_from_namespace int NOT NULL default 0,
 
   -- Key to page_namespace/page_title of the target page.
   -- The target page may or may not exist, and due to renames
@@ -480,7 +482,8 @@
 ) /*$wgDBTableOptions*/;
 
 CREATE UNIQUE INDEX /*i*/pl_from ON /*_*/pagelinks 
(pl_from,pl_namespace,pl_title);
-CREATE UNIQUE INDEX /*i*/pl_namespace ON /*_*/pagelinks 
(pl_namespace,pl_title,pl_from);
+CREATE INDEX /*i*/pl_namespace ON /*_*/pagelinks 
(pl_namespace,pl_title,pl_from);
+CREATE INDEX /*i*/pl_backlinks_namespace ON /*_*/pagelinks 
(pl_namespace,pl_title,pl_from_namespace,pl_from);
 
 
 --
@@ -489,6 +492,8 @@
 CREATE TABLE /*_*/templatelinks (
   -- Key to the page_id of the page containing the link.
   tl_from int unsigned NOT NULL default 0,
+  -- Namespace for this page
+  tl_from_namespace int NOT NULL default 0,
 
   -- Key to page_namespace/page_title of the target page.
   -- The target page may or may not exist, and due to renames
@@ -499,7 +504,8 @@
 ) /*$wgDBTableOptions*/;
 
 CREATE UNIQUE INDEX /*i*/tl_from ON /*_*/templatelinks 
(tl_from,tl_namespace,tl_title);
-CREATE UNIQUE INDEX /*i*/tl_namespace ON /*_*/templatelinks 
(tl_namespace,tl_title,tl_from);
+CREATE INDEX /*i*/tl_namespace ON /*_*/templatelinks 
(tl_namespace,tl_title,tl_from);
+CREATE INDEX /*i*/tl_backlinks_namespace ON /*_*/templatelinks 
(tl_namespace,tl_title,tl_from_namespace,tl_from);
 
 
 --
@@ -510,6 +516,8 @@
 CREATE TABLE /*_*/imagelinks (
   -- Key to page_id of the page containing the image / media link.
   il_from int unsigned NOT NULL default 0,
+  -- Namespace for this page
+  il_from_namespace int NOT NULL default 0,
 
   -- Filename of target image.
   -- This is also the page_title of the file's description page;
@@ -518,7 +526,8 @@
 ) /*$wgDBTableOptions*/;
 
 CREATE UNIQUE INDEX /*i*/il_from ON /*_*/imagelinks (il_from,il_to);
-CREATE UNIQUE INDEX /*i*/il_to ON /*_*/imagelinks (il_to,il_from);
+CREATE INDEX /*i*/il_to ON /*_*/imagelinks (il_to,il_from);
+CREATE INDEX /*i*/il_backlinks_namespace ON /*_*/imagelinks 
(il_to,il_from_namespace,il_from);
 
 
 --

-- 
To view, visit https://gerrit.wikimedia.org/r/117373
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Icca99b6ae0ef76cb77695faf82c615516191da36
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <asch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to