Cenarium has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/334337 )

Change subject: Add change_tag_statistics table to speed up tag hitcount 
retrieval
......................................................................

Add change_tag_statistics table to speed up tag hitcount retrieval

This adds a change_tag_statistics table giving the hitcount
of each tag used on the wiki. This avoids querying change_tag
directly for hitcounts, which is extremely expansive.
The table is used only if requested in config, and requires a
maintenance script to be populated.

Bug: T91535
Change-Id: Ib20a2bdd2f7156f153271f8aeee433f9a7e26f49
---
M includes/DefaultSettings.php
M includes/changetags/ChangeTags.php
A maintenance/archives/patch_change_tag_statistics.sql
A maintenance/populateTagUsageStatistics.php
M maintenance/tables.sql
5 files changed, 154 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/37/334337/1

diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index 086b615..a9f7e53 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -6819,6 +6819,13 @@
 $wgUseTagFilter = true;
 
 /**
+ * Uses the change_tag_statistics table instead of change_tag to retrieve
+ * tag hitcounts, requires populateTagUsageStatistics to have been run,
+ * which requires patch_change_tag_statistics applied
+ */
+$wgUseChangeTagStatisticsTable = false;
+
+/**
  * If set to an integer, pages that are watched by this many users or more
  * will not require the unwatchedpages permission to view the number of
  * watchers.
diff --git a/includes/changetags/ChangeTags.php 
b/includes/changetags/ChangeTags.php
index d2239eb..3e4ed99 100644
--- a/includes/changetags/ChangeTags.php
+++ b/includes/changetags/ChangeTags.php
@@ -172,6 +172,7 @@
                &$rev_id = null, &$log_id = null, $params = null, RecentChange 
$rc = null,
                User $user = null
        ) {
+               global $wgUseChangeTagStatisticsTable;
 
                $tagsToAdd = array_filter( (array)$tagsToAdd ); // Make sure 
we're submitting all tags...
                $tagsToRemove = array_filter( (array)$tagsToRemove );
@@ -272,6 +273,14 @@
                        }
 
                        $dbw->insert( 'change_tag', $tagsRows, __METHOD__, [ 
'IGNORE' ] );
+                       if ( $wgUseChangeTagStatisticsTable ) {
+                               $dbw->update(
+                                       'change_tag_statistics',
+                                       [ 'cts_count = cts_count + 1' ],
+                                       [ 'cts_tag' => $tagsToAdd ],
+                                       __METHOD__
+                               );
+                       }
                }
 
                // delete from change_tag
@@ -286,6 +295,14 @@
                                        ]
                                );
                                $dbw->delete( 'change_tag', $conds, __METHOD__ 
);
+                       }
+                       if ( $wgUseChangeTagStatisticsTable ) {
+                               $dbw->update(
+                                       'change_tag_statistics',
+                                       [ 'cts_count = cts_count - 1' ],
+                                       [ 'cts_tag' => $tagsToRemove ],
+                                       __METHOD__
+                               );
                        }
                }
 
@@ -1018,6 +1035,7 @@
         * @since 1.25
         */
        public static function deleteTagEverywhere( $tag ) {
+               global $wgUseChangeTagStatisticsTable;
                $dbw = wfGetDB( DB_MASTER );
                $dbw->startAtomic( __METHOD__ );
 
@@ -1039,6 +1057,10 @@
 
                // delete from change_tag
                $dbw->delete( 'change_tag', [ 'ct_tag' => $tag ], __METHOD__ );
+
+               if ( $wgUseChangeTagStatisticsTable ) {
+                       $dbw->delete( 'change_tag_statistics', [ 'cts_tag' => 
$tag ], __METHOD__ );
+               }
 
                $dbw->endAtomic( __METHOD__ );
 
@@ -1314,21 +1336,39 @@
                        wfMemcKey( 'change-tag-statistics' ),
                        WANObjectCache::TTL_MINUTE * 5,
                        function ( $oldValue, &$ttl, array &$setOpts ) use ( 
$fname ) {
+                               global $wgUseChangeTagStatisticsTable;
                                $dbr = wfGetDB( DB_REPLICA, 'vslow' );
 
                                $setOpts += Database::getCacheSetOptions( $dbr 
);
 
-                               $res = $dbr->select(
-                                       'change_tag',
-                                       [ 'ct_tag', 'hitcount' => 'count(*)' ],
-                                       [],
-                                       $fname,
-                                       [ 'GROUP BY' => 'ct_tag', 'ORDER BY' => 
'hitcount DESC' ]
-                               );
+                               if ( $wgUseChangeTagStatisticsTable ) {
+                                       $res = $dbr->select(
+                                               'change_tag_statistics',
+                                               [ 'cts_tag', 'cts_count' ],
+                                               [],
+                                               $fname,
+                                               [ 'ORDER BY' => 'cts_count 
DESC' ]
+                                       );
 
-                               $out = [];
-                               foreach ( $res as $row ) {
-                                       $out[$row->ct_tag] = $row->hitcount;
+                                       $out = [];
+                                       foreach ( $res as $row ) {
+                                               if ( $row->cts_count ) {
+                                                       $out[$row->cts_tag] = 
$row->cts_count;
+                                               }
+                                       }
+                               } else {
+                                       $res = $dbr->select(
+                                               'change_tag',
+                                               [ 'ct_tag', 'hitcount' => 
'count(*)' ],
+                                               [],
+                                               $fname,
+                                               [ 'GROUP BY' => 'ct_tag', 
'ORDER BY' => 'hitcount DESC' ]
+                                       );
+
+                                       $out = [];
+                                       foreach ( $res as $row ) {
+                                               $out[$row->ct_tag] = 
$row->hitcount;
+                                       }
                                }
 
                                return $out;
diff --git a/maintenance/archives/patch_change_tag_statistics.sql 
b/maintenance/archives/patch_change_tag_statistics.sql
new file mode 100644
index 0000000..995b12d
--- /dev/null
+++ b/maintenance/archives/patch_change_tag_statistics.sql
@@ -0,0 +1,7 @@
+--
+-- This table contains change tags hitcounts extracted from the change_tag 
table.
+--
+CREATE TABLE /*_*/change_tag_statistics (
+  cts_tag varchar(255) NOT NULL PRIMARY KEY,
+  cts_count bigint unsigned default 0
+) /*$wgDBTableOptions*/;
diff --git a/maintenance/populateTagUsageStatistics.php 
b/maintenance/populateTagUsageStatistics.php
new file mode 100644
index 0000000..f18626a
--- /dev/null
+++ b/maintenance/populateTagUsageStatistics.php
@@ -0,0 +1,84 @@
+<?php
+/**
+ * Pouplates change_tag_statistics table
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that makes the required database updates for 
change_tag_statistics
+ * table to be of any use.
+ *
+ * @ingroup Maintenance
+ */
+class PopulateTagUsageStatistics extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Populates change_tag_statistics' );
+       }
+
+       protected function getUpdateKey() {
+               return 'populate change_tag_statistics';
+       }
+
+       protected function updateSkippedMessage() {
+               return 'change_tag_statistics table already populated.';
+       }
+
+       protected function doDBUpdates() {
+               $db = $this->getDB( DB_MASTER );
+               if ( !$db->tableExists( 'change_tag_statistics' ) ) {
+                       $this->error( "change_tag_statistics table does not 
exist" );
+
+                       return false;
+               }
+               $this->output( "Populating change_tag_statistics table\n" );
+
+               // clear table
+               $db->delete( 'change_tag_statistics', [ '1=1' ], __METHOD__ );
+
+               // get hitcounts
+               $res = $db->select(
+                       'change_tag',
+                       [ 'ct_tag', 'hitcount' => 'count(*)' ],
+                       [],
+                       __METHOD__,
+                       [ 'GROUP BY' => 'ct_tag', 'ORDER BY' => 'hitcount DESC' 
]
+               );
+
+               $count = 0;
+               $rows = [];
+               foreach ( $res as $row ) {
+                       $count++;
+                       $rows[] = [ 'cts_tag' => $row->ct_tag, 'cts_count' => 
$row->hitcount ];
+               }
+
+               // record hitcounts
+               $db->insert( 'change_tag_statistics', $rows, __METHOD__, [] );
+
+               $this->output( "change_tag_statistics population complete ... 
hitcounts added for {$count} tags\n" );
+
+               return true;
+       }
+}
+
+$maintClass = "PopulateTagUsageStatistics";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index 2b6ea03..04e8e43 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -1498,6 +1498,12 @@
 -- Covering index, so we can pull all the info only out of the index.
 CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag 
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
 
+-- Table giving hit counts for tags, to avoid expansive queries on change_tag.
+CREATE TABLE /*_*/change_tag_statistics (
+  cts_tag varchar(255) NOT NULL PRIMARY KEY,
+  cts_count bigint unsigned default 0
+) /*$wgDBTableOptions*/;
+
 
 -- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT
 -- that only works on MySQL 4.1+

-- 
To view, visit https://gerrit.wikimedia.org/r/334337
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib20a2bdd2f7156f153271f8aeee433f9a7e26f49
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Cenarium <cenarium.sy...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to