Cenarium has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/334337 )
Change subject: Add change_tag_statistics table to speed up tag hitcount retrieval ...................................................................... Add change_tag_statistics table to speed up tag hitcount retrieval This adds a change_tag_statistics table giving the hitcount of each tag used on the wiki. This avoids querying change_tag directly for hitcounts, which is extremely expansive. The table is used only if requested in config, and requires a maintenance script to be populated. Bug: T91535 Change-Id: Ib20a2bdd2f7156f153271f8aeee433f9a7e26f49 --- M includes/DefaultSettings.php M includes/changetags/ChangeTags.php A maintenance/archives/patch_change_tag_statistics.sql A maintenance/populateTagUsageStatistics.php M maintenance/tables.sql 5 files changed, 154 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/37/334337/1 diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 086b615..a9f7e53 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -6819,6 +6819,13 @@ $wgUseTagFilter = true; /** + * Uses the change_tag_statistics table instead of change_tag to retrieve + * tag hitcounts, requires populateTagUsageStatistics to have been run, + * which requires patch_change_tag_statistics applied + */ +$wgUseChangeTagStatisticsTable = false; + +/** * If set to an integer, pages that are watched by this many users or more * will not require the unwatchedpages permission to view the number of * watchers. diff --git a/includes/changetags/ChangeTags.php b/includes/changetags/ChangeTags.php index d2239eb..3e4ed99 100644 --- a/includes/changetags/ChangeTags.php +++ b/includes/changetags/ChangeTags.php @@ -172,6 +172,7 @@ &$rev_id = null, &$log_id = null, $params = null, RecentChange $rc = null, User $user = null ) { + global $wgUseChangeTagStatisticsTable; $tagsToAdd = array_filter( (array)$tagsToAdd ); // Make sure we're submitting all tags... $tagsToRemove = array_filter( (array)$tagsToRemove ); @@ -272,6 +273,14 @@ } $dbw->insert( 'change_tag', $tagsRows, __METHOD__, [ 'IGNORE' ] ); + if ( $wgUseChangeTagStatisticsTable ) { + $dbw->update( + 'change_tag_statistics', + [ 'cts_count = cts_count + 1' ], + [ 'cts_tag' => $tagsToAdd ], + __METHOD__ + ); + } } // delete from change_tag @@ -286,6 +295,14 @@ ] ); $dbw->delete( 'change_tag', $conds, __METHOD__ ); + } + if ( $wgUseChangeTagStatisticsTable ) { + $dbw->update( + 'change_tag_statistics', + [ 'cts_count = cts_count - 1' ], + [ 'cts_tag' => $tagsToRemove ], + __METHOD__ + ); } } @@ -1018,6 +1035,7 @@ * @since 1.25 */ public static function deleteTagEverywhere( $tag ) { + global $wgUseChangeTagStatisticsTable; $dbw = wfGetDB( DB_MASTER ); $dbw->startAtomic( __METHOD__ ); @@ -1039,6 +1057,10 @@ // delete from change_tag $dbw->delete( 'change_tag', [ 'ct_tag' => $tag ], __METHOD__ ); + + if ( $wgUseChangeTagStatisticsTable ) { + $dbw->delete( 'change_tag_statistics', [ 'cts_tag' => $tag ], __METHOD__ ); + } $dbw->endAtomic( __METHOD__ ); @@ -1314,21 +1336,39 @@ wfMemcKey( 'change-tag-statistics' ), WANObjectCache::TTL_MINUTE * 5, function ( $oldValue, &$ttl, array &$setOpts ) use ( $fname ) { + global $wgUseChangeTagStatisticsTable; $dbr = wfGetDB( DB_REPLICA, 'vslow' ); $setOpts += Database::getCacheSetOptions( $dbr ); - $res = $dbr->select( - 'change_tag', - [ 'ct_tag', 'hitcount' => 'count(*)' ], - [], - $fname, - [ 'GROUP BY' => 'ct_tag', 'ORDER BY' => 'hitcount DESC' ] - ); + if ( $wgUseChangeTagStatisticsTable ) { + $res = $dbr->select( + 'change_tag_statistics', + [ 'cts_tag', 'cts_count' ], + [], + $fname, + [ 'ORDER BY' => 'cts_count DESC' ] + ); - $out = []; - foreach ( $res as $row ) { - $out[$row->ct_tag] = $row->hitcount; + $out = []; + foreach ( $res as $row ) { + if ( $row->cts_count ) { + $out[$row->cts_tag] = $row->cts_count; + } + } + } else { + $res = $dbr->select( + 'change_tag', + [ 'ct_tag', 'hitcount' => 'count(*)' ], + [], + $fname, + [ 'GROUP BY' => 'ct_tag', 'ORDER BY' => 'hitcount DESC' ] + ); + + $out = []; + foreach ( $res as $row ) { + $out[$row->ct_tag] = $row->hitcount; + } } return $out; diff --git a/maintenance/archives/patch_change_tag_statistics.sql b/maintenance/archives/patch_change_tag_statistics.sql new file mode 100644 index 0000000..995b12d --- /dev/null +++ b/maintenance/archives/patch_change_tag_statistics.sql @@ -0,0 +1,7 @@ +-- +-- This table contains change tags hitcounts extracted from the change_tag table. +-- +CREATE TABLE /*_*/change_tag_statistics ( + cts_tag varchar(255) NOT NULL PRIMARY KEY, + cts_count bigint unsigned default 0 +) /*$wgDBTableOptions*/; diff --git a/maintenance/populateTagUsageStatistics.php b/maintenance/populateTagUsageStatistics.php new file mode 100644 index 0000000..f18626a --- /dev/null +++ b/maintenance/populateTagUsageStatistics.php @@ -0,0 +1,84 @@ +<?php +/** + * Pouplates change_tag_statistics table + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +require_once __DIR__ . '/Maintenance.php'; + +/** + * Maintenance script that makes the required database updates for change_tag_statistics + * table to be of any use. + * + * @ingroup Maintenance + */ +class PopulateTagUsageStatistics extends LoggedUpdateMaintenance { + public function __construct() { + parent::__construct(); + $this->addDescription( 'Populates change_tag_statistics' ); + } + + protected function getUpdateKey() { + return 'populate change_tag_statistics'; + } + + protected function updateSkippedMessage() { + return 'change_tag_statistics table already populated.'; + } + + protected function doDBUpdates() { + $db = $this->getDB( DB_MASTER ); + if ( !$db->tableExists( 'change_tag_statistics' ) ) { + $this->error( "change_tag_statistics table does not exist" ); + + return false; + } + $this->output( "Populating change_tag_statistics table\n" ); + + // clear table + $db->delete( 'change_tag_statistics', [ '1=1' ], __METHOD__ ); + + // get hitcounts + $res = $db->select( + 'change_tag', + [ 'ct_tag', 'hitcount' => 'count(*)' ], + [], + __METHOD__, + [ 'GROUP BY' => 'ct_tag', 'ORDER BY' => 'hitcount DESC' ] + ); + + $count = 0; + $rows = []; + foreach ( $res as $row ) { + $count++; + $rows[] = [ 'cts_tag' => $row->ct_tag, 'cts_count' => $row->hitcount ]; + } + + // record hitcounts + $db->insert( 'change_tag_statistics', $rows, __METHOD__, [] ); + + $this->output( "change_tag_statistics population complete ... hitcounts added for {$count} tags\n" ); + + return true; + } +} + +$maintClass = "PopulateTagUsageStatistics"; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/tables.sql b/maintenance/tables.sql index 2b6ea03..04e8e43 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -1498,6 +1498,12 @@ -- Covering index, so we can pull all the info only out of the index. CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag (ct_tag,ct_rc_id,ct_rev_id,ct_log_id); +-- Table giving hit counts for tags, to avoid expansive queries on change_tag. +CREATE TABLE /*_*/change_tag_statistics ( + cts_tag varchar(255) NOT NULL PRIMARY KEY, + cts_count bigint unsigned default 0 +) /*$wgDBTableOptions*/; + -- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT -- that only works on MySQL 4.1+ -- To view, visit https://gerrit.wikimedia.org/r/334337 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib20a2bdd2f7156f153271f8aeee433f9a7e26f49 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Cenarium <cenarium.sy...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits