Adamw has submitted this change and it was merged. Change subject: Clean up CentralNotice Translation Metadata ......................................................................
Clean up CentralNotice Translation Metadata Right now the translation metadata job is running very slowly -- part of this problem is that there is a lot of junk metadata that we have to process. So... the maintenance script cleans things up. And I'm now putting the banner id in with the revtag object so that we can eventually look to see if the banner is archived or attached only to archived campaigns or something. E.g. this patch alone will take 800 objects on meta down to ~250. Bug: 53769 Bug: 53792 Change-Id: If9ae70977ee0f008f65ef8f742acafb991b4d221 --- M includes/Banner.php A maintenance/CleanCNTranslateMetadata.php 2 files changed, 142 insertions(+), 5 deletions(-) Approvals: Adamw: Looks good to me, approved Nikerabbit: Looks good to me, but someone else must approve Mwalker: Looks good to me, but someone else must approve diff --git a/includes/Banner.php b/includes/Banner.php index 5cdbe59..9319485 100644 --- a/includes/Banner.php +++ b/includes/Banner.php @@ -842,7 +842,7 @@ $fields = $this->extractMessageFields( $this->bodyContent ); if ( count( $fields ) > 0 ) { // Tag the banner for translation - Banner::addTag( 'banner:translate', $revisionId, $pageId ); + Banner::addTag( 'banner:translate', $revisionId, $pageId, $this->getId() ); $this->runTranslateJob = true; } } @@ -1147,25 +1147,27 @@ * @param string $tag The name of the tag * @param integer $revisionId ID of the revision * @param integer $pageId ID of the MediaWiki page for the banner - * @param string $value Value to store for the tag + * @param string $bannerId ID of banner this revtag belongs to * @throws MWException */ - static function addTag( $tag, $revisionId, $pageId, $value = null ) { + static function addTag( $tag, $revisionId, $pageId, $bannerId ) { $dbw = CNDatabase::getDb(); if ( is_object( $revisionId ) ) { throw new MWException( 'Got object, excepted id' ); } + // There should only ever be one tag applied to a banner object + Banner::removeTag( $tag, $pageId ); + $conds = array( 'rt_page' => $pageId, 'rt_type' => RevTag::getType( $tag ), 'rt_revision' => $revisionId ); - $dbw->delete( 'revtag', $conds, __METHOD__ ); if ( $value !== null ) { - $conds['rt_value'] = serialize( implode( '|', $value ) ); + $conds['rt_value'] = $bannerId; } $dbw->insert( 'revtag', $conds, __METHOD__ ); diff --git a/maintenance/CleanCNTranslateMetadata.php b/maintenance/CleanCNTranslateMetadata.php new file mode 100644 index 0000000..b93dbed --- /dev/null +++ b/maintenance/CleanCNTranslateMetadata.php @@ -0,0 +1,135 @@ +<?php + +$IP = getenv( 'MW_INSTALL_PATH' ); +if ( $IP === false ) { + $IP = __DIR__ . '/../../..'; +} +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Cleans up the Revision Tag table which is where CentralNotice stores + * metadata required for the Translate extension. + * + * So far this class: + * * Removes duplicate revision entries (there should be only one per banner) + * * Associates entries with a banner by name + * * Removes entries that have no banner object + * + * Class CleanCNTranslateMetadata + */ +class CleanCNTranslateMetadata extends Maintenance { + protected $ttag; + + public function execute() { + $this->ttag = RevTag::getType( 'banner:translate' ); + + $this->cleanDuplicates(); + $this->populateIDs(); + $this->deleteOrphans(); + } + + /** + * Remove duplicated revtags + */ + protected function cleanDuplicates() { + $this->output( "Cleaning duplicates\n" ); + + $db = CNDatabase::getDb( DB_MASTER ); + + $res = $db->select( + 'revtag', + array( + 'rt_page', + 'maxrev' => 'max(rt_revision)', + 'count' => 'count(*)' + ), + array( 'rt_type' => $this->ttag ), + __METHOD__, + array( 'GROUP BY' => 'rt_page' ) + ); + + foreach ( $res as $row ) { + if ( (int)$row->count === 1 ) continue; + + $db->delete( + 'revtag', + array( + 'rt_type' => $this->ttag, + 'rt_page' => $row->rt_page, + "rt_revision != {$row->maxrev}" + ), + __METHOD__ + ); + $numRows = $db->affectedRows(); + $this->output( " -- Deleted {$numRows} rows for banner with page id {$row->rt_page}\n" ); + } + } + + /** + * Attach a banner ID with a orphan metadata line + */ + protected function populateIDs() { + $this->output( "Associating metadata with banner ids\n" ); + + $db = CNDatabase::getDb( DB_MASTER ); + + $res = $db->select( + array( 'revtag' => 'revtag', 'page' => 'page', 'cn_templates' => 'cn_templates' ), + array( 'rt_page', 'rt_revision', 'page_title', 'tmp_id' ), + array( + 'rt_type' => $this->ttag, + 'rt_page=page_id', + 'rt_value is null', + # Length of "centralnotice-template-" + 'tmp_name=substr(page_title, 24)' + ), + __METHOD__ + ); + + foreach( $res as $row ) { + $this->output( " -- Associating banner id {$row->tmp_id} with revtag with page id {$row->rt_page}\n" ); + $db->update( + 'revtag', + array( 'rt_value' => $row->tmp_id ), + array( + 'rt_type' => $this->ttag, + 'rt_page' => $row->rt_page, + 'rt_value is null' + ), + __METHOD__ + ); + } + } + + /** + * Delete rows that have no banner ID associated with them + */ + protected function deleteOrphans() { + $db = CNDatabase::getDb( DB_MASTER ); + $this->output( "Preparing to delete orphaned rows\n" ); + + $res = $db->select( + 'revtag', + array( 'rt_page', 'rt_revision' ), + array( 'rt_type' => $this->ttag, 'rt_value is null' ), + __METHOD__ + ); + + foreach ( $res as $row ) { + $this->output( " -- Deleting orphan row {$row->rt_page}:{$row->rt_revision}\n" ); + $db->delete( + 'revtag', + array( + 'rt_type' => $this->ttag, + 'rt_page' => $row->rt_page, + 'rt_revision' => $row->rt_revision, + 'rt_value is null' // Just in case something updated it + ), + __METHOD__ + ); + } + } +} + +$maintClass = 'CleanCNTranslateMetadata'; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/83980 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: If9ae70977ee0f008f65ef8f742acafb991b4d221 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/extensions/CentralNotice Gerrit-Branch: master Gerrit-Owner: Mwalker <mwal...@wikimedia.org> Gerrit-Reviewer: Adamw <awi...@wikimedia.org> Gerrit-Reviewer: Kaldari <rkald...@wikimedia.org> Gerrit-Reviewer: Mwalker <mwal...@wikimedia.org> Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com> Gerrit-Reviewer: Reedy <re...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits