Adamw has submitted this change and it was merged.

Change subject: Clean up CentralNotice Translation Metadata
......................................................................


Clean up CentralNotice Translation Metadata

Right now the translation metadata job is running very slowly --
part of this problem is that there is a lot of junk metadata
that we have to process.

So... the maintenance script cleans things up. And I'm now putting
the banner id in with the revtag object so that we can eventually
look to see if the banner is archived or attached only to archived
campaigns or something.

E.g. this patch alone will take 800 objects on meta down to ~250.

Bug: 53769
Bug: 53792
Change-Id: If9ae70977ee0f008f65ef8f742acafb991b4d221
---
M includes/Banner.php
A maintenance/CleanCNTranslateMetadata.php
2 files changed, 142 insertions(+), 5 deletions(-)

Approvals:
  Adamw: Looks good to me, approved
  Nikerabbit: Looks good to me, but someone else must approve
  Mwalker: Looks good to me, but someone else must approve



diff --git a/includes/Banner.php b/includes/Banner.php
index 5cdbe59..9319485 100644
--- a/includes/Banner.php
+++ b/includes/Banner.php
@@ -842,7 +842,7 @@
                                        $fields = $this->extractMessageFields( 
$this->bodyContent );
                                        if ( count( $fields ) > 0 ) {
                                                // Tag the banner for 
translation
-                                               Banner::addTag( 
'banner:translate', $revisionId, $pageId );
+                                               Banner::addTag( 
'banner:translate', $revisionId, $pageId, $this->getId() );
                                                $this->runTranslateJob = true;
                                        }
                                }
@@ -1147,25 +1147,27 @@
         * @param string $tag The name of the tag
         * @param integer $revisionId ID of the revision
         * @param integer $pageId ID of the MediaWiki page for the banner
-        * @param string $value Value to store for the tag
+        * @param string $bannerId ID of banner this revtag belongs to
         * @throws MWException
         */
-       static function addTag( $tag, $revisionId, $pageId, $value = null ) {
+       static function addTag( $tag, $revisionId, $pageId, $bannerId ) {
                $dbw = CNDatabase::getDb();
 
                if ( is_object( $revisionId ) ) {
                        throw new MWException( 'Got object, excepted id' );
                }
 
+               // There should only ever be one tag applied to a banner object
+               Banner::removeTag( $tag, $pageId );
+
                $conds = array(
                        'rt_page' => $pageId,
                        'rt_type' => RevTag::getType( $tag ),
                        'rt_revision' => $revisionId
                );
-               $dbw->delete( 'revtag', $conds, __METHOD__ );
 
                if ( $value !== null ) {
-                       $conds['rt_value'] = serialize( implode( '|', $value ) 
);
+                       $conds['rt_value'] = $bannerId;
                }
 
                $dbw->insert( 'revtag', $conds, __METHOD__ );
diff --git a/maintenance/CleanCNTranslateMetadata.php 
b/maintenance/CleanCNTranslateMetadata.php
new file mode 100644
index 0000000..b93dbed
--- /dev/null
+++ b/maintenance/CleanCNTranslateMetadata.php
@@ -0,0 +1,135 @@
+<?php
+
+$IP = getenv( 'MW_INSTALL_PATH' );
+if ( $IP === false ) {
+       $IP = __DIR__ . '/../../..';
+}
+require_once "$IP/maintenance/Maintenance.php";
+
+/**
+ * Cleans up the Revision Tag table which is where CentralNotice stores
+ * metadata required for the Translate extension.
+ *
+ * So far this class:
+ * * Removes duplicate revision entries (there should be only one per banner)
+ * * Associates entries with a banner by name
+ * * Removes entries that have no banner object
+ *
+ * Class CleanCNTranslateMetadata
+ */
+class CleanCNTranslateMetadata extends Maintenance {
+       protected $ttag;
+
+       public function execute() {
+               $this->ttag = RevTag::getType( 'banner:translate' );
+
+               $this->cleanDuplicates();
+               $this->populateIDs();
+               $this->deleteOrphans();
+       }
+
+       /**
+        * Remove duplicated revtags
+        */
+       protected function cleanDuplicates() {
+               $this->output( "Cleaning duplicates\n" );
+
+               $db = CNDatabase::getDb( DB_MASTER );
+
+               $res = $db->select(
+                       'revtag',
+                       array(
+                               'rt_page',
+                               'maxrev' => 'max(rt_revision)',
+                               'count' => 'count(*)'
+                       ),
+                       array( 'rt_type' => $this->ttag ),
+                       __METHOD__,
+                       array( 'GROUP BY' => 'rt_page' )
+               );
+
+               foreach ( $res as $row ) {
+                       if ( (int)$row->count === 1 ) continue;
+
+                       $db->delete(
+                               'revtag',
+                               array(
+                                       'rt_type' => $this->ttag,
+                                       'rt_page' => $row->rt_page,
+                                       "rt_revision != {$row->maxrev}"
+                               ),
+                               __METHOD__
+                       );
+                       $numRows = $db->affectedRows();
+                       $this->output( " -- Deleted {$numRows} rows for banner 
with page id {$row->rt_page}\n" );
+               }
+       }
+
+       /**
+        * Attach a banner ID with a orphan metadata line
+        */
+       protected function populateIDs() {
+               $this->output( "Associating metadata with banner ids\n" );
+
+               $db = CNDatabase::getDb( DB_MASTER );
+
+               $res = $db->select(
+                       array( 'revtag' => 'revtag', 'page' => 'page', 
'cn_templates' => 'cn_templates' ),
+                       array( 'rt_page', 'rt_revision', 'page_title', 'tmp_id' 
),
+                       array(
+                               'rt_type' => $this->ttag,
+                               'rt_page=page_id',
+                               'rt_value is null',
+                               # Length of "centralnotice-template-"
+                               'tmp_name=substr(page_title, 24)'
+                       ),
+                       __METHOD__
+               );
+
+               foreach( $res as $row ) {
+                       $this->output( " -- Associating banner id 
{$row->tmp_id} with revtag with page id {$row->rt_page}\n" );
+                       $db->update(
+                               'revtag',
+                               array( 'rt_value' => $row->tmp_id ),
+                               array(
+                                       'rt_type' => $this->ttag,
+                                       'rt_page' => $row->rt_page,
+                                       'rt_value is null'
+                               ),
+                               __METHOD__
+                       );
+               }
+       }
+
+       /**
+        * Delete rows that have no banner ID associated with them
+        */
+       protected function deleteOrphans() {
+               $db = CNDatabase::getDb( DB_MASTER );
+               $this->output( "Preparing to delete orphaned rows\n" );
+
+               $res = $db->select(
+                       'revtag',
+                       array( 'rt_page', 'rt_revision' ),
+                       array( 'rt_type' => $this->ttag, 'rt_value is null' ),
+                       __METHOD__
+               );
+
+               foreach ( $res as $row ) {
+                       $this->output( " -- Deleting orphan row 
{$row->rt_page}:{$row->rt_revision}\n" );
+                       $db->delete(
+                               'revtag',
+                               array(
+                                       'rt_type' => $this->ttag,
+                                       'rt_page' => $row->rt_page,
+                                       'rt_revision' => $row->rt_revision,
+                                       'rt_value is null' // Just in case 
something updated it
+                               ),
+                               __METHOD__
+                       );
+               }
+       }
+}
+
+$maintClass = 'CleanCNTranslateMetadata';
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/83980
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If9ae70977ee0f008f65ef8f742acafb991b4d221
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/CentralNotice
Gerrit-Branch: master
Gerrit-Owner: Mwalker <mwal...@wikimedia.org>
Gerrit-Reviewer: Adamw <awi...@wikimedia.org>
Gerrit-Reviewer: Kaldari <rkald...@wikimedia.org>
Gerrit-Reviewer: Mwalker <mwal...@wikimedia.org>
Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com>
Gerrit-Reviewer: Reedy <re...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to