Brion VIBBER has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/365894 )

Change subject: [WIP] Early versions of 'actor' column migration
......................................................................

[WIP] Early versions of 'actor' column migration

Creates actor table. Very incomplete.

Bug: T167246
Change-Id: I98210ea8e45a0e22ca3e3300e16df2bf69066f74
---
M RELEASE-NOTES-1.30
A includes/Actor.php
M includes/DefaultSettings.php
M includes/Revision.php
M includes/actions/HistoryAction.php
M includes/actions/InfoAction.php
M includes/installer/DatabaseUpdater.php
M includes/installer/MysqlUpdater.php
M includes/installer/SqliteUpdater.php
A maintenance/archives/patch-actor-table.sql
A maintenance/migrateActor.php
11 files changed, 366 insertions(+), 14 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/94/365894/1

diff --git a/RELEASE-NOTES-1.30 b/RELEASE-NOTES-1.30
index 87e6ce5..bd725b9 100644
--- a/RELEASE-NOTES-1.30
+++ b/RELEASE-NOTES-1.30
@@ -41,6 +41,10 @@
   enabled by setting $wgUsePigLatinVariant to true.
 * Added RecentChangesPurgeRows hook to allow extensions to purge data that
   depends on the recentchanges table.
+* Column pairs (*_user, *_user_text) are being replaced with references
+  to a shared 'actor' table in the database.
+** Large wikis may want to use $wgActorTableSchemaMigrationStage to perform
+   the update at leisure.
 
 === Languages updated in 1.30 ===
 
diff --git a/includes/Actor.php b/includes/Actor.php
new file mode 100644
index 0000000..debad65
--- /dev/null
+++ b/includes/Actor.php
@@ -0,0 +1,192 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+class Actor {
+       /**
+        * The actor_id row id for the actor table.
+        * @var int
+        */
+       protected $mId;
+
+       /**
+        * The actor_user relation to user_id, or null if not a local user.
+        * @var int|null
+        */
+       protected $mUserId;
+
+       /**
+        * The actor_text text form of the name.
+        * @var string
+        */
+       protected $mText;
+
+       /**
+        * Do not construct Actor instances manually; use the ActorStore 
service.
+        * @param int $id row id in the actor table
+        * @param int|null $userId matching user_id in the actor table, or null
+        * @param string $text text-form name of the user/ip/import
+        */
+       public function __construct( $id, $userId, $text ) {
+               $this->mId = intval( $id );
+               $this->mUserId = is_null( $userId ) ? null : intval( $userId );
+               $this->mText = strval( $text );
+       }
+
+       /**
+        * @return int the row id for the actor table
+        */
+       public function getId() {
+               return $this->mId;
+       }
+
+       /**
+        * @return int|null user_id of the matching user, or null
+        */
+       public function getUserId() {
+               return $this->mUserId;
+       }
+
+       /**
+        * @return string text form of the username/IP/etc
+        */
+       public function getText() {
+               return $this->mText;
+       }
+
+       /**
+        * @return User|bool matching User object or nothing
+        */
+       public function getUser() {
+               if ( $this->mId ) {
+                       return User::newFromId( $this->mId );
+               } else {
+                       return User::newFromName( $this->mText, false );
+               }
+       }
+}
+
+class ActorStore {
+       protected $mDb;
+
+       public function __construct( IDatabase $db ) {
+               $this->mDb = $db;
+       }
+
+       /**
+        * Store an actor record if necessary given an ID and text,
+        * returning the record with id.
+        *
+        * @param int|null $userId the user id
+        * @param string $text
+        * @return Actor
+        * @throws Exception may be thrown on database error
+        */
+       public function store( $userId, $text ) {
+               if ( $userId ) {
+                       $conds = [ 'actor_user' => $userId ];
+               } else {
+                       $conds = [ 'actor_text' => $text ];
+               }
+               $row = $this->mDb->selectRow( 'actor',
+                       [ 'actor_id', 'actor_user', 'actor_text' ],
+                       $conds,
+                       __METHOD__
+               );
+               if ( $row ) {
+                       return new Actor( $row->actor_id, $row->actor_user, 
$row->actor_text );
+               } else {
+                       $id = $this->mDb->nextSequenceValue( 'actor_id_seq' );
+                       $ok = $this->mDb->insert( 'actor',
+                               [
+                                       'actor_id' => $id,
+                                       'actor_user' => $userId,
+                                       'actor_text' => $text
+                               ],
+                               __METHOD__
+                       );
+                       if ( $ok ) {
+                               $id = $this->mDb->insertId();
+                               return new Actor( $id, $userId, $text );
+                       } else {
+                               throw new MWException( 'unexpected failure 
inserting actor row' );
+                       }
+               }
+       }
+
+       /**
+        * Provide the columns that would be needed to look up an actor 
reference
+        * when manually doing a join to actor table.
+        *
+        * @return array of string column names
+        */
+       public function selectFields() {
+               return [ 'actor_id', 'actor_user', 'actor_text' ];
+       }
+
+       /**
+        * Create an Actor instance from a database row object.
+        * @param object $row a DB result row containing necessary fields
+        * @return Actor
+        */
+       public function fetchFromRow( $row ) {
+               return new Actor( $row->actor_id, $row->actor_user, 
$row->actor_text );
+       }
+
+       /**
+        * Internal wrapper for fetching individual actor references.
+        *
+        * @param array $conds
+        * @return Actor|bool matching Actor instance or false if none found
+        */
+       protected function fetchByConds( array $conds ) {
+               $row = $this->mDb->selectRow( 'actor',
+                       $this->selectFields(),
+                       $conds,
+                       __METHOD__
+               );
+               if ( $row ) {
+                       return $this->fetchFromRow( $row )
+               } else {
+                       return false;
+               }
+       }
+
+       /**
+        * Fetch an individual actor reference by id.
+        *
+        * @param array $conds
+        * @return Actor|bool matching Actor instance or false if none found
+        */
+       public function fetchById( $id ) {
+               return $this->fetchByConds( [ 'actor_id' => $id ] );
+       }
+
+       /**
+        * Fetch an individual actor reference by user id.
+        *
+        * @param array $conds
+        * @return Actor|bool matching Actor instance or false if none found
+        */
+       public function fetchByUserId( $userId ) {
+               return $this->fetchByConds( [ 'actor_user' => $userId ] );
+       }
+
+       /**
+        * Fetch an individual actor reference by text (name/IP/etc).
+        *
+        * @param array $conds
+        * @return Actor|bool matching Actor instance or false if none found
+        */
+       public function fetchByText( $text ) {
+               return $this->fetchByConds( [ 'actor_text' => $text ] );
+       }
+
+       public function fetchByPrefix( $prefix ) {
+               return $this->fetchByConds( [ 'actor_text ' .
+                       $this->mDb->buildLike( $prefix, $this->mDb->anyString() 
) ] );
+       }
+}
+
+class ActorHelper {
+}
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index 6ce9a66..b555cc4 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -8688,6 +8688,13 @@
 $wgInterwikiPrefixDisplayTypes = [];
 
 /**
+ * Actor table schema migration stage.
+ * @since 1.30
+ * @var int One of the MIGRATION_* constants
+ */
+$wgActorTableSchemaMigrationStage = MIGRATION_OLD;
+
+/**
  * For really cool vim folding this needs to be at the end:
  * vim: foldmarker=@{,@} foldmethod=marker
  * @}
diff --git a/includes/Revision.php b/includes/Revision.php
index c3782ba..023d581 100644
--- a/includes/Revision.php
+++ b/includes/Revision.php
@@ -431,13 +431,25 @@
        }
 
        /**
-        * Return the value of a select() page conds array for the page table.
+        * Return the value of a select() JOIN conds array for the page table.
         * This will assure that the revision(s) are not orphaned from live 
pages.
         * @since 1.19
         * @return array
         */
        public static function pageJoinCond() {
                return [ 'INNER JOIN', [ 'page_id = rev_page' ] ];
+       }
+
+       /**
+        * Return the value of a select() JOIN conds array for the actor table.
+        * This will get actor table rows for all users and other actor types.
+        * @since 1.30
+        * @return array
+        */
+       public static function actorJoinCond() {
+               return [ 'LEFT JOIN',
+                       [ '(rev_user = actor_user) OR ((rev_user = 0) AND 
rev_user_text=actor_text)' ] ];
+               //return [ 'LEFT JOIN', [ 'actor_id = rev_actor' ] ];
        }
 
        /**
@@ -625,10 +637,14 @@
 
                        // Use user_name for users and rev_user_text for IPs...
                        $this->mUserText = null; // lazy load if left null
-                       if ( $this->mUser == 0 ) {
-                               $this->mUserText = $row->rev_user_text; // IP 
user
-                       } elseif ( isset( $row->user_name ) ) {
-                               $this->mUserText = $row->user_name; // 
logged-in user
+                       if ( isset( $row->actor_text ) ) {
+                               $this->mUserText = $row->actor_text;
+                       } else {
+                               if ( $this->mUser == 0 ) {
+                                       $this->mUserText = $row->rev_user_text; 
// IP user
+                               } elseif ( isset( $row->user_name ) ) {
+                                       $this->mUserText = $row->user_name; // 
logged-in user
+                               }
                        }
                        $this->mOrigUserText = $row->rev_user_text;
                } elseif ( is_array( $row ) ) {
diff --git a/includes/actions/HistoryAction.php 
b/includes/actions/HistoryAction.php
index 7460340..d498509 100644
--- a/includes/actions/HistoryAction.php
+++ b/includes/actions/HistoryAction.php
@@ -479,15 +479,15 @@
                $batch = new LinkBatch();
                $revIds = [];
                foreach ( $this->mResult as $row ) {
-                       if ( $row->rev_parent_id ) {
-                               $revIds[] = $row->rev_parent_id;
+                       $rev = Revision::newFromRow( $row );
+                       $parentId = $rev->getParentId();
+                       if ( $parentId ) {
+                               $revIds[] = $parentId;
                        }
-                       if ( !is_null( $row->user_name ) ) {
-                               $batch->add( NS_USER, $row->user_name );
-                               $batch->add( NS_USER_TALK, $row->user_name );
-                       } else { # for anons or usernames of imported revisions
-                               $batch->add( NS_USER, $row->rev_user_text );
-                               $batch->add( NS_USER_TALK, $row->rev_user_text 
);
+                       $name = $rev->getUserText( Revision::FOR_THIS_USER, 
$this->getUser() );
+                       if ( $name !== '' ) {
+                               $batch->add( NS_USER, $name );
+                               $batch->add( NS_USER_TALK, $name );
                        }
                }
                $this->parentLens = Revision::getParentLengths( $this->mDb, 
$revIds );
diff --git a/includes/actions/InfoAction.php b/includes/actions/InfoAction.php
index baec944..8465d1e 100644
--- a/includes/actions/InfoAction.php
+++ b/includes/actions/InfoAction.php
@@ -704,6 +704,8 @@
                        self::getCacheKey( $cache, $page->getTitle(), 
$page->getLatest() ),
                        WANObjectCache::TTL_WEEK,
                        function ( $oldValue, &$ttl, &$setOpts ) use ( $page, 
$config, $fname ) {
+                               global $wgActorTableSchemaMigrationStage;
+
                                $title = $page->getTitle();
                                $id = $title->getArticleID();
 
@@ -737,9 +739,20 @@
                                if ( $config->get( 'MiserMode' ) ) {
                                        $result['authors'] = 0;
                                } else {
+                                       switch ( 
$wgActorTableSchemaMigrationStage ) {
+                                               case MIGRATION_OLD:
+                                               case MIGRATION_WRITE_BOTH:
+                                                       $field = 
'rev_user_text';
+                                                       break;
+                                               case MIGRATION_WRITE_NEW:
+                                               case MIGRATION_NEW:
+                                               default:
+                                                       $field = 'rev_actor';
+                                       }
+
                                        $result['authors'] = 
(int)$dbr->selectField(
                                                'revision',
-                                               'COUNT(DISTINCT rev_user_text)',
+                                               "COUNT(DISTINCT $field)",
                                                [ 'rev_page' => $id ],
                                                $fname
                                        );
diff --git a/includes/installer/DatabaseUpdater.php 
b/includes/installer/DatabaseUpdater.php
index e5cbb7c..0a626c2 100644
--- a/includes/installer/DatabaseUpdater.php
+++ b/includes/installer/DatabaseUpdater.php
@@ -1191,4 +1191,18 @@
                        $wgContentHandlerUseDB = $this->holdContentHandlerUseDB;
                }
        }
+
+       protected function migrateActor() {
+               if ( !$this->updateRowExists( 'MigrateActor' ) ) {
+                       $this->output(
+                               "Migrating actor refs to the 'actor' table, 
printing progress markers. For large\n" .
+                               "databases, you may want to hit Ctrl-C and do 
this manually with\n" .
+                               "maintenance/migrateActor.php.\n"
+                       );
+                       $task = $this->maintenance->runChild( 'MigrateActor', 
'migrateActor.php' );
+                       $task->execute();
+                       $this->output( "done.\n" );
+               }
+       }
+
 }
diff --git a/includes/installer/MysqlUpdater.php 
b/includes/installer/MysqlUpdater.php
index adfe2f6..7f4aeac 100644
--- a/includes/installer/MysqlUpdater.php
+++ b/includes/installer/MysqlUpdater.php
@@ -302,6 +302,10 @@
                        [ 'addField', 'user_groups', 'ug_expiry', 
'patch-user_groups-ug_expiry.sql' ],
                        [ 'addIndex', 'image', 'img_user_timestamp', 
'patch-image-user-index-2.sql' ],
                        [ 'modifyField', 'image', 'img_media_type', 
'patch-add-3d.sql' ],
+
+                       // 1.30
+                       [ 'addTable', 'actor', 'patch-actor-table.sql' ],
+                       [ 'migrateActor' ],
                ];
        }
 
diff --git a/includes/installer/SqliteUpdater.php 
b/includes/installer/SqliteUpdater.php
index 9c90283..5ee48b2 100644
--- a/includes/installer/SqliteUpdater.php
+++ b/includes/installer/SqliteUpdater.php
@@ -165,6 +165,10 @@
                        [ 'addField', 'externallinks', 'el_index_60', 
'patch-externallinks-el_index_60.sql' ],
                        [ 'addField', 'user_groups', 'ug_expiry', 
'patch-user_groups-ug_expiry.sql' ],
                        [ 'addIndex', 'image', 'img_user_timestamp', 
'patch-image-user-index-2.sql' ],
+
+                       // 1.30
+                       [ 'addTable', 'actor', 'patch-actor-table.sql' ],
+                       [ 'migrateActor' ],
                ];
        }
 
diff --git a/maintenance/archives/patch-actor-table.sql 
b/maintenance/archives/patch-actor-table.sql
new file mode 100644
index 0000000..500f88c
--- /dev/null
+++ b/maintenance/archives/patch-actor-table.sql
@@ -0,0 +1,42 @@
+--
+-- Revision authorship is represented by an actor row.
+-- This avoids duplicating common usernames/IP addresses
+-- in the revision table, and makes rename processing quicker.
+--
+CREATE TABLE /*_*/actor (
+  -- Unique ID to identify each entry
+  actor_id bigint unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+
+  -- Key to user.user_id of the user who made this edit.
+  -- Stores NULL for anonymous edits and for some mass imports.
+  actor_user int unsigned,
+
+  -- Text username or IP address of the editor.
+  actor_text varchar(255) binary NOT NULL default ''
+) /*$wgDBTableOptions*/;
+
+
+CREATE UNIQUE INDEX /*i*/actor_user ON /*_*/actor (actor_user);
+CREATE UNIQUE INDEX /*i*/actor_text ON /*_*/actor (actor_text);
+
+
+
+
+-- TODO: maybe do these two as additions with temp tables?
+
+-- Add new fields to revision
+--ALTER TABLE revision
+--  ADD COLUMN rev_actor bigint unsigned NOT NULL;
+--CREATE INDEX /*i*/actor_timestamp ON /*_*/revision (rev_actor,rev_timestamp);
+--CREATE INDEX /*i*/page_actor_timestamp ON /*_*/revision 
(rev_page,rev_actor,rev_timestamp);
+
+-- Add new fields to archive and update some bigints
+--ALTER TABLE archive
+--  ADD COLUMN ar_actor bigint unsigned
+--    AFTER ar_comment;
+
+-- TODO: image
+-- TODO: oldimage
+-- TODO: filearchive
+-- TODO: recentchanges
+-- TODO: logging
diff --git a/maintenance/migrateActor.php b/maintenance/migrateActor.php
new file mode 100644
index 0000000..2a730c7
--- /dev/null
+++ b/maintenance/migrateActor.php
@@ -0,0 +1,56 @@
+<?php
+/**
+ * Migrate actor refs from pre-1.30 columns to the 'actor' table
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that migrates actor refs from pre-1.30 columns to the
+ * 'actor' table
+ *
+ * @ingroup Maintenance
+ */
+class MigrateActor extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Migrates actor refs from pre-1.30 
columns to the \'actor\' table' );
+               $this->setBatchSize( 100 );
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function updateSkippedMessage() {
+               return 'actor already migrated.';
+       }
+
+       protected function doDBUpdates() {
+               // TODO: do stuff
+               return true;
+       }
+}
+
+$maintClass = "MigrateComments";
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/365894
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I98210ea8e45a0e22ca3e3300e16df2bf69066f74
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Brion VIBBER <br...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to