Brion VIBBER has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/365894 )
Change subject: [WIP] Early versions of 'actor' column migration ...................................................................... [WIP] Early versions of 'actor' column migration Creates actor table. Very incomplete. Bug: T167246 Change-Id: I98210ea8e45a0e22ca3e3300e16df2bf69066f74 --- M RELEASE-NOTES-1.30 A includes/Actor.php M includes/DefaultSettings.php M includes/Revision.php M includes/actions/HistoryAction.php M includes/actions/InfoAction.php M includes/installer/DatabaseUpdater.php M includes/installer/MysqlUpdater.php M includes/installer/SqliteUpdater.php A maintenance/archives/patch-actor-table.sql A maintenance/migrateActor.php 11 files changed, 366 insertions(+), 14 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/94/365894/1 diff --git a/RELEASE-NOTES-1.30 b/RELEASE-NOTES-1.30 index 87e6ce5..bd725b9 100644 --- a/RELEASE-NOTES-1.30 +++ b/RELEASE-NOTES-1.30 @@ -41,6 +41,10 @@ enabled by setting $wgUsePigLatinVariant to true. * Added RecentChangesPurgeRows hook to allow extensions to purge data that depends on the recentchanges table. +* Column pairs (*_user, *_user_text) are being replaced with references + to a shared 'actor' table in the database. +** Large wikis may want to use $wgActorTableSchemaMigrationStage to perform + the update at leisure. === Languages updated in 1.30 === diff --git a/includes/Actor.php b/includes/Actor.php new file mode 100644 index 0000000..debad65 --- /dev/null +++ b/includes/Actor.php @@ -0,0 +1,192 @@ +<?php + +use Wikimedia\Rdbms\IDatabase; + +class Actor { + /** + * The actor_id row id for the actor table. + * @var int + */ + protected $mId; + + /** + * The actor_user relation to user_id, or null if not a local user. + * @var int|null + */ + protected $mUserId; + + /** + * The actor_text text form of the name. + * @var string + */ + protected $mText; + + /** + * Do not construct Actor instances manually; use the ActorStore service. + * @param int $id row id in the actor table + * @param int|null $userId matching user_id in the actor table, or null + * @param string $text text-form name of the user/ip/import + */ + public function __construct( $id, $userId, $text ) { + $this->mId = intval( $id ); + $this->mUserId = is_null( $userId ) ? null : intval( $userId ); + $this->mText = strval( $text ); + } + + /** + * @return int the row id for the actor table + */ + public function getId() { + return $this->mId; + } + + /** + * @return int|null user_id of the matching user, or null + */ + public function getUserId() { + return $this->mUserId; + } + + /** + * @return string text form of the username/IP/etc + */ + public function getText() { + return $this->mText; + } + + /** + * @return User|bool matching User object or nothing + */ + public function getUser() { + if ( $this->mId ) { + return User::newFromId( $this->mId ); + } else { + return User::newFromName( $this->mText, false ); + } + } +} + +class ActorStore { + protected $mDb; + + public function __construct( IDatabase $db ) { + $this->mDb = $db; + } + + /** + * Store an actor record if necessary given an ID and text, + * returning the record with id. + * + * @param int|null $userId the user id + * @param string $text + * @return Actor + * @throws Exception may be thrown on database error + */ + public function store( $userId, $text ) { + if ( $userId ) { + $conds = [ 'actor_user' => $userId ]; + } else { + $conds = [ 'actor_text' => $text ]; + } + $row = $this->mDb->selectRow( 'actor', + [ 'actor_id', 'actor_user', 'actor_text' ], + $conds, + __METHOD__ + ); + if ( $row ) { + return new Actor( $row->actor_id, $row->actor_user, $row->actor_text ); + } else { + $id = $this->mDb->nextSequenceValue( 'actor_id_seq' ); + $ok = $this->mDb->insert( 'actor', + [ + 'actor_id' => $id, + 'actor_user' => $userId, + 'actor_text' => $text + ], + __METHOD__ + ); + if ( $ok ) { + $id = $this->mDb->insertId(); + return new Actor( $id, $userId, $text ); + } else { + throw new MWException( 'unexpected failure inserting actor row' ); + } + } + } + + /** + * Provide the columns that would be needed to look up an actor reference + * when manually doing a join to actor table. + * + * @return array of string column names + */ + public function selectFields() { + return [ 'actor_id', 'actor_user', 'actor_text' ]; + } + + /** + * Create an Actor instance from a database row object. + * @param object $row a DB result row containing necessary fields + * @return Actor + */ + public function fetchFromRow( $row ) { + return new Actor( $row->actor_id, $row->actor_user, $row->actor_text ); + } + + /** + * Internal wrapper for fetching individual actor references. + * + * @param array $conds + * @return Actor|bool matching Actor instance or false if none found + */ + protected function fetchByConds( array $conds ) { + $row = $this->mDb->selectRow( 'actor', + $this->selectFields(), + $conds, + __METHOD__ + ); + if ( $row ) { + return $this->fetchFromRow( $row ) + } else { + return false; + } + } + + /** + * Fetch an individual actor reference by id. + * + * @param array $conds + * @return Actor|bool matching Actor instance or false if none found + */ + public function fetchById( $id ) { + return $this->fetchByConds( [ 'actor_id' => $id ] ); + } + + /** + * Fetch an individual actor reference by user id. + * + * @param array $conds + * @return Actor|bool matching Actor instance or false if none found + */ + public function fetchByUserId( $userId ) { + return $this->fetchByConds( [ 'actor_user' => $userId ] ); + } + + /** + * Fetch an individual actor reference by text (name/IP/etc). + * + * @param array $conds + * @return Actor|bool matching Actor instance or false if none found + */ + public function fetchByText( $text ) { + return $this->fetchByConds( [ 'actor_text' => $text ] ); + } + + public function fetchByPrefix( $prefix ) { + return $this->fetchByConds( [ 'actor_text ' . + $this->mDb->buildLike( $prefix, $this->mDb->anyString() ) ] ); + } +} + +class ActorHelper { +} diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 6ce9a66..b555cc4 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -8688,6 +8688,13 @@ $wgInterwikiPrefixDisplayTypes = []; /** + * Actor table schema migration stage. + * @since 1.30 + * @var int One of the MIGRATION_* constants + */ +$wgActorTableSchemaMigrationStage = MIGRATION_OLD; + +/** * For really cool vim folding this needs to be at the end: * vim: foldmarker=@{,@} foldmethod=marker * @} diff --git a/includes/Revision.php b/includes/Revision.php index c3782ba..023d581 100644 --- a/includes/Revision.php +++ b/includes/Revision.php @@ -431,13 +431,25 @@ } /** - * Return the value of a select() page conds array for the page table. + * Return the value of a select() JOIN conds array for the page table. * This will assure that the revision(s) are not orphaned from live pages. * @since 1.19 * @return array */ public static function pageJoinCond() { return [ 'INNER JOIN', [ 'page_id = rev_page' ] ]; + } + + /** + * Return the value of a select() JOIN conds array for the actor table. + * This will get actor table rows for all users and other actor types. + * @since 1.30 + * @return array + */ + public static function actorJoinCond() { + return [ 'LEFT JOIN', + [ '(rev_user = actor_user) OR ((rev_user = 0) AND rev_user_text=actor_text)' ] ]; + //return [ 'LEFT JOIN', [ 'actor_id = rev_actor' ] ]; } /** @@ -625,10 +637,14 @@ // Use user_name for users and rev_user_text for IPs... $this->mUserText = null; // lazy load if left null - if ( $this->mUser == 0 ) { - $this->mUserText = $row->rev_user_text; // IP user - } elseif ( isset( $row->user_name ) ) { - $this->mUserText = $row->user_name; // logged-in user + if ( isset( $row->actor_text ) ) { + $this->mUserText = $row->actor_text; + } else { + if ( $this->mUser == 0 ) { + $this->mUserText = $row->rev_user_text; // IP user + } elseif ( isset( $row->user_name ) ) { + $this->mUserText = $row->user_name; // logged-in user + } } $this->mOrigUserText = $row->rev_user_text; } elseif ( is_array( $row ) ) { diff --git a/includes/actions/HistoryAction.php b/includes/actions/HistoryAction.php index 7460340..d498509 100644 --- a/includes/actions/HistoryAction.php +++ b/includes/actions/HistoryAction.php @@ -479,15 +479,15 @@ $batch = new LinkBatch(); $revIds = []; foreach ( $this->mResult as $row ) { - if ( $row->rev_parent_id ) { - $revIds[] = $row->rev_parent_id; + $rev = Revision::newFromRow( $row ); + $parentId = $rev->getParentId(); + if ( $parentId ) { + $revIds[] = $parentId; } - if ( !is_null( $row->user_name ) ) { - $batch->add( NS_USER, $row->user_name ); - $batch->add( NS_USER_TALK, $row->user_name ); - } else { # for anons or usernames of imported revisions - $batch->add( NS_USER, $row->rev_user_text ); - $batch->add( NS_USER_TALK, $row->rev_user_text ); + $name = $rev->getUserText( Revision::FOR_THIS_USER, $this->getUser() ); + if ( $name !== '' ) { + $batch->add( NS_USER, $name ); + $batch->add( NS_USER_TALK, $name ); } } $this->parentLens = Revision::getParentLengths( $this->mDb, $revIds ); diff --git a/includes/actions/InfoAction.php b/includes/actions/InfoAction.php index baec944..8465d1e 100644 --- a/includes/actions/InfoAction.php +++ b/includes/actions/InfoAction.php @@ -704,6 +704,8 @@ self::getCacheKey( $cache, $page->getTitle(), $page->getLatest() ), WANObjectCache::TTL_WEEK, function ( $oldValue, &$ttl, &$setOpts ) use ( $page, $config, $fname ) { + global $wgActorTableSchemaMigrationStage; + $title = $page->getTitle(); $id = $title->getArticleID(); @@ -737,9 +739,20 @@ if ( $config->get( 'MiserMode' ) ) { $result['authors'] = 0; } else { + switch ( $wgActorTableSchemaMigrationStage ) { + case MIGRATION_OLD: + case MIGRATION_WRITE_BOTH: + $field = 'rev_user_text'; + break; + case MIGRATION_WRITE_NEW: + case MIGRATION_NEW: + default: + $field = 'rev_actor'; + } + $result['authors'] = (int)$dbr->selectField( 'revision', - 'COUNT(DISTINCT rev_user_text)', + "COUNT(DISTINCT $field)", [ 'rev_page' => $id ], $fname ); diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index e5cbb7c..0a626c2 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -1191,4 +1191,18 @@ $wgContentHandlerUseDB = $this->holdContentHandlerUseDB; } } + + protected function migrateActor() { + if ( !$this->updateRowExists( 'MigrateActor' ) ) { + $this->output( + "Migrating actor refs to the 'actor' table, printing progress markers. For large\n" . + "databases, you may want to hit Ctrl-C and do this manually with\n" . + "maintenance/migrateActor.php.\n" + ); + $task = $this->maintenance->runChild( 'MigrateActor', 'migrateActor.php' ); + $task->execute(); + $this->output( "done.\n" ); + } + } + } diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index adfe2f6..7f4aeac 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -302,6 +302,10 @@ [ 'addField', 'user_groups', 'ug_expiry', 'patch-user_groups-ug_expiry.sql' ], [ 'addIndex', 'image', 'img_user_timestamp', 'patch-image-user-index-2.sql' ], [ 'modifyField', 'image', 'img_media_type', 'patch-add-3d.sql' ], + + // 1.30 + [ 'addTable', 'actor', 'patch-actor-table.sql' ], + [ 'migrateActor' ], ]; } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 9c90283..5ee48b2 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -165,6 +165,10 @@ [ 'addField', 'externallinks', 'el_index_60', 'patch-externallinks-el_index_60.sql' ], [ 'addField', 'user_groups', 'ug_expiry', 'patch-user_groups-ug_expiry.sql' ], [ 'addIndex', 'image', 'img_user_timestamp', 'patch-image-user-index-2.sql' ], + + // 1.30 + [ 'addTable', 'actor', 'patch-actor-table.sql' ], + [ 'migrateActor' ], ]; } diff --git a/maintenance/archives/patch-actor-table.sql b/maintenance/archives/patch-actor-table.sql new file mode 100644 index 0000000..500f88c --- /dev/null +++ b/maintenance/archives/patch-actor-table.sql @@ -0,0 +1,42 @@ +-- +-- Revision authorship is represented by an actor row. +-- This avoids duplicating common usernames/IP addresses +-- in the revision table, and makes rename processing quicker. +-- +CREATE TABLE /*_*/actor ( + -- Unique ID to identify each entry + actor_id bigint unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, + + -- Key to user.user_id of the user who made this edit. + -- Stores NULL for anonymous edits and for some mass imports. + actor_user int unsigned, + + -- Text username or IP address of the editor. + actor_text varchar(255) binary NOT NULL default '' +) /*$wgDBTableOptions*/; + + +CREATE UNIQUE INDEX /*i*/actor_user ON /*_*/actor (actor_user); +CREATE UNIQUE INDEX /*i*/actor_text ON /*_*/actor (actor_text); + + + + +-- TODO: maybe do these two as additions with temp tables? + +-- Add new fields to revision +--ALTER TABLE revision +-- ADD COLUMN rev_actor bigint unsigned NOT NULL; +--CREATE INDEX /*i*/actor_timestamp ON /*_*/revision (rev_actor,rev_timestamp); +--CREATE INDEX /*i*/page_actor_timestamp ON /*_*/revision (rev_page,rev_actor,rev_timestamp); + +-- Add new fields to archive and update some bigints +--ALTER TABLE archive +-- ADD COLUMN ar_actor bigint unsigned +-- AFTER ar_comment; + +-- TODO: image +-- TODO: oldimage +-- TODO: filearchive +-- TODO: recentchanges +-- TODO: logging diff --git a/maintenance/migrateActor.php b/maintenance/migrateActor.php new file mode 100644 index 0000000..2a730c7 --- /dev/null +++ b/maintenance/migrateActor.php @@ -0,0 +1,56 @@ +<?php +/** + * Migrate actor refs from pre-1.30 columns to the 'actor' table + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +use Wikimedia\Rdbms\IDatabase; + +require_once __DIR__ . '/Maintenance.php'; + +/** + * Maintenance script that migrates actor refs from pre-1.30 columns to the + * 'actor' table + * + * @ingroup Maintenance + */ +class MigrateActor extends LoggedUpdateMaintenance { + public function __construct() { + parent::__construct(); + $this->addDescription( 'Migrates actor refs from pre-1.30 columns to the \'actor\' table' ); + $this->setBatchSize( 100 ); + } + + protected function getUpdateKey() { + return __CLASS__; + } + + protected function updateSkippedMessage() { + return 'actor already migrated.'; + } + + protected function doDBUpdates() { + // TODO: do stuff + return true; + } +} + +$maintClass = "MigrateComments"; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/365894 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I98210ea8e45a0e22ca3e3300e16df2bf69066f74 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Brion VIBBER <br...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits