Anomie has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/357892 )
Change subject: WIP: Add `comment` table and code to start using it ...................................................................... WIP: Add `comment` table and code to start using it A subsequent patch will remove the old columns. DONE: * Schema change for MySQL and (untested) sqlite * update.php integration for MySQL and (untested) sqlite * CommentStore class TODO: * Update everything in core that currently accesses comment columns to use CommentStore * Update extensions or file bugs for them to be updated * Test sqlite * Write schema change for other DBs Bug: T166732 Change-Id: Ic3a434c061ed6e443ea072bc62dda09acbeeed7f --- M RELEASE-NOTES-1.30 M autoload.php A includes/CommentStore.php M includes/DefaultSettings.php M includes/installer/DatabaseUpdater.php M includes/installer/MysqlUpdater.php M includes/installer/SqliteUpdater.php A maintenance/archives/patch-comment-table.sql A maintenance/migrateComments.php M maintenance/tables.sql A tests/phpunit/includes/CommentStoreTest.php 11 files changed, 1,433 insertions(+), 13 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/92/357892/1 diff --git a/RELEASE-NOTES-1.30 b/RELEASE-NOTES-1.30 index fa5c280..c5e0027 100644 --- a/RELEASE-NOTES-1.30 +++ b/RELEASE-NOTES-1.30 @@ -39,6 +39,10 @@ as page ranges keyed by dimensions. * Added a 'ParserOptionsRegister' hook to allow extensions to register additional parser options. +* Edit summaries, block reasons, and other "comments" are now stored in a + separate database table. Use the CommentFormatter class to access them. +** Large wikis may want to use $wgCommentTableSchemaMigrationStage to perform + the update at leisure. === Languages updated in 1.30 === diff --git a/autoload.php b/autoload.php index 0264435..2251ef9 100644 --- a/autoload.php +++ b/autoload.php @@ -276,6 +276,7 @@ 'CollationFa' => __DIR__ . '/includes/collation/CollationFa.php', 'CommandLineInc' => __DIR__ . '/maintenance/commandLine.inc', 'CommandLineInstaller' => __DIR__ . '/maintenance/install.php', + 'CommentStore' => __DIR__ . '/includes/CommentStore.php', 'CompareParserCache' => __DIR__ . '/maintenance/compareParserCache.php', 'CompareParsers' => __DIR__ . '/maintenance/compareParsers.php', 'ComposerHookHandler' => __DIR__ . '/includes/composer/ComposerHookHandler.php', @@ -980,6 +981,7 @@ 'MessageCache' => __DIR__ . '/includes/cache/MessageCache.php', 'MessageContent' => __DIR__ . '/includes/content/MessageContent.php', 'MessageSpecifier' => __DIR__ . '/includes/libs/MessageSpecifier.php', + 'MigrateComments' => __DIR__ . '/maintenance/migrateComments.php', 'MigrateFileRepoLayout' => __DIR__ . '/maintenance/migrateFileRepoLayout.php', 'MigrateUserGroup' => __DIR__ . '/maintenance/migrateUserGroup.php', 'MimeAnalyzer' => __DIR__ . '/includes/libs/mime/MimeAnalyzer.php', diff --git a/includes/CommentStore.php b/includes/CommentStore.php new file mode 100644 index 0000000..4d2017d --- /dev/null +++ b/includes/CommentStore.php @@ -0,0 +1,457 @@ +<?php +/** + * Manage storage of comments in the database + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * CommentStore handles storage of comments (edit summaries, log reasons, etc) + * in the database. + * @since 1.30 + */ +class CommentStore { + + /** + * Define fields that use temporary tables for transitional purposes + * @var array Keys are '$key', values are arrays with four fields: + * - table: Temporary table name + * - pk: Temporary table column referring to the main table's primary key + * - field: Temporary table column referring comment.comment_id + * - joinPK: Main table's primary key + */ + protected static $tempTables = [ + 'rev_comment' => [ + 'table' => 'revision_comment_temp', + 'pk' => 'revcomment_rev', + 'field' => 'revcomment_comment_id', + 'joinPK' => 'rev_id', + ], + 'img_description' => [ + 'table' => 'image_comment_temp', + 'pk' => 'imgcomment_name', + 'field' => 'imgcomment_description_id', + 'joinPK' => 'img_name', + ], + ]; + + /** + * Fields that formerly used $tempTables + * @var array Key is '$key', value is the MediaWiki version in which it was + * removed from $tempTables. + */ + protected static $formerTempTables = []; + + /** @var IDatabase */ + protected $db; + + /** @var int One of the MIGRATION_* constants */ + protected $stage; + + /** @var array Cache for `self::getJoin()` */ + protected $joinCache; + + /** + * @param IDatabase $db Database handle + */ + public function __construct( IDatabase $db ) { + global $wgCommentTableSchemaMigrationStage; + + $this->db = $db; + $this->stage = $wgCommentTableSchemaMigrationStage; + } + + /** + * Get SELECT fields for a comment key + * + * Each resulting row should be passed to `self::getComment()` to get the + * actual comment. + * + * @note Use of this method may require a subsequent database query to + * actually fetch the comment. If possible, use `self::getJoin()` instead. + * @param string $key A key such as "rev_comment" identifying the comment + * field being fetched. + * @return string[] to include in the `$vars` to `IDatabase->select()`. All + * fields are aliased, so `+` is safe to use. + */ + public function getFields( $key ) { + $fields = []; + if ( $this->stage === MIGRATION_OLD ) { + $fields["{$key}_text"] = $key; + $fields["{$key}_data"] = 'NULL'; + } else { + if ( $this->stage < MIGRATION_NEW ) { + $fields["{$key}_old"] = $key; + } + if ( isset( self::$tempTables[$key] ) ) { + $fields["{$key}_pk"] = self::$tempTables[$key]['joinPK']; + } else { + $fields["{$key}_id"] = "{$key}_id"; + } + } + return $fields; + } + + /** + * Get SELECT fields and joins for a comment key + * + * Each resulting row should be passed to `self::getComment()` to get the + * actual comment. + * + * @param string $key A key such as "rev_comment" identifying the comment + * field being fetched. + * @return array With three keys: + * - tables: (string[]) to include in the `$table` to `IDatabase->select()` + * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` + * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` + * All tables, fields, and joins are aliased, so `+` is safe to use. + */ + public function getJoin( $key ) { + if ( !isset( $this->joinCache[$key] ) ) { + $tables = []; + $fields = []; + $joins = []; + + if ( $this->stage === MIGRATION_OLD ) { + $fields["{$key}_text"] = $key; + $fields["{$key}_data"] = 'NULL'; + } else { + $join = $this->stage === MIGRATION_NEW ? 'JOIN' : 'LEFT JOIN'; + + if ( isset( self::$tempTables[$key] ) ) { + $t = self::$tempTables[$key]; + $alias = "temp_$key"; + $tables[$alias] = $t['table']; + $joins[$alias] = [ $join, [ "{$alias}.{$t['pk']} = {$t['joinPK']}" ] ]; + $joinField = "{$alias}.{$t['field']}"; + } else { + $joinField = "{$key}_id"; + } + + $alias = "comment_$key"; + $tables[$alias] = 'comment'; + $joins[$alias] = [ $join, [ "{$alias}.comment_id = {$joinField}" ] ]; + + if ( $this->stage === MIGRATION_NEW ) { + $fields["{$key}_text"] = "{$alias}.comment_text"; + } else { + $fields["{$key}_text"] = "COALESCE( {$alias}.comment_text, $key )"; + } + $fields["{$key}_data"] = "{$alias}.comment_data"; + } + + $this->joinCache[$key] = [ + 'tables' => $tables, + 'fields' => $fields, + 'joins' => $joins, + ]; + } + + return $this->joinCache[$key]; + } + + /** + * Extract the comment from a row + * + * Use `self::getJoin()` (preferred) or `self::getFields()` to ensure the + * row contains the needed data. + * + * @param string $key A key such as "rev_comment" identifying the comment + * field being fetched. + * @param object|array $row Result row. + * @return object with the following properties + * - text: (string) Text version of the comment + * - message: (Message) Message version of the comment. Might be a RawMessage. + * - data: (object|null) Structured data of the comment + */ + public function getComment( $key, $row ) { + $row = (array)$row; + if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) { + $text = $row["{$key}_text"]; + $data = $row["{$key}_data"]; + } elseif ( $this->stage === MIGRATION_OLD ) { + // @codeCoverageIgnoreStart + wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" ); + $text = ''; + $data = null; + // @codeCoverageIgnoreEnd + } else { + if ( isset( self::$tempTables[$key] ) ) { + // @codeCoverageIgnoreStart + if ( !array_key_exists( "{$key}_pk", $row ) ) { + throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" ); + } + // @codeCoverageIgnoreEnd + + $t = self::$tempTables[$key]; + $id = $row["{$key}_pk"]; + $row2 = $this->db->selectRow( + [ $t['table'], 'comment' ], + [ 'comment_text', 'comment_data' ], + [ $t['pk'] => $id ], + __METHOD__, + [], + [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ] + ); + } else { + // @codeCoverageIgnoreStart + if ( !array_key_exists( "{$key}_id", $row ) ) { + throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" ); + } + // @codeCoverageIgnoreEnd + + $id = $row["{$key}_id"]; + $row2 = $this->db->selectRow( + 'comment', + [ 'comment_text', 'comment_data' ], + [ 'comment_id' => $id ], + __METHOD__ + ); + } + + if ( $row2 ) { + $text = $row2->comment_text; + $data = $row2->comment_data; + } elseif ( $this->stage < MIGRATION_NEW && array_key_exists( "{$key}_old", $row ) ) { + $text = $row["{$key}_old"]; + $data = null; + } else { + // @codeCoverageIgnoreStart + wfLogWarning( "Missing comment row for $key, id=$id" ); + $text = ''; + $data = null; + // @codeCoverageIgnoreEnd + } + } + + $msg = null; + if ( $data !== null ) { + $data = FormatJson::decode( $data ); + if ( !is_object( $data ) ) { + // @codeCoverageIgnoreStart + wfLogWarning( "Invalid JSON object in comment: $data" ); + $data = null; + // @codeCoverageIgnoreEnd + } else { + $data = (array)$data; + if ( isset( $data['_message'] ) ) { + $msg = self::decodeMessage( $data['_message'] ) + ->setInterfaceMessageFlag( true ); + } + if ( !empty( $data['_null'] ) ) { + $data = null; + } else { + foreach ( $data as $k => $v ) { + if ( substr( $k, 0, 1 ) === '_' ) { + unset( $data[$k] ); + } + } + } + } + } + + return (object)[ + 'text' => $text, + 'message' => $msg ?: new RawMessage( '$1', [ $text ] ), + 'data' => $data, + ]; + } + + /** + * @param string $key + * @param string|Message $text + * @param array|null $data + * @return array [ array $fields, callable $callback ] + */ + private function insertInternal( $key, $text, $data ) { + global $wgContLang; + + $fields = []; + $callback = null; + + if ( $data !== null ) { + if ( !is_array( $data ) ) { + throw new InvalidArgumentException( '$data must be null or an array' ); + } + foreach ( $data as $k => $v ) { + if ( substr( $k, 0, 1 ) === '_' ) { + throw new InvalidArgumentException( 'Keys in $data beginning with "_" are reserved' ); + } + } + } + + if ( $text instanceof Message ) { + if ( $data === null ) { + $data = [ '_null' => true ]; + } + $data['_message'] = self::encodeMessage( $text ); + $text = $text + ->inLanguage( $wgContLang ) // Avoid $wgForceUIMsgAsContentMsg + ->setInterfaceMessageFlag( true ) + ->text(); + } + if ( $data !== null ) { + $data = FormatJson::encode( (object)$data, false, FormatJson::ALL_OK ); + } + + if ( $this->stage <= MIGRATION_WRITE_BOTH ) { + $fields[$key] = $text; + } + + if ( $this->stage >= MIGRATION_WRITE_BOTH ) { + $this->db->startAtomic( __METHOD__ ); + $commentId = $this->db->selectField( + 'comment', + 'comment_id', + [ + 'comment_text' => $text, + 'comment_data' => $data, + ], + __METHOD__, + [ 'FOR UPDATE' ] + ); + if ( !$commentId ) { + $commentId = $this->db->nextSequenceValue( 'comment_comment_id_seq' ); + $this->db->insert( + 'comment', + [ + 'comment_id' => $commentId, + 'comment_text' => $text, + 'comment_data' => $data, + ], + __METHOD__ + ); + $commentId = $this->db->insertId(); + } + $this->db->endAtomic( __METHOD__ ); + + if ( isset( self::$tempTables[$key] ) ) { + $t = self::$tempTables[$key]; + $func = __METHOD__; + $callback = function ( $id ) use ( $commentId, $t, $func ) { + $this->db->insert( + $t['table'], + [ + $t['pk'] => $id, + $t['field'] => $commentId, + ], + $func + ); + }; + } else { + $fields["{$key}_id"] = $commentId; + } + } + + return [ $fields, $callback ]; + } + + /** + * Prepare for the insertion of a row with a comment + * + * @note This may insert into the database. It's recommended to include + * both the call to this method and the actual insert in the same + * transaction. + * @param string $key A key such as "rev_comment" identifying the comment + * field being set. + * @param string|Message $comment Comment text or Message object + * @param array|null $data Structured data to store. The key 'message' is reserved. + * @return array Fields for the insert or update + */ + public function insert( $key, $text, $data = null ) { + if ( isset( self::$tempTables[$key] ) ) { + throw new InvalidArgumentException( "Must use insertWithTempTable() for $key" ); + } + + list( $fields ) = $this->insertInternal( $key, $text, $data ); + return $fields; + } + + /** + * Prepare for the insertion of a row with a comment and temporary table + * + * This is currently needed for "rev_comment" and "img_description". In the + * future that requirement will be removed. + * + * @note This may insert into the database. It's recommended to include + * both the call to this method and the actual insert in the same + * transaction. + * @param string $key A key such as "rev_comment" identifying the comment + * field being set. + * @param string|Message $comment Comment text or Message object + * @param array|null $data Structured data to store. Keys beginning with + * '_' are reserved. + * @return array Two values: + * - array Fields for the insert or update + * - callable Function to call when the primary key of the row being + * inserted/updated is known. Pass it that primary key. + */ + public function insertWithTempTable( $key, $text, $data = null ) { + if ( isset( self::$formerTempTables[$key] ) ) { + wfDeprecated( __METHOD__ . " for $key", self::$formerTempTables[$key] ); + } elseif ( !isset( self::$tempTables[$key] ) ) { + throw new InvalidArgumentException( "Must use insert() for $key" ); + } + + list( $fields, $callback ) = $this->insertInternal( $key, $text, $data ); + if ( !$callback ) { + $callback = function () { + // Do nothing. + }; + } + return [ $fields, $callback ]; + } + + /** + * Encode a Message as a PHP data structure + * @param Message $msg + * @return array + */ + protected static function encodeMessage( Message $msg ) { + $key = count( $msg->getKeysToTry() ) > 1 ? $msg->getKeysToTry() : $msg->getKey(); + $params = $msg->getParams(); + foreach ( $params as &$param ) { + if ( $param instanceof Message ) { + $param = [ + 'message' => self::encodeMessage( $param ) + ]; + } + } + array_unshift( $params, $key ); + return $params; + } + + /** + * Decode a message that was encoded by self::encodeMessage() + * @param array $data + * @return Message + */ + protected static function decodeMessage( $data ) { + $key = array_shift( $data ); + foreach ( $data as &$param ) { + if ( is_object( $param ) ) { + $param = (array)$param; + } + if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) { + $param = self::decodeMessage( $param['message'] ); + } + } + return new Message( $key, $data ); + } + +} diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 5b7ca3e..01b1a9f 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -8659,6 +8659,13 @@ $wgInterwikiPrefixDisplayTypes = []; /** + * Comment table schema migration stage. + * @since 1.30 + * @var int One of the MIGRATION_* constants + */ +$wgCommentTableSchemaMigrationStage = MIGRATION_NEW; + +/** * For really cool vim folding this needs to be at the end: * vim: foldmarker=@{,@} foldmethod=marker * @} diff --git a/includes/installer/DatabaseUpdater.php b/includes/installer/DatabaseUpdater.php index e5cbb7c..b3acd8e 100644 --- a/includes/installer/DatabaseUpdater.php +++ b/includes/installer/DatabaseUpdater.php @@ -1191,4 +1191,22 @@ $wgContentHandlerUseDB = $this->holdContentHandlerUseDB; } } + + /** + * Populates the externallinks.el_index_60 field + * @since 1.30 + */ + protected function migrateComments() { + if ( !$this->updateRowExists( 'MigrateComments' ) ) { + $this->output( + "Migrating comments to the 'comments' table, printing progress markers. For large\n" . + "databases, you may want to hit Ctrl-C and do this manually with\n" . + "maintenance/migrateComments.php.\n" + ); + $task = $this->maintenance->runChild( 'MigrateComments', 'migrateComments.php' ); + $task->execute(); + $this->output( "done.\n" ); + } + } + } diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index 70e790c..6536dec 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -301,6 +301,10 @@ [ 'dropIndex', 'user_groups', 'ug_user_group', 'patch-user_groups-primary-key.sql' ], [ 'addField', 'user_groups', 'ug_expiry', 'patch-user_groups-ug_expiry.sql' ], [ 'addIndex', 'image', 'img_user_timestamp', 'patch-image-user-index-2.sql' ], + + // 1.30 + [ 'addTable', 'comment', 'patch-comment-table.sql' ], + [ 'migrateComments' ], ]; } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index 9c90283..c9b41a3 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -165,6 +165,10 @@ [ 'addField', 'externallinks', 'el_index_60', 'patch-externallinks-el_index_60.sql' ], [ 'addField', 'user_groups', 'ug_expiry', 'patch-user_groups-ug_expiry.sql' ], [ 'addIndex', 'image', 'img_user_timestamp', 'patch-image-user-index-2.sql' ], + + // 1.30 + [ 'addTable', 'comment', 'patch-comment-table.sql' ], + [ 'migrateComments' ], ]; } diff --git a/maintenance/archives/patch-comment-table.sql b/maintenance/archives/patch-comment-table.sql new file mode 100644 index 0000000..2830386 --- /dev/null +++ b/maintenance/archives/patch-comment-table.sql @@ -0,0 +1,58 @@ +-- +-- patch-comment-table.sql +-- +-- T166732. Add a `comment` table and various columns (and temporary tables) to reference it. + +CREATE TABLE /*_*/comment ( + comment_id bigint unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, + comment_text BLOB NOT NULL, + comment_data BLOB +) /*$wgDBTableOptions*/; +CREATE INDEX /*i*/comment_text ON comment (comment_text(100)); + +CREATE TABLE /*_*/revision_comment_temp ( + revcomment_rev int unsigned NOT NULL, + revcomment_comment_id bigint unsigned NOT NULL, + PRIMARY KEY (revcomment_rev, revcomment_comment_id) +) /*$wgDBTableOptions*/; +CREATE UNIQUE INDEX /*i*/ revcomment_rev ON /*_*/revision_comment_temp (revcomment_rev); + +CREATE TABLE /*_*/image_comment_temp ( + imgcomment_name varchar(255) binary NOT NULL, + imgcomment_description_id bigint unsigned NOT NULL, + PRIMARY KEY (imgcomment_name, imgcomment_description_id) +) /*$wgDBTableOptions*/; +CREATE UNIQUE INDEX /*i*/ imgcomment_name ON /*_*/image_comment_temp (imgcomment_name); + +ALTER TABLE /*_*/revision + ALTER COLUMN rev_comment SET DEFAULT ''; + +ALTER TABLE /*_*/archive + ALTER COLUMN ar_comment SET DEFAULT '', + ADD COLUMN ar_comment_id bigint unsigned NOT NULL DEFAULT 0 AFTER ar_comment; + +ALTER TABLE /*_*/ipblocks + ALTER COLUMN ipb_reason SET DEFAULT '', + ADD COLUMN ipb_reason_id bigint unsigned NOT NULL DEFAULT 0 AFTER ipb_reason; + +ALTER TABLE /*_*/image + ALTER COLUMN img_description SET DEFAULT ''; + +ALTER TABLE /*_*/oldimage + ALTER COLUMN oi_description SET DEFAULT '', + ADD COLUMN oi_description_id bigint unsigned NOT NULL DEFAULT 0 AFTER oi_description; + +ALTER TABLE /*_*/filearchive + ADD COLUMN fa_deleted_reason_id bigint unsigned NOT NULL DEFAULT 0 AFTER fa_deleted_reason, + ALTER COLUMN fa_description SET DEFAULT '', + ADD COLUMN fa_description_id bigint unsigned NOT NULL DEFAULT 0 AFTER fa_description; + +ALTER TABLE /*_*/recentchanges + ADD COLUMN rc_comment_id bigint unsigned NOT NULL DEFAULT 0 AFTER rc_comment; + +ALTER TABLE /*_*/logging + ADD COLUMN log_comment_id bigint unsigned NOT NULL DEFAULT 0 AFTER log_comment; + +ALTER TABLE /*_*/protected_titles + ALTER COLUMN pt_reason SET DEFAULT '', + ADD COLUMN pt_reason_id bigint unsigned NOT NULL DEFAULT 0 AFTER pt_reason; diff --git a/maintenance/migrateComments.php b/maintenance/migrateComments.php new file mode 100644 index 0000000..c322ca8 --- /dev/null +++ b/maintenance/migrateComments.php @@ -0,0 +1,268 @@ +<?php +/** + * Migrate comments from pre-1.30 columns to the 'comment' table + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +require_once __DIR__ . '/Maintenance.php'; + +/** + * Maintenance script that migrates comments from pre-1.30 columns to the + * 'comment' table + * + * @ingroup Maintenance + */ +class MigrateComments extends LoggedUpdateMaintenance { + public function __construct() { + parent::__construct(); + $this->addDescription( 'Migrates comments from pre-1.30 columns to the \'comment\' table' ); + $this->setBatchSize( 100 ); + } + + protected function getUpdateKey() { + return __CLASS__; + } + + protected function updateSkippedMessage() { + return 'comments already migrated.'; + } + + protected function doDBUpdates() { + $this->migrateToTemp( + 'revision', 'rev_id', 'rev_comment', 'revcomment_rev', 'revcomment_comment_id' + ); + $this->migrate( 'archive', 'ar_id', 'ar_comment' ); + $this->migrate( 'ipblocks', 'ipb_id', 'ipb_reason' ); + $this->migrateToTemp( + 'image', 'img_name', 'img_description', 'imgcomment_name', 'imgcomment_description_id' + ); + $this->migrate( 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_description' ); + $this->migrate( 'filearchive', 'fa_id', 'fa_deleted_reason' ); + $this->migrate( 'filearchive', 'fa_id', 'fa_description' ); + $this->migrate( 'recentchanges', 'rc_id', 'rc_comment' ); + $this->migrate( 'logging', 'log_id', 'log_comment' ); + $this->migrate( 'protected_titles', [ 'pt_namespace', 'pt_title' ], 'pt_reason' ); + return true; + } + + /** + * Fetch comment IDs for a set of comments + * @param IDatabase $dbw + * @param array &$comments Keys are comment names, values will be set to IDs. + * @return int Count of added comments + */ + private function loadCommentIDs( IDatabase $dbw, array &$comments ) { + $count = 0; + $needComments = $comments; + + // We need a transaction and FOR UPDATE here to avoid races where a + // comment gets inserted twice. + $this->beginTransaction( $dbw, __METHOD__ ); + while ( true ) { + $res = $dbw->select( + 'comment', + [ 'comment_id', 'comment_text' ], + [ 'comment_text' => array_keys( $needComments ) ], + __METHOD__, + [ 'FOR UPDATE' ] + ); + foreach ( $res as $row ) { + $comments[$row->comment_text] = $row->comment_id; + unset( $needComments[$row->comment_text] ); + } + + if ( !$needComments ) { + break; + } + + $dbw->insert( + 'comment', + array_map( function ( $v ) { + return [ 'comment_text' => $v ]; + }, array_keys( $needComments ) ), + __METHOD__ + ); + $count += $dbw->affectedRows(); + } + $this->commitTransaction( $dbw, __METHOD__ ); + return $count; + } + + /** + * Migrate comments in a table. + * + * Assumes any row with the ID field non-zero have already been migrated. + * Assumes the new field name is the same as the old with '_id' appended. + * Blanks the old fields while migrating. + * + * @param string $table Table to migrate + * @param string|string[] $primaryKey Primary key of the table. + * @param string $oldField Old comment field name + */ + protected function migrate( $table, $primaryKey, $oldField ) { + $newField = $oldField . '_id'; + $primaryKey = (array)$primaryKey; + $pkFilter = array_flip( $primaryKey ); + $this->output( "Beginning migration of $table.$oldField to $table.$newField\n" ); + + $dbw = $this->getDB( DB_MASTER ); + $next = '1=1'; + $countUpdated = 0; + $countComments = 0; + while ( true ) { + // Fetch the rows needing update + $res = $dbw->select( + $table, + array_merge( $primaryKey, [ $oldField ] ), + [ + $newField => 0, + $next, + ], + __METHOD__, + [ + 'ORDER BY' => $primaryKey, + 'LIMIT' => $this->mBatchSize, + ] + ); + if ( !$res->numRows() ) { + break; + } + + // Collect the distinct comments from those rows + $comments = []; + foreach ( $res as $row ) { + $comments[$row->$oldField] = 0; + } + $countComments += $this->loadCommentIDs( $dbw, $comments ); + + // Update the existing rows + foreach ( $res as $row ) { + $dbw->update( + $table, + [ + $newField => $comments[$row->$oldField], + $oldField => '', + ], + array_intersect_key( (array)$row, $pkFilter ) + [ + $newField => 0 + ], + __METHOD__ + ); + $countUpdated += $dbw->affectedRows(); + } + + // Calculate the "next" condition + $next = ''; + $prompt = []; + for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) { + $field = $primaryKey[$i]; + $prompt[] = $row->$field; + $value = $dbw->addQuotes( $row->$field ); + if ( $next === '' ) { + $next = "$field > $value"; + } else { + $next = "$field > $value OR $field = $value AND ($next)"; + } + } + $prompt = join( ' ', array_reverse( $prompt ) ); + $this->output( "... $prompt\n" ); + } + + $this->output( + "Completed migration, updated $countUpdated row(s) with $countComments new comment(s)\n" + ); + } + + /** + * Migrate comments in a table to a temporary table. + * + * Assumes any row with the ID field non-zero have already been migrated. + * Assumes the new table is named "{$table}_comment_temp", and it has two + * columns, in order, being the primary key of the original table and the + * comment ID field. + * Blanks the old fields while migrating. + * + * @param string $oldTable Table to migrate + * @param string $primaryKey Primary key of the table. + * @param string $oldField Old comment field name + * @param string $newPrimaryKey Primary key of the new table. + * @param string $newField New comment field name + */ + protected function migrateToTemp( $table, $primaryKey, $oldField, $newPrimaryKey, $newField ) { + $newTable = $table . '_comment_temp'; + $this->output( "Beginning migration of $table.$oldField to $newTable.$newField\n" ); + + $dbw = $this->getDB( DB_MASTER ); + $next = []; + $countUpdated = 0; + $countComments = 0; + while ( true ) { + // Fetch the rows needing update + $res = $dbw->select( + [ $table, $newTable ], + [ $primaryKey, $oldField ], + [ $newPrimaryKey => null ] + $next, + __METHOD__, + [ + 'ORDER BY' => $primaryKey, + 'LIMIT' => $this->mBatchSize, + ], + [ $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ] ] + ); + if ( !$res->numRows() ) { + break; + } + + // Collect the distinct comments from those rows + $comments = []; + foreach ( $res as $row ) { + $comments[$row->$oldField] = 0; + } + $countComments += $this->loadCommentIDs( $dbw, $comments ); + + // Update rows + $inserts = []; + $updates = []; + foreach ( $res as $row ) { + $inserts[] = [ + $newPrimaryKey => $row->$primaryKey, + $newField => $comments[$row->$oldField] + ]; + $updates[] = $row->$primaryKey; + } + $this->beginTransaction( $dbw, __METHOD__ ); + $dbw->insert( $newTable, $inserts, __METHOD__ ); + $dbw->update( $table, [ $oldField => '' ], [ $primaryKey => $updates ], __METHOD__ ); + $countUpdated += $dbw->affectedRows(); + $this->commitTransaction( $dbw, __METHOD__ ); + + // Calculate the "next" condition + $next = [ $primaryKey . ' > ' . $dbw->addQuotes( $row->$primaryKey ) ]; + $this->output( "... {$row->$primaryKey}\n" ); + } + + $this->output( + "Completed migration, updated $countUpdated row(s) with $countComments new comment(s)\n" + ); + } +} + +$maintClass = "MigrateComments"; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/tables.sql b/maintenance/tables.sql index d4147f4..6302469 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -346,10 +346,9 @@ -- or a rollback to a previous version. rev_text_id int unsigned NOT NULL, - -- Text comment summarizing the change. - -- This text is shown in the history and other changes lists, - -- rendered in a subset of wiki markup by Linker::formatComment() - rev_comment varbinary(767) NOT NULL, + -- Text comment summarizing the change. Deprecated in favor of + -- revision_comment_temp.revcomment_comment_id. + rev_comment varbinary(767) NOT NULL default '', -- Key to user.user_id of the user who made this edit. -- Stores 0 for anonymous edits and for some mass imports. @@ -411,6 +410,23 @@ CREATE INDEX /*i*/page_user_timestamp ON /*_*/revision (rev_page,rev_user,rev_timestamp); -- +-- Temporary table to avoid blocking on an alter of revision. +-- +-- On large wikis like the English Wikipedia, altering the revision table is a +-- months-long process. This table is being created to avoid such an alter, and +-- will be merged back into revision in the future. +-- +CREATE TABLE /*_*/revision_comment_temp ( + -- Key to rev_id + revcomment_rev int unsigned NOT NULL, + -- Key to comment_id + revcomment_comment_id bigint unsigned NOT NULL, + PRIMARY KEY (revcomment_rev, revcomment_comment_id) +) /*$wgDBTableOptions*/; +-- Ensure uniqueness +CREATE UNIQUE INDEX /*i*/ revcomment_rev ON /*_*/revision_comment_temp (revcomment_rev); + +-- -- Holds text of individual page revisions. -- -- Field names are a holdover from the 'old' revisions table in @@ -452,6 +468,34 @@ -- +-- Edits, blocks, and other actions typically have a textual comment describing +-- the action. They are stored here to reduce the size of the main tables, and +-- to allow for deduplication. +-- +CREATE TABLE /*_*/comment ( + -- Unique ID to identify each comment + comment_id bigint unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT, + + -- Text comment summarizing the change. + -- This text is shown in the history and other changes lists, + -- rendered in a subset of wiki markup by Linker::formatComment() + -- Size limits are enforced at the application level, and should + -- take care to crop UTF-8 strings appropriately. + comment_text BLOB NOT NULL, + + -- JSON data, intended for localizing auto-generated comments. + -- This holds structured data that is intended to be used to provide + -- localized versions of automatically-generated comments. When not empty, + -- comment_text should be the generated comment localized using the wiki's + -- content language. + comment_data BLOB +) /*$wgDBTableOptions*/; +-- Index used for deduplication. The first 100 bytes should be enough to limit +-- the number of rows that need to be scanned for an exact match. +CREATE INDEX /*i*/comment_text ON comment (comment_text(100)); + + +-- -- Holding area for deleted articles, which may be viewed -- or restored by admins through the Special:Undelete interface. -- The fields generally correspond to the page, revision, and text @@ -472,7 +516,8 @@ ar_text mediumblob NOT NULL, -- Basic revision stuff... - ar_comment varbinary(767) NOT NULL, + ar_comment varbinary(767) NOT NULL default '', -- Deprecated in favor of ar_comment_id + ar_comment_id bigint unsigned NOT NULL DEFAULT 0, -- ("DEFAULT 0" is temporary, signaling that ar_comment should be used) ar_user int unsigned NOT NULL default 0, ar_user_text varchar(255) binary NOT NULL, ar_timestamp binary(14) NOT NULL default '', @@ -851,8 +896,12 @@ -- User name of blocker ipb_by_text varchar(255) binary NOT NULL default '', - -- Text comment made by blocker. - ipb_reason varbinary(767) NOT NULL, + -- Text comment made by blocker. Deprecated in favor of ipb_reason_id + ipb_reason varbinary(767) NOT NULL default '', + + -- Key to comment_id. Text comment made by blocker. + -- ("DEFAULT 0" is temporary, signaling that ipb_reason should be used) + ipb_reason_id bigint unsigned NOT NULL DEFAULT 0, -- Creation (or refresh) date in standard YMDHMS form. -- IP blocks expire automatically. @@ -959,7 +1008,8 @@ -- Description field as entered by the uploader. -- This is displayed in image upload history and logs. - img_description varbinary(767) NOT NULL, + -- Deprecated in favor of image_comment_temp.imgcomment_description_id. + img_description varbinary(767) NOT NULL default '', -- user_id and user_name of uploader. img_user int unsigned NOT NULL default 0, @@ -984,6 +1034,23 @@ -- Used to get media of one type CREATE INDEX /*i*/img_media_mime ON /*_*/image (img_media_type,img_major_mime,img_minor_mime); +-- +-- Temporary table to avoid blocking on an alter of image. +-- +-- On large wikis like Wikimedia Commons, altering the image table is a +-- months-long process. This table is being created to avoid such an alter, and +-- will be merged back into image in the future. +-- +CREATE TABLE /*_*/image_comment_temp ( + -- Key to img_name (ugh) + imgcomment_name varchar(255) binary NOT NULL, + -- Key to comment_id + imgcomment_description_id bigint unsigned NOT NULL, + PRIMARY KEY (imgcomment_name, imgcomment_description_id) +) /*$wgDBTableOptions*/; +-- Ensure uniqueness +CREATE UNIQUE INDEX /*i*/ imgcomment_name ON /*_*/image_comment_temp (imgcomment_name); + -- -- Previous revisions of uploaded files. @@ -1003,7 +1070,8 @@ oi_width int NOT NULL default 0, oi_height int NOT NULL default 0, oi_bits int NOT NULL default 0, - oi_description varbinary(767) NOT NULL, + oi_description varbinary(767) NOT NULL default '', -- Deprecated. + oi_description_id bigint unsigned NOT NULL DEFAULT 0, -- ("DEFAULT 0" is temporary, signaling that oi_description should be used) oi_user int unsigned NOT NULL default 0, oi_user_text varchar(255) binary NOT NULL, oi_timestamp binary(14) NOT NULL default '', @@ -1051,7 +1119,8 @@ -- Deletion information, if this file is deleted. fa_deleted_user int, fa_deleted_timestamp binary(14) default '', - fa_deleted_reason varbinary(767) default '', + fa_deleted_reason varbinary(767) default '', -- Deprecated + fa_deleted_reason_id bigint unsigned NOT NULL DEFAULT 0, -- ("DEFAULT 0" is temporary, signaling that fa_deleted_reason should be used) -- Duped fields from image fa_size int unsigned default 0, @@ -1062,7 +1131,8 @@ fa_media_type ENUM("UNKNOWN", "BITMAP", "DRAWING", "AUDIO", "VIDEO", "MULTIMEDIA", "OFFICE", "TEXT", "EXECUTABLE", "ARCHIVE") default NULL, fa_major_mime ENUM("unknown", "application", "audio", "image", "text", "video", "message", "model", "multipart", "chemical") default "unknown", fa_minor_mime varbinary(100) default "unknown", - fa_description varbinary(767), + fa_description varbinary(767) default '', -- Deprecated + fa_description_id bigint unsigned NOT NULL DEFAULT 0, -- ("DEFAULT 0" is temporary, signaling that fa_description should be used) fa_user int unsigned default 0, fa_user_text varchar(255) binary, fa_timestamp binary(14) default '', @@ -1160,7 +1230,8 @@ rc_title varchar(255) binary NOT NULL default '', -- as in revision... - rc_comment varbinary(767) NOT NULL default '', + rc_comment varbinary(767) NOT NULL default '', -- Deprecated. + rc_comment_id bigint unsigned NOT NULL DEFAULT 0, -- ("DEFAULT 0" is temporary, signaling that rc_comment should be used) rc_minor tinyint unsigned NOT NULL default 0, -- Edits by user accounts with the 'bot' rights key are @@ -1391,7 +1462,12 @@ log_page int unsigned NULL, -- Freeform text. Interpreted as edit history comments. + -- Deprecated in favor of log_comment_id. log_comment varbinary(767) NOT NULL default '', + + -- Key to comment_id. Comment summarizing the change. + -- ("DEFAULT 0" is temporary, signaling that log_comment should be used) + log_comment_id bigint unsigned NOT NULL DEFAULT 0, -- miscellaneous parameters: -- LF separated list (old system) or serialized PHP array (new system) @@ -1568,7 +1644,8 @@ pt_namespace int NOT NULL, pt_title varchar(255) binary NOT NULL, pt_user int unsigned NOT NULL, - pt_reason varbinary(767), + pt_reason varbinary(767) default '', -- Deprecated. + pt_reason_id bigint unsigned NOT NULL DEFAULT 0, -- ("DEFAULT 0" is temporary, signaling that pt_reason should be used) pt_timestamp binary(14) NOT NULL, pt_expiry varbinary(14) NOT NULL default '', pt_create_perm varbinary(60) NOT NULL diff --git a/tests/phpunit/includes/CommentStoreTest.php b/tests/phpunit/includes/CommentStoreTest.php new file mode 100644 index 0000000..07f79b7 --- /dev/null +++ b/tests/phpunit/includes/CommentStoreTest.php @@ -0,0 +1,521 @@ +<?php + +use Wikimedia\TestingAccessWrapper; + +/** + * @group Database + * @covers CommentStore + */ +class CommentStoreTest extends MediaWikiLangTestCase { + + protected $tablesUsed = [ + 'revision', + 'revision_comment_temp', + 'ipblocks', + 'comment', + ]; + + /** + * Create a store for a particular stage + * @param int $stage + * @return CommentStore + */ + protected function makeStore( $stage ) { + $store = new CommentStore( $this->db ); + TestingAccessWrapper::newFromObject( $store )->stage = $stage; + return $store; + } + + /** + * @dataProvider provideGetFields + * @param int $stage + * @param string $key + * @param array $expect + */ + public function testGetFields( $stage, $key, $expect ) { + $store = $this->makeStore( $stage ); + $result = $store->getFields( $key ); + $this->assertEquals( $expect, $result ); + } + + public static function provideGetFields() { + return [ + 'Simple table, old' => [ + MIGRATION_OLD, 'ipb_reason', + [ 'ipb_reason_text' => 'ipb_reason', 'ipb_reason_data' => 'NULL' ], + ], + 'Simple table, write-both' => [ + MIGRATION_WRITE_BOTH, 'ipb_reason', + [ 'ipb_reason_old' => 'ipb_reason', 'ipb_reason_id' => 'ipb_reason_id' ], + ], + 'Simple table, write-new' => [ + MIGRATION_WRITE_NEW, 'ipb_reason', + [ 'ipb_reason_old' => 'ipb_reason', 'ipb_reason_id' => 'ipb_reason_id' ], + ], + 'Simple table, new' => [ + MIGRATION_NEW, 'ipb_reason', + [ 'ipb_reason_id' => 'ipb_reason_id' ], + ], + + 'Revision, old' => [ + MIGRATION_OLD, 'rev_comment', + [ 'rev_comment_text' => 'rev_comment', 'rev_comment_data' => 'NULL' ], + ], + 'Revision, write-both' => [ + MIGRATION_WRITE_BOTH, 'rev_comment', + [ 'rev_comment_old' => 'rev_comment', 'rev_comment_pk' => 'rev_id' ], + ], + 'Revision, write-new' => [ + MIGRATION_WRITE_NEW, 'rev_comment', + [ 'rev_comment_old' => 'rev_comment', 'rev_comment_pk' => 'rev_id' ], + ], + 'Revision, new' => [ + MIGRATION_NEW, 'rev_comment', + [ 'rev_comment_pk' => 'rev_id' ], + ], + + 'Image, old' => [ + MIGRATION_OLD, 'img_description', + [ 'img_description_text' => 'img_description', 'img_description_data' => 'NULL' ], + ], + 'Image, write-both' => [ + MIGRATION_WRITE_BOTH, 'img_description', + [ 'img_description_old' => 'img_description', 'img_description_pk' => 'img_name' ], + ], + 'Image, write-new' => [ + MIGRATION_WRITE_NEW, 'img_description', + [ 'img_description_old' => 'img_description', 'img_description_pk' => 'img_name' ], + ], + 'Image, new' => [ + MIGRATION_NEW, 'img_description', + [ 'img_description_pk' => 'img_name' ], + ], + ]; + } + + /** + * @dataProvider provideGetJoin + * @param int $stage + * @param string $key + * @param array $expect + */ + public function testGetJoin( $stage, $key, $expect ) { + $store = $this->makeStore( $stage ); + $result = $store->getJoin( $key ); + $this->assertEquals( $expect, $result ); + } + + public static function provideGetJoin() { + return [ + 'Simple table, old' => [ + MIGRATION_OLD, 'ipb_reason', [ + 'tables' => [], + 'fields' => [ 'ipb_reason_text' => 'ipb_reason', 'ipb_reason_data' => 'NULL' ], + 'joins' => [], + ], + ], + 'Simple table, write-both' => [ + MIGRATION_WRITE_BOTH, 'ipb_reason', [ + 'tables' => [ 'comment_ipb_reason' => 'comment' ], + 'fields' => [ + 'ipb_reason_text' => 'COALESCE( comment_ipb_reason.comment_text, ipb_reason )', + 'ipb_reason_data' => 'comment_ipb_reason.comment_data', + ], + 'joins' => [ + 'comment_ipb_reason' => [ 'LEFT JOIN', [ 'comment_ipb_reason.comment_id = ipb_reason_id' ] ], + ], + ], + ], + 'Simple table, write-new' => [ + MIGRATION_WRITE_NEW, 'ipb_reason', [ + 'tables' => [ 'comment_ipb_reason' => 'comment' ], + 'fields' => [ + 'ipb_reason_text' => 'COALESCE( comment_ipb_reason.comment_text, ipb_reason )', + 'ipb_reason_data' => 'comment_ipb_reason.comment_data', + ], + 'joins' => [ + 'comment_ipb_reason' => [ 'LEFT JOIN', [ 'comment_ipb_reason.comment_id = ipb_reason_id' ] ], + ], + ], + ], + 'Simple table, new' => [ + MIGRATION_NEW, 'ipb_reason', [ + 'tables' => [ 'comment_ipb_reason' => 'comment' ], + 'fields' => [ + 'ipb_reason_text' => 'comment_ipb_reason.comment_text', + 'ipb_reason_data' => 'comment_ipb_reason.comment_data', + ], + 'joins' => [ + 'comment_ipb_reason' => [ 'JOIN', [ 'comment_ipb_reason.comment_id = ipb_reason_id' ] ], + ], + ], + ], + + 'Revision, old' => [ + MIGRATION_OLD, 'rev_comment', [ + 'tables' => [], + 'fields' => [ 'rev_comment_text' => 'rev_comment', 'rev_comment_data' => 'NULL' ], + 'joins' => [], + ], + ], + 'Revision, write-both' => [ + MIGRATION_WRITE_BOTH, 'rev_comment', [ + 'tables' => [ + 'temp_rev_comment' => 'revision_comment_temp', + 'comment_rev_comment' => 'comment', + ], + 'fields' => [ + 'rev_comment_text' => 'COALESCE( comment_rev_comment.comment_text, rev_comment )', + 'rev_comment_data' => 'comment_rev_comment.comment_data', + ], + 'joins' => [ + 'temp_rev_comment' => [ 'LEFT JOIN', [ 'temp_rev_comment.revcomment_rev = rev_id' ] ], + 'comment_rev_comment' => [ 'LEFT JOIN', + [ 'comment_rev_comment.comment_id = temp_rev_comment.revcomment_comment_id' ] ], + ], + ], + ], + 'Revision, write-new' => [ + MIGRATION_WRITE_NEW, 'rev_comment', [ + 'tables' => [ + 'temp_rev_comment' => 'revision_comment_temp', + 'comment_rev_comment' => 'comment', + ], + 'fields' => [ + 'rev_comment_text' => 'COALESCE( comment_rev_comment.comment_text, rev_comment )', + 'rev_comment_data' => 'comment_rev_comment.comment_data', + ], + 'joins' => [ + 'temp_rev_comment' => [ 'LEFT JOIN', [ 'temp_rev_comment.revcomment_rev = rev_id' ] ], + 'comment_rev_comment' => [ 'LEFT JOIN', + [ 'comment_rev_comment.comment_id = temp_rev_comment.revcomment_comment_id' ] ], + ], + ], + ], + 'Revision, new' => [ + MIGRATION_NEW, 'rev_comment', [ + 'tables' => [ + 'temp_rev_comment' => 'revision_comment_temp', + 'comment_rev_comment' => 'comment', + ], + 'fields' => [ + 'rev_comment_text' => 'comment_rev_comment.comment_text', + 'rev_comment_data' => 'comment_rev_comment.comment_data', + ], + 'joins' => [ + 'temp_rev_comment' => [ 'JOIN', [ 'temp_rev_comment.revcomment_rev = rev_id' ] ], + 'comment_rev_comment' => [ 'JOIN', + [ 'comment_rev_comment.comment_id = temp_rev_comment.revcomment_comment_id' ] ], + ], + ], + ], + + 'Image, old' => [ + MIGRATION_OLD, 'img_description', [ + 'tables' => [], + 'fields' => [ 'img_description_text' => 'img_description', 'img_description_data' => 'NULL' ], + 'joins' => [], + ], + ], + 'Image, write-both' => [ + MIGRATION_WRITE_BOTH, 'img_description', [ + 'tables' => [ + 'temp_img_description' => 'image_comment_temp', + 'comment_img_description' => 'comment', + ], + 'fields' => [ + 'img_description_text' => 'COALESCE( comment_img_description.comment_text, img_description )', + 'img_description_data' => 'comment_img_description.comment_data', + ], + 'joins' => [ + 'temp_img_description' => [ 'LEFT JOIN', + [ 'temp_img_description.imgcomment_name = img_name' ] ], + 'comment_img_description' => [ 'LEFT JOIN', + [ 'comment_img_description.comment_id = temp_img_description.imgcomment_description_id' ] ], + ], + ], + ], + 'Image, write-new' => [ + MIGRATION_WRITE_NEW, 'img_description', [ + 'tables' => [ + 'temp_img_description' => 'image_comment_temp', + 'comment_img_description' => 'comment', + ], + 'fields' => [ + 'img_description_text' => 'COALESCE( comment_img_description.comment_text, img_description )', + 'img_description_data' => 'comment_img_description.comment_data', + ], + 'joins' => [ + 'temp_img_description' => [ 'LEFT JOIN', + [ 'temp_img_description.imgcomment_name = img_name' ] ], + 'comment_img_description' => [ 'LEFT JOIN', + [ 'comment_img_description.comment_id = temp_img_description.imgcomment_description_id' ] ], + ], + ], + ], + 'Image, new' => [ + MIGRATION_NEW, 'img_description', [ + 'tables' => [ + 'temp_img_description' => 'image_comment_temp', + 'comment_img_description' => 'comment', + ], + 'fields' => [ + 'img_description_text' => 'comment_img_description.comment_text', + 'img_description_data' => 'comment_img_description.comment_data', + ], + 'joins' => [ + 'temp_img_description' => [ 'JOIN', [ 'temp_img_description.imgcomment_name = img_name' ] ], + 'comment_img_description' => [ 'JOIN', + [ 'comment_img_description.comment_id = temp_img_description.imgcomment_description_id' ] ], + ], + ], + ], + ]; + } + + private function assertComment( $expect, $actual, $from ) { + $this->assertSame( $expect['text'], $actual->text, "text $from" ); + $this->assertInstanceOf( get_class( $expect['message'] ), $actual->message, + "message class $from" ); + $this->assertSame( $expect['message']->getKeysToTry(), $actual->message->getKeysToTry(), + "message keys $from" ); + $this->assertEquals( $expect['message']->text(), $actual->message->text(), + "message rendering $from" ); + $this->assertEquals( $expect['data'], $actual->data, "data $from" ); + } + + /** + * @dataProvider provideInsertRoundTrip + * @param string $table + * @param string $key + * @param string $pk + * @param string $extraFields + * @param string|Message $comment + * @param array|null $data + * @param array $expect + */ + public function testInsertRoundTrip( $table, $key, $pk, $extraFields, $comment, $data, $expect ) { + $expectOld = [ + 'text' => $expect['text'], + 'message' => new RawMessage( '$1', [ $expect['text'] ] ), + 'data' => null, + ]; + + $stages = [ + MIGRATION_OLD => [ MIGRATION_OLD, MIGRATION_WRITE_NEW ], + MIGRATION_WRITE_BOTH => [ MIGRATION_OLD, MIGRATION_NEW ], + MIGRATION_WRITE_NEW => [ MIGRATION_WRITE_BOTH, MIGRATION_NEW ], + MIGRATION_NEW => [ MIGRATION_WRITE_BOTH, MIGRATION_NEW ], + ]; + + foreach ( $stages as $writeStage => $readRange ) { + if ( $key === 'ipb_reason' ) { + $extraFields['ipb_address'] = __CLASS__ . "#$writeStage"; + } + + $wstore = $this->makeStore( $writeStage ); + $usesTemp = $key === 'rev_comment'; + + if ( $usesTemp ) { + list( $fields, $callback ) = $wstore->insertWithTempTable( $key, $comment, $data ); + } else { + $fields = $wstore->insert( $key, $comment, $data ); + } + + if ( $writeStage <= MIGRATION_WRITE_BOTH ) { + $this->assertSame( $expect['text'], $fields[$key], "old field, stage=$writeStage" ); + } else { + $this->assertArrayNotHasKey( $key, $fields, "old field, stage=$writeStage" ); + } + if ( $writeStage >= MIGRATION_WRITE_BOTH && !$usesTemp ) { + $this->assertArrayHasKey( "{$key}_id", $fields, "new field, stage=$writeStage" ); + } else { + $this->assertArrayNotHasKey( "{$key}_id", $fields, "new field, stage=$writeStage" ); + } + + $this->db->insert( $table, $extraFields + $fields, __METHOD__ ); + $id = $this->db->insertId(); + if ( $usesTemp ) { + $callback( $id ); + } + + for ( $readStage = $readRange[0]; $readStage <= $readRange[1]; $readStage++ ) { + $rstore = $this->makeStore( $readStage ); + + $fieldRow = $this->db->selectRow( + $table, + $rstore->getFields( $key ), + [ $pk => $id ], + __METHOD__ + ); + + list( $tables, $fields, $joins ) = array_values( $rstore->getJoin( $key ) ); + $joinRow = $this->db->selectRow( + [ $table ] + $tables, + $fields, + [ $pk => $id ], + __METHOD__, + [], + $joins + ); + + $this->assertComment( + $writeStage === MIGRATION_OLD || $readStage === MIGRATION_OLD ? $expectOld : $expect, + $rstore->getComment( $key, $fieldRow ), + "w=$writeStage, r=$readStage, from getFields()" + ); + $this->assertComment( + $writeStage === MIGRATION_OLD || $readStage === MIGRATION_OLD ? $expectOld : $expect, + $rstore->getComment( $key, $joinRow ), + "w=$writeStage, r=$readStage, from getJoin()" + ); + } + } + } + + public static function provideInsertRoundTrip() { + $msgComment = new Message( 'parentheses', [ 'message comment' ] ); + $textCommentMsg = new RawMessage( '$1', [ 'text comment' ] ); + $nestedMsgComment = new Message( [ 'parentheses', 'rawmessage' ], [ new Message( 'mainpage' ) ] ); + $ipbfields = [ + 'ipb_range_start' => '', + 'ipb_range_end' => '', + ]; + $revfields = [ + 'rev_page' => 42, + 'rev_text_id' => 42, + 'rev_len' => 0, + ]; + + return [ + 'Simple table, text comment' => [ + 'ipblocks', 'ipb_reason', 'ipb_id', $ipbfields, 'text comment', null, [ + 'text' => 'text comment', + 'message' => $textCommentMsg, + 'data' => null, + ] + ], + 'Simple table, text comment with data' => [ + 'ipblocks', 'ipb_reason', 'ipb_id', $ipbfields, 'text comment', [ 'message' => 42 ], [ + 'text' => 'text comment', + 'message' => $textCommentMsg, + 'data' => [ 'message' => 42 ], + ] + ], + 'Simple table, message comment' => [ + 'ipblocks', 'ipb_reason', 'ipb_id', $ipbfields, $msgComment, null, [ + 'text' => '(message comment)', + 'message' => $msgComment, + 'data' => null, + ] + ], + 'Simple table, message comment with data' => [ + 'ipblocks', 'ipb_reason', 'ipb_id', $ipbfields, $msgComment, [ 'message' => 42 ], [ + 'text' => '(message comment)', + 'message' => $msgComment, + 'data' => [ 'message' => 42 ], + ] + ], + 'Simple table, nested message comment' => [ + 'ipblocks', 'ipb_reason', 'ipb_id', $ipbfields, $nestedMsgComment, null, [ + 'text' => '(Main Page)', + 'message' => $nestedMsgComment, + 'data' => null, + ] + ], + + 'Revision, text comment' => [ + 'revision', 'rev_comment', 'rev_id', $revfields, 'text comment', null, [ + 'text' => 'text comment', + 'message' => $textCommentMsg, + 'data' => null, + ] + ], + 'Revision, text comment with data' => [ + 'revision', 'rev_comment', 'rev_id', $revfields, 'text comment', [ 'message' => 42 ], [ + 'text' => 'text comment', + 'message' => $textCommentMsg, + 'data' => [ 'message' => 42 ], + ] + ], + 'Revision, message comment' => [ + 'revision', 'rev_comment', 'rev_id', $revfields, $msgComment, null, [ + 'text' => '(message comment)', + 'message' => $msgComment, + 'data' => null, + ] + ], + 'Revision, message comment with data' => [ + 'revision', 'rev_comment', 'rev_id', $revfields, $msgComment, [ 'message' => 42 ], [ + 'text' => '(message comment)', + 'message' => $msgComment, + 'data' => [ 'message' => 42 ], + ] + ], + 'Revision, nested message comment' => [ + 'revision', 'rev_comment', 'rev_id', $revfields, $nestedMsgComment, null, [ + 'text' => '(Main Page)', + 'message' => $nestedMsgComment, + 'data' => null, + ] + ], + ]; + } + + public static function provideStages() { + return [ + 'MIGRATION_OLD' => [ MIGRATION_OLD ], + 'MIGRATION_WRITE_BOTH' => [ MIGRATION_WRITE_BOTH ], + 'MIGRATION_WRITE_NEW' => [ MIGRATION_WRITE_NEW ], + 'MIGRATION_NEW' => [ MIGRATION_NEW ], + ]; + } + + /** + * @dataProvider provideStages + * @param int $stage + * @expectedException InvalidArgumentException + * @expectedExceptionMessage Must use insertWithTempTable() for rev_comment + */ + public function testInsertWrong( $stage ) { + $store = $this->makeStore( $stage ); + $store->insert( 'rev_comment', 'foo' ); + } + + /** + * @dataProvider provideStages + * @param int $stage + * @expectedException InvalidArgumentException + * @expectedExceptionMessage Must use insert() for ipb_reason + */ + public function testInsertWithTempTableWrong( $stage ) { + $store = $this->makeStore( $stage ); + $store->insertWithTempTable( 'ipb_reason', 'foo' ); + } + + /** + * @dataProvider provideStages + * @param int $stage + * @expectedException InvalidArgumentException + * @expectedExceptionMessage $data must be null or an array + */ + public function testInsertBadData( $stage ) { + $store = $this->makeStore( $stage ); + $store->insert( 'ipb_reason', 'foo', 'bar' ); + } + + /** + * @dataProvider provideStages + * @param int $stage + */ + public function testInsertWithTempTableDeprecated( $stage ) { + $wrap = TestingAccessWrapper::newFromClass( CommentStore::class ); + $wrap->formerTempTables += [ 'ipb_reason' => '1.30' ]; + + $this->hideDeprecated( 'CommentStore::insertWithTempTable for ipb_reason' ); + $store = $this->makeStore( $stage ); + list( $fields, $callback ) = $store->insertWithTempTable( 'ipb_reason', 'foo' ); + $this->assertTrue( is_callable( $callback ) ); + } + +} -- To view, visit https://gerrit.wikimedia.org/r/357892 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic3a434c061ed6e443ea072bc62dda09acbeeed7f Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Anomie <bjor...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits