jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/394816 )
Change subject: Reattribute edits to imported users. ...................................................................... Reattribute edits to imported users. Edits and log entries which previously had user id 0 but the username of the imported user would stay not-reattributed, which meant it wouldn't show up in Special:Contributions. Now we (optionally) attribute edits as well. A maintenance script is provided which can reattribute all edits, and does not require this extension to be installed to run. Additionally, a new config option dictates whether or not to import the watchlist. Change-Id: Ifd26850a9fdf36deaf3c4408375c1dbd9f80eb86 --- M ExternalWikiPrimaryAuthenticationProvider.php M PopulateImportedWatchlistJob.php A ReattributeImportedEditsJob.php M extension.json A maintenance/reattributeImportedEdits.php 5 files changed, 228 insertions(+), 27 deletions(-) Approvals: Skizzerz: Looks good to me, approved jenkins-bot: Verified diff --git a/ExternalWikiPrimaryAuthenticationProvider.php b/ExternalWikiPrimaryAuthenticationProvider.php index 45761f5..4de21dd 100644 --- a/ExternalWikiPrimaryAuthenticationProvider.php +++ b/ExternalWikiPrimaryAuthenticationProvider.php @@ -160,35 +160,81 @@ ]; $watchlist = []; - - while ( true ) { - $resp = $this->apiRequest( 'GET', $wrquery, [], __METHOD__ ); - $watchlist = array_merge( $watchlist, $resp->watchlistraw ); - - if ( !isset( $resp->{'query-continue'} ) ) { - break; - } - - $wrquery['wrcontinue'] = $resp->{'query-continue'}->watchlistraw->wrcontinue; - } - - // enqueue jobs to actually add the watchlist pages to the user, since there might be a lot of them $pagesPerJob = (int)$this->config->get( 'UpdateRowsPerJob' ); - if ( $pagesPerJob <= 0 ) { - $this->logger->warning( '$wgUpdateRowsPerJob is set to 0 or a negative value; importing watchlist in batches of 300 instead.' ); - $pagesPerJob = 300; + if ( $pagesPerJob < 100 ) { + $this->logger->warning( '$wgUpdateRowsPerJob is set to less than 100; performing jobs in batches of 100 instead.' ); + $pagesPerJob = 100; } + $dbw = wfGetDB( DB_MASTER ); + $dbr = wfGetDB( DB_REPLICA ); $jobs = []; $title = $user->getUserPage(); // not used by us, but Job constructor needs a valid Title - while ( $watchlist ) { - // array_splice reduces the size of $watchlist and returns the removed elements. - // This avoids memory bloat so that we only keep the watchlist resident in memory one time. - $slice = array_splice( $watchlist, 0, $pagesPerJob ); - $jobs[] = new PopulateImportedWatchlistJob( $title, [ 'username' => $user->getName(), 'pages' => $slice ] ); + + // enqueue jobs to actually add watchlist items and to reattribute already-existing edits (if enabled) + if ( $this->config->get( 'MediaWikiAuthImportWatchlist' ) ) { + while ( true ) { + $resp = $this->apiRequest( 'GET', $wrquery, [], __METHOD__ ); + $watchlist = array_merge( $watchlist, $resp->watchlistraw ); + + if ( !isset( $resp->{'query-continue'} ) ) { + break; + } + + $wrquery['wrcontinue'] = $resp->{'query-continue'}->watchlistraw->wrcontinue; + } + + while ( $watchlist ) { + // array_splice reduces the size of $watchlist and returns the removed elements. + // This avoids memory bloat so that we only keep the watchlist resident in memory one time. + $slice = array_splice( $watchlist, 0, $pagesPerJob ); + $jobs[] = new PopulateImportedWatchlistJob( $title, [ 'username' => $user->getName(), 'pages' => $slice ] ); + } } - \JobQueueGroup::singleton()->push( $jobs ); + if ( $this->config->get( 'MediaWikiAuthReattributeEdits' ) ) { + foreach ( ReattributeImportedEdits::getTableMetadata() as $table => $metadata ) { + $idKey = $metadata[0]; + + foreach ( $metadata[1] as $nameKey => $fields ) { + $idEnd = true; // so next loop doesn't terminate immediately + + for ( $offset = 0; $idEnd !== false; $offset += $pagesPerJob ) { + // this is being thrown in the job queue anyway, so up-to-date data isn't required + // any newly-imported revs/logs will see our new user and attribute properly anyway + $idStart = $dbr->selectField( + $table, + $idKey, + '', // no WHERE clause + __METHOD__ . ':idStart', + [ 'ORDER BY' => $idKey, 'OFFSET' => $offset ] + ); + + $idEnd = $dbr->selectField( + $table, + $idKey, + '', // no WHERE clause + __METHOD__ . ':idEnd', + [ 'ORDER BY' => $idKey, 'OFFSET' => $offset + $pagesPerJob - 1 ] + ); + + $jobs[] = new ReattributeImportedEditsJob( $title, [ + 'username' => $user->getName(), + 'id_start' => $idStart, + 'id_end' => $idEnd, + 'table' => $table, + 'idkey' => $idKey, + 'namekey' => $nameKey, + 'fields' => $fields + ] ); + } + } + } + } + + if ( $jobs !== [] ) { + \JobQueueGroup::singleton()->push( $jobs ); + } // groupmemberships contains groups and expiries, but is only present in recent versions of MW. Fall back to groups if it doesn't exist. $validGroups = array_diff( array_keys( $this->config->get( 'GroupPermissions' ) ), $this->config->get( 'ImplicitGroups' ) ); @@ -246,7 +292,6 @@ } // editcount and registrationdate cannot be set via methods on User - $dbw = wfGetDB( DB_MASTER ); $dbw->update( 'user', [ diff --git a/PopulateImportedWatchlistJob.php b/PopulateImportedWatchlistJob.php index 05a8984..37d7c62 100644 --- a/PopulateImportedWatchlistJob.php +++ b/PopulateImportedWatchlistJob.php @@ -39,5 +39,7 @@ // we may wish to import changed as well (as wl_notificationtimestamp in the db). Note that $page->changed may not exists, // need to test with if ( isset( $page->changed ) ) before doing anything with it. } + + return true; } } diff --git a/ReattributeImportedEditsJob.php b/ReattributeImportedEditsJob.php new file mode 100644 index 0000000..32813e0 --- /dev/null +++ b/ReattributeImportedEditsJob.php @@ -0,0 +1,52 @@ +<?php + +namespace MediaWikiAuth; + +use User; + +class ReattributeImportedEditsJob extends \Job { + /** + * Construct a new edit reattribution job. + * + * @param $title Title unused + * @param $params Array of the format [ + * 'username' => string username of the user whose edits we are reattributing + * 'id_start' => mixed id of the revision/log we're starting at to reattribute + * 'id_end' => mixed id of the revision/log we're ending at (inclusive) + * 'table' => string table name to operate on (without prefix) + * 'idkey' => string field containing table id + * 'namekey' => string field containing username to look up + * 'fields' => array of string fields containing user ids to modify + * ] + */ + public function __construct( $title, $params ) { + parent::__construct( 'reattributeImportedEdits', $title, $params ); + } + + public function run() { + $user = User::newFromName( $this->params['username'] ); + if ( $user === null || $user->getId() === 0 ) { + throw new \BadMethodCallException( "Attempting to reattribute edits for nonexistent user {$this->params['username']}." ); + } + + $updateFields = array_fill_keys( $this->params['fields'], $user->getId() ); + + $dbw = wfGetDB( DB_MASTER ); + $conds = [ $this->params['namekey'] => $user->getName() ]; + $id1 = $dbw->addQuotes( $this->params['id_start'] ); + $id2 = $dbw->addQuotes( $this->params['id_end'] ); + + if ( $this->params['id_start'] === false && $this->params['id_end'] !== false ) { + $conds[] = "{$this->params['idkey']} <= {$id2}"; + } elseif ( $this->params['id_start'] !== false && $this->params['id_end'] === false ) { + $conds[] = "{$this->params['idkey']} >= {$id1}"; + } elseif ( $this->params['id_start'] !== false && $this->params['id_end'] !== false ) { + $conds[] = "{$this->params['idkey']} BETWEEN {$id1} AND {$id2}"; + } + + $dbw->update( $this->params['table'], $updateFields, $conds, __METHOD__ ); + + return true; + } + +} diff --git a/extension.json b/extension.json index 73591cc..d62130b 100644 --- a/extension.json +++ b/extension.json @@ -1,6 +1,6 @@ { "name": "MediaWikiAuth", - "version": "0.10.0", + "version": "1.0.0", "author": [ "Laurence Parry", "Jack Phoenix", @@ -14,7 +14,9 @@ "config": { "MediaWikiAuthAllowPasswordChange": false, "MediaWikiAuthApiUrl": "", - "MediaWikiAuthImportGroups": true + "MediaWikiAuthImportGroups": true, + "MediaWikiAuthImportWatchlist": true, + "MediaWikiAuthReattributeEdits": true }, "MessagesDirs": { "MediaWikiAuth": [ @@ -23,13 +25,16 @@ }, "AutoloadClasses": { "MediaWikiAuth\\ExternalWikiPrimaryAuthenticationProvider": "ExternalWikiPrimaryAuthenticationProvider.php", - "MediaWikiAuth\\PopulateImportedWatchlistJob": "PopulateImportedWatchlistJob.php" + "MediaWikiAuth\\PopulateImportedWatchlistJob": "PopulateImportedWatchlistJob.php", + "MediaWikiAuth\\ReattributeImportedEdits": "maintenance/reattributeImportedEdits.php", + "MediaWikiAuth\\ReattributeImportedEditsJob": "ReattributeImportedEditsJob.php" }, "AvailableRights": [ "mwa-createlocalaccount" ], "JobClasses": { - "populateImportedWatchlist": "MediaWikiAuth\\PopulateImportedWatchlistJob" + "populateImportedWatchlist": "MediaWikiAuth\\PopulateImportedWatchlistJob", + "reattributeImportedEdits": "MediaWikiAuth\\ReattributeImportedEditsJob" }, "AuthManagerAutoConfig": { "primaryauth": { diff --git a/maintenance/reattributeImportedEdits.php b/maintenance/reattributeImportedEdits.php new file mode 100644 index 0000000..7fb8c7e --- /dev/null +++ b/maintenance/reattributeImportedEdits.php @@ -0,0 +1,97 @@ +<?php + +namespace MediaWikiAuth; + +use Wikimedia\Rdbms\Database; + +if ( getenv( 'MW_INSTALL_PATH' ) ) { + $IP = getenv( 'MW_INSTALL_PATH' ); +} else { + $IP = __DIR__ . '/../../..'; +} + +require_once "$IP/maintenance/Maintenance.php"; + +class ReattributeImportedEdits extends \Maintenance { + const OPT_USER = 'user'; + + public function __construct() { + parent::__construct(); + + $this->addOption( + self::OPT_USER, + 'Username to update. If not specified, all users will be updated.', + false, // not required + true // requires argument + ); + } + + public function execute() { + $dbw = wfGetDB( DB_MASTER ); + $singleUser = false; + + if ( $this->hasOption( self::OPT_USER ) ) { + $user = \User::newFromName( $this->getOption( self::OPT_USER ) ); + + if ( $user === null || $user->getId() === 0 ) { + $this->error( "User {$user} does not exist.\n", 1 ); + return; // never actually get here; error() calls die() + } + + $singleUser = $user->getName(); + } + + foreach ( self::getTableMetadata() as $table => $metadata ) { + foreach ( $metadata[1] as $nameKey => $fields ) { + // not every DMBS supports joins on update, and those that do all + // do it different ways. Subqueries are therefore more portable. + $conds = array_fill_keys( $fields, 0 ); + $setList = []; + + $subquery = $dbw->selectSQLText( + 'user', + 'user_id', + "user_name = $nameKey", + __METHOD__ . ':subquery' + ); + + if ( $singleUser !== false ) { + $conds[$nameKey] = $singleUser; + } else { + $conds[] = "EXISTS($subquery)"; + } + + foreach ( $fields as $field ) { + $setList[] = "$field = ($subquery)"; + } + + $this->output( "Updating {$table} (this may take a few minutes)...\n" ); + $success = $dbw->update( $table, $setList, $conds, __METHOD__ . ':update' ); + + if ( $success ) { + $rows = $dbw->affectedRows(); + $this->output( "Updated {$rows} records on {$table}.\n" ); + } else { + $this->error( "Unable to update table {$table}.\n" ); + } + } + } + } + + public static function getTableMetadata() { + // Note that only tables which are used in the XML dump import process (plus recentchanges) are updated. + return [ + 'archive' => [ 'ar_id', [ 'ar_user_text' => [ 'ar_user' ] ] ], + 'filearchive' => [ 'fa_id', [ 'fa_user_text' => [ 'fa_user' ] ] ], + // img_name is the PK, and PKs are clustered on InnoDB, so we can sensibly use BETWEEN + 'image' => [ 'img_name', [ 'img_user_text' => [ 'img_user' ] ] ], + 'logging' => [ 'log_id', [ 'log_user_text' => [ 'log_user' ] ] ], + 'oldimage' => [ 'oi_name', [ 'oi_user_text' => [ 'oi_user' ] ] ], + 'recentchanges' => [ 'rc_id', [ 'rc_user_text' => [ 'rc_user' ] ] ], + 'revision' => [ 'rev_id', [ 'rev_user_text' => [ 'rev_user' ] ] ] + ]; + } +} + +$maintClass = 'MediaWikiAuth\ReattributeImportedEdits'; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/394816 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ifd26850a9fdf36deaf3c4408375c1dbd9f80eb86 Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/extensions/MediaWikiAuth Gerrit-Branch: master Gerrit-Owner: Skizzerz <skizz...@skizzerz.net> Gerrit-Reviewer: Daniel Friesen <dan...@nadir-seen-fire.com> Gerrit-Reviewer: Legoktm <lego...@member.fsf.org> Gerrit-Reviewer: Skizzerz <skizz...@skizzerz.net> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits