Skizzerz has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/394816 )

Change subject: Reattribute edits to imported users.
......................................................................

Reattribute edits to imported users.

Edits and log entries which previously had user id 0 but the username of
the imported user would stay not-reattributed, which meant it wouldn't
show up in Special:Contributions. Now we (optionally) attribute edits as
well. A maintenance script is provided which can reattribute all edits,
and does not require this extension to be installed to run.

Additionally, a new config option dictates whether or not to import the
watchlist.

Change-Id: Ifd26850a9fdf36deaf3c4408375c1dbd9f80eb86
---
M ExternalWikiPrimaryAuthenticationProvider.php
M PopulateImportedWatchlistJob.php
A ReattributeImportedEditsJob.php
M extension.json
A maintenance/reattributeImportedEdits.php
5 files changed, 228 insertions(+), 27 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MediaWikiAuth 
refs/changes/16/394816/1

diff --git a/ExternalWikiPrimaryAuthenticationProvider.php 
b/ExternalWikiPrimaryAuthenticationProvider.php
index 45761f5..4de21dd 100644
--- a/ExternalWikiPrimaryAuthenticationProvider.php
+++ b/ExternalWikiPrimaryAuthenticationProvider.php
@@ -160,35 +160,81 @@
                ];
 
                $watchlist = [];
-
-               while ( true ) {
-                       $resp = $this->apiRequest( 'GET', $wrquery, [], 
__METHOD__ );
-                       $watchlist = array_merge( $watchlist, 
$resp->watchlistraw );
-
-                       if ( !isset( $resp->{'query-continue'} ) ) {
-                               break;
-                       }
-
-                       $wrquery['wrcontinue'] = 
$resp->{'query-continue'}->watchlistraw->wrcontinue;
-               }
-
-               // enqueue jobs to actually add the watchlist pages to the 
user, since there might be a lot of them
                $pagesPerJob = (int)$this->config->get( 'UpdateRowsPerJob' );
-               if ( $pagesPerJob <= 0 ) {
-                       $this->logger->warning( '$wgUpdateRowsPerJob is set to 
0 or a negative value; importing watchlist in batches of 300 instead.' );
-                       $pagesPerJob = 300;
+               if ( $pagesPerJob < 100 ) {
+                       $this->logger->warning( '$wgUpdateRowsPerJob is set to 
less than 100; performing jobs in batches of 100 instead.' );
+                       $pagesPerJob = 100;
                }
 
+               $dbw = wfGetDB( DB_MASTER );
+               $dbr = wfGetDB( DB_REPLICA );
                $jobs = [];
                $title = $user->getUserPage(); // not used by us, but Job 
constructor needs a valid Title
-               while ( $watchlist ) {
-                       // array_splice reduces the size of $watchlist and 
returns the removed elements.
-                       // This avoids memory bloat so that we only keep the 
watchlist resident in memory one time.
-                       $slice = array_splice( $watchlist, 0, $pagesPerJob );
-                       $jobs[] = new PopulateImportedWatchlistJob( $title, [ 
'username' => $user->getName(), 'pages' => $slice ] );
+
+               // enqueue jobs to actually add watchlist items and to 
reattribute already-existing edits (if enabled)
+               if ( $this->config->get( 'MediaWikiAuthImportWatchlist' ) ) {
+                       while ( true ) {
+                               $resp = $this->apiRequest( 'GET', $wrquery, [], 
__METHOD__ );
+                               $watchlist = array_merge( $watchlist, 
$resp->watchlistraw );
+
+                               if ( !isset( $resp->{'query-continue'} ) ) {
+                                       break;
+                               }
+
+                               $wrquery['wrcontinue'] = 
$resp->{'query-continue'}->watchlistraw->wrcontinue;
+                       }
+
+                       while ( $watchlist ) {
+                               // array_splice reduces the size of $watchlist 
and returns the removed elements.
+                               // This avoids memory bloat so that we only 
keep the watchlist resident in memory one time.
+                               $slice = array_splice( $watchlist, 0, 
$pagesPerJob );
+                               $jobs[] = new PopulateImportedWatchlistJob( 
$title, [ 'username' => $user->getName(), 'pages' => $slice ] );
+                       }
                }
 
-               \JobQueueGroup::singleton()->push( $jobs );
+               if ( $this->config->get( 'MediaWikiAuthReattributeEdits' ) ) {
+                       foreach ( ReattributeImportedEdits::getTableMetadata() 
as $table => $metadata ) {
+                               $idKey = $metadata[0];
+
+                               foreach ( $metadata[1] as $nameKey => $fields ) 
{
+                                       $idEnd = true; // so next loop doesn't 
terminate immediately
+
+                                       for ( $offset = 0; $idEnd !== false; 
$offset += $pagesPerJob ) {
+                                               // this is being thrown in the 
job queue anyway, so up-to-date data isn't required
+                                               // any newly-imported revs/logs 
will see our new user and attribute properly anyway
+                                               $idStart = $dbr->selectField(
+                                                       $table,
+                                                       $idKey,
+                                                       '', // no WHERE clause
+                                                       __METHOD__ . ':idStart',
+                                                       [ 'ORDER BY' => $idKey, 
'OFFSET' => $offset ]
+                                               );
+
+                                               $idEnd = $dbr->selectField(
+                                                       $table,
+                                                       $idKey,
+                                                       '', // no WHERE clause
+                                                       __METHOD__ . ':idEnd',
+                                                       [ 'ORDER BY' => $idKey, 
'OFFSET' => $offset + $pagesPerJob - 1 ]
+                                               );
+
+                                               $jobs[] = new 
ReattributeImportedEditsJob( $title, [
+                                                       'username' => 
$user->getName(),
+                                                       'id_start' => $idStart,
+                                                       'id_end' => $idEnd,
+                                                       'table' => $table,
+                                                       'idkey' => $idKey,
+                                                       'namekey' => $nameKey,
+                                                       'fields' => $fields
+                                               ] );
+                                       }
+                               }
+                       }
+               }
+
+               if ( $jobs !== [] ) {
+                       \JobQueueGroup::singleton()->push( $jobs );
+               }
 
                // groupmemberships contains groups and expiries, but is only 
present in recent versions of MW. Fall back to groups if it doesn't exist.
                $validGroups = array_diff( array_keys( $this->config->get( 
'GroupPermissions' ) ), $this->config->get( 'ImplicitGroups' ) );
@@ -246,7 +292,6 @@
                }
 
                // editcount and registrationdate cannot be set via methods on 
User
-               $dbw = wfGetDB( DB_MASTER );
                $dbw->update(
                        'user',
                        [
diff --git a/PopulateImportedWatchlistJob.php b/PopulateImportedWatchlistJob.php
index 05a8984..37d7c62 100644
--- a/PopulateImportedWatchlistJob.php
+++ b/PopulateImportedWatchlistJob.php
@@ -39,5 +39,7 @@
                        // we may wish to import changed as well (as 
wl_notificationtimestamp in the db). Note that $page->changed may not exists,
                        // need to test with if ( isset( $page->changed ) ) 
before doing anything with it.
                }
+
+               return true;
        }
 }
diff --git a/ReattributeImportedEditsJob.php b/ReattributeImportedEditsJob.php
new file mode 100644
index 0000000..dec71b5
--- /dev/null
+++ b/ReattributeImportedEditsJob.php
@@ -0,0 +1,52 @@
+<?php
+
+namespace MediaWikiAuth;
+
+use User;
+
+class ReattributeImportedEditsJob extends \Job {
+       /**
+        * Construct a new edit reattribution job.
+        *
+        * @param $title Title unused
+        * @param $params Array of the format [
+        *     'username' => string username of the user whose edits we are 
reattributing
+        *     'id_start' => mixed id of the revision/log we're starting at to 
reattribute
+        *     'id_end' => mixed id of the revision/log we're ending at 
(inclusive)
+        *     'table' => string table name to operate on (without prefix)
+        *     'idkey' => string field containing table id
+        *     'namekey' => string field containing username to look up
+        *     'fields' => array of string fields containing user ids to modify
+        * ]
+        */
+       public function __construct( $title, $params ) {
+               parent::__construct( 'reattributeImportedEdits', $title, 
$params );
+       }
+
+       public function run() {
+               $user = User::newFromName( $this->params['username'] );
+               if ( $user === null || $user->getId() === 0 ) {
+                       throw new \BadMethodCallException( "Attempting to 
reattribute edits for nonexistent user {$this->params['username']}." );
+               }
+
+               $updateFields = array_fill_keys( $this->params['fields'], 
$user->getId() );
+
+               $dbw = wfGetDB( DB_MASTER );
+               $conds = [ $this->params['namekey'] => $user->getName() ];
+               $id1 = $dbw->addQuotes( $this->params['id_start'] );
+               $id2 = $dbw->addQuotes( $this->params['id_end'] );
+
+               if ( $this->params['id_start'] === false && 
$this->params['id_end'] !== false ) {
+                       $conds[] = "{$this->params['idkey']} <= {$id2}"; 
+               } elseif ( $this->params['id_start'] !== false && 
$this->params['id_end'] === false ) {
+                       $conds[] = "{$this->params['idkey']} >= {$id1}";
+               } elseif ( $this->params['id_start'] !== false && 
$this->params['id_end'] !== false ) {
+                       $conds[] = "{$this->params['idkey']} BETWEEN {$id1} AND 
{$id2}";
+               }
+
+               $dbw->update( $this->params['table'], $updateFields, $conds, 
__METHOD__ );
+
+               return true;
+       }
+
+}
diff --git a/extension.json b/extension.json
index 73591cc..d62130b 100644
--- a/extension.json
+++ b/extension.json
@@ -1,6 +1,6 @@
 {
        "name": "MediaWikiAuth",
-       "version": "0.10.0",
+       "version": "1.0.0",
        "author": [
                "Laurence Parry",
                "Jack Phoenix",
@@ -14,7 +14,9 @@
        "config": {
                "MediaWikiAuthAllowPasswordChange": false,
                "MediaWikiAuthApiUrl": "",
-               "MediaWikiAuthImportGroups": true
+               "MediaWikiAuthImportGroups": true,
+               "MediaWikiAuthImportWatchlist": true,
+               "MediaWikiAuthReattributeEdits": true
        },
        "MessagesDirs": {
                "MediaWikiAuth": [
@@ -23,13 +25,16 @@
        },
        "AutoloadClasses": {
                "MediaWikiAuth\\ExternalWikiPrimaryAuthenticationProvider": 
"ExternalWikiPrimaryAuthenticationProvider.php",
-               "MediaWikiAuth\\PopulateImportedWatchlistJob": 
"PopulateImportedWatchlistJob.php"
+               "MediaWikiAuth\\PopulateImportedWatchlistJob": 
"PopulateImportedWatchlistJob.php",
+               "MediaWikiAuth\\ReattributeImportedEdits": 
"maintenance/reattributeImportedEdits.php",
+               "MediaWikiAuth\\ReattributeImportedEditsJob": 
"ReattributeImportedEditsJob.php"
        },
        "AvailableRights": [
                "mwa-createlocalaccount"
        ],
        "JobClasses": {
-               "populateImportedWatchlist": 
"MediaWikiAuth\\PopulateImportedWatchlistJob"
+               "populateImportedWatchlist": 
"MediaWikiAuth\\PopulateImportedWatchlistJob",
+               "reattributeImportedEdits": 
"MediaWikiAuth\\ReattributeImportedEditsJob"
        },
        "AuthManagerAutoConfig": {
                "primaryauth": {
diff --git a/maintenance/reattributeImportedEdits.php 
b/maintenance/reattributeImportedEdits.php
new file mode 100644
index 0000000..78f9d09
--- /dev/null
+++ b/maintenance/reattributeImportedEdits.php
@@ -0,0 +1,97 @@
+<?php
+
+namespace MediaWikiAuth;
+
+use Wikimedia\Rdbms\Database;
+
+if ( getenv( 'MW_INSTALL_PATH' ) ) {
+       $IP = getenv( 'MW_INSTALL_PATH' );
+} else {
+       $IP = __DIR__ . '/../../..';
+}
+
+require_once "$IP/maintenance/Maintenance.php";
+
+class ReattributeImportedEdits extends \Maintenance {
+       private const OPT_USER = 'user';
+
+       public function __construct() {
+               parent::__construct();
+
+               $this->addOption(
+                       self::OPT_USER,
+                       'Username to update. If not specified, all users will 
be updated.',
+                       false, // not required
+                       true // requires argument
+               );
+       }
+
+       public function execute() {
+               $dbw = wfGetDB( DB_MASTER );
+               $singleUser = false;
+
+               if ( $this->hasOption( self::OPT_USER ) ) {
+                       $user = \User::newFromName( $this->getOption( 
self::OPT_USER ) );
+
+                       if ( $user === null || $user->getId() === 0 ) {
+                               $this->error( "User {$user} does not exist.\n", 
1 );
+                               return; // never actually get here; error() 
calls die()
+                       }
+
+                       $singleUser = $user->getName();
+               }
+
+               foreach ( self::getTableMetadata() as $table => $metadata ) {
+                       foreach ( $metadata[1] as $nameKey => $fields ) {
+                               // not every DMBS supports joins on update, and 
those that do all
+                               // do it different ways. Subqueries are 
therefore more portable.
+                               $conds = array_fill_keys( $fields, 0 );
+                               $setList = [];
+
+                               $subquery = $dbw->selectSQLText(
+                                       'user',
+                                       'user_id',
+                                       "user_name = $nameKey",
+                                       __METHOD__ . ':subquery'
+                               );
+
+                               if ( $singleUser !== false ) {
+                                       $conds[$nameKey] = $singleUser;
+                               } else {
+                                       $conds[] = "EXISTS($subquery)";
+                               }
+
+                               foreach ( $fields as $field ) {
+                                       $setList[] = "$field = ($subquery)";
+                               }
+
+                               $this->output( "Updating {$table} (this may 
take a few minutes)...\n" );
+                               $success = $dbw->update( $table, $setList, 
$conds, __METHOD__ . ':update' );
+
+                               if ( $success ) {
+                                       $rows = $dbw->affectedRows();
+                                       $this->output( "Updated {$rows} records 
on {$table}.\n" );
+                               } else {
+                                       $this->error( "Unable to update table 
{$table}.\n" );
+                               }
+                       }
+               }
+       }
+
+       public static function getTableMetadata() {
+               // Note that only tables which are used in the XML dump import 
process (plus recentchanges) are updated.
+               return [
+                       'archive' => [ 'ar_id', [ 'ar_user_text' => [ 'ar_user' 
] ] ],
+                       'filearchive' => [ 'fa_id', [ 'fa_user_text' => [ 
'fa_user' ] ] ],
+                       // img_name is the PK, and PKs are clustered on InnoDB, 
so we can sensibly use BETWEEN
+                       'image' => [ 'img_name', [ 'img_user_text' => [ 
'img_user' ] ] ],
+                       'logging' => [ 'log_id', [ 'log_user_text' => [ 
'log_user' ] ] ],
+                       'oldimage' => [ 'oi_name', [ 'oi_user_text' => [ 
'oi_user' ] ] ],
+                       'recentchanges' => [ 'rc_id', [ 'rc_user_text' => [ 
'rc_user' ] ] ],
+                       'revision' => [ 'rev_id', [ 'rev_user_text' => [ 
'rev_user' ] ] ]
+               ];
+       }
+}
+
+$maintClass = 'MediaWikiAuth\ReattributeImportedEdits';
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/394816
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifd26850a9fdf36deaf3c4408375c1dbd9f80eb86
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/MediaWikiAuth
Gerrit-Branch: master
Gerrit-Owner: Skizzerz <skizz...@skizzerz.net>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to