Gergő Tisza has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/349946 )
Change subject: Add ability to purge old rows to PurgeScoreCache
......................................................................
Add ability to purge old rows to PurgeScoreCache
This allows PurgeScoreCache to be used as a cron job to limit
scores to those revisions which are still in recentchanges.
Each deletion batch does a left join on recentchanges which is
probably not the most efficient but seems good enough for a
maintenance script (P5319; 30ms/batch means 90M rows would
take roughly an hour).
Also add some logging per the TODO comment.
Bug: T159753
Change-Id: Id35bca820822dc46caa5adf6450c81871465abc9
---
M includes/Cache.php
M maintenance/PurgeScoreCache.php
2 files changed, 69 insertions(+), 11 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES
refs/changes/46/349946/1
diff --git a/includes/Cache.php b/includes/Cache.php
index 4513902..de299e5 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -49,25 +49,63 @@
*
* @param string $model Model name.
* @param bool $isEverything When true, delete scores with the
up-to-date
- * model version as well. This can be used in cases where the old data
is
- * bad, but no new model has been released yet.
+ * model version as well. This can be used in cases where the old
data is
+ * bad, but no new model has been released yet.
* @param integer $batchSize Maximum number of records to delete per
loop.
- * Note that this function runs multiple batches, until all records are
deleted.
+ * Note that this function runs multiple batches, until all records
are deleted.
+ * @return int The number of deleted rows
*/
public function purge( $model, $isEverything, $batchSize = 1000 ) {
- $dbr = \wfGetDB( DB_REPLICA );
- $dbw = \wfGetDB( DB_MASTER );
-
$tables = [ 'ores_classification', 'ores_model' ];
-
- $join_conds = [ 'ores_model' =>
- [ 'LEFT JOIN', 'oresm_id = oresc_model' ] ];
+ $join_conds = [
+ 'ores_model' => [ 'LEFT JOIN', 'oresm_id = oresc_model'
],
+ ];
$conditions = [
'oresm_name' => [ $model, null ],
];
if ( !$isEverything ) {
$conditions[] = '(oresm_is_current != 1 OR
oresm_is_current IS NULL)';
}
+ return $this->deleteRows( $tables, $conditions, $join_conds );
+ }
+
+ /**
+ * Delete old cached scores.
+ * A score is old of the corresponding revision is not in the
recentchanges table.
+ * @param string $model Model name.
+ * @param integer $batchSize Maximum number of records to delete per
loop.
+ * Note that this function runs multiple batches, until all records
are deleted.
+ * @return int The number of deleted rows
+ */
+ public function purgeOld( $model, $batchSize = 1000 ) {
+ $tables = [ 'ores_classification', 'ores_model',
'recentchanges' ];
+ $join_conds = [
+ 'ores_model' => [ 'LEFT JOIN', 'oresm_id = oresc_model'
],
+ 'recentchanges' => [ 'LEFT JOIN', 'oresc_rev =
rc_this_oldid' ],
+ ];
+ $conditions = [
+ 'oresm_name' => [ $model, null ],
+ 'rc_this_oldid' => null,
+ ];
+ return $this->deleteRows( $tables, $conditions, $join_conds );
+ }
+
+ /**
+ * Delete cached scores. Which rows to delete is given by
Database::select parameters.
+ *
+ * @param array $tables
+ * @param array $conditions
+ * @param array $join_conds
+ * @param integer $batchSize Maximum number of records to delete per
loop.
+ * Note that this function runs multiple batches, until all records
are deleted.
+ * @return int The number of deleted rows
+ * @see Database::select
+ */
+ protected function deleteRows( $tables, $conditions, $join_conds,
$batchSize = 1000 ) {
+ $dbr = \wfGetDB( DB_REPLICA );
+ $dbw = \wfGetDB( DB_MASTER );
+
+ $deletedRows = 0;
do {
$ids = $dbr->selectFieldValues( $tables,
@@ -82,9 +120,12 @@
[ 'oresc_id' => $ids ],
__METHOD__
);
+ $deletedRows += $dbw->affectedRows();
MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
}
} while ( $ids );
+
+ return $deletedRows;
}
/**
diff --git a/maintenance/PurgeScoreCache.php b/maintenance/PurgeScoreCache.php
index ce318ee..a3dca27 100644
--- a/maintenance/PurgeScoreCache.php
+++ b/maintenance/PurgeScoreCache.php
@@ -21,6 +21,8 @@
$this->addOption( 'model', 'Model name (optional)', false, true
);
$this->addOption( 'all', 'Flag to indicate that we want to
clear all data, ' .
'even those from the most recent model', false, false );
+ $this->addOption( 'old', 'Flag to indicate that we only want to
clear old data ' .
+ 'that is not in recent changes anymore. Implicitly
assumes --all.', false, false );
}
public function execute() {
@@ -30,10 +32,25 @@
$models = Cache::instance()->getModels();
}
+ $this->output( "Purging ORES scores:\n" );
foreach ( $models as $model ) {
- Cache::instance()->purge( $model, $this->hasOption(
'all' ) );
+ if ( $this->hasOption( 'old' ) ) {
+ $deletedRows = Cache::instance()->purgeOld(
$model );
+ $description = 'old rows';
+ } elseif ( $this->hasOption( 'all' ) ) {
+ $deletedRows = Cache::instance()->purge(
$model, true );
+ $description = 'old model versions';
+ } else {
+ $deletedRows = Cache::instance()->purge(
$model, false );
+ $description = 'all rows';
+ }
+ if ( $deletedRows ) {
+ $this->output( " ...purging $description from
'$model' model': deleted $deletedRows rows\n" );
+ } else {
+ $this->output( " ...skipping '$model' model,
no action needed\n" );
+ }
}
- // @todo this script needs some output
+ $this->output( " done.\n" );
}
}
--
To view, visit https://gerrit.wikimedia.org/r/349946
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Id35bca820822dc46caa5adf6450c81871465abc9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Gergő Tisza <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits