MaxSem has uploaded a new change for review. https://gerrit.wikimedia.org/r/324347
Change subject: refreshLinks.php: allow refreshing by categories, tracking or not ...................................................................... refreshLinks.php: allow refreshing by categories, tracking or not Needed for selective updates of pages using a particular feature. Intended to be run in production, so needs to scale. Bug: T149723 Change-Id: If20fb1f91de8d4227def5b07d6d52b91161ed3fd --- M RELEASE-NOTES-1.29 M includes/specials/SpecialTrackingCategories.php M maintenance/refreshLinks.php 3 files changed, 98 insertions(+), 8 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/47/324347/1 diff --git a/RELEASE-NOTES-1.29 b/RELEASE-NOTES-1.29 index 5b5640f..c85e6b8 100644 --- a/RELEASE-NOTES-1.29 +++ b/RELEASE-NOTES-1.29 @@ -54,6 +54,8 @@ === Other changes in 1.29 === * Database::getSearchEngine() (deprecated in 1.28) was removed. Use SearchEngineFactory::getSearchEngineClass() instead. +* refreshLinks.php now can be limited to a particular category with --category=... + or a tracking category with --tracking-category=.... == Compatibility == diff --git a/includes/specials/SpecialTrackingCategories.php b/includes/specials/SpecialTrackingCategories.php index 4c6a345..787db03 100644 --- a/includes/specials/SpecialTrackingCategories.php +++ b/includes/specials/SpecialTrackingCategories.php @@ -76,7 +76,7 @@ </tr></thead>" ); - $trackingCategories = $this->prepareTrackingCategoriesData(); + $trackingCategories = $this->getTrackingCategories(); $batch = new LinkBatch(); foreach ( $trackingCategories as $catMsg => $data ) { @@ -145,7 +145,7 @@ * Read the global and extract title objects from the corresponding messages * @return array Array( 'msg' => Title, 'cats' => Title[] ) */ - private function prepareTrackingCategoriesData() { + public function getTrackingCategories() { $categories = array_merge( self::$coreTrackingCategories, ExtensionRegistry::getInstance()->getAttribute( 'TrackingCategories' ), diff --git a/maintenance/refreshLinks.php b/maintenance/refreshLinks.php index e7a4d06..9d54901 100644 --- a/maintenance/refreshLinks.php +++ b/maintenance/refreshLinks.php @@ -29,6 +29,8 @@ * @ingroup Maintenance */ class RefreshLinks extends Maintenance { + const REPORTING_INTERVAL = 100; + /** @var int|bool */ protected $namespace = false; @@ -43,6 +45,8 @@ $this->addOption( 'dfn-chunk-size', 'Maximum number of existent IDs to check per ' . 'query, default 100000', false, true ); $this->addOption( 'namespace', 'Only fix pages in this namespace', false, true ); + $this->addOption( 'category', 'Only fix pages in this category', false, true ); + $this->addOption( 'tracking-category', 'Only fix pages in this tracking category', false, true ); $this->addArg( 'start', 'Page_id to start from, default 1', false ); $this->setBatchSize( 100 ); } @@ -61,7 +65,15 @@ } else { $this->namespace = (int)$ns; } - if ( !$this->hasOption( 'dfn-only' ) ) { + if ( ( $category = $this->getOption( 'category', false ) ) !== false ) { + $title = Title::makeTitle( NS_CATEGORY, $category ); + if ( !$title ) { + $this->error( "'$category' is an invalid category name!\n", true ); + } + $this->refreshCategory( $category ); + } elseif ( ( $category = $this->getOption( 'tracking-category', false ) ) !== false ) { + $this->refreshTrackingCategory( $category ); + } elseif ( !$this->hasOption( 'dfn-only' ) ) { $new = $this->getOption( 'new-only', false ); $redir = $this->getOption( 'redirects-only', false ); $oldRedir = $this->getOption( 'old-redirects-only', false ); @@ -89,7 +101,6 @@ private function doRefreshLinks( $start, $newOnly = false, $end = null, $redirectsOnly = false, $oldRedirectsOnly = false ) { - $reportingInterval = 100; $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] ); if ( $start === null ) { @@ -124,7 +135,7 @@ $i = 0; foreach ( $res as $row ) { - if ( !( ++$i % $reportingInterval ) ) { + if ( !( ++$i % self::REPORTING_INTERVAL ) ) { $this->output( "$i\n" ); wfWaitForSlaves(); } @@ -145,7 +156,7 @@ $i = 0; foreach ( $res as $row ) { - if ( !( ++$i % $reportingInterval ) ) { + if ( !( ++$i % self::REPORTING_INTERVAL ) ) { $this->output( "$i\n" ); wfWaitForSlaves(); } @@ -166,7 +177,7 @@ for ( $id = $start; $id <= $end; $id++ ) { - if ( !( $id % $reportingInterval ) ) { + if ( !( $id % self::REPORTING_INTERVAL ) ) { $this->output( "$id\n" ); wfWaitForSlaves(); } @@ -179,7 +190,7 @@ for ( $id = $start; $id <= $end; $id++ ) { - if ( !( $id % $reportingInterval ) ) { + if ( !( $id % self::REPORTING_INTERVAL ) ) { $this->output( "$id\n" ); wfWaitForSlaves(); } @@ -379,6 +390,7 @@ * @param string $var Field name * @param mixed $start First value to include or null * @param mixed $end Last value to include or null + * @return string */ private static function intervalCond( IDatabase $db, $var, $start, $end ) { if ( $start === null && $end === null ) { @@ -391,6 +403,82 @@ return "$var BETWEEN {$db->addQuotes( $start )} AND {$db->addQuotes( $end )}"; } } + + /** + * Refershes links for pages in a tracking category + * + * @param string $category Category key + */ + private function refreshTrackingCategory( $category ) { + $cats = $this->getPossibleCategories( $category ); + + foreach ( $cats as $cat ) { + $this->refreshCategory( $cat ); + } + } + + /** + * Refreshes links to a category + * + * @param Title $category + */ + private function refreshCategory( Title $category ) { + $this->output( "Refreshing pages in category '{$category->getText()}'...\n" ); + + $dbr = $this->getDB( DB_REPLICA ); + $conds = [ + 'page_id=cl_from', + 'cl_to' => $category->getDBkey(), + ]; + if ( $this->namespace !== false ) { + $conds['page_namespace'] = $this->namespace; + } + + $i = 0; + $timestamp = ''; + $lastId = 0; + do { + $finalConds = $conds; + $timestamp = $dbr->addQuotes( $timestamp ); + $finalConds []= "(cl_timestamp > $timestamp OR (cl_timestamp = $timestamp AND cl_from > $lastId))"; + $res = $dbr->select( [ 'page', 'categorylinks' ], + [ 'page_id', 'cl_timestamp' ], + $finalConds, + __METHOD__, + [ + 'ORDER BY' => [ 'cl_timestamp', 'cl_from' ], + 'LIMIT' => $this->mBatchSize, + ] + ); + + foreach ( $res as $row ) { + if ( !( ++$i % self::REPORTING_INTERVAL ) ) { + $this->output( "$id\n" ); + wfWaitForSlaves(); + } + $lastId = $row->page_id; + $timestamp = $row->cl_timestamp; + self::fixLinksFromArticle( $row->page_id ); + } + + } while ( $res->numRows() == $this->mBatchSize ); + } + + /** + * Returns a list of possible categories for a given tracking category key + * + * @param string $categoryKey + * @return Title[] + */ + private function getPossibleCategories( $categoryKey ) { + /** @var SpecialTrackingCategories $specialTrackingCategories */ + $specialTrackingCategories = SpecialPageFactory::getPage( 'TrackingCategories' ); + $cats = $specialTrackingCategories->getTrackingCategories(); + if ( isset( $cats[$categoryKey] ) ) { + return $cats[$categoryKey]['cats']; + } + $this->error( "Unknown tracking category {$categoryKey}\n", true ); + } } $maintClass = 'RefreshLinks'; -- To view, visit https://gerrit.wikimedia.org/r/324347 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If20fb1f91de8d4227def5b07d6d52b91161ed3fd Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: MaxSem <maxsem.w...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits