Umherirrender has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/364484 )
Change subject: Break very long lines ...................................................................... Break very long lines Default in mediawiki/codesniffer is a length of 100, set it first to 170, because there are 88 files with 293 lines longer than 100 Change-Id: I707253515e5903690d14d6d2987dfbdefdcb0f30 --- M includes/DataSender.php M includes/Maintenance/MetaStoreIndex.php M includes/Maintenance/Reindexer.php M includes/Maintenance/Validators/MappingValidator.php M includes/Maintenance/Validators/SpecificAliasValidator.php M includes/Sanity/Checker.php M maintenance/forceSearchIndex.php M maintenance/updateSuggesterIndex.php M phpcs.xml M tests/unit/SuggestScoringTest.php 10 files changed, 362 insertions(+), 155 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/84/364484/1 diff --git a/includes/DataSender.php b/includes/DataSender.php index b1d0290..921fad7 100644 --- a/includes/DataSender.php +++ b/includes/DataSender.php @@ -135,7 +135,9 @@ * already fully qualified elasticsearch index names. * @return bool */ - public function areIndexesAvailableForWrites( array $indexes, $areIndexesFullyQualified = false ) { + public function areIndexesAvailableForWrites( + array $indexes, $areIndexesFullyQualified = false + ) { if ( count( $indexes ) === 0 ) { return true; } @@ -176,7 +178,9 @@ $responseSet = null; $justDocumentMissing = false; try { - $pageType = $this->connection->getIndexType( $this->indexBaseName, $indexType, $elasticType ); + $pageType = $this->connection->getIndexType( + $this->indexBaseName, $indexType, $elasticType + ); $this->start( new BulkUpdateRequestLog( $this->connection->getClient(), 'sending {numBulk} documents to the {index} index(s)', @@ -226,7 +230,9 @@ * @param string $indexType * @param int $sent */ - private function reportUpdateMetrics( \Elastica\Bulk\ResponseSet $responseSet, $indexType, $sent ) { + private function reportUpdateMetrics( + \Elastica\Bulk\ResponseSet $responseSet, $indexType, $sent + ) { $updateStats = [ 'sent' => $sent, ]; @@ -250,7 +256,9 @@ $cluster = $this->connection->getClusterName(); $metricsPrefix = "CirrusSearch.$cluster.updates"; foreach ( $updateStats as $what => $num ) { - $stats->updateCount( "$metricsPrefix.details.{$this->indexBaseName}.$indexType.$what", $num ); + $stats->updateCount( + "$metricsPrefix.details.{$this->indexBaseName}.$indexType.$what", $num + ); $stats->updateCount( "$metricsPrefix.all.$what", $num ); } } @@ -282,11 +290,14 @@ if ( $idCount !== 0 ) { try { foreach ( $indexes as $indexType ) { - $this->startNewLog( 'deleting {numIds} from {indexType}/{elasticType}', 'send_deletes', [ - 'numIds' => $idCount, - 'indexType' => $indexType, - 'elasticType' => $elasticType, - ] ); + $this->startNewLog( + 'deleting {numIds} from {indexType}/{elasticType}', + 'send_deletes', [ + 'numIds' => $idCount, + 'indexType' => $indexType, + 'elasticType' => $elasticType, + ] + ); $this->connection ->getIndexType( $this->indexBaseName, $indexType, $elasticType ) ->deleteIds( $docIds ); @@ -308,7 +319,8 @@ /** * @param string $localSite The wikiId to add/remove from local_sites_with_dupe * @param string $indexName The name of the index to perform updates to - * @param array $otherActions A list of arrays each containing the id within elasticsearch ('docId') and the article namespace ('ns') and DB key ('dbKey') at the within $localSite + * @param array $otherActions A list of arrays each containing the id within elasticsearch + * ('docId') and the article namespace ('ns') and DB key ('dbKey') at the within $localSite * @return Status */ public function sendOtherIndexUpdates( $localSite, $indexName, array $otherActions ) { @@ -399,7 +411,9 @@ * Callback will be passed the id of the missing document. * @return bool */ - protected function bulkResponseExceptionIsJustDocumentMissing( ResponseException $exception, $logCallback = null ) { + protected function bulkResponseExceptionIsJustDocumentMissing( + ResponseException $exception, $logCallback = null + ) { $justDocumentMissing = true; foreach ( $exception->getResponseSet()->getBulkResponses() as $bulkResponse ) { if ( !$bulkResponse->hasError() ) { diff --git a/includes/Maintenance/MetaStoreIndex.php b/includes/Maintenance/MetaStoreIndex.php index bcfa205..21b4775 100644 --- a/includes/Maintenance/MetaStoreIndex.php +++ b/includes/Maintenance/MetaStoreIndex.php @@ -106,7 +106,9 @@ * @param Maintenance $out * @param string $masterTimeout */ - public function __construct( Connection $connection, Maintenance $out, $masterTimeout = '10000s' ) { + public function __construct( + Connection $connection, Maintenance $out, $masterTimeout = '10000s' + ) { $this->connection = $connection; $this->client = $connection->getClient(); $this->configUtils = new ConfigUtils( $this->client, $out ); @@ -145,11 +147,20 @@ if ( $major < self::METASTORE_MAJOR_VERSION ) { $this->log( self::INDEX_NAME . " major version mismatch upgrading.\n" ); $this->majorUpgrade(); - } elseif ( $major == self::METASTORE_MAJOR_VERSION && $minor < self::METASTORE_MINOR_VERSION ) { - $this->log( self::INDEX_NAME . " minor version mismatch trying to upgrade mapping.\n" ); + } elseif ( $major == self::METASTORE_MAJOR_VERSION && + $minor < self::METASTORE_MINOR_VERSION + ) { + $this->log( + self::INDEX_NAME . " minor version mismatch trying to upgrade mapping.\n" + ); $this->minorUpgrade(); - } elseif ( $major > self::METASTORE_MAJOR_VERSION || $minor > self::METASTORE_MINOR_VERSION ) { - throw new \Exception( "Metastore version $major.$minor found, cannot upgrade to a lower version: " . self::METASTORE_MAJOR_VERSION . "." . self::METASTORE_MINOR_VERSION ); + } elseif ( $major > self::METASTORE_MAJOR_VERSION || + $minor > self::METASTORE_MINOR_VERSION + ) { + throw new \Exception( + "Metastore version $major.$minor found, cannot upgrade to a lower version: " . + self::METASTORE_MAJOR_VERSION . "." . self::METASTORE_MINOR_VERSION + ); } } } @@ -274,7 +285,9 @@ } if ( $oldIndexName == $name ) { - throw new \Exception( "Cannot switch aliases old and new index names are identical: $name" ); + throw new \Exception( + "Cannot switch aliases old and new index names are identical: $name" + ); } // Create the alias $path = '_aliases'; @@ -319,7 +332,8 @@ foreach ( $resp->getData() as $index => $aliases ) { if ( isset( $aliases['aliases'][self::INDEX_NAME] ) ) { if ( $indexName !== null ) { - throw new \Exception( "Multiple indices are aliased with " . self::INDEX_NAME . ", please fix manually." ); + throw new \Exception( "Multiple indices are aliased with " . self::INDEX_NAME . + ", please fix manually." ); } $indexName = $index; } @@ -515,7 +529,8 @@ */ public static function getMetastoreVersion( Connection $connection ) { try { - $doc = self::getInternalType( $connection )->getDocument( self::METASTORE_VERSION_DOCID ); + $doc = self::getInternalType( $connection ) + ->getDocument( self::METASTORE_VERSION_DOCID ); } catch ( \Elastica\Exception\NotFoundException $e ) { return [ 0, 0 ]; } catch ( \Elastica\Exception\ResponseException $e ) { diff --git a/includes/Maintenance/Reindexer.php b/includes/Maintenance/Reindexer.php index bae65de..50ba6f5 100644 --- a/includes/Maintenance/Reindexer.php +++ b/includes/Maintenance/Reindexer.php @@ -111,7 +111,18 @@ * @param string[] $fieldsToDelete * @throws \Exception */ - public function __construct( SearchConfig $searchConfig, Connection $source, Connection $target, array $types, array $oldTypes, $shardCount, $replicaCount, array $mergeSettings, Maintenance $out = null, $fieldsToDelete = [] ) { + public function __construct( + SearchConfig $searchConfig, + Connection $source, + Connection $target, + array $types, + array $oldTypes, + $shardCount, + $replicaCount, + array $mergeSettings, + Maintenance $out = null, + $fieldsToDelete = [] + ) { // @todo: this constructor has too many arguments - refactor! $this->searchConfig = $searchConfig; $this->oldConnection = $source; @@ -140,7 +151,12 @@ * @param int $chunkSize * @param float $acceptableCountDeviation */ - public function reindex( $slices = null, $refreshInterval = 1, $chunkSize = 100, $acceptableCountDeviation = .05 ) { + public function reindex( + $slices = null, + $refreshInterval = 1, + $chunkSize = 100, + $acceptableCountDeviation = .05 + ) { // Set some settings that should help io load during bulk indexing. We'll have to // optimize after this to consolidate down to a proper number of segments but that is // is worth the price. total_shards_per_node will help to make sure that each shard @@ -191,8 +207,8 @@ } $this->outputIndented( "Verifying counts..." ); - // We can't verify counts are exactly equal because they won't be - we still push updates into - // the old index while reindexing the new one. + // We can't verify counts are exactly equal because they won't be - we still push updates + // into the old index while reindexing the new one. foreach ( $this->types as $i => $type ) { $oldType = $this->oldTypes[$i]; $oldCount = (float)$oldType->count(); @@ -200,7 +216,9 @@ $newCount = (float)$type->count(); $difference = $oldCount > 0 ? abs( $oldCount - $newCount ) / $oldCount : 0; if ( $difference > $acceptableCountDeviation ) { - $this->output( "Not close enough! old=$oldCount new=$newCount difference=$difference\n" ); + $this->output( + "Not close enough! old=$oldCount new=$newCount difference=$difference\n" + ); $this->error( 'Failed to load index - counts not close enough. ' . "old=$oldCount new=$newCount difference=$difference. " . 'Check for warnings above.', 1 ); @@ -261,8 +279,8 @@ $expectedReplicas = min( max( $nodes - 1, $lower ), $upper ); $expectedActive = $this->shardCount * ( 1 + $expectedReplicas ); if ( $each === 0 || $active === $expectedActive ) { - $this->outputIndented( "\t\tactive:$active/$expectedActive relocating:$relocating " . - "initializing:$initializing unassigned:$unassigned\n" ); + $this->outputIndented( "\t\tactive:$active/$expectedActive relocating:$relocating" . + " initializing:$initializing unassigned:$unassigned\n" ); if ( $active === $expectedActive ) { break; } @@ -283,7 +301,8 @@ $path = "_cluster/health/$indexName"; $response = $this->index->getClient()->request( $path ); if ( $response->hasError() ) { - $this->error( 'Error fetching index health but going to retry. Message: ' . $response->getError() ); + $this->error( 'Error fetching index health but going to retry. Message: ' . + $response->getError() ); sleep( 1 ); continue; } @@ -410,11 +429,13 @@ $innerConnection = $source->getClient()->getConnection(); $transport = $innerConnection->getTransportObject(); if ( !$transport instanceof Http ) { - throw new \RuntimeException( 'Remote reindex not implemented for transport: ' . get_class( $transport ) ); + throw new \RuntimeException( + 'Remote reindex not implemented for transport: ' . get_class( $transport ) + ); } - // We make some pretty bold assumptions that classes extending from \Elastica\Transport\Http don't - // change how any of this works. + // We make some pretty bold assumptions that classes extending from \Elastica\Transport\Http + // don't change how any of this works. $url = $innerConnection->hasConfig( 'url' ) ? $innerConnection->getConfig( 'url' ) : ''; @@ -422,7 +443,8 @@ $scheme = ( $transport instanceof Https || $transport instanceof PooledHttps ) ? 'https' : 'http'; - $url = $scheme . '://' . $innerConnection->getHost() . ':' . $innerConnection->getPort() . '/' . $innerConnection->getPath(); + $url = $scheme . '://' . $innerConnection->getHost() . ':' . + $innerConnection->getPort() . '/' . $innerConnection->getPath(); } if ( $innerConnection->getUsername() && $innerConnection->getPassword() ) { @@ -451,8 +473,9 @@ $this->out->outputIndented( "\n" ); $this->error( "$e\n\n" . - "Lost connection to elasticsearch cluster. The reindex task {$task->getId()} is still running.\n" - . "The task should be manually canceled, and the index {$target->getIndex()->getName()}\n" + "Lost connection to elasticsearch cluster. The reindex task " + . "{$task->getId()} is still running.\nThe task should be manually " + . "canceled, and the index {$target->getIndex()->getName()}\n" . "should be removed.\n" . $e->getMessage(), 1 @@ -498,7 +521,9 @@ // In theory this should never happen, we will get a ResponseException if the index doesn't // exist and every index must have a number_of_shards settings. But better safe than sorry. if ( !isset( $data[$realIndexName]['settings']['index']['number_of_shards'] ) ) { - throw new \RuntimeException( "Couldn't detect number of shards in {$index->getName()}" ); + throw new \RuntimeException( + "Couldn't detect number of shards in {$index->getName()}" + ); } return $data[$realIndexName]['settings']['index']['number_of_shards']; } diff --git a/includes/Maintenance/Validators/MappingValidator.php b/includes/Maintenance/Validators/MappingValidator.php index 3aee0ae..3e2ecaf 100644 --- a/includes/Maintenance/Validators/MappingValidator.php +++ b/includes/Maintenance/Validators/MappingValidator.php @@ -54,7 +54,15 @@ * @param Type[] $types Array with type names as key & type object as value * @param Maintenance $out */ - public function __construct( Index $index, $masterTimeout, $optimizeIndexForExperimentalHighlighter, array $availablePlugins, array $mappingConfig, array $types, Maintenance $out = null ) { + public function __construct( + Index $index, + $masterTimeout, + $optimizeIndexForExperimentalHighlighter, + array $availablePlugins, + array $mappingConfig, + array $types, + Maintenance $out = null + ) { parent::__construct( $out ); $this->index = $index; diff --git a/includes/Maintenance/Validators/SpecificAliasValidator.php b/includes/Maintenance/Validators/SpecificAliasValidator.php index c1612bd..82e75ee 100644 --- a/includes/Maintenance/Validators/SpecificAliasValidator.php +++ b/includes/Maintenance/Validators/SpecificAliasValidator.php @@ -46,7 +46,18 @@ * @param bool $tooFewReplicas * @param Maintenance $out */ - public function __construct( Client $client, $aliasName, $specificIndexName, $startOver, Reindexer $reindexer, array $reindexParams, array $reindexValidators, $reindexAndRemoveOk, $tooFewReplicas, Maintenance $out = null ) { + public function __construct( + Client $client, + $aliasName, + $specificIndexName, + $startOver, + Reindexer $reindexer, + array $reindexParams, + array $reindexValidators, + $reindexAndRemoveOk, + $tooFewReplicas, + Maintenance $out = null + ) { // @todo: this constructor takes too many arguments - refactor! parent::__construct( $client, $aliasName, $specificIndexName, $startOver, $out ); diff --git a/includes/Sanity/Checker.php b/includes/Sanity/Checker.php index 43791a8..a2e2097 100644 --- a/includes/Sanity/Checker.php +++ b/includes/Sanity/Checker.php @@ -79,7 +79,15 @@ * @param bool $fastRedirectCheck fast but inconsistent redirect check * @param ArrayObject|null $pageCache cache for WikiPage loaded from db */ - public function __construct( SearchConfig $config, Connection $connection, Remediator $remediator, Searcher $searcher, $logSane, $fastRedirectCheck, ArrayObject $pageCache = null ) { + public function __construct( + SearchConfig $config, + Connection $connection, + Remediator $remediator, + Searcher $searcher, + $logSane, + $fastRedirectCheck, + ArrayObject $pageCache = null + ) { $this->searchConfig = $config; $this->connection = $connection; $this->remediator = $remediator; @@ -231,7 +239,9 @@ */ private function checkIndexMismatch( $docId, $pageId, WikiPage $page, array $fromIndex ) { $foundInsanityInIndex = false; - $expectedType = $this->connection->getIndexSuffixForNamespace( $page->getTitle()->getNamespace() ); + $expectedType = $this->connection->getIndexSuffixForNamespace( + $page->getTitle()->getNamespace() + ); foreach ( $fromIndex as $indexInfo ) { $type = $this->connection->extractIndexSuffix( $indexInfo->getIndex() ); if ( $type !== $expectedType ) { @@ -262,7 +272,9 @@ $latest = $page->getLatest(); $foundInsanityInIndex = false; foreach ( $fromIndex as $indexInfo ) { - $version = isset( $indexInfo->getSource()['version'] ) ? $indexInfo->getSource()['version'] : -1; + $version = isset( $indexInfo->getSource()['version'] ) + ? $indexInfo->getSource()['version'] + : -1; if ( $version < $latest ) { $type = $this->connection->extractIndexSuffix( $indexInfo->getIndex() ); $this->remediator->oldVersionInIndex( $docId, $page, $type ); diff --git a/maintenance/forceSearchIndex.php b/maintenance/forceSearchIndex.php index d4e617a..7adffc7 100644 --- a/maintenance/forceSearchIndex.php +++ b/maintenance/forceSearchIndex.php @@ -67,43 +67,65 @@ public function __construct() { parent::__construct(); - $this->mDescription = "Force indexing some pages. Setting --from or --to will switch from page id based indexing to " + $this->mDescription = "Force indexing some pages. Setting --from or --to will switch " + . "from page id based indexing to " . "date based indexing which uses less efficient queries and follows redirects.\n\n" . "Note: All froms are _exclusive_ and all tos are _inclusive_.\n" . "Note 2: Setting fromId and toId use the efficient query so those are ok.\n" . "Note 3: Operates on all clusters unless --cluster is provided.\n"; $this->setBatchSize( 10 ); - $this->addOption( 'from', 'Start date of reindex in YYYY-mm-ddTHH:mm:ssZ (exc. Defaults to 0 epoch.', false, true ); - $this->addOption( 'to', 'Stop date of reindex in YYYY-mm-ddTHH:mm:ssZ. Defaults to now.', false, true ); - $this->addOption( 'fromId', 'Start indexing at a specific page_id. Not useful with --deletes.', false, true ); - $this->addOption( 'toId', 'Stop indexing at a specific page_id. Not useful with --deletes or --from or --to.', false, true ); - $this->addOption( 'ids', 'List of page ids (comma separated) to reindex. Not allowed with deletes/from/to/fromId/toId/limit.', false, true ); - $this->addOption( 'deletes', 'If this is set then just index deletes, not updates or creates.', false ); - $this->addOption( 'archive', 'Don\'t delete pages, only index them into the archive.', false, false ); - $this->addOption( 'limit', 'Maximum number of pages to process before exiting the script. Default to unlimited.', false, true ); - $this->addOption( 'buildChunks', 'Instead of running the script spit out commands that can be farmed out to ' . - 'different processes or machines to rebuild the index. Works with fromId and toId, not from and to. ' . - 'If specified as a number then chunks no larger than that size are spat out. If specified as a number ' . - 'followed by the word "total" without a space between them then that many chunks will be spat out sized to ' . - 'cover the entire wiki.', false, true ); - $this->addOption( 'queue', 'Rather than perform the indexes in process add them to the job queue. Ignored for delete.' ); - $this->addOption( 'maxJobs', 'If there are more than this many index jobs in the queue then pause before adding ' . - 'more. This is only checked every ' . self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS . ' seconds. Not meaningful ' . - 'without --queue.', false, true ); - $this->addOption( 'pauseForJobs', 'If paused adding jobs then wait for there to be less than this many before ' . - 'starting again. Defaults to the value specified for --maxJobs. Not meaningful without --queue.', false, true ); - $this->addOption( 'indexOnSkip', 'When skipping either parsing or links send the document as an index. ' . - 'This replaces the contents of the index for that entry with the entry built from a skipped process.' . - 'Without this if the entry does not exist then it will be skipped entirely. Only set this when running ' . - 'the first pass of building the index. Otherwise, don\'t tempt fate by indexing half complete documents.' ); - $this->addOption( 'forceParse', 'Bypass ParserCache and do a fresh parse of pages from the Content.' ); - $this->addOption( 'skipParse', 'Skip parsing the page. This is really only good for running the second half ' . - 'of the two phase index build. If this is specified then the default batch size is actually 50.' ); - $this->addOption( 'skipLinks', 'Skip looking for links to the page (counting and finding redirects). Use ' . + $this->addOption( 'from', 'Start date of reindex in YYYY-mm-ddTHH:mm:ssZ (exc. Defaults ' . + 'to 0 epoch.', false, true ); + $this->addOption( 'to', 'Stop date of reindex in YYYY-mm-ddTHH:mm:ssZ. Defaults to now.', + false, true ); + $this->addOption( 'fromId', 'Start indexing at a specific page_id. ' . + 'Not useful with --deletes.', false, true ); + $this->addOption( 'toId', 'Stop indexing at a specific page_id. ' . + 'Not useful with --deletes or --from or --to.', false, true ); + $this->addOption( 'ids', 'List of page ids (comma separated) to reindex. ' . + 'Not allowed with deletes/from/to/fromId/toId/limit.', false, true ); + $this->addOption( 'deletes', + 'If this is set then just index deletes, not updates or creates.', false ); + $this->addOption( 'archive', + 'Don\'t delete pages, only index them into the archive.', false, false ); + $this->addOption( 'limit', + 'Maximum number of pages to process before exiting the script. Default to unlimited.', + false, true ); + $this->addOption( 'buildChunks', 'Instead of running the script spit out commands that ' . + 'can be farmed out to different processes or machines to rebuild the index. Works ' . + 'with fromId and toId, not from and to. If specified as a number then chunks no ' . + 'larger than that size are spat out. If specified as a number followed by the word ' . + '"total" without a space between them then that many chunks will be spat out sized ' . + 'to cover the entire wiki.', false, true ); + $this->addOption( 'queue', 'Rather than perform the indexes in process add them to the ' . + 'job queue. Ignored for delete.' ); + $this->addOption( 'maxJobs', 'If there are more than this many index jobs in the queue ' . + 'then pause before adding more. This is only checked every ' . + self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS . + ' seconds. Not meaningful without --queue.', false, true ); + $this->addOption( 'pauseForJobs', 'If paused adding jobs then wait for there to be less ' . + 'than this many before starting again. Defaults to the value specified for ' . + '--maxJobs. Not meaningful without --queue.', false, true ); + $this->addOption( 'indexOnSkip', 'When skipping either parsing or links send the document' . + ' as an index. This replaces the contents of the index for that entry with the entry' . + ' built from a skipped process. Without this if the entry does not exist then it will' . + ' be skipped entirely. Only set this when running the first pass of building the' . + ' index. Otherwise, don\'t tempt fate by indexing half complete documents.' ); + $this->addOption( 'forceParse', + 'Bypass ParserCache and do a fresh parse of pages from the Content.' ); + $this->addOption( 'skipParse', + 'Skip parsing the page. This is really only good for running the second half ' . + 'of the two phase index build. If this is specified then the default batch size ' . + 'is actually 50.' ); + $this->addOption( 'skipLinks', + 'Skip looking for links to the page (counting and finding redirects). Use ' . 'this with --indexOnSkip for the first half of the two phase index build.' ); $this->addOption( 'namespace', 'Only index pages in this given namespace', false, true ); - $this->addOption( 'excludeContentTypes', 'Exclude pages of the specified content types. These must be a comma separated list of strings such as "wikitext" or "json" matching the CONTENT_MODEL_* constants.', false, true, false ); - $this->addOption( 'useDbIndex', 'Use specific index when fetching IDs from the database.', false, true, false ); + $this->addOption( 'excludeContentTypes', 'Exclude pages of the specified content types. ' . + 'These must be a comma separated list of strings such as "wikitext" or "json" ' . + 'matching the CONTENT_MODEL_* constants.', false, true, false ); + $this->addOption( 'useDbIndex', + 'Use specific index when fetching IDs from the database.', false, true, false ); } public function execute() { @@ -112,7 +134,9 @@ // Make sure we've actually got indices to populate if ( !$this->simpleCheckIndexes() ) { - $this->error( "$wiki index(es) do not exist. Did you forget to run updateSearchIndexConfig?", 1 ); + $this->error( + "$wiki index(es) do not exist. Did you forget to run updateSearchIndexConfig?", 1 + ); } // We need to check ids options early otherwise hasOption may return @@ -141,7 +165,9 @@ return; } $this->queue = $this->getOption( 'queue' ); - $this->maxJobs = $this->getOption( 'maxJobs' ) ? intval( $this->getOption( 'maxJobs' ) ) : null; + $this->maxJobs = $this->getOption( 'maxJobs' ) + ? intval( $this->getOption( 'maxJobs' ) ) + : null; $this->pauseForJobs = $this->getOption( 'pauseForJobs' ) ? intval( $this->getOption( 'pauseForJobs' ) ) : $this->maxJobs; $updateFlags = $this->buildUpdateFlags(); @@ -184,9 +210,9 @@ $updates = array_filter( $batch['updates'] ); if ( $this->queue ) { $this->waitForQueueToShrink( $wiki ); - JobQueueGroup::singleton()->push( - Job\MassIndex::build( $updates, $updateFlags, $this->getOption( 'cluster' ) ) - ); + JobQueueGroup::singleton()->push( Job\MassIndex::build( + $updates, $updateFlags, $this->getOption( 'cluster' ) + ) ); } else { // Update size with the actual number of updated documents. $updater = $this->createUpdater(); @@ -204,7 +230,9 @@ $completed += $size; $rate = $this->calculateIndexingRate( $completed, $operationStartTime ); - $this->output( "$wiki $operationName $size pages ending at {$batch['endingAt']} at $rate/second\n" ); + $this->output( + "$wiki $operationName $size pages ending at {$batch['endingAt']} at $rate/second\n" + ); if ( !is_null( $this->limit ) && $completed > $this->limit ) { break; } @@ -218,14 +246,17 @@ || $this->hasOption( 'from' ) || $this->hasOption( 'to' ) || $this->hasOption( 'fromId' ) || $this->hasOption( 'toId' ) ) { - $this->error( '--ids cannot be used with deletes/archive/from/to/fromId/toId/limit', 1 ); + $this->error( + '--ids cannot be used with deletes/archive/from/to/fromId/toId/limit', 1 + ); } $pageIds = array_map( function ( $pageId ) { $pageId = trim( $pageId ); if ( !ctype_digit( $pageId ) ) { - $this->error( "Invalid page id provided in --ids, got '$pageId', expected a positive integer", 1 ); + $this->error( "Invalid page id provided in --ids, got '$pageId', " . + "expected a positive integer", 1 ); } return intval( $pageId ); }, @@ -258,7 +289,9 @@ private function waitForQueueToShrink( $wiki ) { $now = microtime( true ); - if ( $now - $this->lastJobQueueCheckTime <= self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS ) { + if ( $now - $this->lastJobQueueCheckTime <= + self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS + ) { return; } @@ -269,7 +302,9 @@ } do { - $this->output( "$wiki Waiting while job queue shrinks: $this->pauseForJobs > $queueSize\n" ); + $this->output( + "$wiki Waiting while job queue shrinks: $this->pauseForJobs > $queueSize\n" + ); usleep( self::SECONDS_BETWEEN_JOB_QUEUE_LENGTH_CHECKS * 1000000 ); $queueSize = $this->getUpdatesInQueue(); } while ( $this->pauseForJobs < $queueSize ); @@ -380,8 +415,10 @@ 'titlesToDelete' => $titlesToDelete, 'docIdsToDelete' => $docIdsToDelete, 'archive' => $archive, - 'endingAt' => isset( $title ) ? - substr( preg_replace( '/[^' . Title::legalChars() . ']/', '_', $title->getPrefixedDBkey() ), 0, 30 ) + 'endingAt' => isset( $title ) + ? substr( preg_replace( + '/[^' . Title::legalChars() . ']/', '_', $title->getPrefixedDBkey() + ), 0, 30 ) : 'unknown', ]; } ); @@ -437,13 +474,17 @@ return $this->wrapDecodeResults( $it, 'page_id' ); } - private function attachTimestampConditions( IDatabase $dbr, BatchRowIterator $it, $columnPrefix ) { + private function attachTimestampConditions( + IDatabase $dbr, BatchRowIterator $it, $columnPrefix + ) { // When initializing we guarantee that if either fromDate or toDate are provided // the other has a sane default value. if ( $this->fromDate ) { $it->addConditions( [ - "{$columnPrefix}_timestamp >= " . $dbr->addQuotes( $dbr->timestamp( $this->fromDate ) ), - "{$columnPrefix}_timestamp <= " . $dbr->addQuotes( $dbr->timestamp( $this->toDate ) ), + "{$columnPrefix}_timestamp >= " . + $dbr->addQuotes( $dbr->timestamp( $this->fromDate ) ), + "{$columnPrefix}_timestamp <= " . + $dbr->addQuotes( $dbr->timestamp( $this->toDate ) ), ] ); } } @@ -476,14 +517,14 @@ */ private function wrapDecodeResults( BatchRowIterator $it, $endingAtColumn ) { return new CallbackIterator( $it, function ( $batch ) use ( $endingAtColumn ) { - // Build the updater outside the loop because it stores the redirects it hits. Don't build it at the top - // level so those are stored when it is freed. + // Build the updater outside the loop because it stores the redirects it hits. + // Don't build it at the top level so those are stored when it is freed. $updater = $this->createUpdater(); $pages = []; foreach ( $batch as $row ) { - // No need to call Updater::traceRedirects here because we know this is a valid page because - // it is in the database. + // No need to call Updater::traceRedirects here because we know this is a valid page + // because it is in the database. $page = WikiPage::newFromRow( $row, WikiPage::READ_LATEST ); // null pages still get attached to keep the counts the same. They will be filtered @@ -533,7 +574,9 @@ if ( $content === null ) { // Skip pages without content. Pages have no content because their latest revision // as loaded by the query above doesn't exist. - $this->output( 'Skipping page with no content: ' . $page->getTitle()->getArticleID() . "\n" ); + $this->output( + 'Skipping page with no content: ' . $page->getTitle()->getArticleID() . "\n" + ); return null; } @@ -542,14 +585,14 @@ } if ( $this->toDate === null ) { - // Looks like we accidentally picked up a redirect when we were indexing by id and thus trying to - // ignore redirects! Just ignore it! We would filter them out at the db level but that is slow - // for large wikis. + // Looks like we accidentally picked up a redirect when we were indexing by id and thus + // trying to ignore redirects! Just ignore it! We would filter them out at the db + // level but that is slow for large wikis. return null; } - // We found a redirect. Great. Since we can't index special pages and redirects to special pages - // are totally possible, as well as fun stuff like redirect loops, we need to use + // We found a redirect. Great. Since we can't index special pages and redirects to special + // pages are totally possible, as well as fun stuff like redirect loops, we need to use // Updater's redirect tracing logic which is very complete. Also, it returns null on // self redirects. Great! list( $page, ) = $updater->traceRedirects( $page->getTitle() ); @@ -579,7 +622,9 @@ } } if ( $fromId === $this->toId ) { - $this->error( "Couldn't find any pages to index. fromId = $fromId = $this->toId = toId.", 1 ); + $this->error( + "Couldn't find any pages to index. fromId = $fromId = $this->toId = toId.", 1 + ); } $builder = new \CirrusSearch\Maintenance\ChunkBuilder(); $builder->build( $this->mSelf, $this->mOptions, $buildChunks, $fromId, $this->toId ); diff --git a/maintenance/updateSuggesterIndex.php b/maintenance/updateSuggesterIndex.php index f7047da..7df908a 100644 --- a/maintenance/updateSuggesterIndex.php +++ b/maintenance/updateSuggesterIndex.php @@ -157,18 +157,24 @@ 'per failure. Note that failures are not common but if Elasticsearch is in the process ' . 'of moving a shard this can time out. This will retry the attempt after some backoff ' . 'rather than failing the whole reindex process. Defaults to 5.', false, true ); - $this->addOption( 'optimize', 'Optimize the index to 1 segment. Defaults to false.', false, false ); - $this->addOption( 'scoringMethod', 'The scoring method to use when computing suggestion weights. ' . + $this->addOption( 'optimize', + 'Optimize the index to 1 segment. Defaults to false.', false, false ); + $this->addOption( 'scoringMethod', + 'The scoring method to use when computing suggestion weights. ' . 'Defaults to $wgCirrusSearchCompletionDefaultScore or quality if unset.', false, true ); - $this->addOption( 'masterTimeout', 'The amount of time to wait for the master to respond to mapping ' . + $this->addOption( 'masterTimeout', + 'The amount of time to wait for the master to respond to mapping ' . 'updates before failing. Defaults to $wgCirrusSearchMasterTimeout.', false, true ); - $this->addOption( 'replicationTimeout', 'The amount of time (seconds) to wait for the replica shards to initialize. ' . + $this->addOption( 'replicationTimeout', + 'The amount of time (seconds) to wait for the replica shards to initialize. ' . 'Defaults to 3600 seconds.', false, true ); - $this->addOption( 'allocationIncludeTag', 'Set index.routing.allocation.include.tag on the created index. ' . - 'Useful if you want to force the suggester index not to be allocated on a specific set of nodes.', + $this->addOption( 'allocationIncludeTag', + 'Set index.routing.allocation.include.tag on the created index. Useful if you want to ' . + 'force the suggester index not to be allocated on a specific set of nodes.', false, true ); - $this->addOption( 'allocationExcludeTag', 'Set index.routing.allocation.exclude.tag on the created index. ' . - 'Useful if you want to force the suggester index not to be allocated on a specific set of nodes.', + $this->addOption( 'allocationExcludeTag', + 'Set index.routing.allocation.exclude.tag on the created index. Useful if you want ' . + 'to force the suggester index not to be allocated on a specific set of nodes.', false, true ); } @@ -188,10 +194,14 @@ $this->getShardCount(); $this->getReplicaCount(); } catch ( \Exception $e ) { - $this->error( "Failed to get shard count and replica count information: {$e->getMessage()}", 1 ); + $this->error( + "Failed to get shard count and replica count information: {$e->getMessage()}", 1 + ); } - $this->indexBaseName = $this->getOption( 'baseName', $this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) ); + $this->indexBaseName = $this->getOption( + 'baseName', $this->getSearchConfig()->get( SearchConfig::INDEX_BASE_NAME ) + ); $this->indexChunkSize = $this->getOption( 'indexChunkSize', 500 ); $this->indexRetryAttempts = $this->getOption( 'reindexRetryAttempts', 5 ); @@ -203,20 +213,27 @@ $this->bannedPlugins = $wgCirrusSearchBannedPlugins; $this->availablePlugins = $this->utils->scanAvailablePlugins( $this->bannedPlugins ); - $this->analysisConfigBuilder = $this->pickAnalyzer( $this->langCode, $this->availablePlugins ); + $this->analysisConfigBuilder = $this->pickAnalyzer( + $this->langCode, $this->availablePlugins + ); $this->utils->checkElasticsearchVersion(); - $this->maxShardsPerNode = isset( $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] ) ? $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] : 'unlimited'; + $this->maxShardsPerNode = isset( $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] ) + ? $wgCirrusSearchMaxShardsPerNode[ $this->indexTypeName ] + : 'unlimited'; - $this->scoreMethodName = $this->getOption( 'scoringMethod', $wgCirrusSearchCompletionDefaultScore ); + $this->scoreMethodName = $this->getOption( + 'scoringMethod', $wgCirrusSearchCompletionDefaultScore + ); $this->scoreMethod = SuggestScoringMethodFactory::getScoringMethod( $this->scoreMethodName ); $extraBuilders = []; if ( $this->getSearchConfig()->get( 'CirrusSearchCompletionSuggesterUseDefaultSort' ) ) { $extraBuilders[] = new DefaultSortSuggestionsBuilder(); } - $subPhrasesConfig = $this->getSearchConfig()->get( 'CirrusSearchCompletionSuggesterSubphrases' ); + $subPhrasesConfig = $this->getSearchConfig() + ->get( 'CirrusSearchCompletionSuggesterSubphrases' ); if ( $subPhrasesConfig['build'] ) { $extraBuilders[] = NaiveSubphrasesSuggestionsBuilder::create( $subPhrasesConfig ); } @@ -225,7 +242,9 @@ try { // If the version does not exist it's certainly because nothing has been indexed. if ( !MetaStoreIndex::cirrusReady( $this->getConnection() ) ) { - throw new \Exception( "Cirrus meta store does not exist, you must index your data first" ); + throw new \Exception( + "Cirrus meta store does not exist, you must index your data first" + ); } if ( !$this->canWrite() ) { @@ -250,7 +269,8 @@ /** @suppress PhanUndeclaredMethod ExceptionInterface has no methods */ $trace = $e->getTraceAsString(); $this->log( "\nUnexpected Elasticsearch failure.\n" ); - $this->error( "Elasticsearch failed in an unexpected way. This is always a bug in CirrusSearch.\n" . + $this->error( "Elasticsearch failed in an unexpected way. " . + "This is always a bug in CirrusSearch.\n" . "Error type: $type\n" . "Message: $message\n" . "Trace:\n" . $trace, 1 ); @@ -295,7 +315,8 @@ $this->log( "Deleting broken index {$index->getName()}\n" ); $this->deleteIndex( $index ); } else { - $this->log( "Broken index {$index->getName()} appears to be in use, please check and delete.\n" ); + $this->log( "Broken index {$index->getName()} appears to be in use, " . + "please check and delete.\n" ); } } @@ -303,9 +324,15 @@ } private function rebuild() { - $oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'current', $this->getIndexTypeName() ); - $this->oldIndex = $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier ); - $this->indexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'now', $this->getIndexTypeName() ); + $oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( + 'current', $this->getIndexTypeName() + ); + $this->oldIndex = $this->getConnection()->getIndex( + $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier + ); + $this->indexIdentifier = $this->utils->pickIndexIdentifierFromOption( + 'now', $this->getIndexTypeName() + ); $this->createIndex(); $this->indexData(); @@ -325,8 +352,12 @@ if ( !$wgCirrusSearchRecycleCompletionSuggesterIndex ) { return false; } - $oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( 'current', $this->getIndexTypeName() ); - $oldIndex = $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier ); + $oldIndexIdentifier = $this->utils->pickIndexIdentifierFromOption( + 'current', $this->getIndexTypeName() + ); + $oldIndex = $this->getConnection()->getIndex( + $this->indexBaseName, $this->indexTypeName, $oldIndexIdentifier + ); if ( ! $oldIndex->exists() ) { $this->error( 'Index does not exist yet cannot recycle.' ); return false; @@ -344,11 +375,14 @@ return false; } - list( $mMaj ) = explode( '.', \CirrusSearch\Maintenance\SuggesterMappingConfigBuilder::VERSION ); - list( $aMaj ) = explode( '.', \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder::VERSION ); + list( $mMaj ) = explode( '.', + \CirrusSearch\Maintenance\SuggesterMappingConfigBuilder::VERSION ); + list( $aMaj ) = explode( '.', + \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder::VERSION ); try { - $versionDoc = MetaStoreIndex::getVersionType( $this->getConnection() )->getDocument( $this->getIndexTypeName() ); + $versionDoc = MetaStoreIndex::getVersionType( $this->getConnection() ) + ->getDocument( $this->getIndexTypeName() ); } catch ( \Elastica\Exception\NotFoundException $nfe ) { $this->error( 'Index missing in mw_cirrus_metastore::version, cannot recycle.' ); return false; @@ -535,7 +569,9 @@ $mSearch = new MultiSearch( $this->getClient() ); foreach ( $countIndices as $sourceIndexType ) { $search = new \Elastica\Search( $this->getClient() ); - $search->addIndex( $this->getConnection()->getIndex( $this->indexBaseName, $sourceIndexType ) ); + $search->addIndex( + $this->getConnection()->getIndex( $this->indexBaseName, $sourceIndexType ) + ); $search->getQuery()->setSize( 0 ); $mSearch->addSearch( $search ); } @@ -563,7 +599,9 @@ foreach ( $scroll as $results ) { if ( $totalDocsToDump === -1 ) { $totalDocsToDump = $results->getTotalHits(); - $this->log( "Indexing $totalDocsToDump documents from $sourceIndexType with batchId: {$this->builder->getBatchId()} and scoring method: {$this->scoreMethodName}\n" ); + $this->log( "Indexing $totalDocsToDump documents from $sourceIndexType with " . + "batchId: {$this->builder->getBatchId()} and scoring method: " . + "{$this->scoreMethodName}\n" ); } $inputDocs = []; foreach ( $results as $result ) { @@ -602,7 +640,9 @@ $data['actions'][] = [ 'add' => [ 'index' => $index->getName(), 'alias' => $name ] ]; - $index->getClient()->request( $path, Request::POST, $data, [ 'master_timeout' => $this->masterTimeout ] ); + $index->getClient()->request( + $path, Request::POST, $data, [ 'master_timeout' => $this->masterTimeout ] + ); } /** @@ -636,7 +676,9 @@ * @return AnalysisConfigBuilder */ private function pickAnalyzer( $langCode, array $availablePlugins = [] ) { - $analysisConfigBuilder = new \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder( $langCode, $availablePlugins ); + $analysisConfigBuilder = new \CirrusSearch\Maintenance\SuggesterAnalysisConfigBuilder( + $langCode, $availablePlugins + ); $this->outputIndented( 'Picking analyzer...' . $analysisConfigBuilder->getDefaultTextAnalyzerType() . "\n" ); return $analysisConfigBuilder; @@ -665,14 +707,16 @@ ]; if ( $this->hasOption( 'allocationIncludeTag' ) ) { - $this->output( "Using routing.allocation.include.tag: {$this->getOption( 'allocationIncludeTag' )}, " . - "the index might be stuck in red if the cluster is not properly configured.\n" ); + $this->output( "Using routing.allocation.include.tag: " . + "{$this->getOption( 'allocationIncludeTag' )}, the index might be stuck in red " . + "if the cluster is not properly configured.\n" ); $settings['routing.allocation.include.tag'] = $this->getOption( 'allocationIncludeTag' ); } if ( $this->hasOption( 'allocationExcludeTag' ) ) { - $this->output( "Using routing.allocation.exclude.tag: {$this->getOption( 'allocationExcludeTag' )}, " . - "the index might be stuck in red if the cluster is not properly configured.\n" ); + $this->output( "Using routing.allocation.exclude.tag: " . + "{$this->getOption( 'allocationExcludeTag' )}, the index might be stuck in red " . + "if the cluster is not properly configured.\n" ); $settings['routing.allocation.exclude.tag'] = $this->getOption( 'allocationExcludeTag' ); } @@ -722,7 +766,8 @@ $this->log( "Waiting for the index to go green...\n" ); // Wait for the index to go green ( default 10 min) if ( !$this->utils->waitForGreen( $this->getIndex()->getName(), $timeout ) ) { - $this->error( "Failed to wait for green... please check config and delete the {$this->getIndex()->getName()} index if it was created.", 1 ); + $this->error( "Failed to wait for green... please check config and " . + "delete the {$this->getIndex()->getName()} index if it was created.", 1 ); } } @@ -748,7 +793,9 @@ * @return \Elastica\Index being updated */ public function getIndex() { - return $this->getConnection()->getIndex( $this->indexBaseName, $this->indexTypeName, $this->indexIdentifier ); + return $this->getConnection()->getIndex( + $this->indexBaseName, $this->indexTypeName, $this->indexIdentifier + ); } /** diff --git a/phpcs.xml b/phpcs.xml index 92edc4b..631fadf 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -3,7 +3,6 @@ <rule ref="./vendor/mediawiki/mediawiki-codesniffer/MediaWiki"> <exclude name="PSR2.Methods.MethodDeclaration.Underscore"/> <exclude name="PSR2.Classes.PropertyDeclaration.Underscore"/> - <exclude name="Generic.Files.LineLength"/> <exclude name="MediaWiki.ControlStructures.AssignmentInControlStructures"/> <exclude name="MediaWiki.NamingConventions.LowerCamelFunctionsName.FunctionName"/> <exclude name="MediaWiki.WhiteSpace.SpaceBeforeSingleLineComment.NewLineComment"/> @@ -21,6 +20,12 @@ <property name="ignoreList" type="array" value="main,curl_init_pooled" /> </properties> </rule> + <rule ref="Generic.Files.LineLength"> + <properties> + <property name="lineLimit" value="170" /> + </properties> + <exclude-pattern>tests/unit/resources/wmf</exclude-pattern> + </rule> <file>.</file> <arg name="extensions" value="php,php5,inc"/> <arg name="encoding" value="UTF-8"/> diff --git a/tests/unit/SuggestScoringTest.php b/tests/unit/SuggestScoringTest.php index 5b054bd..6b7c6ec 100644 --- a/tests/unit/SuggestScoringTest.php +++ b/tests/unit/SuggestScoringTest.php @@ -34,30 +34,42 @@ $value = mt_rand( 0, 1000000 ); $norm = mt_rand( 1, 1000000 ); $score = $qs->scoreNorm( $value, $norm ); - $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "scoreNorm cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "scoreNorm cannot produce a score lower than 0" ); $score = $qs->scoreNormL2( $value, $norm ); - $this->assertLessThanOrEqual( 1, $score, "scoreNormL2 cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "scoreNormL2 cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "scoreNormL2 cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "scoreNormL2 cannot produce a score lower than 0" ); } // Edges $score = $qs->scoreNorm( 1, 1 ); - $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "scoreNorm cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "scoreNorm cannot produce a score lower than 0" ); $score = $qs->scoreNorm( 0, 1 ); - $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "scoreNorm cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "scoreNorm cannot produce a score lower than 0" ); $score = $qs->scoreNormL2( 1, 1 ); - $this->assertLessThanOrEqual( 1, $score, "scoreNormL2 cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "scoreNormL2 cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "scoreNormL2 cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "scoreNormL2 cannot produce a score lower than 0" ); $score = $qs->scoreNormL2( 0, 1 ); - $this->assertLessThanOrEqual( 1, $score, "scoreNormL2 cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "scoreNormL2 cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "scoreNormL2 cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "scoreNormL2 cannot produce a score lower than 0" ); } public function testQualityScoreBoostFunction() { @@ -66,12 +78,16 @@ $score = (float)mt_rand() / (float)mt_getrandmax(); $boost = (float)mt_rand( 0, 10000 ) / mt_rand( 1, 10000 ); $res = $qs->boost( $score, $boost ); - $this->assertLessThanOrEqual( 1, $score, "boost cannot produce a score greater than 1" ); - $this->assertGreaterThanOrEqual( 0, $score, "boost cannot produce a score lower than 0" ); + $this->assertLessThanOrEqual( 1, $score, + "boost cannot produce a score greater than 1" ); + $this->assertGreaterThanOrEqual( 0, $score, + "boost cannot produce a score lower than 0" ); if ( $boost > 1 ) { - $this->assertGreaterThan( $score, $res, "With a boost ($boost) greater than 1 the boosted score must be greater than the original." ); + $this->assertGreaterThan( $score, $res, "With a boost ($boost) greater than 1 the" . + " boosted score must be greater than the original." ); } elseif ( $boost < 1 ) { - $this->assertLessThan( $score, $res, "With a boost ($boost) less than 1 the boosted score must be less than the original." ); + $this->assertLessThan( $score, $res, "With a boost ($boost) less than 1 the " . + "boosted score must be less than the original." ); } else { $this->assertEquals( $score, $res, "When boost is 1 the score remains unchanged." ); } @@ -98,7 +114,8 @@ $res = $qs->boost( 1, 0 ); $this->assertEquals( $res, 0.5, "When boost is 0 the score is divided by 2." ); $res = $qs->boost( 1, 2^31-1 ); - $this->assertEquals( $res, 1, "When score is 1 and boost is very high the score is still 1." ); + $this->assertEquals( $res, 1, + "When score is 1 and boost is very high the score is still 1." ); $res = $qs->boost( 0, 0 ); $this->assertEquals( $res, 0, "When score is 0 and boost is 0 the score is still 0." ); } @@ -229,7 +246,8 @@ 'template' => mt_rand( 0, 1 ) == 1 ? [ 'Good' ] : [ 'Bad' ] ]; $this->assertGreaterThan( 0, $qs->score( $page ), "Score is always greater than 0" ); - $this->assertLessThan( QualityScore::SCORE_RANGE, $qs->score( $page ), "Score is always lower than " . QualityScore::SCORE_RANGE ); + $this->assertLessThan( QualityScore::SCORE_RANGE, $qs->score( $page ), + "Score is always lower than " . QualityScore::SCORE_RANGE ); } // Edges @@ -241,7 +259,8 @@ 'redirect' => array_fill( 0, QualityScore::REDIRECT_NORM, null ), 'template' => [] ]; - $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), "Highest score is " . QualityScore::SCORE_RANGE ); + $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), + "Highest score is " . QualityScore::SCORE_RANGE ); $page = [ 'incoming_links' => 0, @@ -267,7 +286,8 @@ 'redirect' => array_fill( 0, QualityScore::REDIRECT_NORM, null ), 'template' => [] ]; - $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), "With very small wiki the highest score is also " . QualityScore::SCORE_RANGE ); + $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), + "With very small wiki the highest score is also " . QualityScore::SCORE_RANGE ); // The scoring function should not fail with 0 page $qs = new QualityScore(); @@ -279,7 +299,8 @@ 'redirect' => array_fill( 0, QualityScore::REDIRECT_NORM, null ), 'template' => [] ]; - $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), "With a zero page wiki the highest score is also " . QualityScore::SCORE_RANGE ); + $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), + "With a zero page wiki the highest score is also " . QualityScore::SCORE_RANGE ); } public function testRobustness() { @@ -310,9 +331,13 @@ $score = $scorer->score( $page ); $pagedebug = print_r( $page, true ); - $this->assertTrue( is_int( $score ), "Score is always an integer for " . get_class( $scorer ) . " with these values $pagedebug" ); - $this->assertTrue( $score >= 0, "Score is always positive " . get_class( $scorer ) . " with these values $pagedebug" ); - $this->assertTrue( $score <= QualityScore::SCORE_RANGE, "Score is always lower than QualityScore::SCORE_RANGE " . get_class( $scorer ) . " with these values $pagedebug" ); + $this->assertTrue( is_int( $score ), "Score is always an integer for " . + get_class( $scorer ) . " with these values $pagedebug" ); + $this->assertTrue( $score >= 0, "Score is always positive " . + get_class( $scorer ) . " with these values $pagedebug" ); + $this->assertTrue( $score <= QualityScore::SCORE_RANGE, + "Score is always lower than QualityScore::SCORE_RANGE " . get_class( $scorer ) . + " with these values $pagedebug" ); } } } -- To view, visit https://gerrit.wikimedia.org/r/364484 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I707253515e5903690d14d6d2987dfbdefdcb0f30 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Umherirrender <umherirrender_de...@web.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits