EBernhardson has uploaded a new change for review. https://gerrit.wikimedia.org/r/313931
Change subject: Initial RequestLogger tests ...................................................................... Initial RequestLogger tests These tests don't seem particularly amazing, but they should provide some ability to see that the refactor of request logging works as expected and still generates the same output for some sets of inputs. These test cases obviously don't workout all the various options that could exist, failure cases, etc, but its better than nothing. Testing everything from this high of level would be incredibly verbose. Maybe this should be re-worked to use fixtures like the searchText instead of trying to spell out the full response/log cycle in the code? Change-Id: If15f280d1bf0b8f8df30b9c25f2bcd4e860aee0a --- M includes/CompletionSuggester.php M includes/Connection.php M includes/RequestLogger.php A tests/unit/RequestLogger.php 4 files changed, 278 insertions(+), 12 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/31/313931/1 diff --git a/includes/CompletionSuggester.php b/includes/CompletionSuggester.php index 1ff38d5..d232693 100644 --- a/includes/CompletionSuggester.php +++ b/includes/CompletionSuggester.php @@ -222,14 +222,11 @@ $profiles, $text ) { $description = "{queryType} search for '{query}'"; $this->start( $description, $logContext ); - $this->logContext['timing']['start'] = microtime( true ); try { $result = $index->request( "_suggest", Request::POST, $suggest, $queryOptions ); - $this->logContext['timing']['end-comp-req'] = microtime( true ); if( $result->isOk() ) { $result = $this->postProcessSuggest( $result, $profiles ); } - $this->logContext['timing']['done'] = microtime( true ); return $this->success( $result ); } catch ( \Elastica\Exception\ExceptionInterface $e ) { return $this->failure( $e ); @@ -428,7 +425,6 @@ $data = $response->getData(); unset( $data['_shards'] ); - $this->logContext['timing']['mark1'] = microtime( true ); $limit = $this->getHardLimit(); $suggestionsByDocId = []; $suggestionProfileByDocId = []; @@ -462,7 +458,6 @@ } } } - $this->logContext['timing']['mark2'] = microtime( true ); // simply sort by existing scores uasort( $suggestionsByDocId, function ( SearchSuggestion $a, SearchSuggestion $b ) { @@ -487,7 +482,6 @@ } if ( !empty ( $missingTextDocIds ) ) { - $this->logContext['timing']['mark3'] = microtime( true ); // Experimental. // // Second pass query to fetch redirects. @@ -505,7 +499,6 @@ $redirResponse = $type->request( '_mget', 'GET', [ 'ids' => $missingTextDocIds ], [ '_source_include' => 'redirect' ] ); - $this->logContext['timing']['mark4'] = microtime( true ); if ( $redirResponse->isOk() ) { $this->logContext['elasticTook2PassMs'] = intval( $redirResponse->getQueryTime() * 1000 ); $docs = $redirResponse->getData(); @@ -538,10 +531,8 @@ ] ); } - $this->logContext['timing']['mark5'] = microtime( true ); } - $this->logContext['timing']['mark6'] = microtime( true ); $finalResults = array_filter( $suggestionsByDocId, function ( SearchSuggestion $suggestion ) { @@ -570,7 +561,6 @@ ]; } $this->logContext['maxScore'] = $maxScore; - $this->logContext['timing']['mark7'] = microtime( true ); return new SearchSuggestionSet( $finalResults ); } diff --git a/includes/Connection.php b/includes/Connection.php index 035d534..af2a881 100644 --- a/includes/Connection.php +++ b/includes/Connection.php @@ -171,6 +171,7 @@ foreach ( $config as $idx => $server ) { if ( isset( $server['transport'] ) && + is_string( $server['transport'] ) && class_exists( $server['transport'] ) ) { $transportClass = $server['transport']; diff --git a/includes/RequestLogger.php b/includes/RequestLogger.php index c8175dd..c564375 100644 --- a/includes/RequestLogger.php +++ b/includes/RequestLogger.php @@ -63,12 +63,12 @@ // it's still doing "something" $finalContext = $this->buildLogContext( $context, $tookMs, $client ); if ( $wgCirrusSearchLogElasticRequests ) { - if ( count( $this->logContexts ) === 0 ) { + $this->logContexts[] = $finalContext; + if ( count( $this->logContexts ) === 1 ) { DeferredUpdates::addCallableUpdate( function () { $this->reportLogContexts(); } ); } - $this->logContexts[] = $finalContext; $logMessage = $this->buildLogMessage( $description, $finalContext ); LoggerFactory::getInstance( 'CirrusSearchRequests' )->debug( $logMessage, $finalContext ); diff --git a/tests/unit/RequestLogger.php b/tests/unit/RequestLogger.php new file mode 100644 index 0000000..3ce9d23 --- /dev/null +++ b/tests/unit/RequestLogger.php @@ -0,0 +1,275 @@ +<?php + +namespace CirrusSearch\Test; + +use CirrusSearch\CompletionSuggester; +use CirrusSearch\Connection; +use CirrusSearch\ElasticsearchIntermediary; +use CirrusSearch\RequestLogger; +use CirrusSearch\Searcher; +use CirrusSearch\Search\IdResultsType; +use Elastica\Response; +use Elastica\Transport\AbstractTransport; +use MediaWiki\MediaWikiServices; + +/** + * Tests full text and completion search request logging. Could be expanded for + * other request types if necessary, but these are mostly the two we care + * about. + */ +class RequestLoggerTest extends \MediaWikiTestCase { + public function fullTextProvider() { + return [ + 'foo' => [ + 'response' => new Response( [ + 'hits' => [ + 'hits' => [ + [ + '_score' => 0.9876, + '_id' => "1", + '_type' => 'page', + 'index' => 'enwiki_content_first', + '_source' => [ + 'title' => 'Main Page', + ], + ], + ], + 'max_score' => 1, + 'total' => 1, + ], + '_shards' => [ /* ... */ ], + 'timed_out' => false, + 'took' => 4, + ], 200 ), + 'expectedLogs' => [ + [ + 'level' => 'debug', + 'message' => '', + 'context' => [ + 'source' => 'cli', + 'ip' => '127.0.0.1', + 'userAgent' => '', + 'backendUserTests' => [], + 'hits' => [ + ], + 'payload' => [ + 'acceptLang' => '', + 'queryString' => '', + ], + 'requests' => [ + [ + 'query' => 'term', + 'queryType' => 'full_text', + 'indices' => [ 'wiki' ], + 'elasticTookMs' => 4, + 'limit' => 20, + 'hitsTotal' => 1, + 'hitsReturned' => 1, + 'hitsOffset' => 0, + 'namespaces' => [], + 'suggestion' => '', + 'suggestionRequested' => false, + 'maxScore' => 1, + 'payload' => [], + 'hits' => [], + ], + ], + ], + ] + ] + ], + ]; + } + + /** + * @dataProvider fullTextProvider + */ + public function testFullText( \Elastica\Response $response, $expectedLogs ) { + list ( $logger, $config, $connection ) = $this->buildDependencies( $response ); + + $searcher = new Searcher( $connection, 0, 20, $config ); + // avoid needing more complex source responses by using a simple + // result type + $searcher->setResultsType( new IdResultsType ); + + $searcher->searchText( 'term', false ); + + // Force the logger to flush + \DeferredUpdates::doUpdates(); + + $logs = $this->filterLogs( $logger->getLogs() ); + + // Finally check for the expected log + $this->assertEquals( $expectedLogs, $logs ); + } + + public function completionProvider() { + return [ + 'foo' => [ + 'response' => new Response( [ + 'plain_stop' => [ + [ + 'text' => 'term', + 'offset' => 0, + 'length'=> 4, + 'options' => [ + [ + 'text' => '1:t:Main Page', + 'score' => 123456 + ], + ], + ], + ], + 'plain_fuzzy_2' => [ + ], + 'plain_stop_fuzzy_2' => [ + ], + 'plain' => [ + ], + '_shards' => [ /* ... */ ], + ], 200 ), + 'expectedLogs' => [ + [ + 'level' => 'debug', + 'message' => '', + 'context' => [ + 'source' => 'cli', + 'ip' => '127.0.0.1', + 'userAgent' => '', + 'backendUserTests' => [], + 'hits' => [ + ], + 'payload' => [ + 'acceptLang' => '', + 'queryString' => '', + ], + 'requests' => [ + [ + 'query' => 'term', + 'queryType' => 'comp_suggest', + 'indices' => [ 'wiki_titlesuggest' ], + 'elasticTookMs' => 0, + 'limit' => -1, + 'hitsTotal' => 1, + 'hitsReturned' => 1, + 'hitsOffset' => 0, + 'namespaces' => [], + 'suggestion' => '', + 'suggestionRequested' => false, + 'maxScore' => 123.456, + 'payload' => [], + 'hits' => [ + [ + 'title' => 'Main Page', + 'index' => 'wiki_titlesuggest', + 'pageId' => 1, + 'profileName' => 'plain_stop', + 'score' => 123.456, + ], + ], + ], + ], + ], + ], + ], + ], + ]; + } + + /** + * @dataProvider completionProvider + */ + public function testCompletion( Response $response, $expectedLogs ) { + list ( $logger, $config, $connection ) = $this->buildDependencies( $response ); + $suggester = new CompletionSuggester( $connection, 5, 0, $config ); + $suggester->suggest( 'term' ); + + // Force the logger to flush + \DeferredUpdates::doUpdates(); + + $logs = $this->filterLogs( $logger->getLogs() ); + + $this->assertEquals( $expectedLogs, $logs ); + } + + private function buildDependencies( Response $response ) { + // Plugin in a request logger that we know is empty + $requestLogger = new RequestLogger; + $requestLoggerProp = new \ReflectionProperty( ElasticsearchIntermediary::class, 'requestLogger' ); + $requestLoggerProp->setAccessible( true ); + $requestLoggerProp->setValue( $requestLogger ); + + // Override the logging channel with our own so we can capture logs + $logger = new FakeLogger(); + $this->setLogger( 'CirrusSearchRequestSet', $logger ); + + // Build up an elastica transport that will return our faked response + $transport = $this->getMockBuilder( AbstractTransport::class ) + ->disableOriginalConstructor() + ->getMock(); + $transport->expects( $this->any() ) + ->method( 'exec' ) + ->will( $this->returnValue( $response ) ); + + // Setting everything expected for running a search request/response + // is a pain...just use the real deal and override the clusters config + // to provide our transport. + $config = MediaWikiServices::getInstance() + ->getConfigFactory() + ->makeConfig( 'CirrusSearch' ); + + $this->setMwGlobals( [ + 'wgCirrusSearchClusters' => [ + 'default' => [ + [ 'transport' => $transport ], + ] + ], + ]); + $connection = new Connection( $config, 'default' ); + + return [$logger, $config, $connection]; + } + + /** + * Filter dynamic data out of the logs that can't be statically + * referred to. Still asserts the keys exist, as otherwise avro + * would fail to encode them. + */ + private function filterLogs( array $logs ) { + foreach ( array_keys( $logs ) as $idx ) { + // we need to remove some quasi-random data. To be safe + // assert this exists before deleting it. + foreach ( [ 'id', 'ts', 'wikiId', 'identity', 'tookMs' ] as $key ) { + $this->assertArrayHasKey( $key, $logs[$idx]['context'] ); + unset( $logs[$idx]['context'][$key] ); + } + $this->assertArrayHasKey( 'host', $logs[$idx]['context']['payload'] ); + unset( $logs[$idx]['context']['payload']['host'] ); + + // Do same for the requests in the log + foreach ( array_keys( $logs[$idx]['context']['requests'] ) as $reqIdx ) { + $this->assertArrayHasKey( 'tookMs', $logs[$idx]['context']['requests'][$reqIdx] ); + unset( $logs[$idx]['context']['requests'][$reqIdx]['tookMs'] ); + } + } + + return $logs; + } + +} + +class FakeLogger extends \Psr\Log\AbstractLogger { + private $logs = []; + + public function log( $level, $message, array $context = array() ) { + $this->logs[] = [ + 'level' => $level, + 'message' => $message, + 'context' => $context, + ]; + } + + public function getLogs() { + return $this->logs; + } +} -- To view, visit https://gerrit.wikimedia.org/r/313931 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If15f280d1bf0b8f8df30b9c25f2bcd4e860aee0a Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits