Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/223327
Change subject: Make prefer-recent tests must more consistent ...................................................................... Make prefer-recent tests must more consistent These tests were slow and inconsistent because they dealt with the current time. They paused intentionally and failed when things took too long. No more! Now they hack the page's timestamp using syntax in the page's source that is only enabled during testing and they hack the current time used in the query using a url parameter. That url parameter is enabled at all times which should be perfectly safe. Change-Id: Ic7b081a29b7f35c1d31fc569e29460468fb881fd --- M includes/BuildDocument/PageDataBuilder.php M includes/CirrusSearch.php M includes/Searcher.php M tests/browser/features/prefer_recent_api.feature M tests/browser/features/step_definitions/search_steps.rb M tests/browser/features/support/hooks.rb M tests/jenkins/Jenkins.php 7 files changed, 82 insertions(+), 23 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/27/223327/1 diff --git a/includes/BuildDocument/PageDataBuilder.php b/includes/BuildDocument/PageDataBuilder.php index 0545854..6682919 100644 --- a/includes/BuildDocument/PageDataBuilder.php +++ b/includes/BuildDocument/PageDataBuilder.php @@ -50,6 +50,7 @@ // All content types have a language $this->doc->add( 'language', $this->title->getPageLanguage()->getCode() ); + $this->hackTimestamp(); return $this->doc; } @@ -144,4 +145,26 @@ $this->doc->add( 'wikibase_item', $wikibaseItem ); } } + + /** + * Check the page's text for timestamp hacking requests if they are enabled. Which should only be during testing. + */ + private function hackTimestamp() { + global $wgCirrusSearchAllowTimeHacking; + + if ( !isset( $wgCirrusSearchAllowTimeHacking ) || + !$wgCirrusSearchAllowTimeHacking ) { + return; + } + $text = $this->content->getTextForSearchIndex(); + if ( !$text ) { + return; + } + $matches = array(); + if ( !preg_match( '/timehack=(.+?)Z/', $text, $matches ) ) { + return; + } + $this->doc->set( 'timestamp', wfTimestamp( TS_ISO_8601, $matches[ 1 ] ) ); + wfDebugLog( 'CirrusSearch', "ASDFADSF " . $this->doc->get( 'timestamp' ) ); + } } diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php index f16c47e..2c0767c 100644 --- a/includes/CirrusSearch.php +++ b/includes/CirrusSearch.php @@ -125,6 +125,10 @@ if ( $request->getVal( 'cirrusHighlightAltTitleWithPostings' ) === 'no' ) { $highlightingConfig ^= FullTextResultsType::HIGHLIGHT_ALT_TITLES_WITH_POSTINGS; } + $overrideNow = $request->getVal( 'now' ); + if ( $overrideNow ) { + $searcher->overrideNow( $overrideNow ); + } } if ( $this->namespaces && !in_array( NS_FILE, $this->namespaces ) ) { $highlightingConfig ^= FullTextResultsType::HIGHLIGHT_FILE_TEXT; diff --git a/includes/Searcher.php b/includes/Searcher.php index 5586ebf..aea1897 100644 --- a/includes/Searcher.php +++ b/includes/Searcher.php @@ -11,9 +11,11 @@ use \CirrusSearch\Search\ResultsType; use \Language; use \MWNamespace; +use \MWTimestamp; use \RequestContext; use \SearchResultSet; use \Status; +use \TimestampException; use \Title; use \UsageException; use User; @@ -196,6 +198,11 @@ * advanced highlighting (e.g. match_phrase_prefix for regular quoted strings). */ private $nonTextHighlightQueries = array(); + + /** + * @var false|string override of the current time + */ + private $overrideNow = false; /** * Constructor @@ -1500,18 +1507,22 @@ // Customize score by decaying a portion by time since last update if ( $this->preferRecentDecayPortion > 0 && $this->preferRecentHalfLife > 0 ) { - // Convert half life for time in days to decay constant for time in milliseconds. - $decayConstant = log( 2 ) / $this->preferRecentHalfLife / 86400000; + $params = array( + // Convert half life for time in days to decay constant for time in milliseconds. + 'decayConstant' => log( 2 ) / $this->preferRecentHalfLife / 86400000, + 'now' => $this->now(), + ); // e^ct - 1 where t is last modified time - now which is negative - $exponentialDecayGroovy = "Math.expm1($decayConstant * (doc['timestamp'].value - Instant.now().getMillis()))"; + $exponentialDecayGroovy = "Math.expm1(decayConstant * (doc['timestamp'].value - now))"; // p(e^ct - 1) if ( $this->preferRecentDecayPortion !== 1.0 ) { - $exponentialDecayGroovy = "$exponentialDecayGroovy * $this->preferRecentDecayPortion"; + $exponentialDecayGroovy = "$exponentialDecayGroovy * preferRecentDecayPortion"; + $params[ 'preferRecentDecayPortion' ] = $this->preferRecentDecayPortion; } // p(e^ct - 1) + 1 which is easier to calculate than, but reduces to 1 - p + pe^ct // Which breaks the score into an unscaled portion (1 - p) and a scaled portion (p) $exponentialDecayGroovy = "$exponentialDecayGroovy + 1"; - $functionScore->addScriptScoreFunction( new \Elastica\Script( $exponentialDecayGroovy, null, 'groovy' ) ); + $functionScore->addScriptScoreFunction( new \Elastica\Script( $exponentialDecayGroovy, $params, 'groovy' ) ); $useFunctionScore = true; } @@ -1698,4 +1709,25 @@ $query = substr( $query, $colon + 1 ); $this->namespaces = array( $foundNamespace->getId() ); } + + /** + * Now in millseconds since epoch. + */ + private function now() { + try { + $t = new MWTimestamp( $this->overrideNow ); + } catch ( TimestampException $e ) { + $t = new MWTimestamp(); + } + // MWTimestamp only has second precision which should be fine. + // But Elasticsearch wants to do everything in milliseconds. + return intval( $t->getTimestamp() ) * 1000; + } + + /** + * Override the current time used in prefer-recent:. + */ + public function overrideNow( $overrideNow ) { + $this->overrideNow = $overrideNow; + } } diff --git a/tests/browser/features/prefer_recent_api.feature b/tests/browser/features/prefer_recent_api.feature index 8885558..1da3401 100644 --- a/tests/browser/features/prefer_recent_api.feature +++ b/tests/browser/features/prefer_recent_api.feature @@ -1,22 +1,21 @@ @clean @api @prefer_recent Feature: Searches with prefer-recent - @expect_failure Scenario Outline: Recently updated articles are prefered if prefer-recent: is specified - When I api search for PreferRecent First OR Second OR Third - Then PreferRecent Second Second is the first api search result - When I api search for prefer-recent:<options> PreferRecent First OR Second OR Third + Given I api search for PreferRecent First OR Second OR Third + And PreferRecent Second Second is the first api search result + When I api search with now set to 1970-01-01T01:00:00Z for prefer-recent:<options> PreferRecent First OR Second OR Third Then PreferRecent Third is the first api search result Examples: | options | | 1,.001 | | 1,0.001 | - | 1,.0001 | - | .99,.0001 | - | .99,.001 | + | 1,.005 | + | .9,.01 | - @expect_failure Scenario Outline: You can specify prefer-recent: in such a way that being super recent isn't enough - When I api search for prefer-recent:<options> PreferRecent First OR Second OR Third + Given I api search for PreferRecent First OR Second OR Third + And PreferRecent Second Second is the first api search result + When I api search with now set to 1970-01-01T01:00:00Z for prefer-recent:<options> PreferRecent First OR Second OR Third Then PreferRecent Second Second is the first api search result Examples: | options | diff --git a/tests/browser/features/step_definitions/search_steps.rb b/tests/browser/features/step_definitions/search_steps.rb index a188ede..5842d92 100644 --- a/tests/browser/features/step_definitions/search_steps.rb +++ b/tests/browser/features/step_definitions/search_steps.rb @@ -18,7 +18,7 @@ @search_vars[varname] = page_id_of title end # rubocop:disable LineLength -When(/^I api search( with disabled incoming link weighting)?(?: with offset (\d+))?(?: in the (.*) language)?(?: in namespaces? (\d+(?: \d+)*))? for (.*)$/) do |incoming_links, offset, lang, namespaces, search| +When(/^I api search( with disabled incoming link weighting)?(?: with now set to (.+Z))?(?: with offset (\d+))?(?: in the (.*) language)?(?: in namespaces? (\d+(?: \d+)*))? for (.*)$/) do |incoming_links, now, offset, lang, namespaces, search| begin @api_result = search_for( search.gsub(/%[^ {]+%/, @search_vars) @@ -28,7 +28,8 @@ sroffset: offset, srnamespace: (namespaces || "0").split(/ /), uselang: lang, - cirrusBoostLinks: incoming_links ? "no" : "yes" + cirrusBoostLinks: incoming_links ? "no" : "yes", + now: now ) rescue MediawikiApi::ApiError => e @api_error = e diff --git a/tests/browser/features/support/hooks.rb b/tests/browser/features/support/hooks.rb index 1991b6d..13deaa6 100644 --- a/tests/browser/features/support/hooks.rb +++ b/tests/browser/features/support/hooks.rb @@ -297,14 +297,10 @@ prefer_recent = false Before("@prefer_recent") do unless prefer_recent - # These are updated per process instead of per test because of the 20 second wait - # Note that the scores have to be close together because 20 seconds doesn't mean a whole lot steps %( - Given a page named PreferRecent First exists with contents %{epoch} - And a page named PreferRecent Second Second exists with contents %{epoch} - And wait 20 seconds - And a page named PreferRecent Third exists with contents %{epoch} - And wait 10 seconds + Given a page named PreferRecent First exists with contents timehack=1970-01-01T00:00:00Z + And a page named PreferRecent Second Second exists with contents timehack=1970-01-01T00:00:00Z + And a page named PreferRecent Third exists with contents timehack=1970-01-01T00:10:00Z ) end prefer_recent = true diff --git a/tests/jenkins/Jenkins.php b/tests/jenkins/Jenkins.php index f75783c..b542b55 100644 --- a/tests/jenkins/Jenkins.php +++ b/tests/jenkins/Jenkins.php @@ -73,6 +73,7 @@ $wgCiteEnablePopups = true; $wgExtraNamespaces[ 760 ] = 'Mó'; + // Extra helpful configuration but not really required $wgShowExceptionDetails = true; @@ -80,6 +81,9 @@ $wgCirrusSearchLanguageWeight[ 'wiki' ] = 5.0; $wgCirrusSearchAllowLeadingWildcard = false; +// Just for testing, allow the page's text to override the timestamp. +$wgCirrusSearchAllowTimeHacking = true; + class Jenkins { /** * Installs maintenance scripts that provide a clean Elasticsearch index for testing. -- To view, visit https://gerrit.wikimedia.org/r/223327 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic7b081a29b7f35c1d31fc569e29460468fb881fd Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits