DCausse has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/402089 )
Change subject: Switch phrase suggester config to profile manager ...................................................................... Switch phrase suggester config to profile manager Bug: T183279 Change-Id: I2447b15c39677501cd8e738551b7028de7b8fb4c --- M CirrusSearch.php M autoload.php M docs/settings.txt A includes/Profile/PhraseSuggesterProfileRepoWrapper.php M includes/Profile/SearchProfileService.php M includes/Profile/SearchProfileServiceFactory.php M includes/Query/FullTextQueryStringQueryBuilder.php M profiles/PhraseSuggesterProfiles.config.php D profiles/PhraseSuggesterProfiles.php 9 files changed, 232 insertions(+), 166 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/89/402089/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index 40f3408..a27a1ab 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -21,7 +21,6 @@ * http://www.gnu.org/copyleft/gpl.html */ -require_once __DIR__ . "/profiles/PhraseSuggesterProfiles.config.php"; require_once __DIR__ . "/profiles/SaneitizeProfiles.php"; require_once __DIR__ . "/profiles/FullTextQueryBuilderProfiles.config.php"; @@ -393,8 +392,13 @@ $wgCirrusSearchEnablePhraseSuggest = true; /** + * List of additional phrase suggester profiles + * see profiles/PhraseSuggesterProfiles.config.php + */ +$wgCirrusSearchPhraseSuggestProfiles = []; + +/** * Set the Phrase suggester settings using the default profile. - * see profiles/PhraseSuggesterProfiles.php */ $wgCirrusSearchPhraseSuggestSettings = 'default'; diff --git a/autoload.php b/autoload.php index 923d938..2a707cb 100644 --- a/autoload.php +++ b/autoload.php @@ -109,11 +109,11 @@ 'CirrusSearch\\MultiSearchRequestLog' => __DIR__ . '/includes/MultiSearchRequestLog.php', 'CirrusSearch\\NearMatchPicker' => __DIR__ . '/includes/NearMatchPicker.php', 'CirrusSearch\\OtherIndexes' => __DIR__ . '/includes/OtherIndexes.php', - 'CirrusSearch\\PhraseSuggesterProfiles' => __DIR__ . '/profiles/PhraseSuggesterProfiles.php', 'CirrusSearch\\Profile\\ArrayProfileRepository' => __DIR__ . '/includes/Profile/ArrayProfileRepository.php', 'CirrusSearch\\Profile\\CompletionSearchProfileNameResolver' => __DIR__ . '/includes/Profile/CompletionSearchProfileNameResolver.php', 'CirrusSearch\\Profile\\CompletionSearchProfileRepository' => __DIR__ . '/includes/Profile/CompletionSearchProfileRepository.php', 'CirrusSearch\\Profile\\ConfigProfileRepository' => __DIR__ . '/includes/Profile/ConfigProfileRepository.php', + 'CirrusSearch\\Profile\\PhraseSuggesterProfileRepoWrapper' => __DIR__ . '/includes/Profile/PhraseSuggesterProfileRepoWrapper.php', 'CirrusSearch\\Profile\\SearchProfileNameResolver' => __DIR__ . '/includes/Profile/SearchProfileNameResolver.php', 'CirrusSearch\\Profile\\SearchProfileRepository' => __DIR__ . '/includes/Profile/SearchProfileRepository.php', 'CirrusSearch\\Profile\\SearchProfileService' => __DIR__ . '/includes/Profile/SearchProfileService.php', diff --git a/docs/settings.txt b/docs/settings.txt index f00900b..5f3874c 100644 --- a/docs/settings.txt +++ b/docs/settings.txt @@ -438,7 +438,14 @@ $wgCirrusSearchPhraseSuggestSettings = 'default'; Set the Phrase suggester settings using the default profile. -see profiles/PhraseSuggesterProfiles.php + +; $wgCirrusSearchPhraseSuggestProfiles + +Default: + $wgCirrusSearchPhraseSuggestProfiles = [] + +Set additional phrase suggester profiles +(see profiles/PhraseSuggesterProfiles.config.php) ; $wgCirrusSearchPhraseSuggestReverseField diff --git a/includes/Profile/PhraseSuggesterProfileRepoWrapper.php b/includes/Profile/PhraseSuggesterProfileRepoWrapper.php new file mode 100644 index 0000000..ac7e701 --- /dev/null +++ b/includes/Profile/PhraseSuggesterProfileRepoWrapper.php @@ -0,0 +1,187 @@ +<?php + +namespace CirrusSearch\Profile; +use CirrusSearch\Util; +use MediaWiki\MediaWikiServices; +use RuntimeException; + +/** + * Wrapper to augment the phrase suggester profile settings + * with customization on-wiki using system messages. + */ +class PhraseSuggesterProfileRepoWrapper implements SearchProfileRepository { + + const MAX_ERRORS_HARD_LIMIT = 2; + const MAX_TERM_FREQ_HARD_LIMIT = 0.6; + const PREFIX_LENGTH_HARD_LIMIT = 2; + + /** + * @var string[] + */ + private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ]; + + /** + * @var SearchProfileRepository + */ + private $wrapped; + + /** + * PhraseSuggesterProfileRepoWrapper constructor. + * @param SearchProfileRepository $wrapped + */ + private function __construct( SearchProfileRepository $wrapped ) { + $this->wrapped = $wrapped; + } + + /** + * Wrap this repository + * @return SearchProfileRepository + */ + public static function wrap( SearchProfileRepository $repo ) { + if ( $repo->repositoryType() !== SearchProfileService::PHRASE_SUGGESTER ) { + throw new RuntimeException( self::class . " only supports repositories of type " . SearchProfileService::PHRASE_SUGGESTER ); + } + return new self( $repo ); + } + + /** + * The repository type + * @return string + */ + public function repositoryType() { + return $this->wrapped->repositoryType(); + } + + /** + * The repository name + * @return string + */ + public function repositoryName() { + return $this->wrapped->repositoryName(); + } + + /** + * Load a profile named $name + * @param string $name + * @return array|null the profile data or null if not found + */ + public function getProfile( $name ) { + $settings = $this->wrapped->getProfile( $name ); + if ( $settings === null ) { + return null; + } + $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); + $lines = $cache->getWithSetCallback( + $cache->makeKey( 'cirrussearch-didyoumean-settings' ), + 600, + function () { + $source = wfMessage( 'cirrussearch-didyoumean-settings' )->inContentLanguage(); + if ( !$source || $source->isDisabled() ) { + return []; + } + return Util::parseSettingsInMessage( $source->plain() ); + } + ); + + $laplaceAlpha = null; + $stupidBackoffDiscount = null; + foreach ( $lines as $line ) { + $linePieces = explode( ':', $line, 2 ); + if ( count( $linePieces ) ) { + // Skip improperly formatted lines without a key:value + continue; + } + $k = $linePieces[0]; + $v = $linePieces[1]; + + switch ( $k ) { + case 'max_errors' : + if ( is_numeric( $v ) && $v >= 1 && $v <= self::MAX_ERRORS_HARD_LIMIT ) { + $settings['max_errors'] = floatval( $v ); + } + break; + case 'confidence' : + if ( is_numeric( $v ) && $v >= 0 ) { + $settings['confidence'] = floatval( $v ); + } + break; + case 'max_term_freq' : + if ( is_numeric( $v ) && $v >= 0 && $v <= self::MAX_TERM_FREQ_HARD_LIMIT ) { + $settings['max_term_freq'] = floatval( $v ); + } + break; + case 'min_doc_freq' : + if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) { + $settings['min_doc_freq'] = floatval( $v ); + } + break; + case 'prefix_length' : + if ( is_numeric( $v ) && $v >= 0 && $v <= self::PREFIX_LENGTH_HARD_LIMIT ) { + $settings['prefix_length'] = intval( $v ); + } + break; + case 'suggest_mode' : + if ( in_array( $v, self::$ALLOWED_MODE ) ) { + $settings['mode'] = $v; + } + break; + case 'collate' : + if ( $v === 'true' ) { + $settings['collate'] = true; + } elseif ( $v === 'false' ) { + $settings['collate'] = false; + } + break; + case 'smoothing' : + if ( $v === 'laplace' ) { + $settings['smoothing_model'] = [ + 'laplace' => [ + 'alpha' => 0.5 + ] + ]; + } elseif ( $v === 'stupid_backoff' ) { + $settings['smoothing_model'] = [ + 'stupid_backoff' => [ + 'discount' => 0.4 + ] + ]; + } + break; + case 'laplace_alpha' : + if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) { + $laplaceAlpha = floatval( $v ); + } + break; + case 'stupid_backoff_discount' : + if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) { + $stupidBackoffDiscount = floatval( $v ); + } + break; + } + } + + // Apply smoothing model options, if none provided we'll use elasticsearch defaults + if ( isset( $settings['smoothing_model']['laplace'] ) && + isset( $laplaceAlpha ) ) { + $settings['smoothing_model']['laplace'] = [ + 'alpha' => $laplaceAlpha + ]; + } + if ( isset( $settings['smoothing_model']['stupid_backoff'] ) && + isset( $stupidBackoffDiscount ) ) { + $settings['smoothing_model']['stupid_backoff'] = [ + 'discount' => $stupidBackoffDiscount + ]; + } + return $settings; + } + + /** + * Get the list of profiles that we want to expose to the user. + * + * @return array list of profiles index by name + */ + public function listExposedProfiles() { + return $this->wrapped->listExposedProfiles(); + } +} diff --git a/includes/Profile/SearchProfileService.php b/includes/Profile/SearchProfileService.php index 9b68045..dc37d63 100644 --- a/includes/Profile/SearchProfileService.php +++ b/includes/Profile/SearchProfileService.php @@ -32,6 +32,7 @@ const RESCORE = 'rescore'; const RESCORE_FUNCTION_CHAINS = 'rescore_function_chains'; const COMPLETION = 'completion'; + const PHRASE_SUGGESTER = 'phrase_suggester'; const CONTEXT_FULLTEXT = 'fulltext'; const CONTEXT_PREFIXSEARCH = 'prefixsearch'; diff --git a/includes/Profile/SearchProfileServiceFactory.php b/includes/Profile/SearchProfileServiceFactory.php index 07a3c50..1e15821 100644 --- a/includes/Profile/SearchProfileServiceFactory.php +++ b/includes/Profile/SearchProfileServiceFactory.php @@ -3,6 +3,7 @@ namespace CirrusSearch\Profile; use CirrusSearch\SearchConfig; +use Elastica\Exception\RuntimeException; /** * Factory to build and prepare search profiles. @@ -26,6 +27,7 @@ $this->loadSimilarityProfiles( $service, $config ); $this->loadRescoreProfiles( $service, $config ); $this->loadCompletionProfiles( $service, $config ); + $this->loadPhraseSuggesterProfiles( $service, $config ); if ( $config->isLocalWiki() ) { \Hooks::run( 'CirrusSearchProfileService', [ $service ] ); @@ -118,4 +120,27 @@ $defaultProfile ) ); } } + + /** + * @param SearchProfileService $service + * @param SearchConfig $config + */ + private function loadPhraseSuggesterProfiles( SearchProfileService $service, SearchConfig $config ) { + $service->registerRepository( PhraseSuggesterProfileRepoWrapper::wrap( new ArrayProfileRepository( + SearchProfileService::PHRASE_SUGGESTER, self::CIRRUS_BASE, + require __DIR__ . '/../../profiles/PhraseSuggesterProfiles.config.php' ) ) ); + + $service->registerRepository( PhraseSuggesterProfileRepoWrapper::wrap( new ConfigProfileRepository( + SearchProfileService::PHRASE_SUGGESTER, self::CIRRUS_CONFIG, + 'CirrusSearchPhraseSuggestProfiles', $config ) ) ); + + $defaultProfile = $config->get( 'CirrusSearchPhraseSuggestSettings' ); + if ( is_array( $defaultProfile ) ) { + throw new RuntimeException( "CirrusSearch no longer supports setting CirrusSearchPhraseSuggesterSettings as an array. " . + "please use a string to reference the profile you want." ); + } + if ( $defaultProfile ) { + $service->registerSimpleNameResolver( SearchProfileService::PHRASE_SUGGESTER, $defaultProfile ); + } + } } diff --git a/includes/Query/FullTextQueryStringQueryBuilder.php b/includes/Query/FullTextQueryStringQueryBuilder.php index 72798c9..2e1c98e 100644 --- a/includes/Query/FullTextQueryStringQueryBuilder.php +++ b/includes/Query/FullTextQueryStringQueryBuilder.php @@ -2,12 +2,11 @@ namespace CirrusSearch\Query; -use CirrusSearch\PhraseSuggesterProfiles; +use CirrusSearch\Profile\SearchProfileService; use CirrusSearch\SearchConfig; use CirrusSearch\Searcher; use CirrusSearch\Search\SearchContext; use CirrusSearch\Extra\Query\TokenCountRouter; -use Elastica\Exception\RuntimeException; use MediaWiki\Logger\LoggerFactory; /** @@ -277,18 +276,8 @@ */ private function buildSuggestConfig( $field, $searchContext ) { // check deprecated settings - $profile = $this->config->get( 'CirrusSearchPhraseSuggestSettings' ); - if ( is_array( $profile ) ) { - // BC code to support profiles set as array in this config var - $suggestSettings = $profile; - } else { - $suggestSettings = $this->config->getElement( 'CirrusSearchPhraseSuggestProfiles', $profile ); - if ( !$suggestSettings ) { - throw new RuntimeException( "Unkown phrase_suggest profile $profile" ); - } - } - - $suggestSettings = PhraseSuggesterProfiles::overrideOptionsFromMessage( $suggestSettings ); + $suggestSettings = $this->config->getProfileService() + ->loadProfileForContext( SearchProfileService::PHRASE_SUGGESTER ); $settings = [ 'phrase' => [ 'field' => $field, diff --git a/profiles/PhraseSuggesterProfiles.config.php b/profiles/PhraseSuggesterProfiles.config.php index 617ad42..fc835e0 100644 --- a/profiles/PhraseSuggesterProfiles.config.php +++ b/profiles/PhraseSuggesterProfiles.config.php @@ -1,7 +1,5 @@ <?php -namespace CirrusSearch; - /** * CirrusSearch - List of profiles for "Did you mean" suggestions * @@ -21,7 +19,7 @@ * http://www.gnu.org/copyleft/gpl.html */ -$wgCirrusSearchPhraseSuggestProfiles = [ +return [ // This is the default settings 'default' => [ // The suggest mode used by the phrase suggester diff --git a/profiles/PhraseSuggesterProfiles.php b/profiles/PhraseSuggesterProfiles.php deleted file mode 100644 index 117ac4b..0000000 --- a/profiles/PhraseSuggesterProfiles.php +++ /dev/null @@ -1,145 +0,0 @@ -<?php - -namespace CirrusSearch; - -use MediaWiki\MediaWikiServices; - -/** - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - */ - -class PhraseSuggesterProfiles { - const MAX_ERRORS_HARD_LIMIT = 2; - const MAX_TERM_FREQ_HARD_LIMIT = 0.6; - /** - * @var string[] - */ - private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ]; - - const PREFIX_LENGTH_HARD_LIMIT = 2; - - /** - * Override Phrase suggester options ("Did you mean?" suggestions) - * @param array $settings - * @return array - */ - public static function overrideOptionsFromMessage( $settings ) { - $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); - $lines = $cache->getWithSetCallback( - $cache->makeKey( 'cirrussearch-didyoumean-settings' ), - 600, - function () { - $source = wfMessage( 'cirrussearch-didyoumean-settings' )->inContentLanguage(); - if ( !$source || $source->isDisabled() ) { - return []; - } - return Util::parseSettingsInMessage( $source->plain() ); - } - ); - - $laplaceAlpha = null; - $stupidBackoffDiscount = null; - foreach ( $lines as $line ) { - $linePieces = explode( ':', $line, 2 ); - if ( count( $linePieces ) ) { - // Skip improperly formatted lines without a key:value - continue; - } - $k = $linePieces[0]; - $v = $linePieces[1]; - - switch ( $k ) { - case 'max_errors' : - if ( is_numeric( $v ) && $v >= 1 && $v <= self::MAX_ERRORS_HARD_LIMIT ) { - $settings['max_errors'] = floatval( $v ); - } - break; - case 'confidence' : - if ( is_numeric( $v ) && $v >= 0 ) { - $settings['confidence'] = floatval( $v ); - } - break; - case 'max_term_freq' : - if ( is_numeric( $v ) && $v >= 0 && $v <= self::MAX_TERM_FREQ_HARD_LIMIT ) { - $settings['max_term_freq'] = floatval( $v ); - } - break; - case 'min_doc_freq' : - if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) { - $settings['min_doc_freq'] = floatval( $v ); - } - break; - case 'prefix_length' : - if ( is_numeric( $v ) && $v >= 0 && $v <= self::PREFIX_LENGTH_HARD_LIMIT ) { - $settings['prefix_length'] = intval( $v ); - } - break; - case 'suggest_mode' : - if ( in_array( $v, self::$ALLOWED_MODE ) ) { - $settings['mode'] = $v; - } - break; - case 'collate' : - if ( $v === 'true' ) { - $settings['collate'] = true; - } elseif ( $v === 'false' ) { - $settings['collate'] = false; - } - break; - case 'smoothing' : - if ( $v === 'laplace' ) { - $settings['smoothing_model'] = [ - 'laplace' => [ - 'alpha' => 0.5 - ] - ]; - } elseif ( $v === 'stupid_backoff' ) { - $settings['smoothing_model'] = [ - 'stupid_backoff' => [ - 'discount' => 0.4 - ] - ]; - } - break; - case 'laplace_alpha' : - if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) { - $laplaceAlpha = floatval( $v ); - } - break; - case 'stupid_backoff_discount' : - if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) { - $stupidBackoffDiscount = floatval( $v ); - } - break; - } - } - - // Apply smoothing model options, if none provided we'll use elasticsearch defaults - if ( isset( $settings['smoothing_model']['laplace'] ) && - isset( $laplaceAlpha ) ) { - $settings['smoothing_model']['laplace'] = [ - 'alpha' => $laplaceAlpha - ]; - } - if ( isset( $settings['smoothing_model']['stupid_backoff'] ) && - isset( $stupidBackoffDiscount ) ) { - $settings['smoothing_model']['stupid_backoff'] = [ - 'discount' => $stupidBackoffDiscount - ]; - } - return $settings; - } -} -- To view, visit https://gerrit.wikimedia.org/r/402089 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2447b15c39677501cd8e738551b7028de7b8fb4c Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: DCausse <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
