DCausse has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/402071 )

Change subject: [cleanup] Simplify phrase suggester settings
......................................................................

[cleanup] Simplify phrase suggester settings

- remove code to override settings with URI params
- switch profile setting to a string instead of the profile array
- remove deprecated settings
- moved safeguard settings to class constant instead of config vars

Change-Id: Id76389c5c1a640bad7175f5507b7abd7f7be4e7f
---
M CirrusSearch.php
M docs/settings.txt
M includes/Api/ConfigDump.php
M includes/Hooks.php
M includes/Query/FullTextQueryStringQueryBuilder.php
M profiles/PhraseSuggesterProfiles.php
6 files changed, 40 insertions(+), 230 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/71/402071/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index 5c85acc..189a1e8 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -397,53 +397,10 @@
 $wgCirrusSearchEnablePhraseSuggest = true;
 
 /**
- * NOTE: This settings is deprecated: update or create your own 
PhraseSuggester profile.
- * Maximum number of terms that we ask phrase suggest to correct.
- * See max_errors on 
http://www.elasticsearch.org/guide/reference/api/search/suggest/
- * $wgCirrusSearchPhraseSuggestMaxErrors = 2;
- */
-
-/**
- * NOTE: This settings is deprecated: update or create your own 
PhraseSuggester profile.
- * Confidence level required to suggest new phrases.
- * See confidence on 
http://www.elasticsearch.org/guide/reference/api/search/suggest/
- * $wgCirrusSearchPhraseSuggestConfidence = 2.0;
- */
-
-/**
- * Set the hard limit for $wgCirrusSearchPhraseSuggestMaxErrors. This prevents 
customizing
- * this setting in a way that could hurt the system performances.
- */
-$wgCirrusSearchPhraseSuggestMaxErrorsHardLimit = 2;
-
-/**
- * Set the hard limit for $wgCirrusSearchPhraseMaxTermFreq. This prevents 
customizing
- * this setting in a way that could hurt the system performances.
- */
-$wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit = 0.6;
-
-/**
- * List of allowed values for the suggest mode
- */
-$wgCirrusSearchPhraseSuggestAllowedMode = [ 'missing', 'popular', 'always' ];
-
-/**
- * List of allowed smoothing models
- */
-$wgCirrusSearchPhraseSuggestAllowedSmoothingModel = [ 'stupid_backoff', 
'laplace', 'linear' ];
-
-/**
- * Set the hard limit for $wgCirrusSearchPhraseSuggestPrefixLength. This 
prevents customizing
- * this setting in a way that could hurt the system performances.
- * (This is the minimal value)
- */
-$wgCirrusSearchPhraseSuggestPrefixLengthHardLimit = 2;
-
-/**
  * Set the Phrase suggester settings using the default profile.
  * see profiles/PhraseSuggesterProfiles.php
  */
-$wgCirrusSearchPhraseSuggestSettings = 
$wgCirrusSearchPhraseSuggestProfiles['default'];
+$wgCirrusSearchPhraseSuggestSettings = 'default';
 
 /**
  * Use a reverse field to build the did you mean suggestions.
diff --git a/docs/settings.txt b/docs/settings.txt
index 131ad0c..f00900b 100644
--- a/docs/settings.txt
+++ b/docs/settings.txt
@@ -432,49 +432,10 @@
 
 Should the phrase suggester (did you mean) be enabled?
 
-; $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit
-
-Default:
-    $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit = 2;
-
-Set the hard limit for $wgCirrusSearchPhraseSuggestMaxErrors. This prevents 
customizing
-this setting in a way that could hurt the system performances.
-
-; $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit
-
-Default:
-    $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit = 0.6;
-
-Set the hard limit for $wgCirrusSearchPhraseMaxTermFreq. This prevents 
customizing
-this setting in a way that could hurt the system performances.
-
-; $wgCirrusSearchPhraseSuggestAllowedMode
-
-Default:
-    $wgCirrusSearchPhraseSuggestAllowedMode = [ 'missing', 'popular', 'always' 
];
-
-List of allowed values for the suggest mode.
-
-; $wgCirrusSearchPhraseSuggestAllowedSmoothingModel
-
-Default:
-    $wgCirrusSearchPhraseSuggestAllowedSmoothingModel = [ 'stupid_backoff', 
'laplace', 'linear' ];
-
-List of allowed smoothing models.
-
-; $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit
-
-Default:
-    $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit = 2;
-
-Set the hard limit for $wgCirrusSearchPhraseSuggestPrefixLength. This prevents 
customizing
-this setting in a way that could hurt the system performances.
-(This is the minimal value)
-
 ; $wgCirrusSearchPhraseSuggestSettings
 
 Default:
-    $wgCirrusSearchPhraseSuggestSettings = 
$wgCirrusSearchPhraseSuggestProfiles['default'];
+    $wgCirrusSearchPhraseSuggestSettings = 'default';
 
 Set the Phrase suggester settings using the default profile.
 see profiles/PhraseSuggesterProfiles.php
@@ -1476,28 +1437,6 @@
 Customize certain fields with a specific implementation.
 Useful to apply CirrusSearch specific config to fields
 controlled by MediaWiki core.
-
-=== Deprecated settings ===
-
-; $wgCirrusSearchPhraseSuggestMaxErrors
-
-Default:
-    $wgCirrusSearchPhraseSuggestMaxErrors = 2;
-
-NOTE: This setting is deprecated: update or create your own PhraseSuggester 
profile.
-
-Maximum number of terms that we ask phrase suggest to correct.
-See max_errors on 
http://www.elasticsearch.org/guide/reference/api/search/suggest/
-
-; $wgCirrusSearchPhraseSuggestConfidence
-
-Default:
-    $wgCirrusSearchPhraseSuggestConfidence = 2.0;
-
-NOTE: This setting is deprecated: update or create your own PhraseSuggester 
profile.
-
-Confidence level required to suggest new phrases.
-See confidence on 
http://www.elasticsearch.org/guide/reference/api/search/suggest/
 
 ; $wgCirrusSearchExtraIndexSettings
 
diff --git a/includes/Api/ConfigDump.php b/includes/Api/ConfigDump.php
index e3d3c1f..c14db45 100644
--- a/includes/Api/ConfigDump.php
+++ b/includes/Api/ConfigDump.php
@@ -81,6 +81,7 @@
                'CirrusSearchEnableArchive',
                'CirrusSearchUseIcuFolding',
                'CirrusSearchUseIcuTokenizer',
+               'CirrusSearchPhraseSuggestProfiles',
                // All the config below was added when moving this data
                // from CirrusSearch config to a static array in this class
                'CirrusSearchDevelOptions',
diff --git a/includes/Hooks.php b/includes/Hooks.php
index 42950dc..18d20fd 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -102,7 +102,6 @@
                }
 
                self::overrideMoreLikeThisOptionsFromMessage();
-               PhraseSuggesterProfiles::overrideOptionsFromMessage();
 
                if ( $request ) {
                        // Engage the experimental highlighter if a url 
parameter requests it
@@ -119,7 +118,6 @@
                        self::overrideYesNo( 
$wgCirrusSearchAllFieldsForRescore, $request, 'cirrusUseAllFieldsForRescore' );
                        self::overrideUseExtraPluginForRegex( $request );
                        self::overrideMoreLikeThisOptions( $request );
-                       PhraseSuggesterProfiles::overrideOptions( $request );
                        RescoreProfiles::overrideOptions( $request );
                        FullTextQueryBuilderProfiles::overrideOptions( $request 
);
                        self::overrideSecret( 
$wgCirrusSearchLogElasticRequests, $wgCirrusSearchLogElasticRequestsSecret, 
$request, 'cirrusLogElasticRequests', false );
diff --git a/includes/Query/FullTextQueryStringQueryBuilder.php 
b/includes/Query/FullTextQueryStringQueryBuilder.php
index 6a0458d..4f1fea8 100644
--- a/includes/Query/FullTextQueryStringQueryBuilder.php
+++ b/includes/Query/FullTextQueryStringQueryBuilder.php
@@ -2,10 +2,12 @@
 
 namespace CirrusSearch\Query;
 
+use CirrusSearch\PhraseSuggesterProfiles;
 use CirrusSearch\SearchConfig;
 use CirrusSearch\Searcher;
 use CirrusSearch\Search\SearchContext;
 use CirrusSearch\Extra\Query\TokenCountRouter;
+use Elastica\Exception\RuntimeException;
 use MediaWiki\Logger\LoggerFactory;
 
 /**
@@ -275,16 +277,18 @@
         */
        private function buildSuggestConfig( $field, $searchContext ) {
                // check deprecated settings
-               $suggestSettings = $this->config->get( 
'CirrusSearchPhraseSuggestSettings' );
-               $maxErrors = $this->config->get( 
'CirrusSearchPhraseSuggestMaxErrors' );
-               if ( isset( $maxErrors ) ) {
-                       $suggestSettings['max_errors'] = $maxErrors;
-               }
-               $confidence = $this->config->get( 
'CirrusSearchPhraseSuggestMaxErrors' );
-               if ( isset( $confidence ) ) {
-                       $suggestSettings['confidence'] = $confidence;
+               $profile = $this->config->get( 
'CirrusSearchPhraseSuggestSettings' );
+               if ( is_array( $profile ) ) {
+                       // BC code to support profiles set as array in this 
config var
+                       $suggestSettings = $profile;
+               } else {
+                       $suggestSettings = $this->config->getElement( 
'CirrusSearchPhraseSuggestProfiles', $profile );
+                       if ( !$suggestSettings ) {
+                               throw new RuntimeException( "Unkown 
phrase_suggest profile $profile" );
+                       }
                }
 
+               PhraseSuggesterProfiles::overrideOptionsFromMessage( 
$suggestSettings );
                $settings = [
                        'phrase' => [
                                'field' => $field,
diff --git a/profiles/PhraseSuggesterProfiles.php 
b/profiles/PhraseSuggesterProfiles.php
index 09ee5ff..6a5d5cc 100644
--- a/profiles/PhraseSuggesterProfiles.php
+++ b/profiles/PhraseSuggesterProfiles.php
@@ -22,102 +22,20 @@
  */
 
 class PhraseSuggesterProfiles {
+       const MAX_ERRORS_HARD_LIMIT = 2;
+       const MAX_TERM_FREQ_HARD_LIMIT = 0.6;
        /**
-        * Override Phrase suggester options ("Did you mean?" suggestions)
-        *
-        * @param \WebRequest $request
+        * @var string[]
         */
-       public static function overrideOptions( $request ) {
-               global $wgCirrusSearchPhraseSuggestMaxErrors,
-                       $wgCirrusSearchPhraseSuggestConfidence,
-                       $wgCirrusSearchPhraseSuggestSettings,
-                       $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit,
-                       $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit,
-                       $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit,
-                       $wgCirrusSearchPhraseSuggestAllowedMode,
-                       $wgCirrusSearchPhraseSuggestAllowedSmoothingModel,
-                       $wgCirrusSearchPhraseSuggestReverseField;
+       private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ];
 
-               Util::overrideYesNo( 
$wgCirrusSearchPhraseSuggestReverseField['use'], $request,
-                       'cirrusSuggUseReverse' );
-               Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestSettings['max_errors'], $request,
-                       'cirrusSuggMaxErrors', 
$wgCirrusSearchPhraseSuggestMaxErrorsHardLimit );
-               Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestSettings['confidence'], $request,
-                       'cirrusSuggConfidence' );
-               Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestSettings['max_term_freq'], $request,
-                       'cirrusSuggMaxTermFreq', 
$wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit );
-               Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestSettings['min_doc_freq'], $request,
-                       'cirrusSuggMinDocFreq' );
-               Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestSettings['prefix_length'], $request,
-                       'cirrusSuggPrefixLength', 
$wgCirrusSearchPhraseSuggestPrefixLengthHardLimit, false );
-               $mode = $request->getVal( 'cirrusSuggMode' );
-               if ( isset( $mode ) && in_array( $mode, 
$wgCirrusSearchPhraseSuggestAllowedMode ) ) {
-                       $wgCirrusSearchPhraseSuggestSettings['mode'] = $mode;
-               }
-
-               // NOTE: we do not allow collate_minimum_should_match to be 
customized, it'd be hard to parse.
-               Util::overrideYesNo( 
$wgCirrusSearchPhraseSuggestSettings['collate'], $request, 'cirrusSuggCollate' 
);
-
-               $smoothing = $request->getVal( 'cirrusSuggSmoothing' );
-               if ( isset( $smoothing ) && in_array( $smoothing, 
$wgCirrusSearchPhraseSuggestAllowedSmoothingModel ) ) {
-                       // We do not support linear_interpolation customization 
yet, should be added
-                       // later if proven useful.
-                       switch ( $smoothing ) {
-                       case 'laplace' :
-                               
$wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [
-                                       'laplace' => [
-                                               'alpha' => 0.5
-                                       ]
-                               ];
-                               break;
-                       case 'stupid_backoff' :
-                               
$wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [
-                                       'stupid_backoff' => [
-                                               'discount' => 0.4
-                                       ]
-                               ];
-                               break;
-                       }
-               }
-
-               // Custom discount for stupid_backoff smoothing model
-               if ( isset( 
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff'] ) ) {
-                       $discount = $request->getVal( 'cirrusSuggDiscount' );
-                       if ( is_numeric( $discount ) && $discount <= 1 && 
$discount >= 0 ) {
-                               
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff']['discount']
 = floatval( $discount );
-                       }
-               }
-
-               // Custom alpha for laplace smoothing model
-               if ( isset( 
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace'] ) ) {
-                       $alpha = $request->getVal( 'cirrusSuggAlpha' );
-                       if ( is_numeric( $alpha ) && $alpha <= 1 && $alpha >= 0 
) {
-                               
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace']['alpha'] = 
floatval( $alpha );
-                       }
-               }
-
-               // Support deprecated settings
-               if ( isset( $wgCirrusSearchPhraseSuggestConfidence ) ) {
-                       Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestConfidence, $request, 'cirrusSuggConfidence' );
-               }
-               if ( isset( $wgCirrusSearchPhraseSuggestMaxErrors ) ) {
-                       Util::overrideNumeric( 
$wgCirrusSearchPhraseSuggestMaxErrors, $request, 'cirrusSuggMaxErrors',
-                               $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit 
);
-               }
-       }
+       const PREFIX_LENGTH_HARD_LIMIT = 2;
 
        /**
         * Override Phrase suggester options ("Did you mean?" suggestions)
+        * @param array &$settings
         */
-       public static function overrideOptionsFromMessage() {
-               global $wgCirrusSearchPhraseSuggestMaxErrors,
-                       $wgCirrusSearchPhraseSuggestConfidence,
-                       $wgCirrusSearchPhraseSuggestSettings,
-                       $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit,
-                       $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit,
-                       $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit,
-                       $wgCirrusSearchPhraseSuggestAllowedMode;
-
+       public static function overrideOptionsFromMessage( &$settings ) {
                $cache = 
MediaWikiServices::getInstance()->getLocalServerObjectCache();
                $lines = $cache->getWithSetCallback(
                        $cache->makeKey( 'cirrussearch-didyoumean-settings' ),
@@ -144,58 +62,51 @@
 
                        switch ( $k ) {
                        case 'max_errors' :
-                               if ( is_numeric( $v ) && $v >= 1 && $v <= 
$wgCirrusSearchPhraseSuggestMaxErrorsHardLimit ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['max_errors'] = floatval( $v );
-                                       // Support deprecated settings
-                                       if ( isset( 
$wgCirrusSearchPhraseSuggestMaxErrors ) ) {
-                                               
$wgCirrusSearchPhraseSuggestMaxErrors = floatval( $v );
-                                       }
+                               if ( is_numeric( $v ) && $v >= 1 && $v <= 
self::MAX_ERRORS_HARD_LIMIT ) {
+                                       $settings['max_errors'] = floatval( $v 
);
                                }
                                break;
                        case 'confidence' :
                                if ( is_numeric( $v ) && $v >= 0 ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['confidence'] = floatval( $v );
-                                       if ( isset( 
$wgCirrusSearchPhraseSuggestConfidence ) ) {
-                                               
$wgCirrusSearchPhraseSuggestConfidence = floatval( $v );
-                                       }
+                                       $settings['confidence'] = floatval( $v 
);
                                }
                                break;
                        case 'max_term_freq' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v <= 
$wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['max_term_freq'] = floatval( $v );
+                               if ( is_numeric( $v ) && $v >= 0 && $v <= 
self::MAX_TERM_FREQ_HARD_LIMIT ) {
+                                       $settings['max_term_freq'] = floatval( 
$v );
                                }
                                break;
                        case 'min_doc_freq' :
                                if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['min_doc_freq'] = floatval( $v );
+                                       $settings['min_doc_freq'] = floatval( 
$v );
                                }
                                break;
                        case 'prefix_length' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v <= 
$wgCirrusSearchPhraseSuggestPrefixLengthHardLimit ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['prefix_length'] = intval( $v );
+                               if ( is_numeric( $v ) && $v >= 0 && $v <= 
self::PREFIX_LENGTH_HARD_LIMIT ) {
+                                       $settings['prefix_length'] = intval( $v 
);
                                }
                                break;
                        case 'suggest_mode' :
-                               if ( in_array( $v, 
$wgCirrusSearchPhraseSuggestAllowedMode ) ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['mode'] = $v;
+                               if ( in_array( $v, self::$ALLOWED_MODE ) ) {
+                                       $settings['mode'] = $v;
                                }
                                break;
                        case 'collate' :
                                if ( $v === 'true' ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['collate'] = true;
+                                       $settings['collate'] = true;
                                } elseif ( $v === 'false' ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['collate'] = false;
+                                       $settings['collate'] = false;
                                }
                                break;
                        case 'smoothing' :
                                if ( $v === 'laplace' ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [
+                                       $settings['smoothing_model'] = [
                                                'laplace' => [
                                                        'alpha' => 0.5
                                                ]
                                        ];
                                } elseif ( $v === 'stupid_backoff' ) {
-                                       
$wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [
+                                       $settings['smoothing_model'] = [
                                                'stupid_backoff' => [
                                                        'discount' => 0.4
                                                ]
@@ -216,15 +127,15 @@
                }
 
                // Apply smoothing model options, if none provided we'll use 
elasticsearch defaults
-               if ( isset( 
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace'] ) &&
+               if ( isset( $settings['smoothing_model']['laplace'] ) &&
                        isset( $laplaceAlpha ) ) {
-                       
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace'] = [
+                       $settings['smoothing_model']['laplace'] = [
                                'alpha' => $laplaceAlpha
                        ];
                }
-               if ( isset( 
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff'] ) &&
+               if ( isset( $settings['smoothing_model']['stupid_backoff'] ) &&
                        isset( $stupidBackoffDiscount ) ) {
-                       
$wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff'] = [
+                       $settings['smoothing_model']['stupid_backoff'] = [
                                'discount' => $stupidBackoffDiscount
                        ];
                }

-- 
To view, visit https://gerrit.wikimedia.org/r/402071
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id76389c5c1a640bad7175f5507b7abd7f7be4e7f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to