DCausse has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/402089 )

Change subject: Switch phrase suggester config to profile manager
......................................................................

Switch phrase suggester config to profile manager

Bug: T183279
Change-Id: I2447b15c39677501cd8e738551b7028de7b8fb4c
---
M CirrusSearch.php
M autoload.php
M docs/settings.txt
A includes/Profile/PhraseSuggesterProfileRepoWrapper.php
M includes/Profile/SearchProfileService.php
M includes/Profile/SearchProfileServiceFactory.php
M includes/Query/FullTextQueryStringQueryBuilder.php
M profiles/PhraseSuggesterProfiles.config.php
D profiles/PhraseSuggesterProfiles.php
9 files changed, 232 insertions(+), 166 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/89/402089/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index 40f3408..a27a1ab 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -21,7 +21,6 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
-require_once __DIR__ . "/profiles/PhraseSuggesterProfiles.config.php";
 require_once __DIR__ . "/profiles/SaneitizeProfiles.php";
 require_once __DIR__ . "/profiles/FullTextQueryBuilderProfiles.config.php";
 
@@ -393,8 +392,13 @@
 $wgCirrusSearchEnablePhraseSuggest = true;
 
 /**
+ * List of additional phrase suggester profiles
+ * see profiles/PhraseSuggesterProfiles.config.php
+ */
+$wgCirrusSearchPhraseSuggestProfiles = [];
+
+/**
  * Set the Phrase suggester settings using the default profile.
- * see profiles/PhraseSuggesterProfiles.php
  */
 $wgCirrusSearchPhraseSuggestSettings = 'default';
 
diff --git a/autoload.php b/autoload.php
index 923d938..2a707cb 100644
--- a/autoload.php
+++ b/autoload.php
@@ -109,11 +109,11 @@
        'CirrusSearch\\MultiSearchRequestLog' => __DIR__ . 
'/includes/MultiSearchRequestLog.php',
        'CirrusSearch\\NearMatchPicker' => __DIR__ . 
'/includes/NearMatchPicker.php',
        'CirrusSearch\\OtherIndexes' => __DIR__ . '/includes/OtherIndexes.php',
-       'CirrusSearch\\PhraseSuggesterProfiles' => __DIR__ . 
'/profiles/PhraseSuggesterProfiles.php',
        'CirrusSearch\\Profile\\ArrayProfileRepository' => __DIR__ . 
'/includes/Profile/ArrayProfileRepository.php',
        'CirrusSearch\\Profile\\CompletionSearchProfileNameResolver' => __DIR__ 
. '/includes/Profile/CompletionSearchProfileNameResolver.php',
        'CirrusSearch\\Profile\\CompletionSearchProfileRepository' => __DIR__ . 
'/includes/Profile/CompletionSearchProfileRepository.php',
        'CirrusSearch\\Profile\\ConfigProfileRepository' => __DIR__ . 
'/includes/Profile/ConfigProfileRepository.php',
+       'CirrusSearch\\Profile\\PhraseSuggesterProfileRepoWrapper' => __DIR__ . 
'/includes/Profile/PhraseSuggesterProfileRepoWrapper.php',
        'CirrusSearch\\Profile\\SearchProfileNameResolver' => __DIR__ . 
'/includes/Profile/SearchProfileNameResolver.php',
        'CirrusSearch\\Profile\\SearchProfileRepository' => __DIR__ . 
'/includes/Profile/SearchProfileRepository.php',
        'CirrusSearch\\Profile\\SearchProfileService' => __DIR__ . 
'/includes/Profile/SearchProfileService.php',
diff --git a/docs/settings.txt b/docs/settings.txt
index f00900b..5f3874c 100644
--- a/docs/settings.txt
+++ b/docs/settings.txt
@@ -438,7 +438,14 @@
     $wgCirrusSearchPhraseSuggestSettings = 'default';
 
 Set the Phrase suggester settings using the default profile.
-see profiles/PhraseSuggesterProfiles.php
+
+; $wgCirrusSearchPhraseSuggestProfiles
+
+Default:
+    $wgCirrusSearchPhraseSuggestProfiles = []
+
+Set additional phrase suggester profiles
+(see profiles/PhraseSuggesterProfiles.config.php)
 
 ; $wgCirrusSearchPhraseSuggestReverseField
 
diff --git a/includes/Profile/PhraseSuggesterProfileRepoWrapper.php 
b/includes/Profile/PhraseSuggesterProfileRepoWrapper.php
new file mode 100644
index 0000000..ac7e701
--- /dev/null
+++ b/includes/Profile/PhraseSuggesterProfileRepoWrapper.php
@@ -0,0 +1,187 @@
+<?php
+
+namespace CirrusSearch\Profile;
+use CirrusSearch\Util;
+use MediaWiki\MediaWikiServices;
+use RuntimeException;
+
+/**
+ * Wrapper to augment the phrase suggester profile settings
+ * with customization on-wiki using system messages.
+ */
+class PhraseSuggesterProfileRepoWrapper implements SearchProfileRepository {
+
+       const MAX_ERRORS_HARD_LIMIT = 2;
+       const MAX_TERM_FREQ_HARD_LIMIT = 0.6;
+       const PREFIX_LENGTH_HARD_LIMIT = 2;
+
+       /**
+        * @var string[]
+        */
+       private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ];
+
+       /**
+        * @var SearchProfileRepository
+        */
+       private $wrapped;
+
+       /**
+        * PhraseSuggesterProfileRepoWrapper constructor.
+        * @param SearchProfileRepository $wrapped
+        */
+       private function __construct( SearchProfileRepository $wrapped ) {
+               $this->wrapped = $wrapped;
+       }
+
+       /**
+        * Wrap this repository
+        * @return SearchProfileRepository
+        */
+       public static function wrap( SearchProfileRepository $repo ) {
+               if ( $repo->repositoryType() !== 
SearchProfileService::PHRASE_SUGGESTER ) {
+                       throw new RuntimeException( self::class . " only 
supports repositories of type " . SearchProfileService::PHRASE_SUGGESTER );
+               }
+               return new self( $repo );
+       }
+
+       /**
+        * The repository type
+        * @return string
+        */
+       public function repositoryType() {
+               return $this->wrapped->repositoryType();
+       }
+
+       /**
+        * The repository name
+        * @return string
+        */
+       public function repositoryName() {
+               return $this->wrapped->repositoryName();
+       }
+
+       /**
+        * Load a profile named $name
+        * @param string $name
+        * @return array|null the profile data or null if not found
+        */
+       public function getProfile( $name ) {
+               $settings = $this->wrapped->getProfile( $name );
+               if ( $settings === null ) {
+                       return null;
+               }
+               $cache = 
MediaWikiServices::getInstance()->getLocalServerObjectCache();
+               $lines = $cache->getWithSetCallback(
+                       $cache->makeKey( 'cirrussearch-didyoumean-settings' ),
+                       600,
+                       function () {
+                               $source = wfMessage( 
'cirrussearch-didyoumean-settings' )->inContentLanguage();
+                               if ( !$source || $source->isDisabled() ) {
+                                       return [];
+                               }
+                               return Util::parseSettingsInMessage( 
$source->plain() );
+                       }
+               );
+
+               $laplaceAlpha = null;
+               $stupidBackoffDiscount = null;
+               foreach ( $lines as $line ) {
+                       $linePieces = explode( ':', $line, 2 );
+                       if ( count( $linePieces ) ) {
+                               // Skip improperly formatted lines without a 
key:value
+                               continue;
+                       }
+                       $k = $linePieces[0];
+                       $v = $linePieces[1];
+
+                       switch ( $k ) {
+                               case 'max_errors' :
+                                       if ( is_numeric( $v ) && $v >= 1 && $v 
<= self::MAX_ERRORS_HARD_LIMIT ) {
+                                               $settings['max_errors'] = 
floatval( $v );
+                                       }
+                                       break;
+                               case 'confidence' :
+                                       if ( is_numeric( $v ) && $v >= 0 ) {
+                                               $settings['confidence'] = 
floatval( $v );
+                                       }
+                                       break;
+                               case 'max_term_freq' :
+                                       if ( is_numeric( $v ) && $v >= 0 && $v 
<= self::MAX_TERM_FREQ_HARD_LIMIT ) {
+                                               $settings['max_term_freq'] = 
floatval( $v );
+                                       }
+                                       break;
+                               case 'min_doc_freq' :
+                                       if ( is_numeric( $v ) && $v >= 0 && $v 
< 1 ) {
+                                               $settings['min_doc_freq'] = 
floatval( $v );
+                                       }
+                                       break;
+                               case 'prefix_length' :
+                                       if ( is_numeric( $v ) && $v >= 0 && $v 
<= self::PREFIX_LENGTH_HARD_LIMIT ) {
+                                               $settings['prefix_length'] = 
intval( $v );
+                                       }
+                                       break;
+                               case 'suggest_mode' :
+                                       if ( in_array( $v, self::$ALLOWED_MODE 
) ) {
+                                               $settings['mode'] = $v;
+                                       }
+                                       break;
+                               case 'collate' :
+                                       if ( $v === 'true' ) {
+                                               $settings['collate'] = true;
+                                       } elseif ( $v === 'false' ) {
+                                               $settings['collate'] = false;
+                                       }
+                                       break;
+                               case 'smoothing' :
+                                       if ( $v === 'laplace' ) {
+                                               $settings['smoothing_model'] = [
+                                                       'laplace' => [
+                                                               'alpha' => 0.5
+                                                       ]
+                                               ];
+                                       } elseif ( $v === 'stupid_backoff' ) {
+                                               $settings['smoothing_model'] = [
+                                                       'stupid_backoff' => [
+                                                               'discount' => 
0.4
+                                                       ]
+                                               ];
+                                       }
+                                       break;
+                               case 'laplace_alpha' :
+                                       if ( is_numeric( $v ) && $v >= 0 && $v 
<= 1 ) {
+                                               $laplaceAlpha = floatval( $v );
+                                       }
+                                       break;
+                               case 'stupid_backoff_discount' :
+                                       if ( is_numeric( $v ) && $v >= 0 && $v 
<= 1 ) {
+                                               $stupidBackoffDiscount = 
floatval( $v );
+                                       }
+                                       break;
+                       }
+               }
+
+               // Apply smoothing model options, if none provided we'll use 
elasticsearch defaults
+               if ( isset( $settings['smoothing_model']['laplace'] ) &&
+                        isset( $laplaceAlpha ) ) {
+                       $settings['smoothing_model']['laplace'] = [
+                               'alpha' => $laplaceAlpha
+                       ];
+               }
+               if ( isset( $settings['smoothing_model']['stupid_backoff'] ) &&
+                        isset( $stupidBackoffDiscount ) ) {
+                       $settings['smoothing_model']['stupid_backoff'] = [
+                               'discount' => $stupidBackoffDiscount
+                       ];
+               }
+               return $settings;
+       }
+
+       /**
+        * Get the list of profiles that we want to expose to the user.
+        *
+        * @return array list of profiles index by name
+        */
+       public function listExposedProfiles() {
+               return $this->wrapped->listExposedProfiles();
+       }
+}
diff --git a/includes/Profile/SearchProfileService.php 
b/includes/Profile/SearchProfileService.php
index 9b68045..dc37d63 100644
--- a/includes/Profile/SearchProfileService.php
+++ b/includes/Profile/SearchProfileService.php
@@ -32,6 +32,7 @@
        const RESCORE = 'rescore';
        const RESCORE_FUNCTION_CHAINS = 'rescore_function_chains';
        const COMPLETION = 'completion';
+       const PHRASE_SUGGESTER = 'phrase_suggester';
 
        const CONTEXT_FULLTEXT = 'fulltext';
        const CONTEXT_PREFIXSEARCH = 'prefixsearch';
diff --git a/includes/Profile/SearchProfileServiceFactory.php 
b/includes/Profile/SearchProfileServiceFactory.php
index 07a3c50..1e15821 100644
--- a/includes/Profile/SearchProfileServiceFactory.php
+++ b/includes/Profile/SearchProfileServiceFactory.php
@@ -3,6 +3,7 @@
 namespace CirrusSearch\Profile;
 
 use CirrusSearch\SearchConfig;
+use Elastica\Exception\RuntimeException;
 
 /**
  * Factory to build and prepare search profiles.
@@ -26,6 +27,7 @@
                $this->loadSimilarityProfiles( $service, $config );
                $this->loadRescoreProfiles( $service, $config );
                $this->loadCompletionProfiles( $service, $config );
+               $this->loadPhraseSuggesterProfiles( $service, $config );
 
                if ( $config->isLocalWiki() ) {
                        \Hooks::run( 'CirrusSearchProfileService', [ $service ] 
);
@@ -118,4 +120,27 @@
                                $defaultProfile ) );
                }
        }
+
+       /**
+        * @param SearchProfileService $service
+        * @param SearchConfig $config
+        */
+       private function loadPhraseSuggesterProfiles( SearchProfileService 
$service, SearchConfig $config ) {
+               $service->registerRepository( 
PhraseSuggesterProfileRepoWrapper::wrap( new ArrayProfileRepository(
+                       SearchProfileService::PHRASE_SUGGESTER, 
self::CIRRUS_BASE,
+                       require __DIR__ . 
'/../../profiles/PhraseSuggesterProfiles.config.php' ) ) );
+
+               $service->registerRepository( 
PhraseSuggesterProfileRepoWrapper::wrap( new ConfigProfileRepository(
+                       SearchProfileService::PHRASE_SUGGESTER, 
self::CIRRUS_CONFIG,
+                       'CirrusSearchPhraseSuggestProfiles', $config ) ) );
+
+               $defaultProfile = $config->get( 
'CirrusSearchPhraseSuggestSettings' );
+               if ( is_array( $defaultProfile ) ) {
+                       throw new RuntimeException( "CirrusSearch no longer 
supports setting CirrusSearchPhraseSuggesterSettings as an array. " .
+                               "please use a string to reference the profile 
you want." );
+               }
+               if ( $defaultProfile ) {
+                       $service->registerSimpleNameResolver( 
SearchProfileService::PHRASE_SUGGESTER, $defaultProfile );
+               }
+       }
 }
diff --git a/includes/Query/FullTextQueryStringQueryBuilder.php 
b/includes/Query/FullTextQueryStringQueryBuilder.php
index 72798c9..2e1c98e 100644
--- a/includes/Query/FullTextQueryStringQueryBuilder.php
+++ b/includes/Query/FullTextQueryStringQueryBuilder.php
@@ -2,12 +2,11 @@
 
 namespace CirrusSearch\Query;
 
-use CirrusSearch\PhraseSuggesterProfiles;
+use CirrusSearch\Profile\SearchProfileService;
 use CirrusSearch\SearchConfig;
 use CirrusSearch\Searcher;
 use CirrusSearch\Search\SearchContext;
 use CirrusSearch\Extra\Query\TokenCountRouter;
-use Elastica\Exception\RuntimeException;
 use MediaWiki\Logger\LoggerFactory;
 
 /**
@@ -277,18 +276,8 @@
         */
        private function buildSuggestConfig( $field, $searchContext ) {
                // check deprecated settings
-               $profile = $this->config->get( 
'CirrusSearchPhraseSuggestSettings' );
-               if ( is_array( $profile ) ) {
-                       // BC code to support profiles set as array in this 
config var
-                       $suggestSettings = $profile;
-               } else {
-                       $suggestSettings = $this->config->getElement( 
'CirrusSearchPhraseSuggestProfiles', $profile );
-                       if ( !$suggestSettings ) {
-                               throw new RuntimeException( "Unkown 
phrase_suggest profile $profile" );
-                       }
-               }
-
-               $suggestSettings = 
PhraseSuggesterProfiles::overrideOptionsFromMessage( $suggestSettings );
+               $suggestSettings = $this->config->getProfileService()
+                       ->loadProfileForContext( 
SearchProfileService::PHRASE_SUGGESTER );
                $settings = [
                        'phrase' => [
                                'field' => $field,
diff --git a/profiles/PhraseSuggesterProfiles.config.php 
b/profiles/PhraseSuggesterProfiles.config.php
index 617ad42..fc835e0 100644
--- a/profiles/PhraseSuggesterProfiles.config.php
+++ b/profiles/PhraseSuggesterProfiles.config.php
@@ -1,7 +1,5 @@
 <?php
 
-namespace CirrusSearch;
-
 /**
  * CirrusSearch - List of profiles for "Did you mean" suggestions
  *
@@ -21,7 +19,7 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
-$wgCirrusSearchPhraseSuggestProfiles = [
+return [
        // This is the default settings
        'default' => [
                // The suggest mode used by the phrase suggester
diff --git a/profiles/PhraseSuggesterProfiles.php 
b/profiles/PhraseSuggesterProfiles.php
deleted file mode 100644
index 117ac4b..0000000
--- a/profiles/PhraseSuggesterProfiles.php
+++ /dev/null
@@ -1,145 +0,0 @@
-<?php
-
-namespace CirrusSearch;
-
-use MediaWiki\MediaWikiServices;
-
-/**
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- */
-
-class PhraseSuggesterProfiles {
-       const MAX_ERRORS_HARD_LIMIT = 2;
-       const MAX_TERM_FREQ_HARD_LIMIT = 0.6;
-       /**
-        * @var string[]
-        */
-       private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ];
-
-       const PREFIX_LENGTH_HARD_LIMIT = 2;
-
-       /**
-        * Override Phrase suggester options ("Did you mean?" suggestions)
-        * @param array $settings
-        * @return array
-        */
-       public static function overrideOptionsFromMessage( $settings ) {
-               $cache = 
MediaWikiServices::getInstance()->getLocalServerObjectCache();
-               $lines = $cache->getWithSetCallback(
-                       $cache->makeKey( 'cirrussearch-didyoumean-settings' ),
-                       600,
-                       function () {
-                               $source = wfMessage( 
'cirrussearch-didyoumean-settings' )->inContentLanguage();
-                               if ( !$source || $source->isDisabled() ) {
-                                       return [];
-                               }
-                               return Util::parseSettingsInMessage( 
$source->plain() );
-                       }
-               );
-
-               $laplaceAlpha = null;
-               $stupidBackoffDiscount = null;
-               foreach ( $lines as $line ) {
-                       $linePieces = explode( ':', $line, 2 );
-                       if ( count( $linePieces ) ) {
-                               // Skip improperly formatted lines without a 
key:value
-                               continue;
-                       }
-                       $k = $linePieces[0];
-                       $v = $linePieces[1];
-
-                       switch ( $k ) {
-                       case 'max_errors' :
-                               if ( is_numeric( $v ) && $v >= 1 && $v <= 
self::MAX_ERRORS_HARD_LIMIT ) {
-                                       $settings['max_errors'] = floatval( $v 
);
-                               }
-                               break;
-                       case 'confidence' :
-                               if ( is_numeric( $v ) && $v >= 0 ) {
-                                       $settings['confidence'] = floatval( $v 
);
-                               }
-                               break;
-                       case 'max_term_freq' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v <= 
self::MAX_TERM_FREQ_HARD_LIMIT ) {
-                                       $settings['max_term_freq'] = floatval( 
$v );
-                               }
-                               break;
-                       case 'min_doc_freq' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) {
-                                       $settings['min_doc_freq'] = floatval( 
$v );
-                               }
-                               break;
-                       case 'prefix_length' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v <= 
self::PREFIX_LENGTH_HARD_LIMIT ) {
-                                       $settings['prefix_length'] = intval( $v 
);
-                               }
-                               break;
-                       case 'suggest_mode' :
-                               if ( in_array( $v, self::$ALLOWED_MODE ) ) {
-                                       $settings['mode'] = $v;
-                               }
-                               break;
-                       case 'collate' :
-                               if ( $v === 'true' ) {
-                                       $settings['collate'] = true;
-                               } elseif ( $v === 'false' ) {
-                                       $settings['collate'] = false;
-                               }
-                               break;
-                       case 'smoothing' :
-                               if ( $v === 'laplace' ) {
-                                       $settings['smoothing_model'] = [
-                                               'laplace' => [
-                                                       'alpha' => 0.5
-                                               ]
-                                       ];
-                               } elseif ( $v === 'stupid_backoff' ) {
-                                       $settings['smoothing_model'] = [
-                                               'stupid_backoff' => [
-                                                       'discount' => 0.4
-                                               ]
-                                       ];
-                               }
-                               break;
-                       case 'laplace_alpha' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) {
-                                       $laplaceAlpha = floatval( $v );
-                               }
-                               break;
-                       case 'stupid_backoff_discount' :
-                               if ( is_numeric( $v ) && $v >= 0 && $v <= 1 ) {
-                                       $stupidBackoffDiscount = floatval( $v );
-                               }
-                               break;
-                       }
-               }
-
-               // Apply smoothing model options, if none provided we'll use 
elasticsearch defaults
-               if ( isset( $settings['smoothing_model']['laplace'] ) &&
-                       isset( $laplaceAlpha ) ) {
-                       $settings['smoothing_model']['laplace'] = [
-                               'alpha' => $laplaceAlpha
-                       ];
-               }
-               if ( isset( $settings['smoothing_model']['stupid_backoff'] ) &&
-                       isset( $stupidBackoffDiscount ) ) {
-                       $settings['smoothing_model']['stupid_backoff'] = [
-                               'discount' => $stupidBackoffDiscount
-                       ];
-               }
-               return $settings;
-       }
-}

-- 
To view, visit https://gerrit.wikimedia.org/r/402089
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2447b15c39677501cd8e738551b7028de7b8fb4c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to