jenkins-bot has submitted this change and it was merged.

Change subject: Use unicode plugin when present
......................................................................


Use unicode plugin when present

We replace the lowercase with unicode case folding which should do the same
thing _and_ fold together similar characters.

Change-Id: Ied112d1c942045e357f9e7ad5f97400377ce164c
---
M includes/AnalysisConfigBuilder.php
M tests/browser/features/full_text.feature
M tests/browser/features/step_definitions/search_steps.rb
M tests/browser/features/support/hooks.rb
4 files changed, 56 insertions(+), 2 deletions(-)

Approvals:
  Chad: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/AnalysisConfigBuilder.php 
b/includes/AnalysisConfigBuilder.php
index 52ac4ee..41a42d4 100644
--- a/includes/AnalysisConfigBuilder.php
+++ b/includes/AnalysisConfigBuilder.php
@@ -29,13 +29,18 @@
         * and change the minor version when it changes but isn't
         * incompatible
         */
-       const VERSION = '0.6';
+       const VERSION = '0.7';
 
        /**
         * Language code we're building analysis for
         * @var string
         */
        private $language;
+
+       /**
+        * @var boolean is the icu plugin available?
+        */
+       private $icu;
 
        /**
         * Constructor
@@ -49,6 +54,7 @@
                                $this->elasticsearchLanguageAnalyzers = 
array_merge( $this->elasticsearchLanguageAnalyzers, $extra );
                        }
                }
+               $this->icu = in_array( 'analysis-icu', $plugins );
        }
 
        /**
@@ -65,7 +71,7 @@
         * Build an analysis config with sane defaults.
         */
        private function defaults() {
-               return array(
+               $defaults = array(
                        'analyzer' => array(
                                'text' => array(
                                        'type' => 
$this->getDefaultTextAnalyzerType(),
@@ -165,6 +171,22 @@
                                ),
                        ),
                );
+               foreach ( $defaults[ 'analyzer' ] as &$analyzer ) {
+                       if ( $analyzer[ 'type' ] === 'default' ) {
+                               $analyzer = array(
+                                       'type' => 'custom',
+                                       'tokenizer' => 'standard',
+                                       'filter' => array( 'standard', 
'lowercase' ),
+                               );
+                       }
+               }
+               if ( $this->icu ) {
+                       $defaults[ 'filter' ][ 'icu_normalizer' ] = array(
+                               'type' => 'icu_normalizer',
+                               'name' => 'nfkc_cf',
+                       );
+               }
+               return $defaults;
        }
 
        /**
@@ -215,6 +237,20 @@
                        if ( $config[ 'analyzer' ][ 'text_search' ][ 'type' ] 
=== 'hebrew' ) {
                                $config[ 'analyzer' ][ 'text_search' ][ 'type' 
] = 'hebrew_exact';
                        }
+                       break;
+               }
+               if ( $this->icu ) {
+                       foreach ( $config[ 'analyzer' ] as &$analyzer ) {
+                               if ( !isset( $analyzer[ 'filter'  ] ) ) {
+                                       continue;
+                               }
+                               $analyzer[ 'filter' ] = array_map( function( 
$filter ) {
+                                       if ( $filter === 'lowercase' ) {
+                                               return 'icu_normalizer';
+                                       }
+                                       return $filter;
+                               }, $analyzer[ 'filter' ] );
+                       }
                }
                return $config;
        }
diff --git a/tests/browser/features/full_text.feature 
b/tests/browser/features/full_text.feature
index 8f20239..df3dcef 100644
--- a/tests/browser/features/full_text.feature
+++ b/tests/browser/features/full_text.feature
@@ -225,3 +225,14 @@
     | Africa                 | África                 |
     | AlphaBeta              | AlphaBeta              |
     | ÁlphaBeta              | none                   |
+
+  @unicode_normalization
+  Scenario Outline: Searching for similar unicode characters finds all variants
+    When I search for <term>
+    Then there are 4 search results
+  Examples:
+    | term |
+    | वाङ्मय |
+    | वाङ्‍मय |
+    | वाङ‍्मय |
+    | वाङ्‌मय |
diff --git a/tests/browser/features/step_definitions/search_steps.rb 
b/tests/browser/features/step_definitions/search_steps.rb
index c386647..ccca030 100644
--- a/tests/browser/features/step_definitions/search_steps.rb
+++ b/tests/browser/features/step_definitions/search_steps.rb
@@ -171,6 +171,9 @@
 Then(/^there are no search results$/) do
   on(SearchResultsPage).first_result_element.should_not exist
 end
+Then(/^there are (\d+) search results$/) do |results|
+  on(SearchResultsPage).search_results_element.items.should == results.to_i
+end
 Then(/^within (\d+) seconds searching for (.*) yields (.*) as the first 
result$/) do |seconds, term, title|
   within(seconds) do
     step("I search for " + term)
diff --git a/tests/browser/features/support/hooks.rb 
b/tests/browser/features/support/hooks.rb
index f2a0335..1e2c8d9 100644
--- a/tests/browser/features/support/hooks.rb
+++ b/tests/browser/features/support/hooks.rb
@@ -11,6 +11,10 @@
       And a page named Two Words exists with contents ffnonesenseword catapult 
{{Template_Test}} anotherword [[Category:TwoWords]] [[Category:Categorywith 
Twowords]]
       And a page named AlphaBeta exists with contents [[Category:Alpha]] 
[[Category:Beta]]
       And a page named IHaveATwoWordCategory exists with contents 
[[Category:CategoryWith ASpace]]
+      And a page named वाङ्मय exists
+      And a page named वाङ्‍मय exists
+      And a page named वाङ‍्मय exists
+      And a page named वाङ्‌मय exists
     }
     $setup_main = true
   end

-- 
To view, visit https://gerrit.wikimedia.org/r/132226
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ied112d1c942045e357f9e7ad5f97400377ce164c
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: Chad <ch...@wikimedia.org>
Gerrit-Reviewer: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to