Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/157972

Change subject: Fix negated phrase search
......................................................................

Fix negated phrase search

Also fixes a nasty bug with browser tests that was causing the tests not
to fail before the work was done.

Bug: 70301
Change-Id: I5b0a99ababd234f81a530e2deef6be46d8426227
---
M includes/Searcher.php
M tests/browser/features/exact_quotes.feature
M tests/browser/features/step_definitions/search_steps.rb
M tests/browser/features/step_definitions/transformers.rb
4 files changed, 29 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/72/157972/1

diff --git a/includes/Searcher.php b/includes/Searcher.php
index ec25a09..9eff90e 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -544,10 +544,11 @@
                // Match quoted phrases including those containing escaped 
quotes
                // Those phrases can optionally be followed by ~ then a number 
(this is the phrase slop)
                // That can optionally be followed by a ~ (this matches stemmed 
words in phrases)
-               // The following all match: "a", "a boat", "a\"boat", "a 
boat"~, "a boat"~9, "a boat"~9~
-               $query = self::replacePartsOfQuery( $this->term, 
'/(?<![\]])(?<main>"((?:[^"]|(?:\"))+)"(?<slop>~[0-9]+)?)(?<fuzzy>~)?/',
+               // The following all match: "a", "a boat", "a\"boat", "a 
boat"~, "a boat"~9, "a boat"~9~, -"a boat", -"a boat"~9~
+               $query = self::replacePartsOfQuery( $this->term, 
'/(?<![\]])(?<negate>-|!)?(?<main>"((?:[^"]|(?:\"))+)"(?<slop>~[0-9]+)?)(?<fuzzy>~)?/',
                        function ( $matches ) use ( $searcher, $escaper, 
&$phrases ) {
                                global $wgCirrusSearchPhraseSlop;
+                               $negate = $matches[ 'negate' ][ 0 ] ? 'NOT ' : 
'';
                                $main = $escaper->fixupQueryStringPart( 
$matches[ 'main' ][ 0 ] );
                                if ( !isset( $matches[ 'fuzzy' ] ) ) {
                                        if ( !isset( $matches[ 'slop' ] ) ) {
@@ -557,11 +558,11 @@
                                        // The highlighter locks phrases to the 
fields that specify them.  It doesn't do
                                        // that with terms.
                                        return array(
-                                               'escaped' => 
$searcher->switchSearchToExact( $main, true ),
-                                               'nonAll' => 
$searcher->switchSearchToExact( $main, false ),
+                                               'escaped' => $negate . 
$searcher->switchSearchToExact( $main, true ),
+                                               'nonAll' => $negate . 
$searcher->switchSearchToExact( $main, false ),
                                        );
                                }
-                               return array( 'escaped' => $main );
+                               return array( 'escaped' => $negate . $main );
                        } );
                wfProfileOut( __METHOD__ . '-find-phrase-queries' );
                wfProfileIn( __METHOD__ . '-switch-prefix-to-plain' );
diff --git a/tests/browser/features/exact_quotes.feature 
b/tests/browser/features/exact_quotes.feature
index af346a0..39c069b 100644
--- a/tests/browser/features/exact_quotes.feature
+++ b/tests/browser/features/exact_quotes.feature
@@ -67,3 +67,22 @@
     | 2         | Two Words |
     | 3         | Two Words |
     | 77        | Two Words |
+
+  Scenario Outline: Prefixing a quoted phrase with - or ! or NOT negates it
+    When I search for catapult <negation>"two words"<suffix>
+    Then Catapult is in the search results
+    And Two Words is not in the search results
+  Examples:
+    |    negation    | suffix |
+    | -              |        |
+    | !              |        |
+    | %{exact:NOT }  |        |
+    | -              | ~      |
+    | !              | ~      |
+    | %{exact:NOT }  | ~      |
+    | -              | ~1     |
+    | !              | ~1     |
+    | %{exact:NOT }  | ~1     |
+    | -              | ~7~    |
+    | !              | ~7~    |
+    | %{exact:NOT }  | ~7~    |
diff --git a/tests/browser/features/step_definitions/search_steps.rb 
b/tests/browser/features/step_definitions/search_steps.rb
index 8bc16f3..3fb6c6f 100644
--- a/tests/browser/features/step_definitions/search_steps.rb
+++ b/tests/browser/features/step_definitions/search_steps.rb
@@ -258,9 +258,9 @@
   found = on(SearchResultsPage).results.any? do |result|
     begin
       check_search_result(result.parent, result, title, in_ok)
-      return true
+      true
     rescue
-      return false
+      false
     end
   end
   found.should == !not_searching
diff --git a/tests/browser/features/step_definitions/transformers.rb 
b/tests/browser/features/step_definitions/transformers.rb
index 8a67b4b..59b6636 100644
--- a/tests/browser/features/step_definitions/transformers.rb
+++ b/tests/browser/features/step_definitions/transformers.rb
@@ -5,6 +5,6 @@
 end
 
 # Allow sending strings with trailing spaces
-Transform(/%{exact:[^}]*}/) do |param|
-  param[8..-2]
+Transform(/%{exact:[^}]*}.*/) do |param|
+  param.gsub(/%{exact:([^}]*)}/, "\\1")
 end

-- 
To view, visit https://gerrit.wikimedia.org/r/157972
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5b0a99ababd234f81a530e2deef6be46d8426227
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to