jenkins-bot has submitted this change and it was merged.

Change subject: Support ORs of incategory
......................................................................


Support ORs of incategory

This supports incategory as names of the category or as the page id of the
category's wikipage.  The latter is to support integration with CatGraph.

This patch has a non-strict dependency (nothing will break without it) on
I7b7be24f in core. Until that patch is merged users searching for a
non-existent category by id will be offered to create the page matching the
search query when they should not.

Bug: T89823
Change-Id: Iad60c33d6a582a1ad005bd45165ffa198637add0
---
M CirrusSearch.php
M includes/Searcher.php
M tests/browser/features/incategory_api.feature
M tests/browser/features/step_definitions/search_steps.rb
M tests/browser/features/support/cirrus_search_api_helper.rb
M tests/browser/features/support/hooks.rb
6 files changed, 89 insertions(+), 5 deletions(-)

Approvals:
  Manybubbles: Looks good to me, approved
  Daniel Kinzler: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/CirrusSearch.php b/CirrusSearch.php
index 3f9a99b..cffc871 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -529,6 +529,8 @@
  */
 $wgCirrusSearchEnableSearchLogging = false;
 
+// The maximum number of incategory:a|b|c items to OR together.
+$wgCirrusSearchMaxIncategoryOptions = 100;
 
 $includes = __DIR__ . "/includes/";
 $apiDir = $includes . 'Api/';
diff --git a/includes/Searcher.php b/includes/Searcher.php
index b077bfc..f067b9d 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -2,6 +2,7 @@
 
 namespace CirrusSearch;
 use Elastica;
+use \Category;
 use \CirrusSearch;
 use \CirrusSearch\Extra\Filter\SourceRegex;
 use \CirrusSearch\Search\Escaper;
@@ -497,10 +498,12 @@
                // The {7,15} keeps this from having horrible performance on 
big strings
                $escaper = $this->escaper;
                $fuzzyQuery = $this->fuzzyQuery;
+               $isEmptyQuery = false;
                $this->extractSpecialSyntaxFromTerm(
                        
'/(?<key>[a-z\\-]{7,15}):\s*(?<value>"(?:[^"]|(?:(?<=\\\)"))+"|[^ "]+) ?/',
                        function ( $matches ) use ( $searcher, $escaper, 
&$filters, &$notFilters, &$boostTemplates,
-                                       &$searchContainedSyntax, &$fuzzyQuery, 
&$highlightSource ) {
+                                       &$searchContainedSyntax, &$fuzzyQuery, 
&$highlightSource, &$isEmptyQuery ) {
+                               global $wgCirrusSearchMaxIncategoryOptions;
                                $key = $matches['key'];
                                $value = $matches['value'];  // Note that if 
the user supplied quotes they are not removed
                                $value = str_replace( '\"', '"', $value );
@@ -543,7 +546,13 @@
                                                $searchContainedSyntax = true;
                                                return '';
                                        case 'incategory':
-                                               $filterDestination[] = 
$searcher->matchPage( 'category.lowercase_keyword', $value );
+                                               $categories = array_slice( 
explode( '|', $value ), 0, $wgCirrusSearchMaxIncategoryOptions );
+                                               $categoryFilters = 
$searcher->matchPageCategories( $categories );
+                                               if ( $categoryFilters === null 
) {
+                                                       $isEmptyQuery = true;
+                                               } else {
+                                                       $filterDestination[] = 
$categoryFilters;
+                                               }
                                                $searchContainedSyntax = true;
                                                return '';
                                        case 'insource':
@@ -559,6 +568,9 @@
                                }
                        }
                );
+               if ( $isEmptyQuery ) {
+                       return Status::newGood( new SearchResultSet( true ) );
+               }
                $this->filters = $filters;
                $this->notFilters = $notFilters;
                $this->boostTemplates = $boostTemplates;
@@ -754,6 +766,38 @@
        }
 
        /**
+        * Builds an or between many categories that the page could be in.
+        * @param string[] $categories categories to match
+        * @return \Elastica\Filter\Bool|null A null return value means all 
values are filtered
+        *  and an empty result set should be returned.
+        */
+       public function matchPageCategories( $categories ) {
+               $filter = new \Elastica\Filter\Bool();
+               $ids = array();
+               $names = array();
+               foreach ( $categories as $category ) {
+                       if ( substr( $category, 0, 3 ) === 'id:' ) {
+                               $id = substr( $category, 3 );
+                               if ( ctype_digit( $id ) ) {
+                                       $ids[] = $id;
+                               }
+                       } else {
+                               $names[] = $category;
+                       }
+               }
+               foreach ( Title::newFromIds( $ids ) as $title ) {
+                       $names[] = $title->getText();
+               }
+               if ( !$names ) {
+                       return null;
+               }
+               foreach( $names as $name ) {
+                       $filter->addShould( $this->matchPage( 
'category.lowercase_keyword', $name ) );
+               }
+               return $filter;
+       }
+
+       /**
         * Find articles that contain similar text to the provided title array.
         * @param Title[] $titles array of titles of articles to search for
         * @param int $options bitset of options:
diff --git a/tests/browser/features/incategory_api.feature 
b/tests/browser/features/incategory_api.feature
index 296b18a..20b68e4 100644
--- a/tests/browser/features/incategory_api.feature
+++ b/tests/browser/features/incategory_api.feature
@@ -7,6 +7,27 @@
       And Amazing Catapult is in the api search results
       But Two Words is not in the api search results
 
+  Scenario: incategory: splits on | to create an OR query
+    When I api search for incategory:weaponry|nothing
+    Then Catapult is in the api search results
+      And Amazing Catapult is in the api search results
+      But Two Words is not in the api search results
+
+  Scenario Outline: incategory: does not fail when the category is unknown
+    When I api search for incategory:<category>
+    Then there are no api search results
+  Examples:
+    |          category           |
+    | doesnotexistatleastihopenot |
+    | id:2147483600               |
+
+  Scenario: incategory: finds categories by page id
+    When I locate the page id of Category:Weaponry and store it as 
%weaponry_id%
+     And I api search for incategory:id:%weaponry_id%
+    Then Catapult is in the api search results
+      And Amazing Catapult is in the api search results
+      But Two Words is not in the api search results
+
   Scenario: incategory: works on categories from templates
     When I api search for incategory:templatetagged incategory:twowords
     Then Two Words is the first api search result
@@ -42,11 +63,11 @@
   Scenario: incategory: can be combined with other text
     When I api search for incategory:weaponry amazing
     Then Amazing Catapult is the first api search result
-  
+
   Scenario: -incategory: excludes pages with the category
     When I api search for -incategory:weaponry incategory:twowords
     Then Two Words is the first api search result
-    
+
   Scenario: incategory: can handle a space after the :
     When I api search for incategory: weaponry
     Then Catapult is in the api search results
diff --git a/tests/browser/features/step_definitions/search_steps.rb 
b/tests/browser/features/step_definitions/search_steps.rb
index 01f6eef..7eefe4c 100644
--- a/tests/browser/features/step_definitions/search_steps.rb
+++ b/tests/browser/features/step_definitions/search_steps.rb
@@ -9,11 +9,19 @@
 When(/^I go search for (.*)$/) do |search|
   visit(SearchResultsPage, using_params: { search: search })
 end
+Before do
+  @search_vars = {
+    "idiographic_whitspace" => "u\3000".force_encoding("utf-8")
+  }
+end
+When(/^I locate the page id of (.*) and store it as (%.*%)$/) do |title, 
varname|
+  @search_vars[varname] = page_id_of title
+end
 # rubocop:disable LineLength
 When(/^I api search( with disabled incoming link weighting)?(?: with offset 
(\d+))?(?: in the (.*) language)?(?: in namespaces? (\d+(?: \d+)*))? for 
(.*)$/) do |incoming_links, offset, lang, namespaces, search|
   begin
     @api_result = search_for(
-      search.gsub("%idiographic_whitespace%", 
"\u3000".force_encoding("utf-8")),
+      search.gsub(/%[^ ]+%/, @search_vars),
       sroffset: offset,
       srnamespace: (namespaces || "0").split(/ /),
       uselang: lang,
diff --git a/tests/browser/features/support/cirrus_search_api_helper.rb 
b/tests/browser/features/support/cirrus_search_api_helper.rb
index 400d091..630988a 100644
--- a/tests/browser/features/support/cirrus_search_api_helper.rb
+++ b/tests/browser/features/support/cirrus_search_api_helper.rb
@@ -93,4 +93,12 @@
       token_type: "edit"
     )
   end
+
+  # Locate a category id
+  def page_id_of(title)
+    res = api.query(
+      titles: title
+    )
+    res["query"]["pages"].first[1]["pageid"]
+  end
 end
diff --git a/tests/browser/features/support/hooks.rb 
b/tests/browser/features/support/hooks.rb
index e1b37c5..1991b6d 100644
--- a/tests/browser/features/support/hooks.rb
+++ b/tests/browser/features/support/hooks.rb
@@ -11,6 +11,7 @@
       And a page named Links To Catapult exists with contents [[Catapult]]
       And a page named Catapult exists with contents ♙ asdf 
[[Category:Weaponry]]
       And a page named Amazing Catapult exists with contents test [[Catapult]] 
[[Category:Weaponry]]
+      And a page named Category:Weaponry exists with contents Weaponry refers 
to any items designed or used to attack and kill or destroy other people and 
property.
       And a page named Two Words exists with contents ffnonesenseword catapult 
{{Template_Test}} anotherword [[Category:TwoWords]] [[Category:Categorywith 
Twowords]] [[Category:Categorywith " Quote]]
       And a page named AlphaBeta exists with contents [[Category:Alpha]] 
[[Category:Beta]]
       And a page named IHaveATwoWordCategory exists with contents 
[[Category:CategoryWith ASpace]]

-- 
To view, visit https://gerrit.wikimedia.org/r/191622
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iad60c33d6a582a1ad005bd45165ffa198637add0
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Jkroll <jkr...@toolserver.org>
Gerrit-Reviewer: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: TMg <mr.h...@gmx.de>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de>
Gerrit-Reviewer: Tobias Gritschacher <tobias.gritschac...@wikimedia.de>
Gerrit-Reviewer: WMDE-Fisch <christoph.fisc...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to