EBernhardson has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/341026 )
Change subject: Rename deprecated more like this fields ...................................................................... Rename deprecated more like this fields Bug:T158266 Change-Id: I7e5874e069f07381c7e0417cad71f123aa43cdc8 --- M CirrusSearch.php M includes/Hooks.php M tests/unit/Query/MoreLikeFeatureTest.php A tests/unit/fixtures/searchText/keyword_edge_case_001.default.expected A tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured-interwiki.expected A tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured.expected A tests/unit/fixtures/searchText/keyword_edge_case_001.query M tests/unit/fixtures/searchText/morelike_001.default.expected M tests/unit/fixtures/searchText/morelike_001.fullyfeatured-interwiki.expected M tests/unit/fixtures/searchText/morelike_001.fullyfeatured.expected M tests/unit/fixtures/searchText/morelike_002.default.expected M tests/unit/fixtures/searchText/morelike_002.fullyfeatured-interwiki.expected M tests/unit/fixtures/searchText/morelike_002.fullyfeatured.expected 13 files changed, 2,048 insertions(+), 26 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/26/341026/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index 7fe4bbd..aaae55d 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -489,12 +489,12 @@ // Minimum length for a word to be considered // small words tend to be stop words. - 'min_word_len' => 0, + 'min_word_length' => 0, // Maximum length for a word to be considered // Very long "words" tend to be uncommon, excluding them can help recall but it // is highly dependent on the language. - 'max_word_len' => 0, + 'max_word_length' => 0, // Percent of terms to match // High value will increase precision but can prevent small docs to match against large ones diff --git a/includes/Hooks.php b/includes/Hooks.php index 1f3c569..74b5fcc 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -231,8 +231,8 @@ case 'max_doc_freq': case 'max_query_terms': case 'min_term_freq': - case 'min_word_len': - case 'max_word_len': + case 'min_word_length': + case 'max_word_length': if( is_numeric( $v ) && $v >= 0 ) { $wgCirrusSearchMoreLikeThisConfig[$k] = intval( $v ); } elseif ( $v === 'null' ) { @@ -305,8 +305,8 @@ $request, 'cirrusMltMaxQueryTerms', $wgCirrusSearchMoreLikeThisMaxQueryTermsLimit ); self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['min_term_freq'], $request, 'cirrusMltMinTermFreq' ); self::overrideMinimumShouldMatch( $wgCirrusSearchMoreLikeThisConfig['minimum_should_match'], $request, 'cirrusMltMinimumShouldMatch' ); - self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['min_word_len'], $request, 'cirrusMltMinWordLength' ); - self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['max_word_len'], $request, 'cirrusMltMaxWordLength' ); + self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['min_word_length'], $request, 'cirrusMltMinWordLength' ); + self::overrideNumeric( $wgCirrusSearchMoreLikeThisConfig['max_word_length'], $request, 'cirrusMltMaxWordLength' ); $fields = $request->getVal( 'cirrusMltFields' ); if( isset( $fields ) ) { $wgCirrusSearchMoreLikeThisFields = array_intersect( diff --git a/tests/unit/Query/MoreLikeFeatureTest.php b/tests/unit/Query/MoreLikeFeatureTest.php index d7484d3..7cfc1f7 100644 --- a/tests/unit/Query/MoreLikeFeatureTest.php +++ b/tests/unit/Query/MoreLikeFeatureTest.php @@ -48,8 +48,8 @@ 'max_doc_freq' => null, 'max_query_terms' => 25, 'min_term_freq' => 2, - 'min_word_len' => 0, - 'max_word_len' => 0, + 'min_word_length' => 0, + 'max_word_length' => 0, 'minimum_should_match' => '30%', ] ) ->setFields( ['text'] ) @@ -67,8 +67,8 @@ 'max_doc_freq' => null, 'max_query_terms' => 25, 'min_term_freq' => 2, - 'min_word_len' => 0, - 'max_word_len' => 0, + 'min_word_length' => 0, + 'max_word_length' => 0, 'minimum_should_match' => '30%', ] ) ->setFields( ['text'] ) @@ -85,8 +85,8 @@ 'max_doc_freq' => null, 'max_query_terms' => 25, 'min_term_freq' => 2, - 'min_word_len' => 0, - 'max_word_len' => 0, + 'min_word_length' => 0, + 'max_word_length' => 0, 'minimum_should_match' => '30%', ] ) ->setFields( ['text'] ) @@ -103,8 +103,8 @@ 'max_doc_freq' => null, 'max_query_terms' => 25, 'min_term_freq' => 2, - 'min_word_len' => 0, - 'max_word_len' => 0, + 'min_word_length' => 0, + 'max_word_length' => 0, 'minimum_should_match' => '30%', ] ) ->setFields( ['text'] ) diff --git a/tests/unit/fixtures/searchText/keyword_edge_case_001.default.expected b/tests/unit/fixtures/searchText/keyword_edge_case_001.default.expected new file mode 100644 index 0000000..35b1cd3 --- /dev/null +++ b/tests/unit/fixtures/searchText/keyword_edge_case_001.default.expected @@ -0,0 +1,308 @@ +{ + "description": "full_text search for 'Organization theory : a strategic approach'", + "options": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "params": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "query": { + "_source": [ + "namespace", + "namespace_text", + "redirect.*", + "text_bytes", + "timestamp", + "title", + "wiki" + ], + "highlight": { + "fields": { + "auxiliary_text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "auxiliary_text", + "auxiliary_text.plain" + ], + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "skip_if_last_matched": true, + "top_scoring": true + }, + "type": "experimental" + }, + "category": { + "fragmenter": "none", + "matched_fields": [ + "category", + "category.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "heading": { + "fragmenter": "none", + "matched_fields": [ + "heading", + "heading.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "redirect.title": { + "fragmenter": "none", + "matched_fields": [ + "redirect.title", + "redirect.title.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "text", + "text.plain" + ], + "no_match_size": 150, + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "top_scoring": true + }, + "type": "experimental" + }, + "title": { + "fragmenter": "none", + "matched_fields": [ + "title", + "title.plain" + ], + "number_of_fragments": 1, + "type": "experimental" + } + }, + "highlight_query": { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "AND", + "fields": [ + "auxiliary_text.plain^0.5", + "auxiliary_text^0.25", + "category.plain^8", + "category^4", + "heading.plain^5", + "heading^2.5", + "opening_text.plain^3", + "opening_text^1.5", + "redirect.title.plain^15", + "redirect.title^7.5", + "text.plain^1", + "text^0.5", + "title.plain^20", + "title^10" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 1, + "query": "Organization theory \\: a strategic approach", + "rewrite": "top_terms_boost_1024" + } + }, + "post_tags": [ + "<\/span>" + ], + "pre_tags": [ + "<span class=\"searchmatch\">" + ] + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "namespace": [ + 0, + 1, + 2, + 3 + ] + } + } + ], + "minimum_should_match": 1, + "should": [ + { + "multi_match": { + "fields": [ + "all_near_match^2" + ], + "query": "Organization theory : a strategic approach" + } + }, + { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "AND", + "fields": [ + "all.plain^1", + "all^0.5" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 0, + "query": "Organization theory \\: a strategic approach", + "rewrite": "top_terms_boost_1024" + } + } + ] + } + }, + "rescore": [ + { + "query": { + "query_weight": 1, + "rescore_query": { + "function_score": { + "functions": [ + { + "field_value_factor": { + "field": "incoming_links", + "missing": 0, + "modifier": "log2p" + } + }, + { + "filter": { + "terms": { + "namespace": [ + 1 + ] + } + }, + "weight": "0.25" + }, + { + "filter": { + "terms": { + "namespace": [ + 2 + ] + } + }, + "weight": "0.05" + }, + { + "filter": { + "terms": { + "namespace": [ + 3 + ] + } + }, + "weight": "0.0125" + } + ] + } + }, + "rescore_query_weight": 1, + "score_mode": "multiply" + }, + "window_size": 8192 + }, + { + "query": { + "query_weight": 1, + "rescore_query": { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "AND", + "fields": [ + "all.plain^1", + "all^0.5" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 1, + "query": "\"Organization theory \\: a strategic approach\"", + "rewrite": "top_terms_boost_1024" + } + }, + "rescore_query_weight": 10 + }, + "window_size": 512 + } + ], + "size": 20, + "stats": [ + "full_text", + "full_text_querystring", + "suggest" + ], + "stored_fields": [ + "text.word_count" + ], + "suggest": { + "suggest": { + "phrase": { + "confidence": 2, + "direct_generator": [ + { + "field": "suggest", + "max_term_freq": 0.5, + "min_doc_freq": 0, + "prefix_length": 2, + "suggest_mode": "always" + } + ], + "field": "suggest", + "highlight": { + "post_tag": "<\/em>", + "pre_tag": "<em>" + }, + "max_errors": 2, + "real_word_error_likelihood": 0.95, + "size": 1, + "smoothing": { + "stupid_backoff": { + "discount": 0.4 + } + } + } + }, + "text": "Organization theory : a strategic approach" + } + } +} \ No newline at end of file diff --git a/tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured-interwiki.expected b/tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured-interwiki.expected new file mode 100644 index 0000000..a6724ce --- /dev/null +++ b/tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured-interwiki.expected @@ -0,0 +1,1266 @@ +[ + { + "description": "full_text search for 'Organization theory : a strategic approach'", + "options": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "params": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "path": "itwikibooks\/page\/_search", + "query": { + "_source": [ + "namespace", + "namespace_text", + "redirect.*", + "text_bytes", + "timestamp", + "title", + "wiki" + ], + "highlight": { + "fields": { + "auxiliary_text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "auxiliary_text", + "auxiliary_text.plain" + ], + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "skip_if_last_matched": true, + "top_scoring": true + }, + "type": "experimental" + }, + "category": { + "fragmenter": "none", + "matched_fields": [ + "category", + "category.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "heading": { + "fragmenter": "none", + "matched_fields": [ + "heading", + "heading.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "redirect.title": { + "fragmenter": "none", + "matched_fields": [ + "redirect.title", + "redirect.title.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "text", + "text.plain" + ], + "no_match_size": 150, + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "top_scoring": true + }, + "type": "experimental" + }, + "title": { + "fragmenter": "none", + "matched_fields": [ + "title", + "title.plain" + ], + "number_of_fragments": 1, + "type": "experimental" + } + }, + "highlight_query": { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "OR", + "fields": [ + "auxiliary_text.plain^0.5", + "auxiliary_text^0.25", + "category.plain^8", + "category^4", + "heading.plain^5", + "heading^2.5", + "opening_text.plain^3", + "opening_text^1.5", + "redirect.title.plain^15", + "redirect.title^7.5", + "text.plain^1", + "text^0.5", + "title.plain^20", + "title^10" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 1, + "query": "Organization theory \\: a strategic approach", + "rewrite": "top_terms_blended_freqs_1024" + } + }, + "post_tags": [ + "<\/span>" + ], + "pre_tags": [ + "<span class=\"searchmatch\">" + ] + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "namespace": [ + 0, + 1, + 2, + 3 + ] + } + } + ], + "minimum_should_match": 1, + "should": [ + { + "bool": { + "disable_coord": true, + "filter": [ + { + "bool": { + "should": [ + { + "match": { + "all": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + }, + { + "match": { + "all.plain": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + } + ] + } + } + ], + "should": [ + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 0.4, + "fields": [ + "text.plain^1", + "text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.5, + "fields": [ + "opening_text.plain^1", + "opening_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ], + "tie_breaker": 0.2 + } + }, + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 1.05, + "fields": [ + "suggest" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2, + "fields": [ + "redirect.title.plain^1", + "redirect.title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "auxiliary_text.plain^1", + "auxiliary_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "file_text.plain^1", + "file_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.3, + "fields": [ + "heading.plain^1", + "heading^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.8, + "fields": [ + "category.plain^1", + "category^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2.3, + "fields": [ + "title.plain^1", + "title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "fields": [ + "all_near_match^2" + ], + "query": "Organization theory : a strategic approach" + } + } + ] + } + }, + "rescore": [ + { + "query": { + "query_weight": 1, + "rescore_query": { + "function_score": { + "functions": [ + { + "field_value_factor": { + "field": "incoming_links", + "missing": 0, + "modifier": "log2p" + } + }, + { + "filter": { + "terms": { + "namespace": [ + 1 + ] + } + }, + "weight": "0.25" + }, + { + "filter": { + "terms": { + "namespace": [ + 2 + ] + } + }, + "weight": "0.05" + }, + { + "filter": { + "terms": { + "namespace": [ + 3 + ] + } + }, + "weight": "0.0125" + } + ] + } + }, + "rescore_query_weight": 1, + "score_mode": "multiply" + }, + "window_size": 8192 + }, + { + "query": { + "query_weight": 1, + "rescore_query": { + "multi_match": { + "fields": [ + "text.plain^0.2", + "text^0.14" + ], + "query": "Organization theory \\: a strategic approach", + "slop": 1, + "type": "phrase" + } + }, + "rescore_query_weight": 10 + }, + "window_size": 512 + } + ], + "size": 5, + "stats": [ + "full_text", + "full_text_simple_match" + ], + "stored_fields": [ + "text.word_count" + ] + } + }, + { + "description": "full_text search for 'Organization theory : a strategic approach'", + "options": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "params": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "path": "itwiktionary\/page\/_search", + "query": { + "_source": [ + "namespace", + "namespace_text", + "redirect.*", + "text_bytes", + "timestamp", + "title", + "wiki" + ], + "highlight": { + "fields": { + "auxiliary_text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "auxiliary_text", + "auxiliary_text.plain" + ], + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "skip_if_last_matched": true, + "top_scoring": true + }, + "type": "experimental" + }, + "category": { + "fragmenter": "none", + "matched_fields": [ + "category", + "category.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "heading": { + "fragmenter": "none", + "matched_fields": [ + "heading", + "heading.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "redirect.title": { + "fragmenter": "none", + "matched_fields": [ + "redirect.title", + "redirect.title.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "text", + "text.plain" + ], + "no_match_size": 150, + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "top_scoring": true + }, + "type": "experimental" + }, + "title": { + "fragmenter": "none", + "matched_fields": [ + "title", + "title.plain" + ], + "number_of_fragments": 1, + "type": "experimental" + } + }, + "highlight_query": { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "OR", + "fields": [ + "auxiliary_text.plain^0.5", + "auxiliary_text^0.25", + "category.plain^8", + "category^4", + "heading.plain^5", + "heading^2.5", + "opening_text.plain^3", + "opening_text^1.5", + "redirect.title.plain^15", + "redirect.title^7.5", + "text.plain^1", + "text^0.5", + "title.plain^20", + "title^10" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 1, + "query": "Organization theory \\: a strategic approach", + "rewrite": "top_terms_blended_freqs_1024" + } + }, + "post_tags": [ + "<\/span>" + ], + "pre_tags": [ + "<span class=\"searchmatch\">" + ] + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "namespace": [ + 0, + 1, + 2, + 3 + ] + } + } + ], + "minimum_should_match": 1, + "should": [ + { + "bool": { + "disable_coord": true, + "filter": [ + { + "bool": { + "should": [ + { + "match": { + "all": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + }, + { + "match": { + "all.plain": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + } + ] + } + } + ], + "should": [ + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 0.4, + "fields": [ + "text.plain^1", + "text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.5, + "fields": [ + "opening_text.plain^1", + "opening_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ], + "tie_breaker": 0.2 + } + }, + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 1.05, + "fields": [ + "suggest" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2, + "fields": [ + "redirect.title.plain^1", + "redirect.title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "auxiliary_text.plain^1", + "auxiliary_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "file_text.plain^1", + "file_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.3, + "fields": [ + "heading.plain^1", + "heading^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.8, + "fields": [ + "category.plain^1", + "category^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2.3, + "fields": [ + "title.plain^1", + "title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "fields": [ + "all_near_match^2" + ], + "query": "Organization theory : a strategic approach" + } + } + ] + } + }, + "rescore": [ + { + "query": { + "query_weight": 1, + "rescore_query": { + "function_score": { + "functions": [ + { + "field_value_factor": { + "field": "incoming_links", + "missing": 0, + "modifier": "log2p" + } + }, + { + "filter": { + "terms": { + "namespace": [ + 1 + ] + } + }, + "weight": "0.25" + }, + { + "filter": { + "terms": { + "namespace": [ + 2 + ] + } + }, + "weight": "0.05" + }, + { + "filter": { + "terms": { + "namespace": [ + 3 + ] + } + }, + "weight": "0.0125" + } + ] + } + }, + "rescore_query_weight": 1, + "score_mode": "multiply" + }, + "window_size": 8192 + }, + { + "query": { + "query_weight": 1, + "rescore_query": { + "multi_match": { + "fields": [ + "text.plain^0.2", + "text^0.14" + ], + "query": "Organization theory \\: a strategic approach", + "slop": 1, + "type": "phrase" + } + }, + "rescore_query_weight": 10 + }, + "window_size": 512 + } + ], + "size": 5, + "stats": [ + "full_text", + "full_text_simple_match" + ], + "stored_fields": [ + "text.word_count" + ] + } + }, + { + "description": "full_text search for 'Organization theory : a strategic approach'", + "options": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "params": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "path": "wiki\/page\/_search", + "query": { + "_source": [ + "namespace", + "namespace_text", + "redirect.*", + "text_bytes", + "timestamp", + "title", + "wiki" + ], + "highlight": { + "fields": { + "auxiliary_text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "auxiliary_text", + "auxiliary_text.plain" + ], + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "skip_if_last_matched": true, + "top_scoring": true + }, + "type": "experimental" + }, + "category": { + "fragmenter": "none", + "matched_fields": [ + "category", + "category.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "heading": { + "fragmenter": "none", + "matched_fields": [ + "heading", + "heading.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "redirect.title": { + "fragmenter": "none", + "matched_fields": [ + "redirect.title", + "redirect.title.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "text", + "text.plain" + ], + "no_match_size": 150, + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "top_scoring": true + }, + "type": "experimental" + }, + "title": { + "fragmenter": "none", + "matched_fields": [ + "title", + "title.plain" + ], + "number_of_fragments": 1, + "type": "experimental" + } + }, + "highlight_query": { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "OR", + "fields": [ + "auxiliary_text.plain^0.5", + "auxiliary_text^0.25", + "category.plain^8", + "category^4", + "heading.plain^5", + "heading^2.5", + "opening_text.plain^3", + "opening_text^1.5", + "redirect.title.plain^15", + "redirect.title^7.5", + "text.plain^1", + "text^0.5", + "title.plain^20", + "title^10" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 1, + "query": "Organization theory \\: a strategic approach", + "rewrite": "top_terms_blended_freqs_1024" + } + }, + "post_tags": [ + "<\/span>" + ], + "pre_tags": [ + "<span class=\"searchmatch\">" + ] + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "namespace": [ + 0, + 1, + 2, + 3 + ] + } + } + ], + "minimum_should_match": 1, + "should": [ + { + "bool": { + "disable_coord": true, + "filter": [ + { + "bool": { + "should": [ + { + "match": { + "all": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + }, + { + "match": { + "all.plain": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + } + ] + } + } + ], + "should": [ + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 0.4, + "fields": [ + "text.plain^1", + "text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.5, + "fields": [ + "opening_text.plain^1", + "opening_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ], + "tie_breaker": 0.2 + } + }, + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 1.05, + "fields": [ + "suggest" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2, + "fields": [ + "redirect.title.plain^1", + "redirect.title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "auxiliary_text.plain^1", + "auxiliary_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "file_text.plain^1", + "file_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.3, + "fields": [ + "heading.plain^1", + "heading^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.8, + "fields": [ + "category.plain^1", + "category^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2.3, + "fields": [ + "title.plain^1", + "title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "fields": [ + "all_near_match^2" + ], + "query": "Organization theory : a strategic approach" + } + } + ] + } + }, + "rescore": [ + { + "query": { + "query_weight": 1, + "rescore_query": { + "function_score": { + "functions": [ + { + "field_value_factor": { + "field": "incoming_links", + "missing": 0, + "modifier": "log2p" + } + }, + { + "filter": { + "terms": { + "namespace": [ + 1 + ] + } + }, + "weight": "0.25" + }, + { + "filter": { + "terms": { + "namespace": [ + 2 + ] + } + }, + "weight": "0.05" + }, + { + "filter": { + "terms": { + "namespace": [ + 3 + ] + } + }, + "weight": "0.0125" + } + ] + } + }, + "rescore_query_weight": 1, + "score_mode": "multiply" + }, + "window_size": 8192 + }, + { + "query": { + "query_weight": 1, + "rescore_query": { + "multi_match": { + "fields": [ + "text.plain^0.2", + "text^0.14" + ], + "query": "Organization theory \\: a strategic approach", + "slop": 1, + "type": "phrase" + } + }, + "rescore_query_weight": 10 + }, + "window_size": 512 + } + ], + "size": 20, + "stats": [ + "full_text", + "full_text_simple_match", + "suggest" + ], + "stored_fields": [ + "text.word_count" + ], + "suggest": { + "suggest": { + "phrase": { + "confidence": 2, + "direct_generator": [ + { + "field": "suggest", + "max_term_freq": 0.5, + "min_doc_freq": 0, + "prefix_length": 2, + "suggest_mode": "always" + }, + { + "field": "suggest.reverse", + "max_term_freq": 0.5, + "min_doc_freq": 0, + "post_filter": "token_reverse", + "pre_filter": "token_reverse", + "prefix_length": 2, + "suggest_mode": "always" + } + ], + "field": "suggest", + "highlight": { + "post_tag": "<\/em>", + "pre_tag": "<em>" + }, + "max_errors": 2, + "real_word_error_likelihood": 0.95, + "size": 1, + "smoothing": { + "stupid_backoff": { + "discount": 0.4 + } + } + } + }, + "text": "Organization theory : a strategic approach" + } + } + } +] \ No newline at end of file diff --git a/tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured.expected b/tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured.expected new file mode 100644 index 0000000..f3927c6 --- /dev/null +++ b/tests/unit/fixtures/searchText/keyword_edge_case_001.fullyfeatured.expected @@ -0,0 +1,447 @@ +{ + "description": "full_text search for 'Organization theory : a strategic approach'", + "options": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "params": { + "search_type": "dfs_query_then_fetch", + "timeout": "20s" + }, + "query": { + "_source": [ + "namespace", + "namespace_text", + "redirect.*", + "text_bytes", + "timestamp", + "title", + "wiki" + ], + "highlight": { + "fields": { + "auxiliary_text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "auxiliary_text", + "auxiliary_text.plain" + ], + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "skip_if_last_matched": true, + "top_scoring": true + }, + "type": "experimental" + }, + "category": { + "fragmenter": "none", + "matched_fields": [ + "category", + "category.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "heading": { + "fragmenter": "none", + "matched_fields": [ + "heading", + "heading.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "redirect.title": { + "fragmenter": "none", + "matched_fields": [ + "redirect.title", + "redirect.title.plain" + ], + "number_of_fragments": 1, + "options": { + "skip_if_last_matched": true + }, + "order": "score", + "type": "experimental" + }, + "text": { + "fragment_size": 150, + "fragmenter": "scan", + "matched_fields": [ + "text", + "text.plain" + ], + "no_match_size": 150, + "number_of_fragments": 1, + "options": { + "boost_before": { + "20": 2, + "50": 1.8, + "200": 1.5, + "1000": 1.2 + }, + "max_fragments_scored": 5000, + "top_scoring": true + }, + "type": "experimental" + }, + "title": { + "fragmenter": "none", + "matched_fields": [ + "title", + "title.plain" + ], + "number_of_fragments": 1, + "type": "experimental" + } + }, + "highlight_query": { + "query_string": { + "allow_leading_wildcard": true, + "auto_generate_phrase_queries": true, + "default_operator": "OR", + "fields": [ + "auxiliary_text.plain^0.5", + "auxiliary_text^0.25", + "category.plain^8", + "category^4", + "heading.plain^5", + "heading^2.5", + "opening_text.plain^3", + "opening_text^1.5", + "redirect.title.plain^15", + "redirect.title^7.5", + "text.plain^1", + "text^0.5", + "title.plain^20", + "title^10" + ], + "fuzzy_prefix_length": 2, + "max_determinized_states": 500, + "phrase_slop": 1, + "query": "Organization theory \\: a strategic approach", + "rewrite": "top_terms_blended_freqs_1024" + } + }, + "post_tags": [ + "<\/span>" + ], + "pre_tags": [ + "<span class=\"searchmatch\">" + ] + }, + "query": { + "bool": { + "filter": [ + { + "terms": { + "namespace": [ + 0, + 1, + 2, + 3 + ] + } + } + ], + "minimum_should_match": 1, + "should": [ + { + "bool": { + "disable_coord": true, + "filter": [ + { + "bool": { + "should": [ + { + "match": { + "all": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + }, + { + "match": { + "all.plain": { + "operator": "AND", + "query": "Organization theory \\: a strategic approach" + } + } + } + ] + } + } + ], + "should": [ + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 0.4, + "fields": [ + "text.plain^1", + "text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.5, + "fields": [ + "opening_text.plain^1", + "opening_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ], + "tie_breaker": 0.2 + } + }, + { + "dis_max": { + "queries": [ + { + "multi_match": { + "boost": 1.05, + "fields": [ + "suggest" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2, + "fields": [ + "redirect.title.plain^1", + "redirect.title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "auxiliary_text.plain^1", + "auxiliary_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 0.2, + "fields": [ + "file_text.plain^1", + "file_text^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.3, + "fields": [ + "heading.plain^1", + "heading^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 1.8, + "fields": [ + "category.plain^1", + "category^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + }, + { + "multi_match": { + "boost": 2.3, + "fields": [ + "title.plain^1", + "title^0.3" + ], + "minimum_should_match": "1", + "query": "Organization theory \\: a strategic approach", + "type": "most_fields" + } + } + ] + } + }, + { + "multi_match": { + "fields": [ + "all_near_match^2" + ], + "query": "Organization theory : a strategic approach" + } + } + ] + } + }, + "rescore": [ + { + "query": { + "query_weight": 1, + "rescore_query": { + "function_score": { + "functions": [ + { + "field_value_factor": { + "field": "incoming_links", + "missing": 0, + "modifier": "log2p" + } + }, + { + "filter": { + "terms": { + "namespace": [ + 1 + ] + } + }, + "weight": "0.25" + }, + { + "filter": { + "terms": { + "namespace": [ + 2 + ] + } + }, + "weight": "0.05" + }, + { + "filter": { + "terms": { + "namespace": [ + 3 + ] + } + }, + "weight": "0.0125" + } + ] + } + }, + "rescore_query_weight": 1, + "score_mode": "multiply" + }, + "window_size": 8192 + }, + { + "query": { + "query_weight": 1, + "rescore_query": { + "multi_match": { + "fields": [ + "text.plain^0.2", + "text^0.14" + ], + "query": "Organization theory \\: a strategic approach", + "slop": 1, + "type": "phrase" + } + }, + "rescore_query_weight": 10 + }, + "window_size": 512 + } + ], + "size": 20, + "stats": [ + "full_text", + "full_text_simple_match", + "suggest" + ], + "stored_fields": [ + "text.word_count" + ], + "suggest": { + "suggest": { + "phrase": { + "confidence": 2, + "direct_generator": [ + { + "field": "suggest", + "max_term_freq": 0.5, + "min_doc_freq": 0, + "prefix_length": 2, + "suggest_mode": "always" + }, + { + "field": "suggest.reverse", + "max_term_freq": 0.5, + "min_doc_freq": 0, + "post_filter": "token_reverse", + "pre_filter": "token_reverse", + "prefix_length": 2, + "suggest_mode": "always" + } + ], + "field": "suggest", + "highlight": { + "post_tag": "<\/em>", + "pre_tag": "<em>" + }, + "max_errors": 2, + "real_word_error_likelihood": 0.95, + "size": 1, + "smoothing": { + "stupid_backoff": { + "discount": 0.4 + } + } + } + }, + "text": "Organization theory : a strategic approach" + } + } +} \ No newline at end of file diff --git a/tests/unit/fixtures/searchText/keyword_edge_case_001.query b/tests/unit/fixtures/searchText/keyword_edge_case_001.query new file mode 100644 index 0000000..5e8b10d --- /dev/null +++ b/tests/unit/fixtures/searchText/keyword_edge_case_001.query @@ -0,0 +1 @@ +Organization theory : a strategic approach diff --git a/tests/unit/fixtures/searchText/morelike_001.default.expected b/tests/unit/fixtures/searchText/morelike_001.default.expected index e5348f9..59f090e 100644 --- a/tests/unit/fixtures/searchText/morelike_001.default.expected +++ b/tests/unit/fixtures/searchText/morelike_001.default.expected @@ -148,10 +148,10 @@ ], "max_doc_freq": null, "max_query_terms": 25, - "max_word_len": 0, + "max_word_length": 0, "min_doc_freq": 2, "min_term_freq": 2, - "min_word_len": 0, + "min_word_length": 0, "minimum_should_match": "30%" } } diff --git a/tests/unit/fixtures/searchText/morelike_001.fullyfeatured-interwiki.expected b/tests/unit/fixtures/searchText/morelike_001.fullyfeatured-interwiki.expected index e5348f9..59f090e 100644 --- a/tests/unit/fixtures/searchText/morelike_001.fullyfeatured-interwiki.expected +++ b/tests/unit/fixtures/searchText/morelike_001.fullyfeatured-interwiki.expected @@ -148,10 +148,10 @@ ], "max_doc_freq": null, "max_query_terms": 25, - "max_word_len": 0, + "max_word_length": 0, "min_doc_freq": 2, "min_term_freq": 2, - "min_word_len": 0, + "min_word_length": 0, "minimum_should_match": "30%" } } diff --git a/tests/unit/fixtures/searchText/morelike_001.fullyfeatured.expected b/tests/unit/fixtures/searchText/morelike_001.fullyfeatured.expected index e5348f9..59f090e 100644 --- a/tests/unit/fixtures/searchText/morelike_001.fullyfeatured.expected +++ b/tests/unit/fixtures/searchText/morelike_001.fullyfeatured.expected @@ -148,10 +148,10 @@ ], "max_doc_freq": null, "max_query_terms": 25, - "max_word_len": 0, + "max_word_length": 0, "min_doc_freq": 2, "min_term_freq": 2, - "min_word_len": 0, + "min_word_length": 0, "minimum_should_match": "30%" } } diff --git a/tests/unit/fixtures/searchText/morelike_002.default.expected b/tests/unit/fixtures/searchText/morelike_002.default.expected index 1227a41..dde8884 100644 --- a/tests/unit/fixtures/searchText/morelike_002.default.expected +++ b/tests/unit/fixtures/searchText/morelike_002.default.expected @@ -151,10 +151,10 @@ ], "max_doc_freq": null, "max_query_terms": 25, - "max_word_len": 0, + "max_word_length": 0, "min_doc_freq": 2, "min_term_freq": 2, - "min_word_len": 0, + "min_word_length": 0, "minimum_should_match": "30%" } } diff --git a/tests/unit/fixtures/searchText/morelike_002.fullyfeatured-interwiki.expected b/tests/unit/fixtures/searchText/morelike_002.fullyfeatured-interwiki.expected index 1227a41..dde8884 100644 --- a/tests/unit/fixtures/searchText/morelike_002.fullyfeatured-interwiki.expected +++ b/tests/unit/fixtures/searchText/morelike_002.fullyfeatured-interwiki.expected @@ -151,10 +151,10 @@ ], "max_doc_freq": null, "max_query_terms": 25, - "max_word_len": 0, + "max_word_length": 0, "min_doc_freq": 2, "min_term_freq": 2, - "min_word_len": 0, + "min_word_length": 0, "minimum_should_match": "30%" } } diff --git a/tests/unit/fixtures/searchText/morelike_002.fullyfeatured.expected b/tests/unit/fixtures/searchText/morelike_002.fullyfeatured.expected index 1227a41..dde8884 100644 --- a/tests/unit/fixtures/searchText/morelike_002.fullyfeatured.expected +++ b/tests/unit/fixtures/searchText/morelike_002.fullyfeatured.expected @@ -151,10 +151,10 @@ ], "max_doc_freq": null, "max_query_terms": 25, - "max_word_len": 0, + "max_word_length": 0, "min_doc_freq": 2, "min_term_freq": 2, - "min_word_len": 0, + "min_word_length": 0, "minimum_should_match": "30%" } } -- To view, visit https://gerrit.wikimedia.org/r/341026 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7e5874e069f07381c7e0417cad71f123aa43cdc8 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: es5 Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits