Smalyshev has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/386544 )
Change subject: Make search for titles be always uppercase ...................................................................... Make search for titles be always uppercase Fortunately, wikidata titles are uppercase. We may need better solution, but that may require full reindex. Bug: T179045 Change-Id: I83259e34b49b18ae8d4bff0ccb8c7738c0ea0d05 --- M repo/includes/Search/Elastic/EntitySearchElastic.php M repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected M repo/tests/phpunit/data/entitySearch/search_de-ch.expected M repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected M repo/tests/phpunit/data/entitySearch/search_en.expected M repo/tests/phpunit/data/entitySearch/search_en_strict.expected A repo/tests/phpunit/data/entitySearch/search_id.expected A repo/tests/phpunit/data/entitySearch/search_id.query 8 files changed, 187 insertions(+), 6 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/44/386544/1 diff --git a/repo/includes/Search/Elastic/EntitySearchElastic.php b/repo/includes/Search/Elastic/EntitySearchElastic.php index d9d8eec..9dd9755 100644 --- a/repo/includes/Search/Elastic/EntitySearchElastic.php +++ b/repo/includes/Search/Elastic/EntitySearchElastic.php @@ -190,7 +190,9 @@ $labelsQuery = new BoolQuery(); $labelsQuery->addFilter( $labelsFilter ); $labelsQuery->addMust( $dismax ); - $titleMatch = new Term( [ 'title.keyword' => $text ] ); + // TODO: this is a bit hacky, better way would be to make the field case-insensitive + // or add new subfiled which is case-insensitive + $titleMatch = new Term( [ 'title.keyword' => strtoupper( $text ) ] ); // Match either labels or exact match to title $query->addShould( $labelsQuery ); diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected b/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected index 48d79ef..4a28724 100644 --- a/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected +++ b/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected @@ -129,7 +129,7 @@ }, { "term": { - "title.keyword": "Wien" + "title.keyword": "WIEN" } } ], diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch.expected b/repo/tests/phpunit/data/entitySearch/search_de-ch.expected index 48ef5ea..81b6ba1 100644 --- a/repo/tests/phpunit/data/entitySearch/search_de-ch.expected +++ b/repo/tests/phpunit/data/entitySearch/search_de-ch.expected @@ -129,7 +129,7 @@ }, { "term": { - "title.keyword": "Wien" + "title.keyword": "WIEN" } } ], diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected b/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected index bc3b388..2163aca 100644 --- a/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected +++ b/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected @@ -59,7 +59,7 @@ }, { "term": { - "title.keyword": "Wien" + "title.keyword": "WIEN" } } ], diff --git a/repo/tests/phpunit/data/entitySearch/search_en.expected b/repo/tests/phpunit/data/entitySearch/search_en.expected index 857c6bf..e745659 100644 --- a/repo/tests/phpunit/data/entitySearch/search_en.expected +++ b/repo/tests/phpunit/data/entitySearch/search_en.expected @@ -69,7 +69,7 @@ }, { "term": { - "title.keyword": "Duck" + "title.keyword": "DUCK" } } ], diff --git a/repo/tests/phpunit/data/entitySearch/search_en_strict.expected b/repo/tests/phpunit/data/entitySearch/search_en_strict.expected index c6a9848..939a7d7 100644 --- a/repo/tests/phpunit/data/entitySearch/search_en_strict.expected +++ b/repo/tests/phpunit/data/entitySearch/search_en_strict.expected @@ -59,7 +59,7 @@ }, { "term": { - "title.keyword": "Duck" + "title.keyword": "DUCK" } } ], diff --git a/repo/tests/phpunit/data/entitySearch/search_id.expected b/repo/tests/phpunit/data/entitySearch/search_id.expected new file mode 100644 index 0000000..5cab8fb --- /dev/null +++ b/repo/tests/phpunit/data/entitySearch/search_id.expected @@ -0,0 +1,172 @@ +{ + "description": "wikibase_prefix search for 'q42'", + "params": { + "timeout": "20s" + }, + "query": { + "query": { + "bool": { + "should": [ + { + "bool": { + "filter": [ + { + "match": { + "labels_all.prefix": "q42" + } + } + ], + "must": [ + { + "dis_max": { + "tie_breaker": 0, + "queries": [ + { + "constant_score": { + "filter": { + "match": { + "labels.en.near_match": "q42" + } + }, + "boost": 2 + } + }, + { + "constant_score": { + "filter": { + "match": { + "labels.en.near_match_folded": "q42" + } + }, + "boost": 1.8 + } + }, + { + "constant_score": { + "filter": { + "match": { + "labels.en.prefix": "q42" + } + }, + "boost": 1.1 + } + }, + { + "constant_score": { + "filter": { + "match": { + "labels_all.near_match_folded": "q42" + } + }, + "boost": 0.001 + } + } + ] + } + } + ] + } + }, + { + "term": { + "title.keyword": "Q42" + } + } + ], + "minimum_should_match": 1, + "filter": [ + { + "term": { + "content_model": "wikibase-item" + } + } + ] + } + }, + "_source": [ + "namespace", + "title", + "labels.en", + "descriptions.en" + ], + "stored_fields": [], + "highlight": { + "pre_tags": [ + "" + ], + "post_tags": [ + "" + ], + "fields": { + "title": { + "type": "experimental", + "fragmenter": "none", + "number_of_fragments": 0, + "matched_fields": [ + "title.keyword" + ] + }, + "labels.en.prefix": { + "type": "experimental", + "fragmenter": "none", + "number_of_fragments": 0, + "options": { + "skip_if_last_matched": true, + "return_snippets_and_offsets": true + } + }, + "labels.*.prefix": { + "type": "experimental", + "fragmenter": "none", + "number_of_fragments": 0, + "options": { + "skip_if_last_matched": true, + "return_snippets_and_offsets": true + } + } + } + }, + "size": 10, + "rescore": [ + { + "window_size": 8192, + "query": { + "query_weight": 1, + "rescore_query_weight": 1, + "score_mode": "total", + "rescore_query": { + "function_score": { + "score_mode": "sum", + "functions": [ + { + "script_score": { + "script": { + "inline": "pow(doc['incoming_links'].value , 2) \/ ( pow(doc['incoming_links'].value, 2) + pow(50,2))", + "lang": "expression" + } + }, + "weight": 0.6 + }, + { + "script_score": { + "script": { + "inline": "pow(doc['sitelink_count'].value , 2) \/ ( pow(doc['sitelink_count'].value, 2) + pow(20,2))", + "lang": "expression" + } + }, + "weight": 0.4 + } + ] + } + } + } + } + ], + "stats": [ + "wikibase-prefix" + ] + }, + "options": { + "timeout": "20s" + } +} \ No newline at end of file diff --git a/repo/tests/phpunit/data/entitySearch/search_id.query b/repo/tests/phpunit/data/entitySearch/search_id.query new file mode 100644 index 0000000..b9dac8b --- /dev/null +++ b/repo/tests/phpunit/data/entitySearch/search_id.query @@ -0,0 +1,7 @@ +{ + "search": "q42", + "language": "en", + "userLang": "en", + "type": "item", + "strictlanguage": false +} -- To view, visit https://gerrit.wikimedia.org/r/386544 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I83259e34b49b18ae8d4bff0ccb8c7738c0ea0d05 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Smalyshev <smalys...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits