Smalyshev has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/386548 )

Change subject: Make search for titles be always uppercase
......................................................................

Make search for titles be always uppercase

Fortunately, wikidata titles are uppercase.
We may need better solution, but that may require full
reindex.

Bug: T179045
Change-Id: I83259e34b49b18ae8d4bff0ccb8c7738c0ea0d05
(cherry picked from commit 4f91ffb6dba082aee721a402f602e85add2c0107)
---
M repo/includes/Search/Elastic/EntitySearchElastic.php
M repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
M repo/tests/phpunit/data/entitySearch/search_de-ch.expected
M repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
M repo/tests/phpunit/data/entitySearch/search_en.expected
M repo/tests/phpunit/data/entitySearch/search_en_strict.expected
A repo/tests/phpunit/data/entitySearch/search_id.expected
A repo/tests/phpunit/data/entitySearch/search_id.query
8 files changed, 187 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/48/386548/1

diff --git a/repo/includes/Search/Elastic/EntitySearchElastic.php 
b/repo/includes/Search/Elastic/EntitySearchElastic.php
index 5e94101..b80bfcc 100644
--- a/repo/includes/Search/Elastic/EntitySearchElastic.php
+++ b/repo/includes/Search/Elastic/EntitySearchElastic.php
@@ -187,7 +187,9 @@
                $labelsQuery = new BoolQuery();
                $labelsQuery->addFilter( $labelsFilter );
                $labelsQuery->addMust( $dismax );
-               $titleMatch = new Term( [ 'title.keyword' => $text ] );
+               // TODO: this is a bit hacky, better way would be to make the 
field case-insensitive
+               // or add new subfiled which is case-insensitive
+               $titleMatch = new Term( [ 'title.keyword' => strtoupper( $text 
) ] );
 
                // Match either labels or exact match to title
                $query->addShould( $labelsQuery );
diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected 
b/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
index 48d79ef..4a28724 100644
--- a/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_de-ch-en.expected
@@ -129,7 +129,7 @@
                     },
                     {
                         "term": {
-                            "title.keyword": "Wien"
+                            "title.keyword": "WIEN"
                         }
                     }
                 ],
diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch.expected 
b/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
index 48ef5ea..81b6ba1 100644
--- a/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_de-ch.expected
@@ -129,7 +129,7 @@
                     },
                     {
                         "term": {
-                            "title.keyword": "Wien"
+                            "title.keyword": "WIEN"
                         }
                     }
                 ],
diff --git a/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected 
b/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
index bc3b388..2163aca 100644
--- a/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_de-ch_strict.expected
@@ -59,7 +59,7 @@
                     },
                     {
                         "term": {
-                            "title.keyword": "Wien"
+                            "title.keyword": "WIEN"
                         }
                     }
                 ],
diff --git a/repo/tests/phpunit/data/entitySearch/search_en.expected 
b/repo/tests/phpunit/data/entitySearch/search_en.expected
index 857c6bf..e745659 100644
--- a/repo/tests/phpunit/data/entitySearch/search_en.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_en.expected
@@ -69,7 +69,7 @@
                     },
                     {
                         "term": {
-                            "title.keyword": "Duck"
+                            "title.keyword": "DUCK"
                         }
                     }
                 ],
diff --git a/repo/tests/phpunit/data/entitySearch/search_en_strict.expected 
b/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
index c6a9848..939a7d7 100644
--- a/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
+++ b/repo/tests/phpunit/data/entitySearch/search_en_strict.expected
@@ -59,7 +59,7 @@
                     },
                     {
                         "term": {
-                            "title.keyword": "Duck"
+                            "title.keyword": "DUCK"
                         }
                     }
                 ],
diff --git a/repo/tests/phpunit/data/entitySearch/search_id.expected 
b/repo/tests/phpunit/data/entitySearch/search_id.expected
new file mode 100644
index 0000000..5cab8fb
--- /dev/null
+++ b/repo/tests/phpunit/data/entitySearch/search_id.expected
@@ -0,0 +1,172 @@
+{
+    "description": "wikibase_prefix search for 'q42'",
+    "params": {
+        "timeout": "20s"
+    },
+    "query": {
+        "query": {
+            "bool": {
+                "should": [
+                    {
+                        "bool": {
+                            "filter": [
+                                {
+                                    "match": {
+                                        "labels_all.prefix": "q42"
+                                    }
+                                }
+                            ],
+                            "must": [
+                                {
+                                    "dis_max": {
+                                        "tie_breaker": 0,
+                                        "queries": [
+                                            {
+                                                "constant_score": {
+                                                    "filter": {
+                                                        "match": {
+                                                            
"labels.en.near_match": "q42"
+                                                        }
+                                                    },
+                                                    "boost": 2
+                                                }
+                                            },
+                                            {
+                                                "constant_score": {
+                                                    "filter": {
+                                                        "match": {
+                                                            
"labels.en.near_match_folded": "q42"
+                                                        }
+                                                    },
+                                                    "boost": 1.8
+                                                }
+                                            },
+                                            {
+                                                "constant_score": {
+                                                    "filter": {
+                                                        "match": {
+                                                            
"labels.en.prefix": "q42"
+                                                        }
+                                                    },
+                                                    "boost": 1.1
+                                                }
+                                            },
+                                            {
+                                                "constant_score": {
+                                                    "filter": {
+                                                        "match": {
+                                                            
"labels_all.near_match_folded": "q42"
+                                                        }
+                                                    },
+                                                    "boost": 0.001
+                                                }
+                                            }
+                                        ]
+                                    }
+                                }
+                            ]
+                        }
+                    },
+                    {
+                        "term": {
+                            "title.keyword": "Q42"
+                        }
+                    }
+                ],
+                "minimum_should_match": 1,
+                "filter": [
+                    {
+                        "term": {
+                            "content_model": "wikibase-item"
+                        }
+                    }
+                ]
+            }
+        },
+        "_source": [
+            "namespace",
+            "title",
+            "labels.en",
+            "descriptions.en"
+        ],
+        "stored_fields": [],
+        "highlight": {
+            "pre_tags": [
+                ""
+            ],
+            "post_tags": [
+                ""
+            ],
+            "fields": {
+                "title": {
+                    "type": "experimental",
+                    "fragmenter": "none",
+                    "number_of_fragments": 0,
+                    "matched_fields": [
+                        "title.keyword"
+                    ]
+                },
+                "labels.en.prefix": {
+                    "type": "experimental",
+                    "fragmenter": "none",
+                    "number_of_fragments": 0,
+                    "options": {
+                        "skip_if_last_matched": true,
+                        "return_snippets_and_offsets": true
+                    }
+                },
+                "labels.*.prefix": {
+                    "type": "experimental",
+                    "fragmenter": "none",
+                    "number_of_fragments": 0,
+                    "options": {
+                        "skip_if_last_matched": true,
+                        "return_snippets_and_offsets": true
+                    }
+                }
+            }
+        },
+        "size": 10,
+        "rescore": [
+            {
+                "window_size": 8192,
+                "query": {
+                    "query_weight": 1,
+                    "rescore_query_weight": 1,
+                    "score_mode": "total",
+                    "rescore_query": {
+                        "function_score": {
+                            "score_mode": "sum",
+                            "functions": [
+                                {
+                                    "script_score": {
+                                        "script": {
+                                            "inline": 
"pow(doc['incoming_links'].value , 2) \/ ( pow(doc['incoming_links'].value, 2) 
+ pow(50,2))",
+                                            "lang": "expression"
+                                        }
+                                    },
+                                    "weight": 0.6
+                                },
+                                {
+                                    "script_score": {
+                                        "script": {
+                                            "inline": 
"pow(doc['sitelink_count'].value , 2) \/ ( pow(doc['sitelink_count'].value, 2) 
+ pow(20,2))",
+                                            "lang": "expression"
+                                        }
+                                    },
+                                    "weight": 0.4
+                                }
+                            ]
+                        }
+                    }
+                }
+            }
+        ],
+        "stats": [
+            "wikibase-prefix"
+        ]
+    },
+    "options": {
+        "timeout": "20s"
+    }
+}
\ No newline at end of file
diff --git a/repo/tests/phpunit/data/entitySearch/search_id.query 
b/repo/tests/phpunit/data/entitySearch/search_id.query
new file mode 100644
index 0000000..b9dac8b
--- /dev/null
+++ b/repo/tests/phpunit/data/entitySearch/search_id.query
@@ -0,0 +1,7 @@
+{
+       "search": "q42",
+       "language": "en",
+       "userLang": "en",
+       "type": "item",
+       "strictlanguage": false
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/386548
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I83259e34b49b18ae8d4bff0ccb8c7738c0ea0d05
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: wmf/1.31.0-wmf.5
Gerrit-Owner: Smalyshev <smalys...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to