Luis Felipe Schenone has uploaded a new change for review. https://gerrit.wikimedia.org/r/286580
Change subject: Further normalise keys to find titles with special characters ...................................................................... Further normalise keys to find titles with special characters Using iconv, all special characters are translitered to ASCII. Then, all non-alphanumerical, non-whitespace characters are removed. This allows for many new kinds of relevant matches in languages such as Spanish where diacritics are common. For example, searching "avion" now matches "AviĆ³n". Bug: T22097 Change-Id: I80e8cf68b213a1593197cbfe6b9ab7de0184de0b --- M TitleKey_body.php 1 file changed, 6 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/TitleKey refs/changes/80/286580/1 diff --git a/TitleKey_body.php b/TitleKey_body.php index 63cf769..319b7f4 100644 --- a/TitleKey_body.php +++ b/TitleKey_body.php @@ -37,10 +37,13 @@ static function setBatchKeys( $titles ) { $rows = array(); foreach( $titles as $id => $title ) { + $key = $title->getText(); + $key = iconv( 'UTF-8', 'ASCII//TRANSLIT', $key ); + $key = preg_replace( '#[^-\w\s]+#', '', $key ); // Remove unwanted chars $rows[] = array( 'tk_page' => $id, 'tk_namespace' => $title->getNamespace(), - 'tk_key' => self::normalize( $title->getText() ), + 'tk_key' => self::normalize( $key ), ); } $db = wfGetDB( DB_MASTER ); @@ -53,6 +56,8 @@ // Normalization... static function normalize( $text ) { global $wgContLang; + $text = iconv( 'UTF-8', 'ASCII//TRANSLIT', $text ); + $text = preg_replace( '#[^-\w\s]+#', '', $text ); // Remove unwanted chars return $wgContLang->caseFold( $text ); } -- To view, visit https://gerrit.wikimedia.org/r/286580 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I80e8cf68b213a1593197cbfe6b9ab7de0184de0b Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/TitleKey Gerrit-Branch: master Gerrit-Owner: Luis Felipe Schenone <scheno...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits