[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Update reverse interwiki map to prefer language prefixes ove...
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/384171 ) Change subject: Update reverse interwiki map to prefer language prefixes over others .. Update reverse interwiki map to prefer language prefixes over others * Updated a bunch of parser tests to reflect the change. * For the T3636 parser test, added a html/parsoid section to eliminate a false wt2html failure. Bug: T177784 Change-Id: I5cf93950a6da69263fb9da59fba2b33cc2e8931f --- M lib/config/WikiConfig.js M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 41 insertions(+), 21 deletions(-) Approvals: jenkins-bot: Verified Arlolra: Looks good to me, approved diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js index e9f8e48..60dadf3 100644 --- a/lib/config/WikiConfig.js +++ b/lib/config/WikiConfig.js @@ -232,14 +232,12 @@ } }); - var cachedMatcher = null; - this.interWikiMatcher = function() { - if (cachedMatcher) { - return cachedMatcher; - } - var keys = []; - var patterns = []; + var updatePatterns = function(keys, patterns, filter) { conf.interwikiMap.forEach(function(val, key) { + if (!filter(val)) { + return; + } + var url = val.url; var protocolRelative = url.startsWith('//'); if (val.protorel !== undefined) { @@ -271,6 +269,20 @@ patterns.push('^' + val.prefix + '%3A(.*?)'); } }); + }; + + var cachedMatcher = null; + this.interWikiMatcher = function() { + if (cachedMatcher) { + return cachedMatcher; + } + var keys = []; + var patterns = []; + // For html -> wt reverse mapping, prefer language interwiki prefixes + // over other interwiki prefixes. So, use "en" instead of "wikipedia" + // for English wikipedia interwiki links. + updatePatterns(keys, patterns, function(val) { return !!val.language; }); + updatePatterns(keys, patterns, function(val) { return !val.language; }); var reString = '^(?:' + patterns.join('|') + ')$'; var regExp = new RegExp(reString, 'i'); var matchFunc = function(s) { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 0c418d9..acd73e1 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -652,7 +652,6 @@ add("html2wt", "Internal link with is link prefix", "Aðrir [[wiki/Söfnuður|mótmælendasöfnuðir]] og\n"); add("html2wt", "Internal link with is link trail and link prefix", "[[wiki/Mótmælendatrú|xxxar]]\n[[wiki/Mótmælendatrú|mótmælendatrúar]]\n[[wiki/Söfnuður|mótmælendasöfnuður]]\n[[wiki/Söfnuður|mótmælendasöfnuðir]]\n[[wiki/Söfnuður|mótmælendasöfnuðirxxx]]\n"); add("html2wt", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved", "[[wiki/Foo| bar]]\n\n[[wiki/Foo| ''bar'']]\n\n[http://wp.org foo]\n\n[http://wp.org ''foo'']\n"); -add("html2wt", "Interwiki link encoding conversion (T3636)", "* [[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n* [[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n"); add("html2wt", "Interwiki link with fragment (T4130)", "[[meatball:SoftSecurity#foo|MeatBall:SoftSecurity#foo]]\n"); add("html2wt", "Escaping of interlanguage links (T129218, T156308)", "Blah blah blah\n[[:es:Spanish]]\n[[:zh:Chinese| zh : Chinese ]]\n"); add("html2wt", "Parsoid-specific test: Wikilinks with should RT properly", "[/index.php?title=WW_II&action=edit&redlink=1 WW II]\n"); @@ -1158,8 +1157,8 @@ add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[1,3,0]]", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas'' (1901) en el sitio de ]\n"); add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[4,0,3]]", "1jnda7a\n"); add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[[1,2],2,4]]", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas''1svf0oe (1901) en el sitio de ]mqtmyg6n94l9"); -add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[4]]]", "[[wikipedia:European_Robin|1rmduf6]]"); -add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[2]]]", "[[wikipedia:European_Robin|134iwocEuropean Robin]]"); +add("selser", "mw:ExtLink linking to a interwiki URL can b
[MediaWiki-commits] [Gerrit] mediawiki...parsoid[master]: Update reverse interwiki map to prefer language prefixes ove...
Subramanya Sastry has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/384171 ) Change subject: Update reverse interwiki map to prefer language prefixes over others .. Update reverse interwiki map to prefer language prefixes over others * Updated a bunch of parser tests to reflect the change. Bug: T177784 Change-Id: I5cf93950a6da69263fb9da59fba2b33cc2e8931f --- M lib/config/WikiConfig.js M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 38 insertions(+), 21 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/71/384171/1 diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js index e9f8e48..52e1c4f 100644 --- a/lib/config/WikiConfig.js +++ b/lib/config/WikiConfig.js @@ -232,14 +232,12 @@ } }); - var cachedMatcher = null; - this.interWikiMatcher = function() { - if (cachedMatcher) { - return cachedMatcher; - } - var keys = []; - var patterns = []; + var updatePatterns = function(keys, patterns, filter) { conf.interwikiMap.forEach(function(val, key) { + if (!filter(val)) { + return; + } + var url = val.url; var protocolRelative = url.startsWith('//'); if (val.protorel !== undefined) { @@ -271,6 +269,20 @@ patterns.push('^' + val.prefix + '%3A(.*?)'); } }); + } + + var cachedMatcher = null; + this.interWikiMatcher = function() { + if (cachedMatcher) { + return cachedMatcher; + } + var keys = []; + var patterns = []; + // For html -> wt reverse mapping, prefer language interwiki prefixes + // over other interwiki prefixes. So, use "en" instead of "wikipedia" + // for English wikipedia interwiki links. + updatePatterns(keys, patterns, function(val) { return !!val.language; }); + updatePatterns(keys, patterns, function(val) { return !val.language; }); var reString = '^(?:' + patterns.join('|') + ')$'; var regExp = new RegExp(reString, 'i'); var matchFunc = function(s) { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 0c418d9..0cd0c61 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -358,6 +358,7 @@ add("html2html", "Internal link with is link prefix", "Aðrir mótmælendasöfnuðir og\n"); add("html2html", "Internal link with is link trail and link prefix", "xxxar\nmótmælendatrúar\nmótmælendasöfnuður\nmótmælendasöfnuðir\nmótmælendasöfnuðirxxx\n"); add("html2html", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved", " bar\n\n bar\n\nhttp://wp.org\"; data-parsoid='{\"targetOff\":59,\"contentOffsets\":[59,62],\"dsr\":[44,63,15,1]}'>foo\n\nhttp://wp.org\"; data-parsoid='{\"targetOff\":80,\"contentOffsets\":[80,87],\"dsr\":[65,88,15,1]}'>foo\n"); +add("html2html", "Interwiki link encoding conversion (T3636)\n+!! options\n+parsoid=wt2html,wt2wt\n+## html2wt and html2html will fail because we will prefer the :en: interwiki prefix over wikipedia:", " Wikipedia:ro:Olteniţa\n Wikipedia:ro:Olteniţa\n"); add("html2html", "Space and question mark encoding in interlanguage links (T95473)", "Blah blah blah\nhttp://es.wikipedia.org/wiki/Foo_bar?\"; data-parsoid='{\"stx\":\"simple\",\"a\":{\"href\":\"http://es.wikipedia.org/wiki/Foo_bar?\"},\"sa\":{\"href\":\"es:Foo_bar?\"},\"dsr\":[15,30,null,null]}'/>"); add("html2html", "Parsoid-specific test: Wikilinks with should RT properly", "[/index.php?title=WW_II&action=edit&redlink=1 WW II]\n"); add("html2html", " to ", "1\n2\n3\n"); @@ -652,7 +653,7 @@ add("html2wt", "Internal link with is link prefix", "Aðrir [[wiki/Söfnuður|mótmælendasöfnuðir]] og\n"); add("html2wt", "Internal link with is link trail and link prefix", "[[wiki/Mótmælendatrú|xxxar]]\n[[wiki/Mótmælendatrú|mótmælendatrúar]]\n[[wiki/Söfnuður|mótmælendasöfnuður]]\n[[wiki/Söfnuður|mótmælendasöfnuðir]]\n[[wiki/Söfnuður|mótmælendasöfnuðirxxx]]\n"); add("html2wt", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved", "[[wiki/Foo| bar]]\n\n[[wiki/Foo| ''bar'']]\n\n[http://wp.org foo]\n\n[http://wp.org ''foo'']\n"); -add("html2wt", "Interwiki link encoding conversion (T3636)", "* [[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n* [[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n"); +add("html2wt", "Interwiki link encoding conversion (T3636)\n+!! options\n+parsoid=wt2html,wt2wt\n+## html2wt and html2html will fail because we will prefer the :en: