jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/384171 )
Change subject: Update reverse interwiki map to prefer language prefixes over others ...................................................................... Update reverse interwiki map to prefer language prefixes over others * Updated a bunch of parser tests to reflect the change. * For the T3636 parser test, added a html/parsoid section to eliminate a false wt2html failure. Bug: T177784 Change-Id: I5cf93950a6da69263fb9da59fba2b33cc2e8931f --- M lib/config/WikiConfig.js M tests/parserTests-blacklist.js M tests/parserTests.txt 3 files changed, 41 insertions(+), 21 deletions(-) Approvals: jenkins-bot: Verified Arlolra: Looks good to me, approved diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js index e9f8e48..60dadf3 100644 --- a/lib/config/WikiConfig.js +++ b/lib/config/WikiConfig.js @@ -232,14 +232,12 @@ } }); - var cachedMatcher = null; - this.interWikiMatcher = function() { - if (cachedMatcher) { - return cachedMatcher; - } - var keys = []; - var patterns = []; + var updatePatterns = function(keys, patterns, filter) { conf.interwikiMap.forEach(function(val, key) { + if (!filter(val)) { + return; + } + var url = val.url; var protocolRelative = url.startsWith('//'); if (val.protorel !== undefined) { @@ -271,6 +269,20 @@ patterns.push('^' + val.prefix + '%3A(.*?)'); } }); + }; + + var cachedMatcher = null; + this.interWikiMatcher = function() { + if (cachedMatcher) { + return cachedMatcher; + } + var keys = []; + var patterns = []; + // For html -> wt reverse mapping, prefer language interwiki prefixes + // over other interwiki prefixes. So, use "en" instead of "wikipedia" + // for English wikipedia interwiki links. + updatePatterns(keys, patterns, function(val) { return !!val.language; }); + updatePatterns(keys, patterns, function(val) { return !val.language; }); var reString = '^(?:' + patterns.join('|') + ')$'; var regExp = new RegExp(reString, 'i'); var matchFunc = function(s) { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 0c418d9..acd73e1 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -652,7 +652,6 @@ add("html2wt", "Internal link with is link prefix", "Aðrir [[wiki/Söfnuður|mótmælendasöfnuðir]] og\n"); add("html2wt", "Internal link with is link trail and link prefix", "[[wiki/Mótmælendatrú|xxxar]]\n[[wiki/Mótmælendatrú|mótmælendatrúar]]\n[[wiki/Söfnuður|mótmælendasöfnuður]]\n[[wiki/Söfnuður|mótmælendasöfnuðir]]\n[[wiki/Söfnuður|mótmælendasöfnuðirxxx]]\n"); add("html2wt", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved", "[[wiki/Foo| bar]]\n\n[[wiki/Foo| ''bar'']]\n\n[http://wp.org foo]\n\n[http://wp.org ''foo'']\n"); -add("html2wt", "Interwiki link encoding conversion (T3636)", "* [[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n* [[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n"); add("html2wt", "Interwiki link with fragment (T4130)", "[[meatball:SoftSecurity#foo|MeatBall:SoftSecurity#foo]]\n"); add("html2wt", "Escaping of interlanguage links (T129218, T156308)", "Blah blah blah\n[[:es:Spanish]]\n[[:zh:Chinese| zh : Chinese ]]\n"); add("html2wt", "Parsoid-specific test: Wikilinks with should RT properly", "[/index.php?title=WW_II&action=edit&redlink=1 WW II]\n"); @@ -1158,8 +1157,8 @@ add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[1,3,0]]", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas'' (1901) en el sitio de ]\n"); add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[4,0,3]]", "1jnda7a\n"); add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[[1,2],2,4]]", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas''1svf0oe (1901) en el sitio de ]mqtmyg6n94l9"); -add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[4]]]", "[[wikipedia:European_Robin|1rmduf6]]"); -add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[2]]]", "[[wikipedia:European_Robin|134iwocEuropean Robin]]"); +add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[4]]]", "[[:en:European_Robin|1rmduf6]]"); +add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[2]]]", "[[:en:European_Robin|134iwocEuropean Robin]]"); add("selser", "Unclosed and unmatched quotes [[[0,0,4]],2,3,3,[1],0,4,0,1,2,[[2]],3,3,3,2,0,3,4,1,2,2]", "'''''Bold italic text '''with bold deactivatedmik1b''\n\n194mnir\n\n'''Bold text..'''\n\n1vh6b8p\n\n'''Bold tag left open\n\n118iayy\n\n''hxqm6fItalic tag left open''\n\n8zfmbl<!-- Unmatching number of opening, closing tags: -->\n\nv7yj6u\n\n''Tom'''s car is bigger than '''''<nowiki/>'''Susan'''s.\n\n1suii2h\n\n1qt3jiw\n\nPlain ''italic'''s plain"); add("selser", "Unclosed and unmatched quotes [[1],0,[1],0,[3],0,[2,2],0,4,0,4,2,4,0,3,0,4,0,[[0,[4]],0,3],0,4]", "'''''Bold italic text '''with bold deactivated''' in between.'''''\n\n'''''Bold italic text ''with italic deactivated'' in between.'''''\n\n1vbvxxl..spanning two paragraphs (should not work).6tygj0'''\n\n750fcg\n\n1ke2xol\n\nqpzby4\n\n1fztsq9\n\n1qf0akm\n\n''Tom'''3ftppf'''''<nowiki/>'''Susan'''\n\namwrge\n"); add("selser", "Unclosed and unmatched quotes [[[3,0,[3]]],0,[1],0,2,2,3,0,[4],4,[4],0,0,3,0,4,[2,3,3,4,3,0],0,4,2,[0,3,4]]", "''with bold deactivated'''<nowiki/>'''''\n\n'''''Bold italic text ''with italic deactivated'' in between.'''''\n\n1o7p7gt\n\n'''Bold text..\n\nypestg\n\n9sn2o4\n\n1clpd1j\n\n1933mb7\n\nNormal text.<!-- Unmatching number of opening, closing tags: -->\n\nr7rcfr\n\n9vn9he'''This year''''1bex21s.\n\n23segf\n\n15xu7jn\n\nPlain 1038n5m"); @@ -1311,13 +1310,11 @@ add("selser", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved [3,4,2,2,2,0,4]", "1ft87cu\n\n13xu2qq\n\n[[Foo| ''bar'']]\n\n15lnhl6\n\n1djvh1q\n\n[http://wp.org foo]\n\n1rzfwwg\n"); add("selser", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved [0,2,1,3,[[4]],3,0]", "[[Foo| bar]]\n\np3atih\n\n[[Foo| ''bar'']]\n\n[http://wp.org rop4jb]\n\n[http://wp.org ''foo'']"); add("selser", "Parsoid-centric test: Whitespace in ext- and wiki-links should be preserved [3,4,4,2,2,4,0]", "1bj9bbq\n\n11icu1m\n\n1f2hrph\n\n1d06nta\n\n[http://wp.org foo]\n\n1npgg6a\n\n[http://wp.org ''foo'']"); -add("selser", "Interwiki link encoding conversion (T3636) [[[3],2,[2]]]", "*\n* edo4is\n*1k978vr[[Wikipedia:ro:Olteniţa]]"); -add("selser", "Different interwiki prefixes mapping to the same URL [2,0,[4],0,2,4,3,0,3,3,[2],2,[4]]", "1cmr4k9\n\n[[:en:Foo]]\n\nhvdis9\n\na1dr3q\n\n[[wikipedia:Foo]]\n\ne49fuw\n\nnsemot[[wikipedia:en:Foo]]\n\nm67gy7\n\n1jea487"); -add("selser", "Different interwiki prefixes mapping to the same URL [3,4,0,0,[1],4,3,0,3,0,[2],0,3]", "1nj1lgj\n\n[[:en:Foo|Foo]]\n\n[[wikipedia:Foo]]\n\n1iwxfz6\n\n1v5pb3c[[wikipedia:en:Foo]]\n"); -add("selser", "Different interwiki prefixes mapping to the same URL [4,3,[4],4,2,0,[1],2,1,0,[2],0,3]", "1niy9um\n\n1rj1oh1\n\n17bubmt\n\nwa0uv2\n\n[[wikipedia:Foo]]\n\n[[:wikipedia:Foo|Foo]]\n\n1gcjzpc\n\n[[wikipedia:en:Foo]]\n\n1x968no[[wikipedia:en:Foo]]\n"); -add("selser", "Different interwiki prefixes mapping to the same URL [4,4,1,4,[3],4,[[4]],0,4,0,[1],3,[4]]", "1ts2wsm\n\n1ha756h\n\n[[:en:Foo|Foo]]\n\n1oy27y4\n\n1wctm47\n\n[[:wikipedia:Foo|3wyj0g]]\n\n13tzutw\n\n[[wikipedia:en:Foo]]\n\ndrkjrg"); -add("selser", "Different interwiki prefixes mapping to the same URL [[4],0,[3],0,0,2,[2],0,[4],0,[2],4,[[2]]]", "vp9v9k\n\n[[wikipedia:Foo]]\n\n1dr7otu\n\n1ws4mui[[:wikipedia:Foo|Foo]]\n\n1szk0ja\n\nkvaw0k[[wikipedia:en:Foo]]\n\n1cbqxcb\n\n[[ wikiPEdia :Foo|qzsy1a wikiPEdia :Foo]]"); -add("selser", "Different interwiki prefixes mapping to the same URL [1,3,1,4,[[4]],0,3,2,[3],0,1,0,3]", "[[:en:Foo]]\n\n[[:en:Foo|Foo]]\n\n1qaad10\n\n[[wikipedia:Foo|zc9a9s]]\n\n1kh1ssv\n\n[[wikipedia:en:Foo]]\n"); +add("selser", "Interwiki link encoding conversion (T3636) [[[3],2,[2]]]", "*\n* edo4is\n*1k978vr[[:Wikipedia:ro:Olteniţa]]"); +add("selser", "Different interwiki prefixes mapping to the same URL [[1],0,4,0,1,0,3,0,[1],0,[3],0,[1]]", "[[:en:Foo]]\n\nv7i85g\n\n[[wikipedia:Foo]]\n\n[[:wikipedia:en:Foo]]\n\n[[: wikiPEdia :Foo]]\n"); +add("selser", "Different interwiki prefixes mapping to the same URL [4,0,[[3]],0,1,0,[[4]],2,0,0,[1],4,1]", "1l14vkx\n\n[:en:Foo]\n\n[[wikipedia:Foo]]\n\n[[:wikipedia:Foo|nbuvsk]]\n\nk4ccf8\n\n[[wikipedia:en:Foo]]\n\n[[:wikipedia:en:Foo]]\n\n1gr9ugr\n\n[[: wikiPEdia :Foo]]"); +add("selser", "Different interwiki prefixes mapping to the same URL [2,0,[[4]],3,3,0,[[3]],0,1,3,0,3,[2]]", "1belm8p\n\n[[:en:Foo]]\n\n[[:en:Foo|jvi0fn]]\n\n[:wikipedia:Foo]\n\n[[:wikipedia:en:Foo]]\n\n[[:wikipedia:en:Foo]]\n\nrqndks[[ wikiPEdia :Foo]]"); +add("selser", "Different interwiki prefixes mapping to the same URL [4,3,[[2]],2,[2],0,4,4,[[4]],4,[[2]],3,[4]]", "1vnl7bc\n\n[[:en:Foo|1mo1igaFoo]]\n\n15p8av9\n\nt1tnk7[[:wikipedia:Foo]]\n\nik65u\n\ncp91v8\n\n[[wikipedia:en:Foo|vzo3uh]]\n\n1hyadw9\n\n[[:wikipedia:en:Foo|11hwcmxwikipedia:en:Foo]]\n\n8ja2ps"); add("selser", "Parsoid: recognize interwiki links without a target page [2,2,1]", "13h7yqp\n\n[[:es:]]\n\nn76wcf\n\n[[ko:]]"); add("selser", "Parsoid: recognize interwiki links without a target page [[2],2,1]", "1fz9jlc[[:es:]]\n\n2alzzr\n\n[[ko:]]"); add("selser", "Parsoid: recognize interwiki links without a target page [1,4,1]", "[[:es:]]\n\neviwhn\n\n[[ko:]]"); diff --git a/tests/parserTests.txt b/tests/parserTests.txt index 7cff659..d046a5c 100644 --- a/tests/parserTests.txt +++ b/tests/parserTests.txt @@ -4833,8 +4833,11 @@ </p> !! end +## html2wt and html2html will fail because we will prefer the :en: interwiki prefix over wikipedia: !! test External links: with no contents +!! options +parsoid=wt2html,wt2wt !! wikitext [http://en.wikipedia.org/wiki/Foo] @@ -5962,11 +5965,11 @@ !! wikitext [[Foo|Bar]] [[Foo|Bar]] -[[wikipedia:Foo|Bar]] -[[wikipedia:Foo|Bar]] +[[:en:Foo|Bar]] +[[:en:Foo|Bar]] -[[wikipedia:European_Robin|European Robin]] -[[wikipedia:European_Robin|European Robin]] +[[:en:European_Robin|European Robin]] +[[:en:European_Robin|European Robin]] !! end !! test @@ -8611,8 +8614,11 @@ <p><a rel="mw:ExtLink" href="http://www.usemod.com/cgi-bin/mb.pl?" title="meatball:">MeatBall:</a></p> !! end +## html2wt and html2html will fail because we will prefer the :en: interwiki prefix over wikipedia: !! test Interwiki link encoding conversion (T3636) +!! options +parsoid=wt2html,wt2wt !! wikitext *[[Wikipedia:ro:Olteniţa]] *[[Wikipedia:ro:Olteniţa]] @@ -8625,6 +8631,11 @@ <li><a href="http://en.wikipedia.org/wiki/ro:Olteni%C5%A3a" class="extiw" title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li> <li><a href="http://en.wikipedia.org/wiki/ro:Olteni%C5%A3a" class="extiw" title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li> </ul> +!! html/parsoid +<ul> +<li><a rel="mw:ExtLink" href="http://en.wikipedia.org/wiki/ro:Olteniţa" title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li> +<li><a rel="mw:ExtLink" href="http://en.wikipedia.org/wiki/ro:Olteniţa" title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li> +</ul> !! end !! test -- To view, visit https://gerrit.wikimedia.org/r/384171 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I5cf93950a6da69263fb9da59fba2b33cc2e8931f Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org> Gerrit-Reviewer: Sbailey <sbai...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits