jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/384171 )

Change subject: Update reverse interwiki map to prefer language prefixes over 
others
......................................................................


Update reverse interwiki map to prefer language prefixes over others

* Updated a bunch of parser tests to reflect the change.
* For the T3636 parser test, added a html/parsoid section to eliminate
  a false wt2html failure.

Bug: T177784
Change-Id: I5cf93950a6da69263fb9da59fba2b33cc2e8931f
---
M lib/config/WikiConfig.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 41 insertions(+), 21 deletions(-)

Approvals:
  jenkins-bot: Verified
  Arlolra: Looks good to me, approved



diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js
index e9f8e48..60dadf3 100644
--- a/lib/config/WikiConfig.js
+++ b/lib/config/WikiConfig.js
@@ -232,14 +232,12 @@
                }
        });
 
-       var cachedMatcher = null;
-       this.interWikiMatcher = function() {
-               if (cachedMatcher) {
-                       return cachedMatcher;
-               }
-               var keys = [];
-               var patterns = [];
+       var updatePatterns = function(keys, patterns, filter) {
                conf.interwikiMap.forEach(function(val, key) {
+                       if (!filter(val)) {
+                               return;
+                       }
+
                        var url = val.url;
                        var protocolRelative = url.startsWith('//');
                        if (val.protorel !== undefined) {
@@ -271,6 +269,20 @@
                                patterns.push('^' + val.prefix + '%3A(.*?)');
                        }
                });
+       };
+
+       var cachedMatcher = null;
+       this.interWikiMatcher = function() {
+               if (cachedMatcher) {
+                       return cachedMatcher;
+               }
+               var keys = [];
+               var patterns = [];
+               // For html -> wt reverse mapping, prefer language interwiki 
prefixes
+               // over other interwiki prefixes. So, use "en" instead of 
"wikipedia"
+               // for English wikipedia interwiki links.
+               updatePatterns(keys, patterns, function(val) { return 
!!val.language; });
+               updatePatterns(keys, patterns, function(val) { return 
!val.language; });
                var reString = '^(?:' + patterns.join('|') + ')$';
                var regExp = new RegExp(reString, 'i');
                var matchFunc = function(s) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 0c418d9..acd73e1 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -652,7 +652,6 @@
 add("html2wt", "Internal link with is link prefix", "Aðrir 
[[wiki/Söfnuður|mótmælendasöfnuðir]] og\n");
 add("html2wt", "Internal link with is link trail and link prefix", 
"[[wiki/Mótmælendatrú|xxxar]]\n[[wiki/Mótmælendatrú|mótmælendatrúar]]\n[[wiki/Söfnuður|mótmælendasöfnuður]]\n[[wiki/Söfnuður|mótmælendasöfnuðir]]\n[[wiki/Söfnuður|mótmælendasöfnuðirxxx]]\n");
 add("html2wt", "Parsoid-centric test: Whitespace in ext- and wiki-links should 
be preserved", "[[wiki/Foo|  bar]]\n\n[[wiki/Foo|  ''bar'']]\n\n[http://wp.org 
foo]\n\n[http://wp.org ''foo'']\n");
-add("html2wt", "Interwiki link encoding conversion (T3636)", "* 
[[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n* 
[[wikipedia:ro:Olteniţa|Wikipedia:ro:Olteniţa]]\n");
 add("html2wt", "Interwiki link with fragment (T4130)", 
"[[meatball:SoftSecurity#foo|MeatBall:SoftSecurity#foo]]\n");
 add("html2wt", "Escaping of interlanguage links (T129218, T156308)", "Blah 
blah blah\n[[:es:Spanish]]\n[[:zh:Chinese| zh : Chinese ]]\n");
 add("html2wt", "Parsoid-specific test: Wikilinks with   should RT 
properly", "[/index.php?title=WW_II&action=edit&redlink=1 WW II]\n");
@@ -1158,8 +1157,8 @@
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[1,3,0]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas'' (1901) en el sitio de ]\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[4,0,3]]", "1jnda7a\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[[1,2],2,4]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas''1svf0oe (1901) en el sitio de ]mqtmyg6n94l9");
-add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[[4]]]", "[[wikipedia:European_Robin|1rmduf6]]");
-add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[[2]]]", "[[wikipedia:European_Robin|134iwocEuropean 
Robin]]");
+add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[[4]]]", "[[:en:European_Robin|1rmduf6]]");
+add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped 
losslessly (T94723) [[[2]]]", "[[:en:European_Robin|134iwocEuropean Robin]]");
 add("selser", "Unclosed and unmatched quotes 
[[[0,0,4]],2,3,3,[1],0,4,0,1,2,[[2]],3,3,3,2,0,3,4,1,2,2]", "'''''Bold italic 
text '''with bold deactivatedmik1b''\n\n194mnir\n\n'''Bold 
text..'''\n\n1vh6b8p\n\n'''Bold tag left open\n\n118iayy\n\n''hxqm6fItalic tag 
left open''\n\n8zfmbl<!-- Unmatching number of opening, closing tags: 
-->\n\nv7yj6u\n\n''Tom'''s car is bigger than 
'''''<nowiki/>'''Susan'''s.\n\n1suii2h\n\n1qt3jiw\n\nPlain ''italic'''s plain");
 add("selser", "Unclosed and unmatched quotes 
[[1],0,[1],0,[3],0,[2,2],0,4,0,4,2,4,0,3,0,4,0,[[0,[4]],0,3],0,4]", "'''''Bold 
italic text '''with bold deactivated''' in between.'''''\n\n'''''Bold italic 
text ''with italic deactivated'' in between.'''''\n\n1vbvxxl..spanning two 
paragraphs (should not 
work).6tygj0'''\n\n750fcg\n\n1ke2xol\n\nqpzby4\n\n1fztsq9\n\n1qf0akm\n\n''Tom'''3ftppf'''''<nowiki/>'''Susan'''\n\namwrge\n");
 add("selser", "Unclosed and unmatched quotes 
[[[3,0,[3]]],0,[1],0,2,2,3,0,[4],4,[4],0,0,3,0,4,[2,3,3,4,3,0],0,4,2,[0,3,4]]", 
"''with bold deactivated'''<nowiki/>'''''\n\n'''''Bold italic text ''with 
italic deactivated'' in between.'''''\n\n1o7p7gt\n\n'''Bold 
text..\n\nypestg\n\n9sn2o4\n\n1clpd1j\n\n1933mb7\n\nNormal text.<!-- Unmatching 
number of opening, closing tags: -->\n\nr7rcfr\n\n9vn9he'''This 
year''''1bex21s.\n\n23segf\n\n15xu7jn\n\nPlain 1038n5m");
@@ -1311,13 +1310,11 @@
 add("selser", "Parsoid-centric test: Whitespace in ext- and wiki-links should 
be preserved [3,4,2,2,2,0,4]", "1ft87cu\n\n13xu2qq\n\n[[Foo|  
''bar'']]\n\n15lnhl6\n\n1djvh1q\n\n[http://wp.org   foo]\n\n1rzfwwg\n");
 add("selser", "Parsoid-centric test: Whitespace in ext- and wiki-links should 
be preserved [0,2,1,3,[[4]],3,0]", "[[Foo|  bar]]\n\np3atih\n\n[[Foo|  
''bar'']]\n\n[http://wp.org rop4jb]\n\n[http://wp.org   ''foo'']");
 add("selser", "Parsoid-centric test: Whitespace in ext- and wiki-links should 
be preserved [3,4,4,2,2,4,0]", 
"1bj9bbq\n\n11icu1m\n\n1f2hrph\n\n1d06nta\n\n[http://wp.org   
foo]\n\n1npgg6a\n\n[http://wp.org   ''foo'']");
-add("selser", "Interwiki link encoding conversion (T3636) [[[3],2,[2]]]", 
"*\n* edo4is\n*1k978vr[[Wikipedia:ro:Olteniţa]]");
-add("selser", "Different interwiki prefixes mapping to the same URL 
[2,0,[4],0,2,4,3,0,3,3,[2],2,[4]]", 
"1cmr4k9\n\n[[:en:Foo]]\n\nhvdis9\n\na1dr3q\n\n[[wikipedia:Foo]]\n\ne49fuw\n\nnsemot[[wikipedia:en:Foo]]\n\nm67gy7\n\n1jea487");
-add("selser", "Different interwiki prefixes mapping to the same URL 
[3,4,0,0,[1],4,3,0,3,0,[2],0,3]", 
"1nj1lgj\n\n[[:en:Foo|Foo]]\n\n[[wikipedia:Foo]]\n\n1iwxfz6\n\n1v5pb3c[[wikipedia:en:Foo]]\n");
-add("selser", "Different interwiki prefixes mapping to the same URL 
[4,3,[4],4,2,0,[1],2,1,0,[2],0,3]", 
"1niy9um\n\n1rj1oh1\n\n17bubmt\n\nwa0uv2\n\n[[wikipedia:Foo]]\n\n[[:wikipedia:Foo|Foo]]\n\n1gcjzpc\n\n[[wikipedia:en:Foo]]\n\n1x968no[[wikipedia:en:Foo]]\n");
-add("selser", "Different interwiki prefixes mapping to the same URL 
[4,4,1,4,[3],4,[[4]],0,4,0,[1],3,[4]]", 
"1ts2wsm\n\n1ha756h\n\n[[:en:Foo|Foo]]\n\n1oy27y4\n\n1wctm47\n\n[[:wikipedia:Foo|3wyj0g]]\n\n13tzutw\n\n[[wikipedia:en:Foo]]\n\ndrkjrg");
-add("selser", "Different interwiki prefixes mapping to the same URL 
[[4],0,[3],0,0,2,[2],0,[4],0,[2],4,[[2]]]", 
"vp9v9k\n\n[[wikipedia:Foo]]\n\n1dr7otu\n\n1ws4mui[[:wikipedia:Foo|Foo]]\n\n1szk0ja\n\nkvaw0k[[wikipedia:en:Foo]]\n\n1cbqxcb\n\n[[
  wikiPEdia :Foo|qzsy1a  wikiPEdia :Foo]]");
-add("selser", "Different interwiki prefixes mapping to the same URL 
[1,3,1,4,[[4]],0,3,2,[3],0,1,0,3]", 
"[[:en:Foo]]\n\n[[:en:Foo|Foo]]\n\n1qaad10\n\n[[wikipedia:Foo|zc9a9s]]\n\n1kh1ssv\n\n[[wikipedia:en:Foo]]\n");
+add("selser", "Interwiki link encoding conversion (T3636) [[[3],2,[2]]]", 
"*\n* edo4is\n*1k978vr[[:Wikipedia:ro:Olteniţa]]");
+add("selser", "Different interwiki prefixes mapping to the same URL 
[[1],0,4,0,1,0,3,0,[1],0,[3],0,[1]]", 
"[[:en:Foo]]\n\nv7i85g\n\n[[wikipedia:Foo]]\n\n[[:wikipedia:en:Foo]]\n\n[[:  
wikiPEdia :Foo]]\n");
+add("selser", "Different interwiki prefixes mapping to the same URL 
[4,0,[[3]],0,1,0,[[4]],2,0,0,[1],4,1]", 
"1l14vkx\n\n[:en:Foo]\n\n[[wikipedia:Foo]]\n\n[[:wikipedia:Foo|nbuvsk]]\n\nk4ccf8\n\n[[wikipedia:en:Foo]]\n\n[[:wikipedia:en:Foo]]\n\n1gr9ugr\n\n[[:
  wikiPEdia :Foo]]");
+add("selser", "Different interwiki prefixes mapping to the same URL 
[2,0,[[4]],3,3,0,[[3]],0,1,3,0,3,[2]]", 
"1belm8p\n\n[[:en:Foo]]\n\n[[:en:Foo|jvi0fn]]\n\n[:wikipedia:Foo]\n\n[[:wikipedia:en:Foo]]\n\n[[:wikipedia:en:Foo]]\n\nrqndks[[
  wikiPEdia :Foo]]");
+add("selser", "Different interwiki prefixes mapping to the same URL 
[4,3,[[2]],2,[2],0,4,4,[[4]],4,[[2]],3,[4]]", 
"1vnl7bc\n\n[[:en:Foo|1mo1igaFoo]]\n\n15p8av9\n\nt1tnk7[[:wikipedia:Foo]]\n\nik65u\n\ncp91v8\n\n[[wikipedia:en:Foo|vzo3uh]]\n\n1hyadw9\n\n[[:wikipedia:en:Foo|11hwcmxwikipedia:en:Foo]]\n\n8ja2ps");
 add("selser", "Parsoid: recognize interwiki links without a target page 
[2,2,1]", "13h7yqp\n\n[[:es:]]\n\nn76wcf\n\n[[ko:]]");
 add("selser", "Parsoid: recognize interwiki links without a target page 
[[2],2,1]", "1fz9jlc[[:es:]]\n\n2alzzr\n\n[[ko:]]");
 add("selser", "Parsoid: recognize interwiki links without a target page 
[1,4,1]", "[[:es:]]\n\neviwhn\n\n[[ko:]]");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 7cff659..d046a5c 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -4833,8 +4833,11 @@
 </p>
 !! end
 
+## html2wt and html2html will fail because we will prefer the :en: interwiki 
prefix over wikipedia:
 !! test
 External links: with no contents
+!! options
+parsoid=wt2html,wt2wt
 !! wikitext
 [http://en.wikipedia.org/wiki/Foo]
 
@@ -5962,11 +5965,11 @@
 !! wikitext
 [[Foo|Bar]]
 [[Foo|Bar]]
-[[wikipedia:Foo|Bar]]
-[[wikipedia:Foo|Bar]]
+[[:en:Foo|Bar]]
+[[:en:Foo|Bar]]
 
-[[wikipedia:European_Robin|European Robin]]
-[[wikipedia:European_Robin|European Robin]]
+[[:en:European_Robin|European Robin]]
+[[:en:European_Robin|European Robin]]
 !! end
 
 !! test
@@ -8611,8 +8614,11 @@
 <p><a rel="mw:ExtLink" href="http://www.usemod.com/cgi-bin/mb.pl?"; 
title="meatball:">MeatBall:</a></p>
 !! end
 
+## html2wt and html2html will fail because we will prefer the :en: interwiki 
prefix over wikipedia:
 !! test
 Interwiki link encoding conversion (T3636)
+!! options
+parsoid=wt2html,wt2wt
 !! wikitext
 *[[Wikipedia:ro:Olteni&#0355;a]]
 *[[Wikipedia:ro:Olteni&#355;a]]
@@ -8625,6 +8631,11 @@
 <li><a href="http://en.wikipedia.org/wiki/ro:Olteni%C5%A3a"; class="extiw" 
title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li>
 <li><a href="http://en.wikipedia.org/wiki/ro:Olteni%C5%A3a"; class="extiw" 
title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li>
 </ul>
+!! html/parsoid
+<ul>
+<li><a rel="mw:ExtLink" href="http://en.wikipedia.org/wiki/ro:Olteniţa"; 
title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li>
+<li><a rel="mw:ExtLink" href="http://en.wikipedia.org/wiki/ro:Olteniţa"; 
title="wikipedia:ro:Olteniţa">Wikipedia:ro:Olteniţa</a></li>
+</ul>
 !! end
 
 !! test

-- 
To view, visit https://gerrit.wikimedia.org/r/384171
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5cf93950a6da69263fb9da59fba2b33cc2e8931f
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: Sbailey <sbai...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to