Subramanya Sastry has uploaded a new change for review. https://gerrit.wikimedia.org/r/292078
Change subject: WIP: Auto-detect interwiki links without needing data-parsoid info ...................................................................... WIP: Auto-detect interwiki links without needing data-parsoid info * This breaks the test "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723)" but not sure that we need to preserve that behavior. Looks like there is some discussion around this in T102556. Bug: T71207 Change-Id: I50edd10c4ac931f7606bade9466678fbe301a271 --- M lib/html2wt/LinkHandler.js M tests/parserTests-blacklist.js 2 files changed, 12 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/78/292078/1 diff --git a/lib/html2wt/LinkHandler.js b/lib/html2wt/LinkHandler.js index 78e0bbb..0a4e1f3 100644 --- a/lib/html2wt/LinkHandler.js +++ b/lib/html2wt/LinkHandler.js @@ -173,7 +173,7 @@ && !/^#|\?./.test(interWikiMatch[1]) // ExtLinks should have content to convert. && (rtData.type !== 'mw:ExtLink' || rtData.content.string || rtData.contentNode) - && (dp.isIW || target.modified || rtData.contentModified)) { + && (dp.isIW || !state.rtTestMode || target.modified || rtData.contentModified)) { // External link that is really an interwiki link. Convert it. if (rtData.type === 'mw:ExtLink') { rtData.type = 'mw:WikiLink'; diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index db6c8d0..3a4b889 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -286,6 +286,7 @@ // Blacklist for wt2wt +add("wt2wt", "Parsing an URL", "[[:fr:🍺|http://fr.wikipedia.org/wiki/🍺]]\n<!-- EasterEgg we love beer, better be able be able to link to it -->"); add("wt2wt", "Parsoid only: Quote balancing context should be restricted to td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to be fixed up)", "{|\n!''a''!!''b''\n|''a''||''b''\n|}"); add("wt2wt", "Non-word characters don't terminate tag names (bug 17663, 40670, 52022)", "<blockquote|>a\n\n<b→> doesn't terminate </b→>\n\n<bä> doesn't terminate </bä>\n\n<boo> doesn't terminate </boo>\n\n<s.foo> doesn't terminate </s.foo>\n\n<sub-ID#1>\n"); add("wt2wt", "Non-word characters don't terminate tag names + tidy", "<blockquote|>a\n\n<b→> doesn't terminate </b→>\n\n<bä> doesn't terminate </bä>\n\n<boo> doesn't terminate </boo>\n\n<s.foo> doesn't terminate </s.foo>\n\n<sub-ID#1>\n"); @@ -302,6 +303,7 @@ add("wt2wt", "External links: multiple legal whitespace is fine, Magnus. Don't break it please. (bug 5081)", "[http://www.example.com test]\n"); add("wt2wt", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid", "''[http://example.com text''<nowiki/>'']''\n[http://example.com '''text''']'''<nowiki/>'''\n''Something [http://example.com in italic''<nowiki/>'']''\n''Something [http://example.com mixed''''', even bold''''']'''\n'''''Now [http://example.com both'''''<nowiki/>''''']'''''\n"); add("wt2wt", "External link containing double-single-quotes with no space separating the url from text in italics", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas'' (1901) en el sitio de ][[Museo Picasso (París)|Museo Picasso]].\n"); +add("wt2wt", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723)", "[[wikipedia:European_Robin|European Robin]]\n"); add("wt2wt", "Unclosed and unmatched quotes", "'''''Bold italic text '''with bold deactivated''' in between.'''''\n\n'''''Bold italic text ''with italic deactivated'' in between.'''''\n\n'''Bold text..'''\n\n..spanning two paragraphs (should not work).'''<nowiki/>'''\n\n'''Bold tag left open'''\n\n''Italic tag left open''\n\nNormal text.\n\n<!-- Unmatching number of opening, closing tags: -->\n'''This year'<nowiki/>'''s election ''should'' beat '''last year''''s.\n\n''Tom'''s car is bigger than '''''<nowiki/>'''Susan'''s.\n\nPlain ''italic'''s plain\n"); add("wt2wt", "A table with captions with non-default spaced attributes and a table row", "{|\n|+ style=\"color: red;\" |caption2\n|+ style=\"color: red;\" | caption3\n|-\n| foo\n|}"); add("wt2wt", "Table td-cell syntax variations", "{|\n| foo bar | baz\n| foo bar foo || baz\n| style=\"color:red;\" | baz\n| style='color:red;' || baz\n|}"); @@ -1320,7 +1322,12 @@ // Blacklist for selser add("selser", "Extra newlines followed by heading [1,2,1,3,0,4,0,4,1,3,4]", "a\n\n40z1qdji2lfskyb9\n\n\n\n=b=\n9h4x5nzjh28rggb9\n\n[[a]]\n\nkup4bo01iztzkt9\n\n\n\nzlbbwe36wkrcnmi\n"); -add("selser", "Parsing an URL [[[2]],2,0]", "[[:fr:🍺|blv27e9oigmpwrk9http://fr.wikipedia.org/wiki/🍺]]\n\n7vcz2sjoo733ow29\n<!-- EasterEgg we love beer, better be able be able to link to it -->"); +add("selser", "Parsing an URL [2,0,2]", "bs8j6bi9cqh0k9\n\nhttp://fr.wikipedia.org/wiki/🍺\n\nk7365yy4w9418aor<!-- EasterEgg we love beer, better be able be able to link to it -->\n"); +add("selser", "Parsing an URL [1,0,4]", "http://fr.wikipedia.org/wiki/🍺\n\n45l8ib6o1shjv2t9\n"); +add("selser", "Parsing an URL [1,0,0]", "http://fr.wikipedia.org/wiki/🍺\n<!-- EasterEgg we love beer, better be able be able to link to it -->"); +add("selser", "Parsing an URL [0,4,3]", "http://fr.wikipedia.org/wiki/🍺\n\nxqtpipchumcxr\n"); +add("selser", "Parsing an URL [0,3,3]", "http://fr.wikipedia.org/wiki/🍺\n"); +add("selser", "Parsing an URL [1,0,2]", "http://fr.wikipedia.org/wiki/🍺\n\n2t2vp3lwj30ltyb9<!-- EasterEgg we love beer, better be able be able to link to it -->\n"); add("selser", "Italics and bold: 5-quote opening sequence: (5,6) [[[2]]]", "''7mymhlqo5nhlg14i'''foo''''''"); add("selser", "Parsoid only: Quote balancing context should be restricted to td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to be fixed up) [1]", "{| data-foobar=\"pl7el6nksli8uxr\"\n!''a!!''b\n|''a||''b\n|}"); add("selser", "Parsoid only: Quote balancing context should be restricted to td/th cells on the same wikitext line\n(Requires tidy for PHP parser output to be fixed up) [[4,[[4,0,2,[1],0],4]]]", "{|<!--44rbogtlx8hncdi-->\n!5oylcgfssnfav2t9!!''b\n!qcinqswgq2r9t3xr\n|''a''||''b<!--b7ncx26rmqiu23xr-->\n|}"); @@ -1457,8 +1464,9 @@ add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[1,3,0]]", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas'' (1901) en el sitio de ]\n"); add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[4,0,3]]", "s7cmk1ocalpd5cdi\n"); add("selser", "External link containing double-single-quotes with no space separating the url from text in italics [[[1,2],2,4]]", "[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de Casagemas''wvn1dm0gv694fgvi (1901) en el sitio de ]bizwk64e6yfogvi3d9bhvaaeq8jv2t9"); -add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[4]]]", "[[wikipedia:European_Robin|w8te4gwhsniv0a4i]]"); -add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[[2]]]", "[[wikipedia:European_Robin|jtvcvll5n6x0f6rEuropean Robin]]"); +add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [2]", "w9k5s550u1xj38fr\n\n[http://en.wikipedia.org/wiki/European_Robin European Robin]"); +add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [[2]]", "etja73fm9wak0529[http://en.wikipedia.org/wiki/European_Robin European Robin]"); +add("selser", "mw:ExtLink linking to a interwiki URL can be round-tripped losslessly (T94723) [1]", "[http://en.wikipedia.org/wiki/European_Robin European Robin]"); add("selser", "Unclosed and unmatched quotes [[[0,0,4]],2,3,3,[1],0,4,0,1,2,[[2]],3,3,3,2,0,3,4,1,2,2]", "'''''Bold italic text '''with bold deactivated0bet2nehr3gl23xr''\n\nmveau9dj2krl766r\n\n'''Bold text..'''\n\ny75pk6fh55lba9k9\n\n'''Bold tag left open\n\nivecp7jd69h8semi\n\n''939vypv7nzalwhfrItalic tag left open''\n\n4jx8l4e2s9kymn29<!-- Unmatching number of opening, closing tags: -->\n\nftnac0njf7i1wcdi\n\n''Tom'''s car is bigger than '''''<nowiki/>'''Susan'''s.\n\nwv6k36cpnikbuik9\n\nvtz1rewhs7i7ldi\n\nPlain ''italic'''s plain"); add("selser", "Unclosed and unmatched quotes [[1],0,[1],0,[3],0,[2,2],0,4,0,4,2,4,0,3,0,4,0,[[0,[4]],0,3],0,4]", "'''''Bold italic text '''with bold deactivated''' in between.'''''\n\n'''''Bold italic text ''with italic deactivated'' in between.'''''\n\ny4h85lgdrtacerk9..spanning two paragraphs (should not work).3gnmqbeuuymbo6r'''\n\n3m9bvegcf03sor\n\nskw0gib981xde7b9\n\ndjjwoysazbvgqfr\n\nqcokvvv9z09kke29\n\nvmtyprts14m9rudi\n\n''Tom'''1qr9kmif90ox0f6r'''''<nowiki/>'''Susan'''\n\n5e2ez0in4um78pvi\n"); add("selser", "Unclosed and unmatched quotes [[[3,0,[3]]],0,[1],0,2,2,3,0,[4],4,[4],0,0,3,0,4,[2,3,3,4,3,0],0,4,2,[0,3,4]]", "''with bold deactivated'''<nowiki/>'''''\n\n'''''Bold italic text ''with italic deactivated'' in between.'''''\n\nuimy0isfyrykfbt9\n\n'''Bold text..\n\nhl888wghmewxw29\n\n4yq4z49x86ywrk9\n\nomsddcwsglihehfr\n\nmumepxtpar0vbo6r\n\nNormal text.<!-- Unmatching number of opening, closing tags: -->\n\ndska41gwjbwyu8fr\n\n508yy8ogfxd8ia4i'''This year''''0o13pli3vxojq0k9s.\n\n12et9k2znm5wb3xr\n\nl97w0g61j6rvbo6r\n\nPlain iahdy1xbdbjwz5mi"); -- To view, visit https://gerrit.wikimedia.org/r/292078 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I50edd10c4ac931f7606bade9466678fbe301a271 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits