Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/230131

Change subject: Strip spans added for misnesting, just in case
......................................................................

Strip spans added for misnesting, just in case

 * Misnested content should have its source reused by selser but
   apparently sometimes slips through the cracks, as in T101768.

Change-Id: Ifd79f5824e6f32b1d085c46890483b0f1b598009
---
M lib/wts.TagHandlers.js
M tests/parserTests-blacklist.js
M tests/parserTests.txt
3 files changed, 28 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/31/230131/1

diff --git a/lib/wts.TagHandlers.js b/lib/wts.TagHandlers.js
index d15d3e1..cbb02ce 100644
--- a/lib/wts.TagHandlers.js
+++ b/lib/wts.TagHandlers.js
@@ -1048,9 +1048,16 @@
                                        }
                                }
                        } else {
+                               var dp = DU.getDataParsoid(node);
                                // Fall back to plain HTML serialization for 
spans created
                                // by the editor
-                               state.serializer._htmlElementHandler(node, 
state, cb);
+                               if (dp.misnested && dp.stx !== 'html') {
+                                       state.env.log('warning',
+                                               'Serializing misnested content: 
' + node.outerHTML);
+                                       state.serializeChildren(node, cb);
+                               } else {
+                                       
state.serializer._htmlElementHandler(node, state, cb);
+                               }
                        }
                },
        },
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 6d56c79..6c60f80 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -331,9 +331,9 @@
 add("wt2wt", "BUG 289: literal \">\"-token in bracketed URL", 
"[http://www.example.com/ <b>html</b> stuff]\n");
 add("wt2wt", "BUG 289: literal double quote in bracketed URL", 
"[http://www.example.com/ \"hello\" stuff]\n");
 add("wt2wt", "External links: multiple legal whitespace is fine, Magnus. Don't 
break it please. (bug 5081)", "[http://www.example.com test]\n");
-add("wt2wt", "External links: wiki links within external link (Bug 3695)", 
"[http://example.com][[wikilink]]<span> embedded in ext link</span>\n");
+add("wt2wt", "External links: wiki links within external link (Bug 3695)", 
"[http://example.com][[wikilink]] embedded in ext link\n");
 add("wt2wt", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid", 
"''[http://example.com text''<nowiki/>'']''\n[http://example.com 
'''text''']'''<nowiki/>'''\n''Something [http://example.com in 
italic''<nowiki/>'']''\n''Something [http://example.com mixed''''', even 
bold''''']'''\n'''''Now [http://example.com both'''''<nowiki/>''''']'''''\n");
-add("wt2wt", "External link containing double-single-quotes with no space 
separating the url from text in italics", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas'' (1901) en el sitio de ][[Museo Picasso (París)|Museo 
Picasso]]<span>.</span>\n");
+add("wt2wt", "External link containing double-single-quotes with no space 
separating the url from text in italics", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm ''La muerte de 
Casagemas'' (1901) en el sitio de ][[Museo Picasso (París)|Museo Picasso]].\n");
 add("wt2wt", "Unclosed and unmatched quotes", "'''''Bold italic text '''with 
bold deactivated''' in between.'''''\n\n'''''Bold italic text ''with italic 
deactivated'' in between.'''''\n\n'''Bold text..'''\n\n..spanning two 
paragraphs (should not work).'''<nowiki/>'''\n\n'''Bold tag left 
open'''\n\n''Italic tag left open''\n\nNormal text.\n\n<!-- Unmatching number 
of opening, closing tags: -->\n'''This year'<nowiki/>'''s election ''should'' 
beat '''last year''''s.\n\n''Tom'''s car is bigger than 
'''''<nowiki/>'''Susan'''s.\n\nPlain ''italic'''s plain\n");
 add("wt2wt", "A table with captions with non-default spaced attributes and a 
table row", "{|\n|+ style=\"color: red;\" |caption2\n|+ style=\"color: red;\" | 
caption3\n|-\n| foo\n|}");
 add("wt2wt", "Table td-cell syntax variations", "{|\n| foo bar | baz\n| foo 
bar foo || baz\n| style=\"color:red;\" | baz\n| style='color:red;' || baz\n|}");
@@ -1549,7 +1549,7 @@
 add("selser", "External links: multiple legal whitespace is fine, Magnus. 
Don't break it please. (bug 5081) [[2]]", 
"u4ly6ay2j8257b9[http://www.example.com  test]");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,0,2]]", "[http://example.com [[wikilink]] embedded in ext 
link]56lgcoywrk9\n");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[2]", "rfn3et25sei885mi\n\n[http://example.com [[wikilink]] embedded in ext 
link]");
-add("selser", "External links: wiki links within external link (Bug 3695) 
[[1,0,[3]]]", "[http://example.com]<span></span>\n");
+add("selser", "External links: wiki links within external link (Bug 3695) 
[[1,0,[3]]]", "[http://example.com]\n";);
 add("selser", "External links: wiki links within external link (Bug 3695) 
[1]", "[http://example.com [[wikilink]] embedded in ext link]\n");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,[2],4]]", "[http://example.com [[wikilink]] embedded in ext 
link][[wikilink|tfm4924utjjg7gb9wikilink]]4pudhm2cxjchm2t9");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,1,0]]", "[http://example.com [[wikilink]] embedded in ext 
link][[wikilink]]\n");
@@ -1558,7 +1558,7 @@
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[2,0,2]]", "293cbx0l3sg7gb9[http://example.com [[wikilink]] embedded in ext 
link]8w2h9t6x2k2o6r\n");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,2,4]]", "[http://example.com [[wikilink]] embedded in ext 
link]rc54l2ht7oh9qkt9<nowiki/>rdih6467tz6ko6r");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,0,4]]", "[http://example.com [[wikilink]] embedded in ext 
link]<nowiki/>sqtg3ksvvgta9k9");
-add("selser", "External links: wiki links within external link (Bug 3695) 
[[2,3,[4]]]", "ajav6f9s210dx6r[http://example.com [[wikilink]] embedded in ext 
link]<span>l42uxett0a1nhfr</span>\n");
+add("selser", "External links: wiki links within external link (Bug 3695) 
[[2,3,[4]]]", "ajav6f9s210dx6r[http://example.com [[wikilink]] embedded in ext 
link]l42uxett0a1nhfr\n");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,2,0]]", "[http://example.com [[wikilink]] embedded in ext 
link]pj9l2glwx6sf9a4i\n");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[0,4,0]]", "[http://example.com [[wikilink]] embedded in ext 
link]2zzejaf2y9nc23xr\n");
 add("selser", "External links: wiki links within external link (Bug 3695) 
[[3,2,0]]", "0uk611ztql8wu3di\n");
@@ -1580,12 +1580,12 @@
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[0,3,[4],2,0,[0,[0,1]],0,[3,[3,4],3],2,[[0,4]]]]", "''[http://example.com 
text''][http://example.com utcl2r0fq8lac3di]3qd60yxh5otakyb9'''\n''Something 
[http://example.com in italic''<nowiki/>'']''\n''[http://example.com 
dc0yjx5wecvaq0k9]''gbmw7avnratf0f6r\n'''''Now ozt1j409a2d42t9'''''");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[2,4,0,4,0,1,2,1,0,2]]", "pv9ge46otv86w29''[http://example.com 
text'']t24xqdq7y0la0pb9[http://example.com '''text]10amjvk8ra1nhfr\n''Something 
[http://example.com in italic''<nowiki/>'']''l4tfdpdshpqilik9\n''Something 
[http://example.com mixed''''', even bold]'''\nziz3awxqv8lu9pb9'''''Now 
[http://example.com both''''']");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[0,0,4,4,0,[3,4],0,[4,[4,0],0],2,3]]", "''[http://example.com 
text'']\nxh33ebkbpr639pb9ikqa3l7jkhrdlsor\n''h327flpc5pbsxlxr''\n''3icsd821v6de7b9[http://example.com
 2cukbor10uanhfr''''', even bold]'''3cf3j98c7sfnipb9\n");
-add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[0,1,1]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La muerte de 
Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].][[Museo Picasso (París)|Museo Picasso]]<span 
data-foobar=\"w3fg9p5998n3tyb9\">.</span>\n");
+add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[0,1,1]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La muerte de 
Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].][[Museo Picasso (París)|Museo Picasso]].\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [2]", 
"pl3umz8dotsdobt9\n\n[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La
 muerte de Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].]");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [1]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La muerte de 
Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo Picasso]].]\n");
-add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[2,0,[2]]]", 
"3owjotpl28qqto6r[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La 
muerte de Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].]<span>2hhu1hny1kvgqfr.</span>\n");
+add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[2,0,[2]]]", 
"3owjotpl28qqto6r[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La 
muerte de Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].]2hhu1hny1kvgqfr.\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[[[4],4],2,0]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm 
''epe48j1yfhcl3di''5qs8nipyh55ewmi]362rfc9jrfab57b9\n");
-add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[2,[3],[3]]]", 
"76vwlxnsrpyqr529[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La 
muerte de Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].][[Museo Picasso (París)|<nowiki/>]]<span></span>\n");
+add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[2,[3],[3]]]", 
"76vwlxnsrpyqr529[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La 
muerte de Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].][[Museo Picasso (París)|<nowiki/>]]\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[0,0,4]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm''La muerte de 
Casagemas'' (1901) en el sitio de [[Museo Picasso (París)|Museo 
Picasso]].]<nowiki/>gjnm2n9a52tn8kt9");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[[4,0],0,2]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm m80ag44dkvxtj4i 
(1901) en el sitio de ]<nowiki/>iu8ii4xb76i529\n");
 add("selser", "External link containing double-single-quotes with no space 
separating the url from text in italics [[[3,3],[4],0]]", 
"[http://www.musee-picasso.fr/pages/page_id18528_u1l2.htm][[Museo Picasso 
(París)|d9nzrdjvb4sd1jor]]\n");
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 79982c2..974d29c 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -25061,6 +25061,19 @@
 [http://boo.org http://boohoo.org]
 !! end
 
+# Misnested is an indication that selser can reuse the source but these have
+# shown to sneak through on occasion. See T101768.
+# The original wikitext here is: [http://test.com [[one]] two three]
+!! test
+Strip span tags added to mark as misnested
+!! options
+parsoid=html2wt
+!! html/parsoid
+<p data-parsoid='{}'><a rel="mw:ExtLink" href="http://test.com"; 
data-parsoid='{"targetOff":17,"contentOffsets":[17,34]}'></a><a 
rel="mw:WikiLink" href="./One" title="One" 
data-parsoid='{"stx":"simple","a":{"href":"./One"},"sa":{"href":"one"},"misnested":true}'>one</a><span
 data-parsoid='{"misnested":true}'> two three</span></p>
+!! wikitext
+[http://test.com][[one]] two three
+!! end
+
 # --------------------------------------------
 # Tests spec'ing wikitext serialization norms |
 # --------------------------------------------

-- 
To view, visit https://gerrit.wikimedia.org/r/230131
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifd79f5824e6f32b1d085c46890483b0f1b598009
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to