jenkins-bot has submitted this change and it was merged.
Change subject: computeDSR: Fix source of pathological O(n^2) behavior
......................................................................
computeDSR: Fix source of pathological O(n^2) behavior
* Left to right propagation of DSR information skips over text
nodes to find an element node whose DSR information might change.
* However, during the right to left walk of a node's children,
DSR code removes marker meta tags (and it looks like the code
assumes that the meta tags would be removed). In pathological
scenarios (as in enwiki:Colonization?oldid=718468597) where
there is a really large block text that mimics table wikitext
but is not really a table ends up with marker meta tags in
between text nodes (a separate patch will fix up TokenStreamPatcher
to strip these meta tags early on). When these meta tags are
stripped, we end up with a series of adjacent text nodes.
In the pathological scenario of this revision, ~6000+ children
get replaced with ~3000+ text nodes. This causes the LTR pass
to touch all text nodes to the right till the end that leads
to O(n^2) behavior.
This patch fixes that by collapsing text nodes.
* The revision now parses in ~5sec compared to 200+ sec earlier.
Change-Id: Ide1badb0a9df48679d5af5bcee1d0ffbad6d2732
---
M lib/wt2html/pp/processors/computeDSR.js
M tests/parserTests-blacklist.js
2 files changed, 38 insertions(+), 5 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/wt2html/pp/processors/computeDSR.js
b/lib/wt2html/pp/processors/computeDSR.js
index b227ef4..76129df 100644
--- a/lib/wt2html/pp/processors/computeDSR.js
+++ b/lib/wt2html/pp/processors/computeDSR.js
@@ -215,10 +215,12 @@
var cs = ce;
var rtTestMode = env.conf.parsoid.rtTestMode;
- for (var i = numChildren - 1; i >= 0; i--) {
+ var child = node.lastChild;
+ var i = numChildren - 1;
+ while (child !== null) {
+ var prevChild = child.previousSibling;
var isMarkerTag = false;
var origCE = ce;
- var child = children[i];
var cType = child.nodeType;
var endTagInfo = null;
var fosteredNode = false;
@@ -248,6 +250,7 @@
if (cType === node.TEXT_NODE) {
if (ce !== null) {
+ // This code is replicated below. Keep both in
sync.
cs = ce - child.data.length -
DU.indentPreDSRCorrection(child);
}
} else if (cType === node.COMMENT_NODE) {
@@ -567,10 +570,40 @@
}
}
- // No use for this marker tag after this
+ // No use for this marker tag after this.
+ // Looks like DSR computation assumes that
+ // these meta tags will be removed.
if (isMarkerTag) {
+ // Collapse text nodes to prevent n^2 effect in the LTR
propagation pass
+ // Example: enwiki:Colonization?oldid=718468597
+ var nextChild = child.nextSibling;
+ if (DU.isText(prevChild) && DU.isText(nextChild)) {
+ var prevText = prevChild.nodeValue;
+ var nextText = nextChild.nodeValue;
+
+ // Process prevText in place
+ if (ce !== null) {
+ // indentPreDSRCorrection is not
required here since
+ // we'll never come down this branch
(mw:TSRMarker won't exist
+ // in indent-pres, and mw:EndTag
markers won't have a text node
+ // for its previous sibling), but, for
sake of maintenance sanity,
+ // replicating code from above.
+ cs = ce - prevText.length -
DU.indentPreDSRCorrection(prevChild);
+ ce = cs;
+ }
+
+ // Update DOM
+ var newNode =
node.ownerDocument.createTextNode(prevText + nextText);
+ node.replaceChild(newNode, prevChild);
+ node.removeChild(nextChild);
+ prevChild = newNode.previousSibling;
+ i--;
+ }
node.removeChild(child);
}
+
+ i--;
+ child = prevChild;
}
if (cs === undefined || cs === null) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 71c95b1..f4563f9 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -126,7 +126,7 @@
add("wt2html", "Templates: Wiki Tables: 4. Templated tags, no content",
"<table about=\"#mwt1\" typeof=\"mw:Transclusion\"
data-parsoid='{\"dsr\":[0,25,null,null],\"pi\":[[],[]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"tbl-start\",\"href\":\"./Template:Tbl-start\"},\"params\":{},\"i\":0}},\"\\n\",{\"template\":{\"target\":{\"wt\":\"tbl-end\",\"href\":\"./Template:Tbl-end\"},\"params\":{},\"i\":1}}]}'>\n</table>");
add("wt2html", "Templates: Lists: Multi-line list-items via templates", "<ul
data-parsoid='{\"dsr\":[0,71,0,0]}'><li
data-parsoid='{\"dsr\":[0,35,1,0]}'><span about=\"#mwt1\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\"}]],\"dsr\":[1,35,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"a
{{nonexistent|\\nunused}}\"}},\"i\":0}}]}'>a </span><span
typeof=\"mw:Placeholder\" about=\"#mwt1\">Warning: Page/template fetching
disabled, and no cache for Template:Nonexistent</span></li>\n<li
data-parsoid='{\"dsr\":[36,71,1,0]}'><span about=\"#mwt3\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\"}]],\"dsr\":[37,71,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"b
{{nonexistent|\\nunused}}\"}},\"i\":0}}]}'>b </span><span
typeof=\"mw:Placeholder\" about=\"#mwt3\">Warning: Page/template fetching
disabled, and no cache for Template:Nonexistent</span></li></ul>");
add("wt2html", "Templates: Ugly nesting: 1. Quotes opened/closed across
templates (echo)", "<p data-parsoid='{\"dsr\":[0,40,0,0]}'><i about=\"#mwt1\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"dsr\":[0,40,null,null],\"pi\":[[{\"k\":\"1\"}],[{\"k\":\"1\"}],[{\"k\":\"1\"}]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"''a\"}},\"i\":0}},{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"b''c''d\"}},\"i\":1}},{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"''e\"}},\"i\":2}}]}'>ab</i><span
about=\"#mwt1\">c</span><i about=\"#mwt1\">d</i><span
about=\"#mwt1\">e</span></p>");
-add("wt2html", "Templates: Ugly templates: 1. Navbox template parses badly
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo</table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
about=\"#mwt1\">\n|bar</span><span about=\"#mwt1\">\n</span>");
+add("wt2html", "Templates: Ugly templates: 1. Navbox template parses badly
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo</table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
about=\"#mwt1\">\n|bar\n</span>");
add("wt2html", "Templates: Ugly templates: 4. newline-only template parameter
inconsistency", "<span about=\"#mwt1\" typeof=\"mw:Transclusion\"
data-parsoid='{\"pi\":[[{\"k\":\"1\"}]],\"dsr\":[0,10,null,null]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"\\n\"}},\"i\":0}}]}'>\n</span>");
add("wt2html", "subst: does not work during normal parse", "<p about=\"#mwt1\"
typeof=\"mw:Transclusion\" data-parsoid='{\"dsr\":[0,13,0,0],\"pi\":[[]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"SubstTest\",\"href\":\"./Template:SubstTest\"},\"params\":{},\"i\":0}}]}'>Foobar</p>");
add("wt2html", "message transform: magic variables", "<p about=\"#mwt1\"
typeof=\"mw:Transclusion\" data-parsoid='{\"dsr\":[0,12,0,0],\"pi\":[[]]}'
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"SITENAME\",\"function\":\"sitename\"},\"params\":{},\"i\":0}}]}'>MediaWiki</p>");
@@ -478,7 +478,7 @@
add("html2html", "Templates: Links: 5. Generation of link text", "<p
data-parsoid='{\"dsr\":[0,16,0,0]}'><a rel=\"mw:WikiLink\" href=\"./Wiki/Foo\"
title=\"Wiki/Foo\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Foo\"},\"sa\":{\"href\":\"wiki/Foo\"},\"dsr\":[0,16,11,2]}'>bar</a></p>\n");
add("html2html", "Templates: Links: 5. Nested templates (only outermost
template should be marked)", "<p data-parsoid='{\"dsr\":[0,16,0,0]}'><a
rel=\"mw:WikiLink\" href=\"./Wiki/Foo\" title=\"Wiki/Foo\"
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Foo\"},\"sa\":{\"href\":\"wiki/Foo\"},\"dsr\":[0,16,11,2]}'>bar</a></p>\n");
add("html2html", "Templates: Lists: Multi-line list-items via templates", "<ul
data-parsoid='{\"dsr\":[0,173,0,0]}'><li data-parsoid='{\"dsr\":[0,86,1,0]}'> a
[/index.php?title=Template:Nonexistent&action=edit&redlink=1
Template:Nonexistent]</li>\n<li data-parsoid='{\"dsr\":[87,173,1,0]}'> b
[/index.php?title=Template:Nonexistent&action=edit&redlink=1
Template:Nonexistent]</li></ul>\n");
-add("html2html", "Templates: Ugly templates: 1. Navbox template parses badly
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo</table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
about=\"#mwt1\">\n|bar</span><span about=\"#mwt1\">\n</span>");
+add("html2html", "Templates: Ugly templates: 1. Navbox template parses badly
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\"
typeof=\"mw:Transclusion\"
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo</table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
about=\"#mwt1\">\n|bar\n</span>");
add("html2html", "Templates: Ugly templates: 4. newline-only template
parameter inconsistency", "\n");
add("html2html", "message transform: magic variables", "<p
data-parsoid='{\"dsr\":[0,9,0,0]}'>MediaWiki</p>");
add("html2html", "message transform: should not transform wiki markup", "<p
data-parsoid='{\"dsr\":[0,25,0,0]}'><span typeof=\"mw:Nowiki\"
data-parsoid='{\"dsr\":[0,25,8,9]}'>''test''</span></p>");
--
To view, visit https://gerrit.wikimedia.org/r/302644
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ide1badb0a9df48679d5af5bcee1d0ffbad6d2732
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits