jenkins-bot has submitted this change and it was merged.

Change subject: computeDSR: Fix source of pathological O(n^2) behavior
......................................................................


computeDSR: Fix source of pathological O(n^2) behavior

* Left to right propagation of DSR information skips over text
  nodes to find an element node whose DSR information might change.

* However, during the right to left walk of a node's children,
  DSR code removes marker meta tags (and it looks like the code
  assumes that the meta tags would be removed). In pathological
  scenarios (as in enwiki:Colonization?oldid=718468597) where
  there is a really large block text that mimics table wikitext
  but is not really a table ends up with marker meta tags in
  between text nodes (a separate patch will fix up TokenStreamPatcher
  to strip these meta tags early on). When these meta tags are
  stripped, we end up with a series of adjacent text nodes.

  In the pathological scenario of this revision, ~6000+ children
  get replaced with ~3000+ text nodes. This causes the LTR pass
  to touch all text nodes to the right till the end that leads
  to O(n^2) behavior.

  This patch fixes that by collapsing text nodes.

* The revision now parses in ~5sec compared to 200+ sec earlier.

Change-Id: Ide1badb0a9df48679d5af5bcee1d0ffbad6d2732
---
M lib/wt2html/pp/processors/computeDSR.js
M tests/parserTests-blacklist.js
2 files changed, 38 insertions(+), 5 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/wt2html/pp/processors/computeDSR.js 
b/lib/wt2html/pp/processors/computeDSR.js
index b227ef4..76129df 100644
--- a/lib/wt2html/pp/processors/computeDSR.js
+++ b/lib/wt2html/pp/processors/computeDSR.js
@@ -215,10 +215,12 @@
        var cs = ce;
        var rtTestMode = env.conf.parsoid.rtTestMode;
 
-       for (var i = numChildren - 1; i >= 0; i--) {
+       var child = node.lastChild;
+       var i = numChildren - 1;
+       while (child !== null) {
+               var prevChild = child.previousSibling;
                var isMarkerTag = false;
                var origCE = ce;
-               var child = children[i];
                var cType = child.nodeType;
                var endTagInfo = null;
                var fosteredNode = false;
@@ -248,6 +250,7 @@
 
                if (cType === node.TEXT_NODE) {
                        if (ce !== null) {
+                               // This code is replicated below. Keep both in 
sync.
                                cs = ce - child.data.length - 
DU.indentPreDSRCorrection(child);
                        }
                } else if (cType === node.COMMENT_NODE) {
@@ -567,10 +570,40 @@
                        }
                }
 
-               // No use for this marker tag after this
+               // No use for this marker tag after this.
+               // Looks like DSR computation assumes that
+               // these meta tags will be removed.
                if (isMarkerTag) {
+                       // Collapse text nodes to prevent n^2 effect in the LTR 
propagation pass
+                       // Example: enwiki:Colonization?oldid=718468597
+                       var nextChild = child.nextSibling;
+                       if (DU.isText(prevChild) && DU.isText(nextChild)) {
+                               var prevText = prevChild.nodeValue;
+                               var nextText = nextChild.nodeValue;
+
+                               // Process prevText in place
+                               if (ce !== null) {
+                                       // indentPreDSRCorrection is not 
required here since
+                                       // we'll never come down this branch 
(mw:TSRMarker won't exist
+                                       // in indent-pres, and mw:EndTag 
markers won't have a text node
+                                       // for its previous sibling), but, for 
sake of maintenance sanity,
+                                       // replicating code from above.
+                                       cs = ce - prevText.length - 
DU.indentPreDSRCorrection(prevChild);
+                                       ce = cs;
+                               }
+
+                               // Update DOM
+                               var newNode = 
node.ownerDocument.createTextNode(prevText + nextText);
+                               node.replaceChild(newNode, prevChild);
+                               node.removeChild(nextChild);
+                               prevChild = newNode.previousSibling;
+                               i--;
+                       }
                        node.removeChild(child);
                }
+
+               i--;
+               child = prevChild;
        }
 
        if (cs === undefined || cs === null) {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 71c95b1..f4563f9 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -126,7 +126,7 @@
 add("wt2html", "Templates: Wiki Tables: 4. Templated tags, no content", 
"<table about=\"#mwt1\" typeof=\"mw:Transclusion\" 
data-parsoid='{\"dsr\":[0,25,null,null],\"pi\":[[],[]]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"tbl-start\",\"href\":\"./Template:Tbl-start\"},\"params\":{},\"i\":0}},\"\\n\",{\"template\":{\"target\":{\"wt\":\"tbl-end\",\"href\":\"./Template:Tbl-end\"},\"params\":{},\"i\":1}}]}'>\n</table>");
 add("wt2html", "Templates: Lists: Multi-line list-items via templates", "<ul 
data-parsoid='{\"dsr\":[0,71,0,0]}'><li 
data-parsoid='{\"dsr\":[0,35,1,0]}'><span about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"pi\":[[{\"k\":\"1\"}]],\"dsr\":[1,35,null,null]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"a
 {{nonexistent|\\nunused}}\"}},\"i\":0}}]}'>a </span><span 
typeof=\"mw:Placeholder\" about=\"#mwt1\">Warning: Page/template fetching 
disabled, and no cache for Template:Nonexistent</span></li>\n<li 
data-parsoid='{\"dsr\":[36,71,1,0]}'><span about=\"#mwt3\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"pi\":[[{\"k\":\"1\"}]],\"dsr\":[37,71,null,null]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"b
 {{nonexistent|\\nunused}}\"}},\"i\":0}}]}'>b </span><span 
typeof=\"mw:Placeholder\" about=\"#mwt3\">Warning: Page/template fetching 
disabled, and no cache for Template:Nonexistent</span></li></ul>");
 add("wt2html", "Templates: Ugly nesting: 1. Quotes opened/closed across 
templates (echo)", "<p data-parsoid='{\"dsr\":[0,40,0,0]}'><i about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"dsr\":[0,40,null,null],\"pi\":[[{\"k\":\"1\"}],[{\"k\":\"1\"}],[{\"k\":\"1\"}]]}'
 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"&#39;&#39;a\"}},\"i\":0}},{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"b&#39;&#39;c&#39;&#39;d\"}},\"i\":1}},{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"&#39;&#39;e\"}},\"i\":2}}]}'>ab</i><span
 about=\"#mwt1\">c</span><i about=\"#mwt1\">d</i><span 
about=\"#mwt1\">e</span></p>");
-add("wt2html", "Templates: Ugly templates: 1. Navbox template parses badly 
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
 
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo&lt;/table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
 about=\"#mwt1\">\n|bar</span><span about=\"#mwt1\">\n</span>");
+add("wt2html", "Templates: Ugly templates: 1. Navbox template parses badly 
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
 
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo&lt;/table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
 about=\"#mwt1\">\n|bar\n</span>");
 add("wt2html", "Templates: Ugly templates: 4. newline-only template parameter 
inconsistency", "<span about=\"#mwt1\" typeof=\"mw:Transclusion\" 
data-parsoid='{\"pi\":[[{\"k\":\"1\"}]],\"dsr\":[0,10,null,null]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"\\n\"}},\"i\":0}}]}'>\n</span>");
 add("wt2html", "subst: does not work during normal parse", "<p about=\"#mwt1\" 
typeof=\"mw:Transclusion\" data-parsoid='{\"dsr\":[0,13,0,0],\"pi\":[[]]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"SubstTest\",\"href\":\"./Template:SubstTest\"},\"params\":{},\"i\":0}}]}'>Foobar</p>");
 add("wt2html", "message transform: magic variables", "<p about=\"#mwt1\" 
typeof=\"mw:Transclusion\" data-parsoid='{\"dsr\":[0,12,0,0],\"pi\":[[]]}' 
data-mw='{\"parts\":[{\"template\":{\"target\":{\"wt\":\"SITENAME\",\"function\":\"sitename\"},\"params\":{},\"i\":0}}]}'>MediaWiki</p>");
@@ -478,7 +478,7 @@
 add("html2html", "Templates: Links: 5. Generation of link text", "<p 
data-parsoid='{\"dsr\":[0,16,0,0]}'><a rel=\"mw:WikiLink\" href=\"./Wiki/Foo\" 
title=\"Wiki/Foo\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Foo\"},\"sa\":{\"href\":\"wiki/Foo\"},\"dsr\":[0,16,11,2]}'>bar</a></p>\n");
 add("html2html", "Templates: Links: 5. Nested templates (only outermost 
template should be marked)", "<p data-parsoid='{\"dsr\":[0,16,0,0]}'><a 
rel=\"mw:WikiLink\" href=\"./Wiki/Foo\" title=\"Wiki/Foo\" 
data-parsoid='{\"stx\":\"piped\",\"a\":{\"href\":\"./Wiki/Foo\"},\"sa\":{\"href\":\"wiki/Foo\"},\"dsr\":[0,16,11,2]}'>bar</a></p>\n");
 add("html2html", "Templates: Lists: Multi-line list-items via templates", "<ul 
data-parsoid='{\"dsr\":[0,173,0,0]}'><li data-parsoid='{\"dsr\":[0,86,1,0]}'> a 
[/index.php?title=Template:Nonexistent&amp;action=edit&amp;redlink=1 
Template:Nonexistent]</li>\n<li data-parsoid='{\"dsr\":[87,173,1,0]}'> b 
[/index.php?title=Template:Nonexistent&amp;action=edit&amp;redlink=1 
Template:Nonexistent]</li></ul>\n");
-add("html2html", "Templates: Ugly templates: 1. Navbox template parses badly 
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
 
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo&lt;/table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
 about=\"#mwt1\">\n|bar</span><span about=\"#mwt1\">\n</span>");
+add("html2html", "Templates: Ugly templates: 1. Navbox template parses badly 
leading to table misnesting\n(Parsoid-centric)", "<table about=\"#mwt1\" 
typeof=\"mw:Transclusion\" 
data-parsoid='{\"stx\":\"html\",\"dsr\":[0,32,2,null],\"firstWikitextNode\":\"TABLE_html\",\"pi\":[[{\"k\":\"1\"}]]}'
 
data-mw='{\"parts\":[\"{|\\n|\",{\"template\":{\"target\":{\"wt\":\"echo\",\"href\":\"./Template:Echo\"},\"params\":{\"1\":{\"wt\":\"foo&lt;/table>\"}},\"i\":0}},\"\\n|bar\\n|}\"]}'>\n<tbody><tr><td>foo</td></tr></tbody></table><span
 about=\"#mwt1\">\n|bar\n</span>");
 add("html2html", "Templates: Ugly templates: 4. newline-only template 
parameter inconsistency", "\n");
 add("html2html", "message transform: magic variables", "<p 
data-parsoid='{\"dsr\":[0,9,0,0]}'>MediaWiki</p>");
 add("html2html", "message transform: should not transform wiki markup", "<p 
data-parsoid='{\"dsr\":[0,25,0,0]}'><span typeof=\"mw:Nowiki\" 
data-parsoid='{\"dsr\":[0,25,8,9]}'>''test''</span></p>");

-- 
To view, visit https://gerrit.wikimedia.org/r/302644
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ide1badb0a9df48679d5af5bcee1d0ffbad6d2732
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to