C. Scott Ananian has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/386021 )

Change subject: Add DOMUtils.hasNChildren() for quickly testing # of child nodes
......................................................................

Add DOMUtils.hasNChildren() for quickly testing # of child nodes

When testing whether a node has a small number of children (for example,
0, 1, or 2) we can avoid paying the O(# children) cost of potentially
creating a backing array for Node#childNodes by simply walking a
short distance down the linked list of children.

Change-Id: I16eca1e5c088a94acfb64177a9ce8545eb487401
---
M lib/html2wt/DOMHandlers.js
M lib/utils/DOMUtils.js
M lib/wt2html/pp/handlers/cleanup.js
M tests/mocha/parse.js
4 files changed, 22 insertions(+), 8 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/21/386021/1

diff --git a/lib/html2wt/DOMHandlers.js b/lib/html2wt/DOMHandlers.js
index 9ac2547..f1731b7 100644
--- a/lib/html2wt/DOMHandlers.js
+++ b/lib/html2wt/DOMHandlers.js
@@ -1166,7 +1166,7 @@
                                        return 
nativeExt.serialHandler.handle(node, state, wrapperUnmodified);
                                } else if 
(/(?:^|\s)mw:(?:Image|Video|Audio)(\/(Frame|Frameless|Thumb))?/.test(type)) {
                                        return 
state.serializer.figureHandler(node);
-                               } else if (/(?:^|\s)mw\:Entity/.test(type) && 
node.childNodes.length === 1) {
+                               } else if (/(?:^|\s)mw\:Entity/.test(type) && 
DU.hasNChildren(node, 1)) {
                                        // handle a new mw:Entity (not handled 
by selser) by
                                        // serializing its children
                                        if (dp.src !== undefined && contentSrc 
=== dp.srcContent) {
@@ -1183,7 +1183,7 @@
                                        if (dp.src !== undefined) {
                                                return emitPlaceholderSrc(node, 
state);
                                        } else if (/(^|\s)mw:Placeholder(\s|$)/ 
&&
-                                               node.childNodes.length === 1 &&
+                                               DU.hasNChildren(node, 1) &&
                                                DU.isText(node.firstChild) &&
                                                // See the DisplaySpace hack in 
the urltext rule
                                                // in the tokenizer.
@@ -1286,7 +1286,7 @@
                        } else {
                                // Trigger separator
                                if (state.sep.constraints && 
state.sep.constraints.min === 2 &&
-                                               
node.parentNode.childNodes.length === 1) {
+                                               
DU.hasNChildren(node.parentNode, 1)) {
                                        // p/br pair
                                        // Hackhack ;)
 
diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js
index dc221ce..0cfa41c 100644
--- a/lib/utils/DOMUtils.js
+++ b/lib/utils/DOMUtils.js
@@ -90,6 +90,20 @@
        },
 
        /**
+        * Test the number of children this node has without using
+        * `Node#childNodes.length`.  This walks the sibling list and so
+        * takes O(`nchildren`) time -- so `nchildren` is expected to be small
+        * (say: 0, 1, or 2).
+        */
+       hasNChildren: function(node, nchildren) {
+               for (var child = node.firstChild; child; child = 
child.nextSibling) {
+                       if (nchildren <= 0) { return false; }
+                       nchildren -= 1;
+               }
+               return true;
+       },
+
+       /**
         * Is 'node' a block node that is also visible in wikitext?
         * An example of an invisible block node is a `<p>`-tag that
         * Parsoid generated, or a `<ul>`, `<ol>` tag.
diff --git a/lib/wt2html/pp/handlers/cleanup.js 
b/lib/wt2html/pp/handlers/cleanup.js
index d761aba..a546282 100644
--- a/lib/wt2html/pp/handlers/cleanup.js
+++ b/lib/wt2html/pp/handlers/cleanup.js
@@ -87,7 +87,7 @@
                // template/extension content OR nodes with templated-attributes
                !DU.hasParsoidAboutId(node) && (
                        (!node.hasChildNodes()) || (
-                               node.childNodes.length === 1 && 
!DU.isElt(node.firstChild) &&
+                               DU.hasNChildren(node, 1) && 
!DU.isElt(node.firstChild) &&
                                /^\s*$/.test(node.textContent)
                        )
        )) {
diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js
index de145c2..7f7f8bc 100644
--- a/tests/mocha/parse.js
+++ b/tests/mocha/parse.js
@@ -33,12 +33,12 @@
                                doc.outerHTML.startsWith('<!DOCTYPE 
html><html').should.equal(true);
                                
doc.outerHTML.endsWith('</body></html>').should.equal(true);
                                // verify that body has only one <html> tag, 
one <body> tag, etc.
-                               doc.childNodes.length.should.equal(2);// 
<!DOCTYPE> and <html>
+                               DU.hasNChildren(doc, 2).should.equal(true); // 
<!DOCTYPE> and <html>
                                doc.firstChild.nodeName.should.equal('html');
                                doc.lastChild.nodeName.should.equal('HTML');
                                // <html> children should be <head> and <body>
                                var html = doc.documentElement;
-                               html.childNodes.length.should.equal(2);
+                               DU.hasNChildren(html, 2).should.equal(true);
                                html.firstChild.nodeName.should.equal('HEAD');
                                html.lastChild.nodeName.should.equal('BODY');
                                // <body> should have one child, <p>
@@ -58,12 +58,12 @@
                                doc.outerHTML.startsWith('<!DOCTYPE 
html><html').should.equal(true);
                                
doc.outerHTML.endsWith('</body></html>').should.equal(true);
                                // verify that body has only one <html> tag, 
one <body> tag, etc.
-                               doc.childNodes.length.should.equal(2);// 
<!DOCTYPE> and <html>
+                               DU.hasNChildren(doc, 2).should.equal(true); // 
<!DOCTYPE> and <html>
                                doc.firstChild.nodeName.should.equal('html');
                                doc.lastChild.nodeName.should.equal('HTML');
                                // <html> children should be <head> and <body>
                                var html = doc.documentElement;
-                               html.childNodes.length.should.equal(2);
+                               DU.hasNChildren(html, 2).should.equal(true);
                                html.firstChild.nodeName.should.equal('HEAD');
                                html.lastChild.nodeName.should.equal('BODY');
                                // <body> should have one child, <table>

-- 
To view, visit https://gerrit.wikimedia.org/r/386021
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I16eca1e5c088a94acfb64177a9ce8545eb487401
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to