C. Scott Ananian has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/386021 )
Change subject: Add DOMUtils.hasNChildren() for quickly testing # of child nodes ...................................................................... Add DOMUtils.hasNChildren() for quickly testing # of child nodes When testing whether a node has a small number of children (for example, 0, 1, or 2) we can avoid paying the O(# children) cost of potentially creating a backing array for Node#childNodes by simply walking a short distance down the linked list of children. Change-Id: I16eca1e5c088a94acfb64177a9ce8545eb487401 --- M lib/html2wt/DOMHandlers.js M lib/utils/DOMUtils.js M lib/wt2html/pp/handlers/cleanup.js M tests/mocha/parse.js 4 files changed, 22 insertions(+), 8 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/21/386021/1 diff --git a/lib/html2wt/DOMHandlers.js b/lib/html2wt/DOMHandlers.js index 9ac2547..f1731b7 100644 --- a/lib/html2wt/DOMHandlers.js +++ b/lib/html2wt/DOMHandlers.js @@ -1166,7 +1166,7 @@ return nativeExt.serialHandler.handle(node, state, wrapperUnmodified); } else if (/(?:^|\s)mw:(?:Image|Video|Audio)(\/(Frame|Frameless|Thumb))?/.test(type)) { return state.serializer.figureHandler(node); - } else if (/(?:^|\s)mw\:Entity/.test(type) && node.childNodes.length === 1) { + } else if (/(?:^|\s)mw\:Entity/.test(type) && DU.hasNChildren(node, 1)) { // handle a new mw:Entity (not handled by selser) by // serializing its children if (dp.src !== undefined && contentSrc === dp.srcContent) { @@ -1183,7 +1183,7 @@ if (dp.src !== undefined) { return emitPlaceholderSrc(node, state); } else if (/(^|\s)mw:Placeholder(\s|$)/ && - node.childNodes.length === 1 && + DU.hasNChildren(node, 1) && DU.isText(node.firstChild) && // See the DisplaySpace hack in the urltext rule // in the tokenizer. @@ -1286,7 +1286,7 @@ } else { // Trigger separator if (state.sep.constraints && state.sep.constraints.min === 2 && - node.parentNode.childNodes.length === 1) { + DU.hasNChildren(node.parentNode, 1)) { // p/br pair // Hackhack ;) diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js index dc221ce..0cfa41c 100644 --- a/lib/utils/DOMUtils.js +++ b/lib/utils/DOMUtils.js @@ -90,6 +90,20 @@ }, /** + * Test the number of children this node has without using + * `Node#childNodes.length`. This walks the sibling list and so + * takes O(`nchildren`) time -- so `nchildren` is expected to be small + * (say: 0, 1, or 2). + */ + hasNChildren: function(node, nchildren) { + for (var child = node.firstChild; child; child = child.nextSibling) { + if (nchildren <= 0) { return false; } + nchildren -= 1; + } + return true; + }, + + /** * Is 'node' a block node that is also visible in wikitext? * An example of an invisible block node is a `<p>`-tag that * Parsoid generated, or a `<ul>`, `<ol>` tag. diff --git a/lib/wt2html/pp/handlers/cleanup.js b/lib/wt2html/pp/handlers/cleanup.js index d761aba..a546282 100644 --- a/lib/wt2html/pp/handlers/cleanup.js +++ b/lib/wt2html/pp/handlers/cleanup.js @@ -87,7 +87,7 @@ // template/extension content OR nodes with templated-attributes !DU.hasParsoidAboutId(node) && ( (!node.hasChildNodes()) || ( - node.childNodes.length === 1 && !DU.isElt(node.firstChild) && + DU.hasNChildren(node, 1) && !DU.isElt(node.firstChild) && /^\s*$/.test(node.textContent) ) )) { diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js index de145c2..7f7f8bc 100644 --- a/tests/mocha/parse.js +++ b/tests/mocha/parse.js @@ -33,12 +33,12 @@ doc.outerHTML.startsWith('<!DOCTYPE html><html').should.equal(true); doc.outerHTML.endsWith('</body></html>').should.equal(true); // verify that body has only one <html> tag, one <body> tag, etc. - doc.childNodes.length.should.equal(2);// <!DOCTYPE> and <html> + DU.hasNChildren(doc, 2).should.equal(true); // <!DOCTYPE> and <html> doc.firstChild.nodeName.should.equal('html'); doc.lastChild.nodeName.should.equal('HTML'); // <html> children should be <head> and <body> var html = doc.documentElement; - html.childNodes.length.should.equal(2); + DU.hasNChildren(html, 2).should.equal(true); html.firstChild.nodeName.should.equal('HEAD'); html.lastChild.nodeName.should.equal('BODY'); // <body> should have one child, <p> @@ -58,12 +58,12 @@ doc.outerHTML.startsWith('<!DOCTYPE html><html').should.equal(true); doc.outerHTML.endsWith('</body></html>').should.equal(true); // verify that body has only one <html> tag, one <body> tag, etc. - doc.childNodes.length.should.equal(2);// <!DOCTYPE> and <html> + DU.hasNChildren(doc, 2).should.equal(true); // <!DOCTYPE> and <html> doc.firstChild.nodeName.should.equal('html'); doc.lastChild.nodeName.should.equal('HTML'); // <html> children should be <head> and <body> var html = doc.documentElement; - html.childNodes.length.should.equal(2); + DU.hasNChildren(html, 2).should.equal(true); html.firstChild.nodeName.should.equal('HEAD'); html.lastChild.nodeName.should.equal('BODY'); // <body> should have one child, <table> -- To view, visit https://gerrit.wikimedia.org/r/386021 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I16eca1e5c088a94acfb64177a9ce8545eb487401 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits