MarkTraceur has uploaded a new change for review.
https://gerrit.wikimedia.org/r/55158
Change subject: Postpone data-parsoid saves until DOMPostProc is done
......................................................................
Postpone data-parsoid saves until DOMPostProc is done
Should make things faster.
Change-Id: I596e07270b10bd4ff9cc1b291d8b953320122ca6
---
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.DOMUtils.js
2 files changed, 106 insertions(+), 64 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/58/55158/1
diff --git a/js/lib/mediawiki.DOMPostProcessor.js
b/js/lib/mediawiki.DOMPostProcessor.js
index 0296ddb..b130f39 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -502,18 +502,8 @@
c.parentNode.insertBefore(problemTpl.end,
c.nextSibling);
// Update TSR
- var dpSrc =
problemTpl.end.getAttribute("data-parsoid") || "";
-
- if (dpSrc === "") {
- // TODO: Figure out why there is no
data-parsoid here!
- console.error( "XXX Error in
handleUnbalancedTableTags: no data-parsoid found! " +
- env.page.name );
- dpSrc = '{}';
- }
-
- var tplDP = JSON.parse(dpSrc);
- tplDP.tsr = DU.dataParsoid(c).tsr;
- DU.setDataParsoid(problemTpl.end, tplDP);
+ problemTpl.end.data.tmp_hasChanged = true;
+ problemTpl.end.data.parsoid.tsr =
c.data.parsoid.tsr;
// Skip all nodes till we find the opening id
of this template
// FIXME: Ugh! Duplicate tree traversal
@@ -634,9 +624,8 @@
var matches =
fc.data.match(/^(\r\n|\r|\n)/);
if (matches) {
// Record it in data-parsoid
- var preDP = DU.dataParsoid(n);
- preDP.strippedNL = matches[1];
- DU.setDataParsoid(n, preDP);
+ n.data.tmp_hasChanged = true;
+ n.data.parsoid.strippedNL =
matches[1];
}
}
}
@@ -788,7 +777,7 @@
var range = null;
while (parentNode && parentNode.nodeType !== Node.DOCUMENT_NODE) {
var i = startAncestors.indexOf( parentNode );
- var tsr0 = DU.dataParsoid(startElem).tsr[0];
+ var tsr0 = startElem.data.parsoid.tsr[0];
if (i === 0) {
// widen the scope to include the full subtree
range = {
@@ -860,20 +849,27 @@
if (updateDP) {
var done = false;
- var tcDP = DU.dataParsoid(tcStart);
- var seDP = DU.dataParsoid(startElem);
+ var tcDP = ( tcStart.data || {} ).parsoid;
+ var seDP = ( startElem.data || {} ).parsoid;
if (tcDP && seDP && tcDP.dsr && seDP.dsr && tcDP.dsr[1] >
seDP.dsr[1]) {
// Since TSRs on template content tokens are cleared by
the
// template handler, all computed dsr values for
template content
// is always inferred from top-level content values and
is safe.
// So, do not overwrite a bigger end-dsr value.
- tcDP.dsr[0] = seDP.dsr[0];
- DU.setDataParsoid(tcStart, tcDP);
+ tcStart.data.tmp_hasChanged = true;
+ tcStart.data.parsoid.dsr[0] =
startElem.data.parsoid.dsr[0];
done = true;
}
if (!done) {
- tcStart.setAttribute("data-parsoid",
startElem.getAttribute("data-parsoid"));
+ if ( tcStart.data === undefined ) {
+ tcStart.data = {};
+ }
+ if ( startElem.data === undefined ) {
+ startElem.data = {};
+ }
+ tcStart.data.tmp_hasChanged = true;
+ tcStart.data.parsoid = Util.clone(
startElem.data.parsoid );
}
}
@@ -1204,14 +1200,15 @@
* 2b. If dp2.dsr[0] is unknown, we rely on fostered flag on
* tcStart, if any.
*
---------------------------------------------------------------- */
- var dp1 = DU.dataParsoid(tcStart),
- dp2 = DU.dataParsoid(tcEnd),
+ var dp1 = tcStart.data.parsoid,
+ dp2 = tcEnd.data.parsoid,
done = false;
if (dp1.dsr) {
if (dp2.dsr) {
// Case 1. above
if (dp2.dsr[1] > dp1.dsr[1]) {
dp1.dsr[1] = dp2.dsr[1];
+ tcStart.data.tmp_hasChanged = true;
}
// Case 2. above
@@ -1221,13 +1218,14 @@
(tcStart.data &&
tcStart.data.tmp_fostered)))
{
dp1.dsr[0] = endDsr;
+ tcStart.data.tmp_hasChanged = true;
}
}
// Check if now have a useable range on dp1
if (dp1.dsr[0] !== null && dp1.dsr[1] !== null) {
dp1.src = env.page.src.substring( dp1.dsr[0],
dp1.dsr[1] );
- DU.setDataParsoid(tcStart, dp1);
+ tcStart.data.tmp_hasChanged = true;
done = true;
}
}
@@ -1259,8 +1257,10 @@
}
function swallowTableIfNestedDSR(elt, tbl) {
- var eltDP = DU.dataParsoid(elt), eltDSR = eltDP.dsr,
- tblDP = DU.dataParsoid(tbl), tblTSR = tblDP.tsr;
+ var eltDP = elt.data.parsoid,
+ eltDSR = eltDP.dsr,
+ tblDP = tbl.data.parsoid,
+ tblTSR = tblDP.tsr;
// IMPORTANT: Do not use dsr to compare because the table may not
// have a valid dsr[1] (if the table's end-tag is generated by
@@ -1271,7 +1271,6 @@
if (eltDSR && tblTSR && eltDSR[0] >= tblTSR[1]) {
eltDP.dsr[0] = tblTSR[0];
eltDP.dsr[1] = null;
- DU.setDataParsoid(elt, eltDP);
return true;
} else {
return false;
@@ -1321,7 +1320,7 @@
// on end-meta-tags.
//
// Ex: "<ref>{{echo|bar}}<!--bad-></ref>"
- if (metaMatch && (DU.dataParsoid(elem).tsr ||
type.match(/\/End\b/))) {
+ if (metaMatch && (elem.data.parsoid.tsr ||
type.match(/\/End\b/))) {
var metaType = metaMatch[1];
about = elem.getAttribute('about'),
@@ -1436,7 +1435,7 @@
var metaType = c.getAttribute("typeof");
if (metaType === "mw:StartTag") {
deleteNode(c);
- } else if (metaType === "mw:EndTag" &&
!DU.dataParsoid(c).tsr) {
+ } else if (metaType === "mw:EndTag" &&
!c.data.parsoid.tsr) {
// If there is no tsr, this meta is
useless for DSR
// calculations. Remove the meta to
avoid breaking
// other brittle DOM passes working on
the DOM.
@@ -1491,7 +1490,9 @@
placeHolder = node.ownerDocument.createElement('meta'),
placeHolder.setAttribute('typeof', 'mw:Placeholder');
- DU.setDataParsoid(placeHolder, {src: src});
+ placeHolder.data = {};
+ placeHolder.data.parsoid = {src: src};
+ placeHolder.data.tmp_hasChanged = true;
// Insert the placeHolder
node.parentNode.insertBefore(placeHolder, node);
@@ -1526,7 +1527,7 @@
// Process subtree first
findBuilderCorrectedTagsInternal(c, env);
- var dp = DU.dataParsoid(c),
+ var dp = c.data.parsoid,
cNodeName = c.nodeName.toLowerCase();
// Dont bother detecting auto-inserted
start/end if:
@@ -1551,7 +1552,7 @@
// 'c' is a html node
that has tsr, but no end-tag marker tag
// => its closing tag
was auto-generated by treebuilder.
dp.autoInsertedEnd =
true;
- DU.setDataParsoid(c,
dp);
+ c.data.tmp_hasChanged =
true;
}
}
@@ -1562,7 +1563,7 @@
if (fc.nodeType !==
Node.ELEMENT_NODE) {
break;
}
- var fcDP =
DU.dataParsoid(fc);
+ var fcDP =
fc.data.parsoid;
if
(fcDP.autoInsertedStart) {
fc =
fc.firstChild;
} else {
@@ -1576,8 +1577,8 @@
fc.getAttribute('data-stag') !== expectedName)
{
//console.log('autoInsertedStart:', c.innerHTML);
+ c.data.tmp_hasChanged =
true;
dp.autoInsertedStart =
true;
- DU.setDataParsoid(c,
dp);
}
}
} else if ( cNodeName === 'meta') {
@@ -1592,7 +1593,6 @@
// mw:Placeholder for
round-tripping
//console.log('autoinsertedEnd', c.innerHTML, c.parentNode.innerHTML);
addPlaceholderMeta(c,
dp, expectedName, {end: true});
-
}
} else if ( type === 'mw:StartTag' ) {
var dataStag =
c.getAttribute('data-stag'),
@@ -1814,7 +1814,7 @@
} else if (cType === Node.ELEMENT_NODE) {
if (traceDSR) console.warn("-- Processing <" +
node.nodeName + ":" + i + ">=" + child.nodeName + " with [" + cs + "," + ce +
"]");
var cTypeOf = child.getAttribute("typeof"),
- dp = DU.dataParsoid(child),
+ dp = child.data.parsoid,
tsr = dp.tsr,
oldCE = tsr ? tsr[1] : null,
propagateRight = false,
@@ -1833,11 +1833,11 @@
// Update table-end syntax
using info from the meta tag
var prev =
child.previousSibling;
if (prev &&
DU.hasNodeName(prev, "table")) {
- var prevDP =
DU.dataParsoid(prev);
+ var prevDP =
prev.data.parsoid;
if
(!DU.hasLiteralHTMLMarker(prevDP)) {
if
(dp.endTagSrc) {
prevDP.endTagSrc = dp.endTagSrc;
-
DU.setDataParsoid(prev, prevDP);
+
prev.data.tmp_hasChanged = true;
}
}
}
@@ -1879,6 +1879,7 @@
stWidth = dp.tagWidths[0];
etWidth = dp.tagWidths[1];
dp.tagWidths = undefined;
+ child.data.tmp_hasChanged = true;
}
} else if ((cTypeOf === "mw:Placeholder" || cTypeOf ===
"mw:Entity") && ce !== null && dp.src) {
cs = ce - dp.src.length;
@@ -1979,6 +1980,7 @@
// Set up 'dbsrc' so we can debug this
dp.dbsrc = env.page.src.substring(cs,
ce);
}
+ child.data.tmp_hasChanged = true;
}
// Propagate any required changes to the right
@@ -1996,7 +1998,7 @@
} else if (nType === Node.COMMENT_NODE)
{
newCE = newCE +
sibling.data.length + 7;
} else if (nType === Node.ELEMENT_NODE)
{
- var siblingDP =
DU.dataParsoid(sibling);
+ var siblingDP =
sibling.data.parsoid;
if (siblingDP.dsr &&
siblingDP.tsr && siblingDP.dsr[0] <= newCE && e !== null) {
// sibling's dsr wont
change => ltr propagation stops here.
break;
@@ -2015,7 +2017,7 @@
}
}
siblingDP.dsr[0] = newCE;
- DU.setDataParsoid(sibling,
siblingDP);
+ sibling.data.tmp_hasChanged =
true;
newCE = siblingDP.dsr[1];
} else {
break;
@@ -2027,10 +2029,6 @@
if (!sibling) {
e = newCE;
}
- }
-
- if (Object.keys(dp).length > 0) {
- DU.setDataParsoid(child, dp);
}
}
@@ -2078,9 +2076,9 @@
var body = root.body;
computeNodeDSR(env, body, startOffset, endOffset, traceDSR);
- var dp = DU.dataParsoid(body);
+ var dp = body.data.parsoid;
+ body.data.tmp_hasChanged = true;
dp.dsr = [startOffset, endOffset, 0, 0];
- DU.setDataParsoid(body, dp);
if (traceDSR) console.warn("------- done tracing DSR computation
-------");
@@ -2250,19 +2248,18 @@
var ix, prefix = getLinkPrefix( env, node ),
trail = getLinkTrail( env, node ),
- dp = Util.getJSONAttribute( node, 'data-parsoid', {} ),
- updated = false;
+ dp = node.data.parsoid;
if ( prefix && prefix.content ) {
for ( ix = 0; ix < prefix.content.length; ix++ ) {
node.insertBefore( prefix.content[ix], node.firstChild
);
}
if ( prefix.src.length > 0 ) {
- updated = true;
- dp.prefix = prefix.src;
- if (dp.dsr) {
- dp.dsr[0] -= prefix.src.length;
- dp.dsr[2] += prefix.src.length;
+ node.data.parsoid.prefix = prefix.src;
+ node.data.tmp_hasChanged = true;
+ if (node.data.parsoid.dsr) {
+ node.data.parsoid.dsr[0] -= prefix.src.length;
+ node.data.parsoid.dsr[2] += prefix.src.length;
}
}
}
@@ -2272,17 +2269,37 @@
node.appendChild( trail.content[ix] );
}
if ( trail.src.length > 0 ) {
- updated = true;
- dp.tail = trail.src;
- if (dp.dsr) {
- dp.dsr[1] += trail.src.length;
- dp.dsr[3] += trail.src.length;
+ node.data.parsoid.tail = trail.src;
+ node.data.tmp_hasChanged = true;
+ if (node.data.parsoid.dsr) {
+ node.data.parsoid.dsr[1] += trail.src.length;
+ node.data.parsoid.dsr[3] += trail.src.length;
}
}
}
+}
- if ( updated ) {
- node.setAttribute( 'data-parsoid', JSON.stringify( dp ) );
+/**
+ * @method
+ *
+ * Migrate data-parsoid attributes into a property on each DOM node. We'll
+ * migrate them back in the final DOM traversal.
+ *
+ * @param {Node} node
+ */
+function migrateDataParsoid( node ) {
+ DU.loadDataParsoid( node );
+ node.data.tmp_hasChanged = false;
+}
+
+/**
+ * @method
+ *
+ * Save the data-parsoid attributes on each node.
+ */
+function saveDataParsoid( node ) {
+ if ( node.data && node.data.tmp_hasChanged !== false ) {
+ DU.saveDataAttribs( node );
}
}
@@ -2290,8 +2307,13 @@
this.env = env;
this.options = options;
+ // DOM traverser that runs before the DOM handlers.
+ var preDOMHandlerVisitor = new DOMTraverser();
+ preDOMHandlerVisitor.addHandler( null, migrateDataParsoid );
+
// Common post processing
this.processors = [
+ preDOMHandlerVisitor.traverse.bind( preDOMHandlerVisitor ),
handleUnbalancedTableTags,
migrateStartMetas,
normalizeDocument,
@@ -2314,11 +2336,16 @@
// 1. Link prefixes and suffixes
// 2. Strip marker metas -- removes left over marker metas (ex: metas
// nested in expanded tpl/extension output).
- var lastDOMVisitor = new DOMTraverser();
- lastDOMVisitor.addHandler( 'a', handleLinkNeighbours.bind( null, env )
);
- lastDOMVisitor.addHandler('meta', stripMarkerMetas);
+ var lastDOMHandler = new DOMTraverser();
+ lastDOMHandler.addHandler( 'a', handleLinkNeighbours.bind( null, env )
);
+ lastDOMHandler.addHandler('meta', stripMarkerMetas);
- this.processors.push(lastDOMVisitor.traverse.bind(lastDOMVisitor));
+ this.processors.push(lastDOMHandler.traverse.bind(lastDOMHandler));
+
+ var cleanUpDOMPass = new DOMTraverser();
+ cleanUpDOMPass.addHandler( null, saveDataParsoid );
+
+ this.processors.push( cleanUpDOMPass.traverse.bind( cleanUpDOMPass ) );
}
// Inherit from EventEmitter
diff --git a/js/lib/mediawiki.DOMUtils.js b/js/lib/mediawiki.DOMUtils.js
index 11118ba..72ae157 100644
--- a/js/lib/mediawiki.DOMUtils.js
+++ b/js/lib/mediawiki.DOMUtils.js
@@ -19,6 +19,10 @@
// Decode a JSON object into the data member of DOM nodes
loadDataAttrib: function(node, name, defaultVal) {
+ if ( node.nodeType !== node.ELEMENT_NODE ) {
+ return;
+ }
+
if ( ! node.data ) {
node.data = {};
}
@@ -30,7 +34,14 @@
// Save all node.data.* structures to data attributes
saveDataAttribs: function(node) {
+ if ( node.nodeType !== node.ELEMENT_NODE ) {
+ return;
+ }
+
for(var key in node.data) {
+ if ( key.match( /^tmp_/ ) !== null ) {
+ continue;
+ }
var val = node.data[key];
if ( val && val.constructor === String ) {
node.setAttribute('data-' + key, val);
@@ -58,6 +69,10 @@
},
getJSONAttribute: function(n, name, defaultVal) {
+ if ( n.nodeType !== n.ELEMENT_NODE ) {
+ return defaultVal !== undefined ? defaultVal : {};
+ }
+
var attVal = n.getAttribute(name);
if (!attVal) {
return defaultVal !== undefined ? defaultVal : {};
--
To view, visit https://gerrit.wikimedia.org/r/55158
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I596e07270b10bd4ff9cc1b291d8b953320122ca6
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: MarkTraceur <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits