Subramanya Sastry has uploaded a new change for review. https://gerrit.wikimedia.org/r/202960
Change subject: Normalize DOM before running DOM-Diff. ...................................................................... Normalize DOM before running DOM-Diff. * It makes sense to normalize the edited DOM before running a DOMDiff on it. This should effectively only normalize new content since we shouldn't have enabled normalizations that might affect existing content (at least true for wikis that have wikitext norms about acceptable wikitext). Change-Id: I6f02e2b5bee900b8cee8001c499721e02d89a5a0 --- M lib/mediawiki.SelectiveSerializer.js M lib/mediawiki.WikitextSerializer.js M lib/wts.normalizeDOM.js M tests/parserTests-blacklist.js 4 files changed, 18 insertions(+), 17 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/60/202960/1 diff --git a/lib/mediawiki.SelectiveSerializer.js b/lib/mediawiki.SelectiveSerializer.js index 9ed0477..8a81cd9 100644 --- a/lib/mediawiki.SelectiveSerializer.js +++ b/lib/mediawiki.SelectiveSerializer.js @@ -13,6 +13,7 @@ ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFactory, DOMDiff = require('./mediawiki.DOMDiff.js').DOMDiff, ParsoidCacheRequest = require('./mediawiki.ApiRequest.js').ParsoidCacheRequest, + normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM, async = require('async'); /** @@ -68,6 +69,9 @@ this.timer.timing( 'html2wt.full.serialize', '', ( Date.now() - startTimers.get( 'html2wt.full.serialize' )) ); } } else { + // Normalize DOM before diffing + normalizeDOM(body, this.env); + // Use provided diff-marked DOM (used during testing) // or generate one (used in production) if ( this.timer ) { diff --git a/lib/mediawiki.WikitextSerializer.js b/lib/mediawiki.WikitextSerializer.js index 5458fb5..6c025cf 100644 --- a/lib/mediawiki.WikitextSerializer.js +++ b/lib/mediawiki.WikitextSerializer.js @@ -22,7 +22,7 @@ * wikitext and HTML serialization. * ---------------------------------------------------------------------- */ -"use strict"; +'use strict'; require('./core-upgrade.js'); var wtConsts = require('./mediawiki.wikitext.constants.js'), @@ -1374,8 +1374,10 @@ } var state = new SerializerState(this, this.options); try { - // Normalize the DOM - normalizeDOM(body, state.env); + if (!selserMode) { + // Normalize the DOM + normalizeDOM(body, state.env); + } // Don't serialize the DOM if debugging is disabled this.env.log(this.logType, function() { diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js index 1a2e2ff..d9fb752 100644 --- a/lib/wts.normalizeDOM.js +++ b/lib/wts.normalizeDOM.js @@ -26,17 +26,15 @@ * For example: a='<b><i>x</i></b>' b='<i>y</i>' => '<i><b>x</b>y</i>' */ function swappable(a, b) { - return DU.numNonDeletedChildNodes(a) === 1 && - similar(a, DU.firstNonDeletedChildNode(a)) && - mergable(DU.firstNonDeletedChildNode(a), b); + return a.childNodes.length === 1 && + similar(a, a.firstChild) && + mergable(a.firstChild, b); } /** Transfer all of b's children to a and delete b */ function merge(env, a, b) { DU.migrateChildren(b, a); b.parentNode.removeChild(b); - - DU.setDiffMark(a, env, 'children-changed'); return a; } @@ -45,9 +43,6 @@ DU.migrateChildren(b, a); a.parentNode.insertBefore(b, a); b.appendChild(a); - - DU.setDiffMark(a, env, 'children-changed'); - DU.setDiffMark(b, env, 'children-changed'); return b; } @@ -115,12 +110,12 @@ // so we don't need to recurse further. _normalizeDOM(env, a, false); } else if (swappable(a, b)) { - a = merge(env, swap(env, a, DU.firstNonDeletedChildNode(a)), b); + a = merge(env, swap(env, a, a.firstChild), b); // Again, a has new children, but the grandkids have already // been minimized. _normalizeDOM(env, a, false); } else if (swappable(b, a)) { - a = merge(env, a, swap(env, b, DU.firstNonDeletedChildNode(b))); + a = merge(env, a, swap(env, b, b.firstChild)); // Again, a has new children, but the grandkids have already // been minimized. _normalizeDOM(env, a, false); @@ -166,11 +161,11 @@ _normalizeDOM = function(env, node, recurse) { // Process the first child outside the loop. - var a = DU.firstNonDeletedChildNode(node); + var a = node.firstChild; a = processNode(env, a, recurse); while (a) { // We need a pair of adjacent siblings for tag minimization. - var b = DU.nextNonDeletedSibling(a); + var b = a.nextSibling; if (!b) { break; } @@ -180,7 +175,7 @@ // If we skipped over a bunch of nodes in the middle, // we no longer have a pair of adjacent siblings. - if (b && DU.previousNonDeletedSibling(b) === a) { + if (b && b.previousSibling === a) { // Process the pair. a = normalizeSiblingPair(env, a, b); } else { diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js index 470e065..c1c20a9 100644 --- a/tests/parserTests-blacklist.js +++ b/tests/parserTests-blacklist.js @@ -1884,7 +1884,7 @@ add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[3,0,3,3,4,[3,[4,0]],0,[0,1,3],0,[[3,[0,2]]]]]", "\nsjft3sgyl0i9t3xr''[http://example.com spz6gumbba84zpvi''<nowiki/>'']''\n''Something [http://example.com mixed''''', even bold]''\n'''''[http://example.com bothi78rcmgoegtoi529'''''<nowiki/>''''']'''''"); add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[[3],0,[2],0,4,1,0,4,3,3]]", "''<nowiki/>''\n[http://example.com rgkc3a28h38cwhfr'''text]'''fuoqrsdna64dkj4i''Something [http://example.com in italic'']''\nmsb55932ah7rdx6r\n"); add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[3,0,3,1,0,2,4,3,3,[[0,[0,1]]]]]", "\n'''<nowiki/>'''\nz032w7z1kdo0f6r''Something [http://example.com in italic'']o88q9qaoc6ljif6r'''''Now [http://example.com both'''''<nowiki/>''''']'''''"); -add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[2,2,[2],1,0,[3,0],3,[3,[0,[[4]]],0],0,3]]", "o9nab4h230tnvcxr''[http://example.com text'']u5sfhyznivd3g14i\n[http://example.com isdro1map7qa1yvi'''text]'''<nowiki/>'''\n''[http://example.com in italic''][http://example.com mixed'''''o08ug21d18h3erk9''''']'''\n"); +add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[2,2,[2],1,0,[3,0],3,[3,[0,[[4]]],0],0,3]]", "o9nab4h230tnvcxr''[http://example.com text'']u5sfhyznivd3g14i\n[http://example.com isdro1map7qa1yvi'''text]'''<nowiki/>'''\n''[http://example.com in italic''][http://example.com mixed'''''o08ug21d18h3erk9''''']'<nowiki/>''\n"); add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[[2],4,[[4]],0,4,[0,4],2,2,3,[4]]]", "''kndlu276kxz5b3xr[http://example.com text'']''66fz4jk56gtd42t9[http://example.com '''hbtqecdqt5oecdi''']'''sr9o6vidsk5u3di''Something 81gi1py5yann4s4i''i1h0s35lu287iudi\nr1c3unvdylanhfr''Something [http://example.com mixed''''', even bold]''''''cwiw4wazt6kfn7b9'''"); add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[2,0,3,0,3,2,0,[3,2,0],2,[3]]]", "rxaa2sxf4o8yqfr''[http://example.com text'']\n'''mtebxw1xl4x11yvi''Something [http://example.com in italic'']\n''vt2hxx23uta3v7vi[http://example.com mixed''''', even bold]'''fr3tlqbb4piizfr\n'''<nowiki/>'''"); add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid [[4,2,0,0,4,[2,[4,1]],0,1,2,1]]", "uypp3nxnupst6gvi5lixlomwktb4vx6r\n[http://example.com '''text]'''1euo2nqbj6mvx6r''s5ucqw1bludz33diSomething [http://example.com iqi27jd7nxoxyldi''<nowiki/>'']''\n''Something [http://example.com mixed''''', even bold]'''3rrma1jx3c70hpvi\n'''''Now [http://example.com both''''']'''"); -- To view, visit https://gerrit.wikimedia.org/r/202960 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I6f02e2b5bee900b8cee8001c499721e02d89a5a0 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits