Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/202960

Change subject: Normalize DOM before running DOM-Diff.
......................................................................

Normalize DOM before running DOM-Diff.

* It makes sense to normalize the edited DOM before
  running a DOMDiff on it. This should effectively
  only normalize new content since we shouldn't have
  enabled normalizations that might affect existing
  content (at least true for wikis that have wikitext
  norms about acceptable wikitext).

Change-Id: I6f02e2b5bee900b8cee8001c499721e02d89a5a0
---
M lib/mediawiki.SelectiveSerializer.js
M lib/mediawiki.WikitextSerializer.js
M lib/wts.normalizeDOM.js
M tests/parserTests-blacklist.js
4 files changed, 18 insertions(+), 17 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/60/202960/1

diff --git a/lib/mediawiki.SelectiveSerializer.js 
b/lib/mediawiki.SelectiveSerializer.js
index 9ed0477..8a81cd9 100644
--- a/lib/mediawiki.SelectiveSerializer.js
+++ b/lib/mediawiki.SelectiveSerializer.js
@@ -13,6 +13,7 @@
        ParserPipelineFactory = 
require('./mediawiki.parser.js').ParserPipelineFactory,
        DOMDiff = require('./mediawiki.DOMDiff.js').DOMDiff,
        ParsoidCacheRequest = 
require('./mediawiki.ApiRequest.js').ParsoidCacheRequest,
+       normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM,
        async = require('async');
 
 /**
@@ -68,6 +69,9 @@
                        this.timer.timing( 'html2wt.full.serialize', '', ( 
Date.now() - startTimers.get( 'html2wt.full.serialize' )) );
                }
        } else {
+               // Normalize DOM before diffing
+               normalizeDOM(body, this.env);
+
                // Use provided diff-marked DOM (used during testing)
                // or generate one (used in production)
                if ( this.timer ) {
diff --git a/lib/mediawiki.WikitextSerializer.js 
b/lib/mediawiki.WikitextSerializer.js
index 5458fb5..6c025cf 100644
--- a/lib/mediawiki.WikitextSerializer.js
+++ b/lib/mediawiki.WikitextSerializer.js
@@ -22,7 +22,7 @@
  *   wikitext and HTML serialization.
  * ---------------------------------------------------------------------- */
 
-"use strict";
+'use strict';
 
 require('./core-upgrade.js');
 var wtConsts = require('./mediawiki.wikitext.constants.js'),
@@ -1374,8 +1374,10 @@
        }
        var state = new SerializerState(this, this.options);
        try {
-               // Normalize the DOM
-               normalizeDOM(body, state.env);
+               if (!selserMode) {
+                       // Normalize the DOM
+                       normalizeDOM(body, state.env);
+               }
 
                // Don't serialize the DOM if debugging is disabled
                this.env.log(this.logType, function() {
diff --git a/lib/wts.normalizeDOM.js b/lib/wts.normalizeDOM.js
index 1a2e2ff..d9fb752 100644
--- a/lib/wts.normalizeDOM.js
+++ b/lib/wts.normalizeDOM.js
@@ -26,17 +26,15 @@
  * For example: a='<b><i>x</i></b>' b='<i>y</i>' => '<i><b>x</b>y</i>'
  */
 function swappable(a, b) {
-       return DU.numNonDeletedChildNodes(a) === 1 &&
-               similar(a, DU.firstNonDeletedChildNode(a)) &&
-               mergable(DU.firstNonDeletedChildNode(a), b);
+       return a.childNodes.length === 1 &&
+               similar(a, a.firstChild) &&
+               mergable(a.firstChild, b);
 }
 
 /** Transfer all of b's children to a and delete b */
 function merge(env, a, b) {
        DU.migrateChildren(b, a);
        b.parentNode.removeChild(b);
-
-       DU.setDiffMark(a, env, 'children-changed');
        return a;
 }
 
@@ -45,9 +43,6 @@
        DU.migrateChildren(b, a);
        a.parentNode.insertBefore(b, a);
        b.appendChild(a);
-
-       DU.setDiffMark(a, env, 'children-changed');
-       DU.setDiffMark(b, env, 'children-changed');
 
        return b;
 }
@@ -115,12 +110,12 @@
                        // so we don't need to recurse further.
                        _normalizeDOM(env, a, false);
                } else if (swappable(a, b)) {
-                       a = merge(env, swap(env, a, 
DU.firstNonDeletedChildNode(a)), b);
+                       a = merge(env, swap(env, a, a.firstChild), b);
                        // Again, a has new children, but the grandkids have 
already
                        // been minimized.
                        _normalizeDOM(env, a, false);
                } else if (swappable(b, a)) {
-                       a = merge(env, a, swap(env, b, 
DU.firstNonDeletedChildNode(b)));
+                       a = merge(env, a, swap(env, b, b.firstChild));
                        // Again, a has new children, but the grandkids have 
already
                        // been minimized.
                        _normalizeDOM(env, a, false);
@@ -166,11 +161,11 @@
 
 _normalizeDOM = function(env, node, recurse) {
        // Process the first child outside the loop.
-       var a = DU.firstNonDeletedChildNode(node);
+       var a = node.firstChild;
        a = processNode(env, a, recurse);
        while (a) {
                // We need a pair of adjacent siblings for tag minimization.
-               var b = DU.nextNonDeletedSibling(a);
+               var b = a.nextSibling;
                if (!b) {
                        break;
                }
@@ -180,7 +175,7 @@
 
                // If we skipped over a bunch of nodes in the middle,
                // we no longer have a pair of adjacent siblings.
-               if (b && DU.previousNonDeletedSibling(b) === a) {
+               if (b && b.previousSibling === a) {
                        // Process the pair.
                        a = normalizeSiblingPair(env, a, b);
                } else {
diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 470e065..c1c20a9 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -1884,7 +1884,7 @@
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[3,0,3,3,4,[3,[4,0]],0,[0,1,3],0,[[3,[0,2]]]]]", 
"\nsjft3sgyl0i9t3xr''[http://example.com 
spz6gumbba84zpvi''<nowiki/>'']''\n''Something [http://example.com mixed''''', 
even bold]''\n'''''[http://example.com 
bothi78rcmgoegtoi529'''''<nowiki/>''''']'''''");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[[3],0,[2],0,4,1,0,4,3,3]]", "''<nowiki/>''\n[http://example.com 
rgkc3a28h38cwhfr'''text]'''fuoqrsdna64dkj4i''Something [http://example.com in 
italic'']''\nmsb55932ah7rdx6r\n");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[3,0,3,1,0,2,4,3,3,[[0,[0,1]]]]]", 
"\n'''<nowiki/>'''\nz032w7z1kdo0f6r''Something [http://example.com in 
italic'']o88q9qaoc6ljif6r'''''Now [http://example.com 
both'''''<nowiki/>''''']'''''");
-add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[2,2,[2],1,0,[3,0],3,[3,[0,[[4]]],0],0,3]]", 
"o9nab4h230tnvcxr''[http://example.com 
text'']u5sfhyznivd3g14i\n[http://example.com 
isdro1map7qa1yvi'''text]'''<nowiki/>'''\n''[http://example.com in 
italic''][http://example.com mixed'''''o08ug21d18h3erk9''''']'''\n");
+add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[2,2,[2],1,0,[3,0],3,[3,[0,[[4]]],0],0,3]]", 
"o9nab4h230tnvcxr''[http://example.com 
text'']u5sfhyznivd3g14i\n[http://example.com 
isdro1map7qa1yvi'''text]'''<nowiki/>'''\n''[http://example.com in 
italic''][http://example.com mixed'''''o08ug21d18h3erk9''''']'<nowiki/>''\n");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[[2],4,[[4]],0,4,[0,4],2,2,3,[4]]]", "''kndlu276kxz5b3xr[http://example.com 
text'']''66fz4jk56gtd42t9[http://example.com 
'''hbtqecdqt5oecdi''']'''sr9o6vidsk5u3di''Something 
81gi1py5yann4s4i''i1h0s35lu287iudi\nr1c3unvdylanhfr''Something 
[http://example.com mixed''''', even bold]''''''cwiw4wazt6kfn7b9'''");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[2,0,3,0,3,2,0,[3,2,0],2,[3]]]", "rxaa2sxf4o8yqfr''[http://example.com 
text'']\n'''mtebxw1xl4x11yvi''Something [http://example.com in 
italic'']\n''vt2hxx23uta3v7vi[http://example.com mixed''''', even 
bold]'''fr3tlqbb4piizfr\n'''<nowiki/>'''");
 add("selser", "Bug 2702: Mismatched <i>, <b> and <a> tags are invalid 
[[4,2,0,0,4,[2,[4,1]],0,1,2,1]]", 
"uypp3nxnupst6gvi5lixlomwktb4vx6r\n[http://example.com 
'''text]'''1euo2nqbj6mvx6r''s5ucqw1bludz33diSomething [http://example.com 
iqi27jd7nxoxyldi''<nowiki/>'']''\n''Something [http://example.com mixed''''', 
even bold]'''3rrma1jx3c70hpvi\n'''''Now [http://example.com both''''']'''");

-- 
To view, visit https://gerrit.wikimedia.org/r/202960
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I6f02e2b5bee900b8cee8001c499721e02d89a5a0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to