jenkins-bot has submitted this change and it was merged.

Change subject: Fix parserTests to serialize-to-html + reparse-to-DOM exactly 
once.
......................................................................


Fix parserTests to serialize-to-html + reparse-to-DOM exactly once.

* The DOM was being serialized to HTML and parsed back multiple
  times on the wt2wt and selser paths which would change DOM
  semantics for HTML-pre (by stripping one newline each time).

* This patches fixes it to always do it exactly once.

* Fixes the HTML-pre test wt2wt failure.

* Speeds up parser tests run by over 20%.

Change-Id: Ic7e9a4a8fc92603cc96e25ea3ea9915f5abb37d8
---
M tests/parserTests-blacklist.js
M tests/parserTests.js
2 files changed, 20 insertions(+), 13 deletions(-)

Approvals:
  GWicke: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/tests/parserTests-blacklist.js b/tests/parserTests-blacklist.js
index 39237d2..4d1d733 100644
--- a/tests/parserTests-blacklist.js
+++ b/tests/parserTests-blacklist.js
@@ -488,7 +488,6 @@
 add("wt2wt", "4. Indent-Pre and extension tags", " a <gallery>\n 
File:foobar.jpg\n </gallery>\n");
 add("wt2wt", "Leading pipes outside of tables 2", "a\n| foo\n\nb\n");
 add("wt2wt", "Leading pipes outside of tables 3", "a\n| class=\"foo bar\" | 
baz\n\nb\n");
-add("wt2wt", "HTML-pre: 1. embedded newlines", 
"<pre>foo</pre>\n\n<pre>\nfoo\n</pre>\n\n<pre>\nfoo\n</pre>\n\n<pre>\n\nfoo\n</pre>");
 add("wt2wt", "Definition lists: self-closed tag", ";one<br />two : two-line 
fun");
 add("wt2wt", "BUG 289: \">\"-token in bracketed URL", 
"[http://www.example.com/ <hello> stuff]\n");
 add("wt2wt", "BUG 289: literal \">\"-token in bracketed URL", 
"[http://www.example.com/ <b>html</b> stuff]\n");
diff --git a/tests/parserTests.js b/tests/parserTests.js
index c83970a..9d32d9f 100755
--- a/tests/parserTests.js
+++ b/tests/parserTests.js
@@ -428,7 +428,7 @@
                self = this,
                startsAtWikitext = mode === 'wt2wt' || mode === 'wt2html' || 
mode === 'selser';
        try {
-               this.env.page.dom = item.cachedHTMLStr ? 
DU.parseHTML(item.cachedHTMLStr).body : null;
+               this.env.page.dom = item.cachedDOM ? item.cachedDOM.body : null;
                if ( mode === 'selser' ) {
                        // console.warn("--> selsering: " + content.outerHTML);
                        this.env.setPageSrcInfo( item.wikitext );
@@ -937,17 +937,29 @@
 
        // First conversion stage
        if ( startsAtWikitext ) {
-               if ( item.cachedHTMLStr === null ) {
+           // Always serialize DOM to string and reparse before passing to 
wt2wt
+               if ( item.cachedDOM === null ) {
                        testTasks.push( this.convertWt2Html.bind( this, mode, 
item.wikitext ) );
-                       // Caching stage 1 - save the result of the first two 
stages so we can maybe skip them later
+                       // Caching stage 1 - save the result of the first two 
stages
+                       // so we can maybe skip them later
                        testTasks.push( function ( result, cb ) {
                                // Cache parsed HTML
-                               item.cachedHTMLStr = DU.serializeNode(result);
-                               cb( null, result );
+                               item.cachedDOM = 
DU.parseHTML(DU.serializeNode(result));
+
+                               // - In wt2html mode, pass through original DOM
+                               //   so that it is serialized just once.
+                               // - In wt2wt and selser modes, pass through 
serialized and
+                               //   reparsed DOM so that 
fostering/normalization effects
+                               //   are reproduced.
+                               if (mode === "wt2html") {
+                                       cb(null, result);
+                               } else {
+                                       cb(null, 
item.cachedDOM.body.cloneNode(true));
+                               }
                        } );
                } else {
                        testTasks.push( function ( cb ) {
-                               cb( null, DU.parseHTML(item.cachedHTMLStr) );
+                               cb(null, item.cachedDOM.body.cloneNode(true));
                        } );
                }
        } else if ( startsAtHtml ) {
@@ -978,11 +990,7 @@
                } );
        }
 
-       // Always serialize DOM to string and reparse before passing to wt2wt
        if (mode === 'wt2wt') {
-               testTasks.push( function ( doc, cb ) {
-                       cb( null, DU.parseHTML(DU.serializeNode(doc)).body);
-               } );
                // handle a 'changes' option if present.
                if (item.options.parsoid && item.options.parsoid.changes) {
                        testTasks.push( function( doc, cb ) {
@@ -1729,7 +1737,7 @@
                                                        }
 
                                                        // Push the caches 
forward!
-                                                       item.cachedHTMLStr = 
newitem.cachedHTMLStr;
+                                                       item.cachedDOM = 
newitem.cachedDOM;
                                                        
item.cachedNormalizedHTML = newitem.cachedNormalizedHTML;
 
                                                        setImmediate( cb );
@@ -1778,7 +1786,7 @@
 
                // Reset the cached results for the new case.
                // All test modes happen in a single run of processCase.
-               item.cachedHTMLStr = null;
+               item.cachedDOM = null;
                item.cachedNormalizedHTML = null;
 
                //console.log( 'processCase ' + i + JSON.stringify( item )  );

-- 
To view, visit https://gerrit.wikimedia.org/r/134787
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic7e9a4a8fc92603cc96e25ea3ea9915f5abb37d8
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Marcoil <marc...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to