Marcoil has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/104723


Change subject: Add ParsoidConfig option fetchWT to fetch original wikitext 
before html2wt
......................................................................

Add ParsoidConfig option fetchWT to fetch original wikitext before html2wt

This is to reduce the number of innecessary semantic differences shown
in round-trip testing.

Change-Id: Iba29fbc4ae98ffe4c88e77d11e01176b0fbb0e56
---
M api/ParserService.js
M lib/mediawiki.ParsoidConfig.js
M tests/test.localsettings.js
3 files changed, 48 insertions(+), 21 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/23/104723/1

diff --git a/api/ParserService.js b/api/ParserService.js
index 682de3b..3fe30ff 100644
--- a/api/ParserService.js
+++ b/api/ParserService.js
@@ -596,28 +596,45 @@
        var env = res.locals.env;
        env.page.id = req.body.oldid || null;
 
-       var doc;
-       try {
-               doc = DU.parseHTML( html.replace( /\r/g, '' ) );
-       } catch ( e ) {
-               console.log( 'There was an error in the HTML5 parser! Sending 
it back to the editor.' );
-               env.errCB( e );
-               return;
-       }
+       var html2wtCb = function () {
+               var doc;
+               try {
+                       doc = DU.parseHTML( html.replace( /\r/g, '' ) );
+               } catch ( e ) {
+                       console.log( 'There was an error in the HTML5 parser! 
Sending it back to the editor.' );
+                       env.errCB( e );
+                       return;
+               }
 
-       try {
-               var out = [];
-               new Serializer( { env: env, oldid: env.page.id } ).serializeDOM(
-                       doc.body,
-                       function ( chunk ) {
-                               out.push( chunk );
-                       }, function () {
-                               res.setHeader( 'Content-Type', 
'text/x-mediawiki; charset=UTF-8' );
-                               res.setHeader( 'X-Parsoid-Performance', 
env.getPerformanceHeader() );
-                               res.end( out.join( '' ) );
-                       } );
-       } catch ( e ) {
-               env.errCB( e );
+               try {
+                       var out = [];
+                       new Serializer( { env: env, oldid: env.page.id } 
).serializeDOM(
+                               doc.body,
+                               function ( chunk ) {
+                                       out.push( chunk );
+                               }, function () {
+                                       res.setHeader( 'Content-Type', 
'text/x-mediawiki; charset=UTF-8' );
+                                       res.setHeader( 'X-Parsoid-Performance', 
env.getPerformanceHeader() );
+                                       res.end( out.join( '' ) );
+                               } );
+               } catch ( e ) {
+                       env.errCB( e );
+               }
+       };
+
+       if ( env.conf.parsoid.fetchWT ) {
+               var target = env.resolveTitle( env.normalizeTitle( 
env.page.name ), '' );
+               var tpr = new TemplateRequest( env, target, env.page.id );
+               tpr.once( 'src', function ( err, src_and_metadata ) {
+                       if ( err ) {
+                               env.errCB( err );
+                       } else {
+                               env.setPageSrcInfo( src_and_metadata );
+                               html2wtCb();
+                       }
+               } );
+       } else {
+               html2wtCb();
        }
 }
 
diff --git a/lib/mediawiki.ParsoidConfig.js b/lib/mediawiki.ParsoidConfig.js
index cd89874..80c583e 100644
--- a/lib/mediawiki.ParsoidConfig.js
+++ b/lib/mediawiki.ParsoidConfig.js
@@ -171,6 +171,13 @@
  */
 ParsoidConfig.prototype.storeDataParsoid = false;
 
+/**
+ * @property {boolean} fetchWT
+ * When transforming from html to wt, fetch the original wikitext before.
+ * Intended for use in round-trip testing.
+ */
+ ParsoidConfig.prototype.fetchWT = false;
+
 if (typeof module === "object") {
        module.exports.ParsoidConfig = ParsoidConfig;
 }
diff --git a/tests/test.localsettings.js b/tests/test.localsettings.js
index 9a1c71f..b5e173a 100644
--- a/tests/test.localsettings.js
+++ b/tests/test.localsettings.js
@@ -21,4 +21,7 @@
 
        // Set editMode to false for round-trip testing
        parsoidConfig.editMode = false;
+
+       // Fetch the wikitext for a page before doing html2wt
+       parsoidConfig.fetchWT = true;
 };

-- 
To view, visit https://gerrit.wikimedia.org/r/104723
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iba29fbc4ae98ffe4c88e77d11e01176b0fbb0e56
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Marcoil <marc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to