Arlolra has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/190410

Change subject: Fix selser in the v2 API
......................................................................

Fix selser in the v2 API

 * It was setting env.page.dom as the document, whereas through the
   codebase that's usually the body. This produced an unnecessary diff.

 * To this point, some of the tests for the v2 API are merely documented
   the expectations, and not enforcing anything. Here we add a test to
   ensure selser is used.

 * Turns on useSelser in the test parsoidServer.

 * Renames some variables in the selectiveSerializer to reflect that
   we're working with a body, not a doc.

Bug: T89411
Change-Id: I91d688d7b3942b89637b15ed2ae1a54dab22bede
---
M api/routes.js
M lib/mediawiki.DOMDiff.js
M lib/mediawiki.SelectiveSerializer.js
M tests/mocha/api.js
M tests/test.localsettings.js
5 files changed, 91 insertions(+), 34 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/10/190410/1

diff --git a/api/routes.js b/api/routes.js
index 9d08249..870c2cf 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -258,7 +258,8 @@
        var out = [];
        var p = new Promise(function( resolve, reject ) {
                if ( v2 && v2.original && v2.original.wikitext ) {
-                       env.setPageSrcInfo( v2.original.wikitext.body );
+                       env.setPageSrcInfo( ( typeof v2.original.wikitext !== 
"string" ) ?
+                               v2.original.wikitext.body : 
v2.original.wikitext );
                        return resolve();
                } else if ( !env.conf.parsoid.fetchWT ) {
                        return resolve();
@@ -279,11 +280,16 @@
                        Serializer = parsoidConfig.useSelser ? 
SelectiveSerializer : WikitextSerializer,
                        serializer = new Serializer({ env: env, oldid: 
env.page.id });
                if ( v2 && v2["data-parsoid"] ) {
-                       DU.applyDataParsoid( doc, v2["data-parsoid"].body );
+                       DU.applyDataParsoid( doc, v2["data-parsoid"].body ?
+                               v2["data-parsoid"].body : v2["data-parsoid"] );
                }
                if ( v2 && v2.original && v2.original.html ) {
-                       env.page.dom = DU.parseHTML( v2.original.html.body );
-                       DU.applyDataParsoid( env.page.dom, 
v2.original["data-parsoid"].body );
+                       env.page.dom = DU.parseHTML( ( typeof v2.original.html 
!== "string" ) ?
+                               v2.original.html.body : v2.original.html ).body;
+                       if ( v2.original["data-parsoid"] ) {
+                               DU.applyDataParsoid( 
env.page.dom.ownerDocument, v2.original["data-parsoid"].body ?
+                                       v2.original["data-parsoid"].body : 
v2.original["data-parsoid"] );
+                       }
                }
                return Promise.promisify( serializer.serializeDOM, false, 
serializer )(
                        doc.body, function( chunk ) { out.push( chunk ); }, 
false
@@ -751,7 +757,8 @@
                        }
                        // We've been given source for this page
                        if ( v2.original && v2.original.wikitext ) {
-                               wikitext = v2.original.wikitext.body;
+                               wikitext = ( typeof v2.original.wikitext !== 
"string" ) ?
+                                       v2.original.wikitext.body : 
v2.original.wikitext;
                        }
                }
                wt2html( req, res, wikitext );
diff --git a/lib/mediawiki.DOMDiff.js b/lib/mediawiki.DOMDiff.js
index 76939f7..efb42ce 100644
--- a/lib/mediawiki.DOMDiff.js
+++ b/lib/mediawiki.DOMDiff.js
@@ -42,8 +42,8 @@
 
        // The root nodes are equal, call recursive differ
        this.debug('ORIG:\n', this.env.page.dom.outerHTML, '\nNEW :\n', 
workNode.outerHTML );
-       var foundChange = this.doDOMDiff(this.env.page.dom, workNode);
-       return { isEmpty: ! foundChange, dom: workNode };
+       var foundChange = this.doDOMDiff( this.env.page.dom, workNode );
+       return { isEmpty: !foundChange, dom: workNode };
 };
 
 // These attributes are ignored for equality purposes if they are added to a 
node.
diff --git a/lib/mediawiki.SelectiveSerializer.js 
b/lib/mediawiki.SelectiveSerializer.js
index 7128d63..c12b693 100644
--- a/lib/mediawiki.SelectiveSerializer.js
+++ b/lib/mediawiki.SelectiveSerializer.js
@@ -49,38 +49,38 @@
  * Run the DOM serialization on a node.
  *
  * @param {Error} err
- * @param {Node} doc
+ * @param {Node} body
  * @param {Function} cb Callback that is called for each chunk.
  * @param {string} cb.res The wikitext of the chunk we've just serialized.
  * @param {Function} finalcb The callback for when we've finished serializing 
the DOM.
  */
-SSP.doSerializeDOM = function( err, doc, cb, finalcb ) {
+SSP.doSerializeDOM = function( err, body, cb, finalcb ) {
        var self = this;
 
        if ( err || (!this.env.page.dom && !this.env.page.domdiff) || 
!this.env.page.src) {
                // If there's no old source, fall back to non-selective 
serialization.
-               this.wts.serializeDOM( doc, cb, false, finalcb );
+               this.wts.serializeDOM( body, cb, false, finalcb );
        } else {
                // Use provided diff-marked DOM (used during testing)
                // or generate one (used in production)
-               var diff = this.env.page.domdiff || new DOMDiff(this.env).diff( 
doc );
+               var diff = this.env.page.domdiff || new DOMDiff( this.env 
).diff( body );
 
-               if ( ! diff.isEmpty ) {
+               if ( !diff.isEmpty ) {
 
-                       doc = diff.dom;
+                       body = diff.dom;
 
                        // Add the serializer info
-                       // new DiffToSelserConverter(this.env, doc).convert();
+                       // new DiffToSelserConverter(this.env, body).convert();
 
                        if ( this.trace || ( this.env.conf.parsoid.dumpFlags &&
                                this.env.conf.parsoid.dumpFlags.indexOf( 
'dom:post-dom-diff' ) !== -1) )
                        {
                                console.log( '----- DOM after running DOMDiff 
-----' );
-                               console.log( doc.outerHTML );
+                               console.log( body.outerHTML );
                        }
 
                        // Call the WikitextSerializer to do our bidding
-                       this.wts.serializeDOM( doc, cb, true, finalcb );
+                       this.wts.serializeDOM( body, cb, true, finalcb );
                } else {
                        // Nothing was modified, just re-use the original source
                        cb( this.env.page.src );
@@ -103,7 +103,7 @@
  * @param {string} src The wikitext source of the document (optionally
  *                     including page metadata)
  */
-SSP.parseOriginalSource = function ( doc, cb, finalcb, err, src ) {
+SSP.parseOriginalSource = function( body, cb, finalcb, err, src ) {
        var self = this,
                parserPipelineFactory = new ParserPipelineFactory( this.env ),
                parserPipeline = parserPipelineFactory.getPipeline( 
'text/x-mediawiki/full' );
@@ -113,14 +113,11 @@
 
        // Parse the wikitext src to the original DOM, and pass that on to
        // doSerializeDOM
-       parserPipeline.once( 'document', function ( origDoc ) {
-               var body = DU.parseHTML( DU.serializeNode(origDoc) ).body;
-               self.env.page.dom = body;
-               //console.log('calling doSerializeDOM');
-               //console.log(body.outerHTML);
-               self.doSerializeDOM(null, doc, cb, finalcb);
+       parserPipeline.once( 'document', function( origDoc ) {
+               self.env.page.dom = DU.parseHTML( DU.serializeNode( origDoc ) 
).body;
+               self.doSerializeDOM( null, body, cb, finalcb );
        } );
-       parserPipeline.processToplevelDoc(this.env.page.src);
+       parserPipeline.processToplevelDoc( this.env.page.src );
 };
 
 
@@ -135,14 +132,14 @@
  * @param {string} cb.res The chunk of wikitext just serialized.
  * @param {Function} finalcb The callback fired on completion of the 
serialization.
  */
-SSP.serializeDOM = function ( doc, cb, dummy, finalcb ) {
+SSP.serializeDOM = function( body, cb, dummy, finalcb ) {
        // dummy preserves the wt serializer interface
        var self = this;
        if ( this.env.page.dom || this.env.page.domdiff ) {
-               this.doSerializeDOM(null, doc, cb, finalcb);
+               this.doSerializeDOM( null, body, cb, finalcb );
        } else if ( this.env.page.src ) {
                // Have the src, only parse the src to the dom
-               this.parseOriginalSource( doc, cb, finalcb, null, 
this.env.page.src );
+               this.parseOriginalSource( body, cb, finalcb, null, 
this.env.page.src );
        } else if (this.env.page.id && this.env.page.id !== '0') {
                // Start by getting the old text of this page
                if (this.env.conf.parsoid.parsoidCacheURI) {
@@ -166,22 +163,23 @@
                                                        // And the original 
dom. results[1] is an array
                                                        // with the html and 
the content type. Ignore the
                                                        // content type here.
-                                                       self.env.page.dom = 
DU.parseHTML(results[1][0]).body;
+                                                       self.env.page.dom = 
DU.parseHTML( results[1][0] ).body;
                                                }
 
                                                // Selective serialization if 
there was no error, full
                                                // serialization if there was 
one.
-                                               self.doSerializeDOM(null, doc, 
cb, finalcb);
+                                               self.doSerializeDOM( null, 
body, cb, finalcb );
                                        }
                        );
 
                } else {
-                       Util.getPageSrc( this.env, this.env.page.name,
-                                       this.env.page.id || null,
-                                       this.parseOriginalSource.bind(this, 
doc, cb, finalcb) );
+                       Util.getPageSrc(
+                               this.env, this.env.page.name, this.env.page.id 
|| null,
+                               this.parseOriginalSource.bind( this, body, cb, 
finalcb )
+                       );
                }
        } else {
-               this.doSerializeDOM(null, doc, cb, finalcb);
+               this.doSerializeDOM( null, body, cb, finalcb );
        }
 
 };
diff --git a/tests/mocha/api.js b/tests/mocha/api.js
index a559f7d..0f7aa8a 100644
--- a/tests/mocha/api.js
+++ b/tests/mocha/api.js
@@ -430,6 +430,58 @@
                                .end(done);
                        });
 
+                       it('should use selser', function(done) {
+                               request(api)
+                               .post('v2/' + mockHost + '/wt/')
+                               .send({
+                                       html: "<html><body id=\"mwAA\"><div 
id=\"mwBB\">Selser test</div></body></html>",
+                                       "data-parsoid": {
+                                               "ids": {
+                                                       mwAA: {},
+                                                       mwBB: {}
+                                               }
+                                       },
+                                       original: {
+                                               title: "Doesnotexist",
+                                               wikitext: "<div id=bar>Selser 
test",
+                                               html: "<html><body 
id=\"mwAA\"><div id=\"mwBB\">Selser test</div></body></html>",
+                                               "data-parsoid": {
+                                                       "ids": {
+                                                               mwAA: {},
+                                                               mwBB: {}
+                                                       }
+                                               }
+                                       }
+                               })
+                               .expect(200)
+                               .expect(function(res) {
+                                       
res.body.should.have.property("wikitext");
+                                       
res.body.wikitext.body.should.equal("<div id=bar>Selser test");
+                               })
+                               .end(done);
+                       });
+
+                       it('should fallback to non-selective serialization', 
function(done) {
+                               // Without the original wikitext and an 
unavailable
+                               // TemplateFetch for the source (Doestnotexist 
will 404),
+                               // it should fallback to non-selective 
serialization.
+                               request(api)
+                               .post('v2/' + mockHost + '/wt/')
+                               .send({
+                                       html: "<html><body><div 
id=\"bar\">Selser test</div></body></html>",
+                                       original: {
+                                               title: "Doesnotexist",
+                                               html: "<html><body><div 
id=\"bar\">Selser test</div></body></html>",
+                                       }
+                               })
+                               .expect(200)
+                               .expect(function(res) {
+                                       
res.body.should.have.property("wikitext");
+                                       
res.body.wikitext.body.should.equal("<div id=\"bar\">Selser test</div>");
+                               })
+                               .end(done);
+                       });
+
                }); // end html2wt
 
        });
diff --git a/tests/test.localsettings.js b/tests/test.localsettings.js
index 6b9e3af..8b97f61 100644
--- a/tests/test.localsettings.js
+++ b/tests/test.localsettings.js
@@ -17,7 +17,7 @@
        //parsoidConfig.usePHPPreProcessor = false;
 
        // Use selective serialization (default false)
-       //parsoidConfig.useSelser = true;
+       parsoidConfig.useSelser = true;
 
        // allow cross-domain requests to the API (default disallowed)
        //parsoidConfig.allowCORS = '*';

-- 
To view, visit https://gerrit.wikimedia.org/r/190410
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I91d688d7b3942b89637b15ed2ae1a54dab22bede
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to