Arlolra has uploaded a new change for review.
https://gerrit.wikimedia.org/r/190410
Change subject: Fix selser in the v2 API
......................................................................
Fix selser in the v2 API
* It was setting env.page.dom as the document, whereas through the
codebase that's usually the body. This produced an unnecessary diff.
* To this point, some of the tests for the v2 API are merely documented
the expectations, and not enforcing anything. Here we add a test to
ensure selser is used.
* Turns on useSelser in the test parsoidServer.
* Renames some variables in the selectiveSerializer to reflect that
we're working with a body, not a doc.
Bug: T89411
Change-Id: I91d688d7b3942b89637b15ed2ae1a54dab22bede
---
M api/routes.js
M lib/mediawiki.DOMDiff.js
M lib/mediawiki.SelectiveSerializer.js
M tests/mocha/api.js
M tests/test.localsettings.js
5 files changed, 91 insertions(+), 34 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid
refs/changes/10/190410/1
diff --git a/api/routes.js b/api/routes.js
index 9d08249..870c2cf 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -258,7 +258,8 @@
var out = [];
var p = new Promise(function( resolve, reject ) {
if ( v2 && v2.original && v2.original.wikitext ) {
- env.setPageSrcInfo( v2.original.wikitext.body );
+ env.setPageSrcInfo( ( typeof v2.original.wikitext !==
"string" ) ?
+ v2.original.wikitext.body :
v2.original.wikitext );
return resolve();
} else if ( !env.conf.parsoid.fetchWT ) {
return resolve();
@@ -279,11 +280,16 @@
Serializer = parsoidConfig.useSelser ?
SelectiveSerializer : WikitextSerializer,
serializer = new Serializer({ env: env, oldid:
env.page.id });
if ( v2 && v2["data-parsoid"] ) {
- DU.applyDataParsoid( doc, v2["data-parsoid"].body );
+ DU.applyDataParsoid( doc, v2["data-parsoid"].body ?
+ v2["data-parsoid"].body : v2["data-parsoid"] );
}
if ( v2 && v2.original && v2.original.html ) {
- env.page.dom = DU.parseHTML( v2.original.html.body );
- DU.applyDataParsoid( env.page.dom,
v2.original["data-parsoid"].body );
+ env.page.dom = DU.parseHTML( ( typeof v2.original.html
!== "string" ) ?
+ v2.original.html.body : v2.original.html ).body;
+ if ( v2.original["data-parsoid"] ) {
+ DU.applyDataParsoid(
env.page.dom.ownerDocument, v2.original["data-parsoid"].body ?
+ v2.original["data-parsoid"].body :
v2.original["data-parsoid"] );
+ }
}
return Promise.promisify( serializer.serializeDOM, false,
serializer )(
doc.body, function( chunk ) { out.push( chunk ); },
false
@@ -751,7 +757,8 @@
}
// We've been given source for this page
if ( v2.original && v2.original.wikitext ) {
- wikitext = v2.original.wikitext.body;
+ wikitext = ( typeof v2.original.wikitext !==
"string" ) ?
+ v2.original.wikitext.body :
v2.original.wikitext;
}
}
wt2html( req, res, wikitext );
diff --git a/lib/mediawiki.DOMDiff.js b/lib/mediawiki.DOMDiff.js
index 76939f7..efb42ce 100644
--- a/lib/mediawiki.DOMDiff.js
+++ b/lib/mediawiki.DOMDiff.js
@@ -42,8 +42,8 @@
// The root nodes are equal, call recursive differ
this.debug('ORIG:\n', this.env.page.dom.outerHTML, '\nNEW :\n',
workNode.outerHTML );
- var foundChange = this.doDOMDiff(this.env.page.dom, workNode);
- return { isEmpty: ! foundChange, dom: workNode };
+ var foundChange = this.doDOMDiff( this.env.page.dom, workNode );
+ return { isEmpty: !foundChange, dom: workNode };
};
// These attributes are ignored for equality purposes if they are added to a
node.
diff --git a/lib/mediawiki.SelectiveSerializer.js
b/lib/mediawiki.SelectiveSerializer.js
index 7128d63..c12b693 100644
--- a/lib/mediawiki.SelectiveSerializer.js
+++ b/lib/mediawiki.SelectiveSerializer.js
@@ -49,38 +49,38 @@
* Run the DOM serialization on a node.
*
* @param {Error} err
- * @param {Node} doc
+ * @param {Node} body
* @param {Function} cb Callback that is called for each chunk.
* @param {string} cb.res The wikitext of the chunk we've just serialized.
* @param {Function} finalcb The callback for when we've finished serializing
the DOM.
*/
-SSP.doSerializeDOM = function( err, doc, cb, finalcb ) {
+SSP.doSerializeDOM = function( err, body, cb, finalcb ) {
var self = this;
if ( err || (!this.env.page.dom && !this.env.page.domdiff) ||
!this.env.page.src) {
// If there's no old source, fall back to non-selective
serialization.
- this.wts.serializeDOM( doc, cb, false, finalcb );
+ this.wts.serializeDOM( body, cb, false, finalcb );
} else {
// Use provided diff-marked DOM (used during testing)
// or generate one (used in production)
- var diff = this.env.page.domdiff || new DOMDiff(this.env).diff(
doc );
+ var diff = this.env.page.domdiff || new DOMDiff( this.env
).diff( body );
- if ( ! diff.isEmpty ) {
+ if ( !diff.isEmpty ) {
- doc = diff.dom;
+ body = diff.dom;
// Add the serializer info
- // new DiffToSelserConverter(this.env, doc).convert();
+ // new DiffToSelserConverter(this.env, body).convert();
if ( this.trace || ( this.env.conf.parsoid.dumpFlags &&
this.env.conf.parsoid.dumpFlags.indexOf(
'dom:post-dom-diff' ) !== -1) )
{
console.log( '----- DOM after running DOMDiff
-----' );
- console.log( doc.outerHTML );
+ console.log( body.outerHTML );
}
// Call the WikitextSerializer to do our bidding
- this.wts.serializeDOM( doc, cb, true, finalcb );
+ this.wts.serializeDOM( body, cb, true, finalcb );
} else {
// Nothing was modified, just re-use the original source
cb( this.env.page.src );
@@ -103,7 +103,7 @@
* @param {string} src The wikitext source of the document (optionally
* including page metadata)
*/
-SSP.parseOriginalSource = function ( doc, cb, finalcb, err, src ) {
+SSP.parseOriginalSource = function( body, cb, finalcb, err, src ) {
var self = this,
parserPipelineFactory = new ParserPipelineFactory( this.env ),
parserPipeline = parserPipelineFactory.getPipeline(
'text/x-mediawiki/full' );
@@ -113,14 +113,11 @@
// Parse the wikitext src to the original DOM, and pass that on to
// doSerializeDOM
- parserPipeline.once( 'document', function ( origDoc ) {
- var body = DU.parseHTML( DU.serializeNode(origDoc) ).body;
- self.env.page.dom = body;
- //console.log('calling doSerializeDOM');
- //console.log(body.outerHTML);
- self.doSerializeDOM(null, doc, cb, finalcb);
+ parserPipeline.once( 'document', function( origDoc ) {
+ self.env.page.dom = DU.parseHTML( DU.serializeNode( origDoc )
).body;
+ self.doSerializeDOM( null, body, cb, finalcb );
} );
- parserPipeline.processToplevelDoc(this.env.page.src);
+ parserPipeline.processToplevelDoc( this.env.page.src );
};
@@ -135,14 +132,14 @@
* @param {string} cb.res The chunk of wikitext just serialized.
* @param {Function} finalcb The callback fired on completion of the
serialization.
*/
-SSP.serializeDOM = function ( doc, cb, dummy, finalcb ) {
+SSP.serializeDOM = function( body, cb, dummy, finalcb ) {
// dummy preserves the wt serializer interface
var self = this;
if ( this.env.page.dom || this.env.page.domdiff ) {
- this.doSerializeDOM(null, doc, cb, finalcb);
+ this.doSerializeDOM( null, body, cb, finalcb );
} else if ( this.env.page.src ) {
// Have the src, only parse the src to the dom
- this.parseOriginalSource( doc, cb, finalcb, null,
this.env.page.src );
+ this.parseOriginalSource( body, cb, finalcb, null,
this.env.page.src );
} else if (this.env.page.id && this.env.page.id !== '0') {
// Start by getting the old text of this page
if (this.env.conf.parsoid.parsoidCacheURI) {
@@ -166,22 +163,23 @@
// And the original
dom. results[1] is an array
// with the html and
the content type. Ignore the
// content type here.
- self.env.page.dom =
DU.parseHTML(results[1][0]).body;
+ self.env.page.dom =
DU.parseHTML( results[1][0] ).body;
}
// Selective serialization if
there was no error, full
// serialization if there was
one.
- self.doSerializeDOM(null, doc,
cb, finalcb);
+ self.doSerializeDOM( null,
body, cb, finalcb );
}
);
} else {
- Util.getPageSrc( this.env, this.env.page.name,
- this.env.page.id || null,
- this.parseOriginalSource.bind(this,
doc, cb, finalcb) );
+ Util.getPageSrc(
+ this.env, this.env.page.name, this.env.page.id
|| null,
+ this.parseOriginalSource.bind( this, body, cb,
finalcb )
+ );
}
} else {
- this.doSerializeDOM(null, doc, cb, finalcb);
+ this.doSerializeDOM( null, body, cb, finalcb );
}
};
diff --git a/tests/mocha/api.js b/tests/mocha/api.js
index a559f7d..0f7aa8a 100644
--- a/tests/mocha/api.js
+++ b/tests/mocha/api.js
@@ -430,6 +430,58 @@
.end(done);
});
+ it('should use selser', function(done) {
+ request(api)
+ .post('v2/' + mockHost + '/wt/')
+ .send({
+ html: "<html><body id=\"mwAA\"><div
id=\"mwBB\">Selser test</div></body></html>",
+ "data-parsoid": {
+ "ids": {
+ mwAA: {},
+ mwBB: {}
+ }
+ },
+ original: {
+ title: "Doesnotexist",
+ wikitext: "<div id=bar>Selser
test",
+ html: "<html><body
id=\"mwAA\"><div id=\"mwBB\">Selser test</div></body></html>",
+ "data-parsoid": {
+ "ids": {
+ mwAA: {},
+ mwBB: {}
+ }
+ }
+ }
+ })
+ .expect(200)
+ .expect(function(res) {
+
res.body.should.have.property("wikitext");
+
res.body.wikitext.body.should.equal("<div id=bar>Selser test");
+ })
+ .end(done);
+ });
+
+ it('should fallback to non-selective serialization',
function(done) {
+ // Without the original wikitext and an
unavailable
+ // TemplateFetch for the source (Doestnotexist
will 404),
+ // it should fallback to non-selective
serialization.
+ request(api)
+ .post('v2/' + mockHost + '/wt/')
+ .send({
+ html: "<html><body><div
id=\"bar\">Selser test</div></body></html>",
+ original: {
+ title: "Doesnotexist",
+ html: "<html><body><div
id=\"bar\">Selser test</div></body></html>",
+ }
+ })
+ .expect(200)
+ .expect(function(res) {
+
res.body.should.have.property("wikitext");
+
res.body.wikitext.body.should.equal("<div id=\"bar\">Selser test</div>");
+ })
+ .end(done);
+ });
+
}); // end html2wt
});
diff --git a/tests/test.localsettings.js b/tests/test.localsettings.js
index 6b9e3af..8b97f61 100644
--- a/tests/test.localsettings.js
+++ b/tests/test.localsettings.js
@@ -17,7 +17,7 @@
//parsoidConfig.usePHPPreProcessor = false;
// Use selective serialization (default false)
- //parsoidConfig.useSelser = true;
+ parsoidConfig.useSelser = true;
// allow cross-domain requests to the API (default disallowed)
//parsoidConfig.allowCORS = '*';
--
To view, visit https://gerrit.wikimedia.org/r/190410
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I91d688d7b3942b89637b15ed2ae1a54dab22bede
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits