Arlolra has uploaded a new change for review. https://gerrit.wikimedia.org/r/204303
Change subject: Convert serializeDOM interface to promise returning func ...................................................................... Convert serializeDOM interface to promise returning func * Removes the promisify everywhere and prepares for an internal refactor of selective serializer. Change-Id: I0eaa5cf0d2524634a993e1f2d6f5579433fb28a6 --- M api/routes.js M lib/mediawiki.SelectiveSerializer.js M lib/mediawiki.WikitextSerializer.js M tests/mocha/parse.js M tests/parse.js 5 files changed, 109 insertions(+), 110 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/03/204303/1 diff --git a/api/routes.js b/api/routes.js index 3f3db08..a66bd82 100644 --- a/api/routes.js +++ b/api/routes.js @@ -114,12 +114,9 @@ // Re-parse the HTML to uncover foster-parenting issues doc = domino.createDocument( doc.outerHTML ); - var Serializer = selser ? SelectiveSerializer : WikitextSerializer, - serializer = new Serializer({ env: env }); - - return Promise.promisify( serializer.serializeDOM, false, serializer )( - doc.body, false - ).then(function( out ) { + var Serializer = selser ? SelectiveSerializer : WikitextSerializer; + var serializer = new Serializer({ env: env }); + return serializer.serializeDOM(doc.body, false).then(function(out) { // Strip selser trigger comment out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, ''); @@ -278,8 +275,6 @@ startTimers.set( 'html2wt.init.domparse', Date.now() ); } - var doc = DU.parseHTML( html ); - // send domparse time, input size and init time to statsd/Graphite // init time is the time elapsed before serialization // init.domParse, a component of init time, is the time elapsed from html string to DOM tree @@ -291,21 +286,22 @@ Date.now() - startTimers.get( 'html2wt.init' )); } - var Serializer = parsoidConfig.useSelser ? SelectiveSerializer : WikitextSerializer, - serializer = new Serializer({ env: env, oldid: env.page.id }); + var doc = DU.parseHTML(html); if ( v2 && v2.original && v2.original["data-parsoid"] ) { DU.applyDataParsoid( doc, v2.original["data-parsoid"].body ); } + if ( v2 && v2.original && v2.original.html ) { env.page.dom = DU.parseHTML( v2.original.html.body ).body; if ( v2.original["data-parsoid"] ) { DU.applyDataParsoid( env.page.dom.ownerDocument, v2.original["data-parsoid"].body ); } } - return Promise.promisify( serializer.serializeDOM, false, serializer )( - doc.body, false - ); + + var Serializer = parsoidConfig.useSelser ? SelectiveSerializer : WikitextSerializer; + var serializer = new Serializer({ env: env, oldid: env.page.id }); + return serializer.serializeDOM(doc.body, false); }).timeout( REQ_TIMEOUT ).then(function( output ) { var contentType = 'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0;charset=utf-8'; if ( v2 ) { diff --git a/lib/mediawiki.SelectiveSerializer.js b/lib/mediawiki.SelectiveSerializer.js index c41a81c..e52f672 100644 --- a/lib/mediawiki.SelectiveSerializer.js +++ b/lib/mediawiki.SelectiveSerializer.js @@ -7,14 +7,15 @@ 'use strict'; -var WikitextSerializer = require( './mediawiki.WikitextSerializer.js' ).WikitextSerializer, - Util = require( './mediawiki.Util.js' ).Util, - DU = require('./mediawiki.DOMUtils.js').DOMUtils, - ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFactory, - DOMDiff = require('./mediawiki.DOMDiff.js').DOMDiff, - ParsoidCacheRequest = require('./mediawiki.ApiRequest.js').ParsoidCacheRequest, - normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM, - async = require('async'); +var WikitextSerializer = require( './mediawiki.WikitextSerializer.js' ).WikitextSerializer; +var Util = require( './mediawiki.Util.js' ).Util; +var DU = require('./mediawiki.DOMUtils.js').DOMUtils; +var JSUtils = require('./jsutils.js').JSUtils; +var ParserPipelineFactory = require('./mediawiki.parser.js').ParserPipelineFactory; +var DOMDiff = require('./mediawiki.DOMDiff.js').DOMDiff; +var ParsoidCacheRequest = require('./mediawiki.ApiRequest.js').ParsoidCacheRequest; +var normalizeDOM = require('./wts.normalizeDOM.js').normalizeDOM; +var async = require('async'); /** * @class @@ -51,10 +52,9 @@ * * @param {Error} err * @param {Node} body - * @param {Function} finalcb The callback for when we've finished serializing the DOM. + * @param {Function} cb The callback for when we've finished serializing the DOM. */ -SSP.doSerializeDOM = function( err, body, finalcb ) { - var self = this; +SSP.doSerializeDOM = function( err, body, cb ) { var startTimers = new Map(); if ( err || (!this.env.page.dom && !this.env.page.domdiff) || !this.env.page.src ) { @@ -63,7 +63,7 @@ } // If there's no old source, fall back to non-selective serialization. - this.wts.serializeDOM( body, false, finalcb ); + this.wts.serializeDOM( body, false, cb ); if ( this.timer ) { this.timer.timing( 'html2wt.full.serialize', '', ( Date.now() - startTimers.get( 'html2wt.full.serialize' )) ); @@ -96,7 +96,7 @@ } // Call the WikitextSerializer to do our bidding - this.wts.serializeDOM( body, true, finalcb ); + this.wts.serializeDOM( body, true, cb ); if ( this.timer ) { this.timer.timing( 'html2wt.selser.serialize', '', ( Date.now() - startTimers.get( 'html2wt.selser.serialize' )) ); @@ -104,7 +104,7 @@ } else { // Nothing was modified, just re-use the original source - finalcb( null, this.env.page.src ); + cb( null, this.env.page.src ); } } }; @@ -116,12 +116,12 @@ * Parse the wikitext source of the page for DOM-diffing purposes. * * @param {Node} body The node for which we're getting the source. - * @param {Function} finalcb The callback for after we've serialized the entire document. + * @param {Function} cb The callback for after we've serialized the entire document. * @param {Error} err * @param {string} src The wikitext source of the document (optionally * including page metadata) */ -SSP.parseOriginalSource = function( body, finalcb, err, src ) { +SSP.parseOriginalSource = function( body, cb, err, src ) { var self = this, parserPipelineFactory = new ParserPipelineFactory( this.env ), parserPipeline = parserPipelineFactory.getPipeline( 'text/x-mediawiki/full' ); @@ -133,7 +133,7 @@ // doSerializeDOM parserPipeline.once( 'document', function( origDoc ) { self.env.page.dom = DU.parseHTML( DU.serializeNode( origDoc ) ).body; - self.doSerializeDOM( null, body, finalcb ); + self.doSerializeDOM( null, body, cb ); } ); parserPipeline.processToplevelDoc( this.env.page.src ); }; @@ -147,15 +147,17 @@ * * @param {Node} body The document to serialize. * @param {null} dummy Preserves the wt serializer interface. - * @param {Function} finalcb The callback fired on completion of the serialization. + * @param {Function} cb The callback fired on completion of the serialization. */ -SSP.serializeDOM = function( body, dummy, finalcb ) { +SSP.serializeDOM = function(body, dummy, cb) { + cb = JSUtils.mkPromised(cb); + var self = this; if ( this.env.page.dom || this.env.page.domdiff ) { - this.doSerializeDOM( null, body, finalcb ); + this.doSerializeDOM( null, body, cb ); } else if ( this.env.page.src ) { // Have the src, only parse the src to the dom - this.parseOriginalSource( body, finalcb, null, this.env.page.src ); + this.parseOriginalSource( body, cb, null, this.env.page.src ); } else if ( this.env.page.id && this.env.page.id !== '0' ) { // Start by getting the old text of this page if ( this.env.conf.parsoid.parsoidCacheURI ) { @@ -181,18 +183,19 @@ // Selective serialization if there was no error, full // serialization if there was one. - self.doSerializeDOM( err, body, finalcb ); + self.doSerializeDOM( err, body, cb ); }); } else { Util.getPageSrc( this.env, this.env.page.name, this.env.page.id || null, - this.parseOriginalSource.bind( this, body, finalcb ) + this.parseOriginalSource.bind( this, body, cb ) ); } } else { - this.doSerializeDOM( null, body, finalcb ); + this.doSerializeDOM( null, body, cb ); } + return cb.promise; }; if ( typeof module === 'object' ) { diff --git a/lib/mediawiki.WikitextSerializer.js b/lib/mediawiki.WikitextSerializer.js index d542cab..d878b6a 100644 --- a/lib/mediawiki.WikitextSerializer.js +++ b/lib/mediawiki.WikitextSerializer.js @@ -78,7 +78,13 @@ WSP.serializeHTML = function(opts, html) { opts.logType = this.logType; - return (new WikitextSerializer(opts)).serializeDOM(DU.parseHTML(html).body); + // FIXME: can we remove this block? + try { + var body = DU.parseHTML(html).body; + return (new WikitextSerializer(opts))._serializeDOM(body); + } catch (err) { + opts.env.log("fatal", err); + } }; WSP.getAttributeKey = function(node, key) { @@ -93,7 +99,6 @@ } } } - return key; }; @@ -113,7 +118,6 @@ } } } - return value; }; @@ -1361,79 +1365,78 @@ } /** - * Serialize an HTML DOM document. + * Serialize an HTML DOM document synchronously. + * WARNING: You probably want to use the wrapper below. */ -WSP.serializeDOM = function( body, selserMode, finalCB ) { +WSP._serializeDOM = function(body, selserMode) { this.logType = selserMode ? "trace/selser" : "trace/wts"; this.trace = this.env.log.bind(this.env, this.logType); if (!this.env.page.editedDoc) { this.env.page.editedDoc = body.ownerDocument; } + var state = new SerializerState(this, this.options); - try { - if (!selserMode) { - // Normalize the DOM - normalizeDOM(body, state.env); - } - // Don't serialize the DOM if debugging is disabled - this.env.log(this.logType, function() { - return "--- DOM --- \n" + body.outerHTML + "\n-----------"; - }); - - var out = ''; - - // Init state - state.selserMode = selserMode || false; - state.sep.lastSourceNode = body; - state.currLine.firstNode = body.firstChild; - - // Wrapper CB for every chunk that emits any required separators - // before emitting the chunk itself. - var chunkCBWrapper = function(chunk, node, atEOF) { - var accum = function(o) { out += o; }; - state.emitSepAndOutput(chunk, node, accum, "OUT:"); - state.atStartOfOutput = false; - if (atEOF === 'EOF') { - state.flushLine(accum); - } - }; - - // Kick it off - state.serializeChildren( body, chunkCBWrapper ); - - // Handle EOF - chunkCBWrapper( '', body, 'EOF' ); - - if (state.hasIndentPreNowikis) { - // FIXME: Perhaps this can be done on a per-line basis - // rather than do one post-pass on the entire document. - // - // Strip excess/useless nowikis - out = stripUnnecessaryIndentPreNowikis( this.env, out ); - } - - if (state.hasQuoteNowikis) { - // FIXME: Perhaps this can be done on a per-line basis - // rather than do one post-pass on the entire document. - // - // Strip excess/useless nowikis - out = stripUnnecessaryQuoteNowikis(out); - } - - if ( finalCB && typeof finalCB === 'function' ) { - finalCB( null, out ); - } - - return out; - } catch (e) { - if ( finalCB && typeof finalCB === 'function' ) { - finalCB( e ); - } else { - state.env.log("fatal", e); - } + if (!selserMode) { + // Normalize the DOM + normalizeDOM(body, state.env); } + + // Don't serialize the DOM if debugging is disabled + this.trace(function() { + return "--- DOM --- \n" + body.outerHTML + "\n-----------"; + }); + + var out = ''; + + // Init state + state.selserMode = selserMode || false; + state.sep.lastSourceNode = body; + state.currLine.firstNode = body.firstChild; + + // Wrapper CB for every chunk that emits any required separators + // before emitting the chunk itself. + var chunkCBWrapper = function(chunk, node, atEOF) { + var accum = function(o) { out += o; }; + state.emitSepAndOutput(chunk, node, accum, "OUT:"); + state.atStartOfOutput = false; + if (atEOF === 'EOF') { + state.flushLine(accum); + } + }; + + // Kick it off + state.serializeChildren(body, chunkCBWrapper); + + // Handle EOF + chunkCBWrapper('', body, 'EOF'); + + if (state.hasIndentPreNowikis) { + // FIXME: Perhaps this can be done on a per-line basis + // rather than do one post-pass on the entire document. + // + // Strip excess/useless nowikis + out = stripUnnecessaryIndentPreNowikis(this.env, out); + } + + if (state.hasQuoteNowikis) { + // FIXME: Perhaps this can be done on a per-line basis + // rather than do one post-pass on the entire document. + // + // Strip excess/useless nowikis + out = stripUnnecessaryQuoteNowikis(out); + } + + return out; +}; + +// Promise returning wrapper of _serializeDOM +WSP.serializeDOM = function(body, selserMode, cb) { + var self = this; + return Promise.resolve().then(function() { + return self._serializeDOM(body, selserMode); + }).nodify(cb); }; if (typeof module === "object") { diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js index e373ff9..1a3d54b 100644 --- a/tests/mocha/parse.js +++ b/tests/mocha/parse.js @@ -55,9 +55,7 @@ DU.applyDataParsoid( doc, dp ); } var serializer = new WikitextSerializer({ env: env }); - return Promise.promisify( - serializer.serializeDOM, false, serializer - )( doc.body, false ); + return serializer.serializeDOM(doc.body, false); }); }; diff --git a/tests/parse.js b/tests/parse.js index 50646d2..2d49eef 100755 --- a/tests/parse.js +++ b/tests/parse.js @@ -131,9 +131,10 @@ var startsAtWikitext; var startsAtHTML = function( argv, env, input, dp ) { - var serializer, doc = DU.parseHTML( input ); + var doc = DU.parseHTML(input); dp = dp || dpFromHead( doc ); + var serializer; if ( argv.selser ) { dp = dp || dpFromHead( env.page.dom.ownerDocument ); if ( dp ) { @@ -148,9 +149,7 @@ DU.applyDataParsoid( doc, dp ); } - return Promise.promisify( serializer.serializeDOM, false, serializer )( - doc.body, false - ).then(function(out) { + return serializer.serializeDOM(doc.body, false).then(function(out) { if ( argv.html2wt || argv.wt2wt ) { return { trailingNL: true, out: out }; } else { -- To view, visit https://gerrit.wikimedia.org/r/204303 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0eaa5cf0d2524634a993e1f2d6f5579433fb28a6 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits