jenkins-bot has submitted this change and it was merged. Change subject: Batch MW parser and imageinfo API requests ......................................................................
Batch MW parser and imageinfo API requests * Implement a system for mixed batches of parser, preprocessor and imageinfo requests. This uses an MW extension specific to Parsoid which provides the relevant API. * Implement caching inside Batcher, replacing env.pageCache, except for its original use case. parserTests.js uses env.pageCache to inject template wikitext, which will still work -- it was never really correct to allow parserTests.js to inject other API responses into the cache. * Remove Processor parameter from fetchExpandedTpl() since it was always the same. Bug: T45888 Change-Id: I2bd6f574bca8c64302810a9569f9390c4cf64626 --- M lib/ParsoidLogger.js M lib/ext.core.ExtensionHandler.js M lib/ext.core.LinkHandler.js M lib/ext.core.TemplateHandler.js M lib/mediawiki.ApiRequest.js A lib/mediawiki.Batcher.js M lib/mediawiki.ParsoidConfig.js M lib/mediawiki.TokenTransformManager.js M lib/mediawiki.Util.js M lib/mediawiki.parser.environment.js 10 files changed, 580 insertions(+), 142 deletions(-) Approvals: Arlolra: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/ParsoidLogger.js b/lib/ParsoidLogger.js index 5207f0a..b6df3e1 100644 --- a/lib/ParsoidLogger.js +++ b/lib/ParsoidLogger.js @@ -146,6 +146,7 @@ "trace/selser": "[SELSER]", "trace/domdiff": "[DOM-DIFF]", "trace/wt-escape": "[wt-esc]", + "trace/batcher": "[batcher]", }; ParsoidLogger.prototype._defaultTracerBackend = function(logData, cb) { diff --git a/lib/ext.core.ExtensionHandler.js b/lib/ext.core.ExtensionHandler.js index 054edcf..ad86e54 100644 --- a/lib/ext.core.ExtensionHandler.js +++ b/lib/ext.core.ExtensionHandler.js @@ -4,7 +4,6 @@ var coreutil = require('util'); var Util = require('./mediawiki.Util.js').Util; var DU = require('./mediawiki.DOMUtils.js').DOMUtils; -var PHPParseRequest = require('./mediawiki.ApiRequest.js').PHPParseRequest; var defines = require('./mediawiki.parser.defines.js'); // define some constructor shortcuts @@ -83,16 +82,12 @@ var env = this.env; // We are about to start an async request for an extension env.dp('Note: trying to expand ', text); - - // Start a new request if none is outstanding - if (env.requestQueue[text] === undefined) { - env.tp('Note: Starting new request for ' + text); - env.requestQueue[text] = new PHPParseRequest(env, title, text); + var cacheEntry = env.batcher.parse(title, text, cb); + if (cacheEntry !== undefined) { + cb(cacheEntry); + } else { + parentCB ({ async: true }); } - // append request, process in document order - env.requestQueue[text].once('src', cb); - - parentCB ({ async: true }); }; function normalizeExtOptions(options) { diff --git a/lib/ext.core.LinkHandler.js b/lib/ext.core.LinkHandler.js index cc5a1d6..4fc51f5 100644 --- a/lib/ext.core.LinkHandler.js +++ b/lib/ext.core.LinkHandler.js @@ -947,8 +947,17 @@ var containerClose = new EndTagTk(containerName); if (!err && data) { - var ns = data.imgns; - image = data.pages[ns + ':' + title.key]; + if (data.batchResponse !== undefined) { + info = data.batchResponse; + } else { + var ns = data.imgns; + image = data.pages[ns + ':' + title.key]; + if (image && image.imageinfo && image.imageinfo[0]) { + info = image.imageinfo[0]; + } else { + info = false; + } + } } // FIXME gwicke: Make sure our filename is never of the form @@ -961,18 +970,14 @@ // full 'filename' does not match any of them, so image is then // undefined here. So for now (as a workaround) check if we // actually have an image to work with instead of crashing. - if (!image || !image.imageinfo) { + if (!info) { // Use sane defaults. - image = { - imageinfo: [ - { - url: './Special:FilePath/' + Util.sanitizeTitleURI(title.key), - // Preserve width and height from the wikitext options - // even if the image is non-existent. - width: opts.size.v.width || 220, - height: opts.size.v.height || opts.size.v.width || 220, - }, - ], + info = { + url: './Special:FilePath/' + Util.sanitizeTitleURI(title.key), + // Preserve width and height from the wikitext options + // even if the image is non-existent. + width: opts.size.v.width || 220, + height: opts.size.v.height || opts.size.v.width || 220, }; // Add mw:Error to the RDFa type. @@ -1003,7 +1008,6 @@ errs.push({"key": "missing-image", "message": "This image does not exist." }); } } - info = image.imageinfo[0]; var imageSrc = dataAttribs.src; if (!dataAttribs.uneditable) { @@ -1425,17 +1429,12 @@ } } - var queueKey = title.key + JSON.stringify(opts.size.v); - if (queueKey in env.pageCache) { - this.handleImageInfo(cb, token, title, opts, optSources, null, env.pageCache[ queueKey ]); + var cacheEntry = env.batcher.imageinfo(title.key, opts.size.v, + this.handleImageInfo.bind(this, cb, token, title, opts, optSources)); + if (cacheEntry !== undefined) { + this.handleImageInfo(cb, token, title, opts, optSources, null, cacheEntry); } else { cb({ async: true }); - - if (!(queueKey in env.requestQueue)) { - env.requestQueue[queueKey] = new ImageInfoRequest(env, title.key, opts.size.v); - } - - env.requestQueue[queueKey].once('src', this.handleImageInfo.bind(this, cb, token, title, opts, optSources)); } }; diff --git a/lib/ext.core.TemplateHandler.js b/lib/ext.core.TemplateHandler.js index 2321252..f7febc3 100644 --- a/lib/ext.core.TemplateHandler.js +++ b/lib/ext.core.TemplateHandler.js @@ -17,7 +17,6 @@ var defines = require('./mediawiki.parser.defines.js'); var TemplateRequest = require('./mediawiki.ApiRequest.js').TemplateRequest; var api = require('./mediawiki.ApiRequest.js'); -var PreprocessorRequest = api.PreprocessorRequest; var Util = require('./mediawiki.Util.js').Util; var DU = require('./mediawiki.DOMUtils.js').DOMUtils; var async = require('async'); @@ -149,8 +148,7 @@ accumReceiveToksFromChild); // Fetch and process the template expansion this.fetchExpandedTpl(env.page.name || '', - text, PreprocessorRequest, - accumReceiveToksFromSibling, srcHandler); + text, accumReceiveToksFromSibling, srcHandler); } } else { // We don't perform recursive template expansion- something @@ -1137,30 +1135,22 @@ /** * Fetch the preprocessed wikitext for a template-like construct. - * (The 'Processor' argument is a constructor, hence the capitalization.) */ -TemplateHandler.prototype.fetchExpandedTpl = function(title, text, Processor, parentCB, cb) { +TemplateHandler.prototype.fetchExpandedTpl = function(title, text, parentCB, cb) { var env = this.manager.env; - if (text in env.pageCache) { - // XXX: store type too (and cache tokens/x-mediawiki) - cb(null, env.pageCache[text] /* , type */); - } else if (!env.conf.parsoid.fetchTemplates) { - parentCB({ tokens: [ 'Warning: Page/template fetching disabled, and no cache for ' + text] }); + if (!env.conf.parsoid.fetchTemplates) { + parentCB({ tokens: [ 'Warning: Page/template fetching disabled cannot expand ' + text] }); } else { - // We are about to start an async request for a template env.dp('Note: trying to expand ', text); - - // Start a new request if none is outstanding - if (env.requestQueue[text] === undefined) { - env.tp('Note: Starting new request for ' + text); - env.requestQueue[text] = new Processor(env, title, text); + var cacheEntry = env.batcher.preprocess(title, text, cb); + if (cacheEntry !== undefined) { + // XXX: store type too (and cache tokens/x-mediawiki) + cb(null, cacheEntry /* , type */); + } else { + // Advise we're going async + parentCB({tokens: [], async: true}); } - // append request, process in document order - env.requestQueue[text].once('src', cb); - - // Advise we're going async - parentCB({tokens: [], async: true}); } }; diff --git a/lib/mediawiki.ApiRequest.js b/lib/mediawiki.ApiRequest.js index 01eeb37..62f865f 100644 --- a/lib/mediawiki.ApiRequest.js +++ b/lib/mediawiki.ApiRequest.js @@ -70,6 +70,84 @@ } }; +var manglePreprocessorResponse = function(env, response) { + var src = ''; + if (response.wikitext !== undefined) { + src = response.wikitext; + } else if (response["*"] !== undefined) { + // For backwards compatibility. Older wikis still put the data here. + src = response["*"]; + } + + // Add the categories which were added by parser functions directly + // into the page and not as in-text links. + if (Array.isArray(response.categories)) { + for (var i in response.categories) { + var category = response.categories[i]; + src += '\n[[Category:' + category['*']; + if (category.sortkey) { + src += "|" + category.sortkey; + } + src += ']]'; + } + } + // Ditto for page properties (like DISPLAYTITLE and DEFAULTSORT) + if (Array.isArray(response.properties)) { + response.properties.forEach(function(prop) { + if (prop.name === 'displaytitle' || prop.name === 'defaultsort') { + src += '\n{{' + prop.name.toUpperCase() + ':' + prop['*'] + '}}'; + } + }); + } + // The same for ResourceLoader modules + setPageProperty(env, response.modules, "extensionModules"); + setPageProperty(env, response.modulescripts, "extensionModuleScripts"); + setPageProperty(env, response.modulestyles, "extensionModuleStyles"); + + return src; +}; + +var dummyDoc = domino.createDocument(); +var mangleParserResponse = function(env, response) { + var parsedHtml = ''; + if (response.text['*'] !== undefined) { + parsedHtml = response.text['*']; + } + + // Strip two trailing newlines that action=parse adds after any + // extension output + parsedHtml = parsedHtml.replace(/\n\n$/, ''); + + // Also strip a paragraph wrapper, if any + parsedHtml = parsedHtml.replace(/(^<p>)|(<\/p>$)/g, ''); + + // Add the modules to the page data + setPageProperty(env, response.modules, "extensionModules"); + setPageProperty(env, response.modulescripts, "extensionModuleScripts"); + setPageProperty(env, response.modulestyles, "extensionModuleStyles"); + + // Add the categories which were added by extensions directly into the + // page and not as in-text links + if (response.categories) { + for (var i in response.categories) { + var category = response.categories[i]; + + var link = dummyDoc.createElement("link"); + link.setAttribute("rel", "mw:PageProp/Category"); + + var href = env.page.relativeLinkPrefix + "Category:" + encodeURIComponent(category['*']); + if (category.sortkey) { + href += "#" + encodeURIComponent(category.sortkey); + } + link.setAttribute("href", href); + + parsedHtml += "\n" + link.outerHTML; + } + } + + return parsedHtml; +}; + /** * @class * @extends Error @@ -175,6 +253,7 @@ options.headers['X-Forwarded-Proto'] = 'https'; } } + this.env.dp("Starting HTTP request", this.toString()); return request(options, callback); }; @@ -444,11 +523,12 @@ * @param {MWParserEnvironment} env * @param {string} title The title of the page to use as the context * @param {string} text + * @param {string} hash The queue key */ -function PreprocessorRequest(env, title, text) { +function PreprocessorRequest(env, title, text, hash) { ApiRequest.call(this, env, title); - this.queueKey = text; + this.queueKey = hash; this.text = text; this.reqType = "Template Expansion"; @@ -501,48 +581,10 @@ if (error) { this.env.log("error", error); this._processListeners(error, ''); - return; + } else { + this._processListeners(error, + manglePreprocessorResponse(this.env, data.expandtemplates)); } - - var src = ''; - if (data.expandtemplates.wikitext !== undefined) { - src = data.expandtemplates.wikitext; - } else if (data.expandtemplates["*"] !== undefined) { - // For backwards compatibility. Older wikis still put the data here. - src = data.expandtemplates["*"]; - } - - this.env.tp('Expanded ', this.text, src); - - // Add the categories which were added by parser functions directly - // into the page and not as in-text links. - if (Array.isArray(data.expandtemplates.categories)) { - for (var i in data.expandtemplates.categories) { - var category = data.expandtemplates.categories[i]; - src += '\n[[Category:' + category['*']; - if (category.sortkey) { - src += "|" + category.sortkey; - } - src += ']]'; - } - } - // Ditto for page properties (like DISPLAYTITLE and DEFAULTSORT) - if (Array.isArray(data.expandtemplates.properties)) { - data.expandtemplates.properties.forEach(function(prop) { - if (prop.name === 'displaytitle' || prop.name === 'defaultsort') { - src += '\n{{' + prop.name.toUpperCase() + ':' + prop['*'] + '}}'; - } - }); - } - // The same for ResourceLoader modules - setPageProperty(this.env, data.expandtemplates.modules, "extensionModules"); - setPageProperty(this.env, data.expandtemplates.modulescripts, "extensionModuleScripts"); - setPageProperty(this.env, data.expandtemplates.modulestyles, "extensionModuleStyles"); - - // Add the source to the cache - this.env.pageCache[this.text] = src; - - this._processListeners(error, src); }; /** @@ -558,12 +600,13 @@ * @param {string} title The title of the page to use as context * @param {string} text * @param {boolean} onlypst (optional) Pass onlypst to PHP parser + * @param {string} hash The queue key */ -function PHPParseRequest(env, name, text, onlypst) { +function PHPParseRequest(env, name, text, onlypst, hash) { ApiRequest.call(this, env, name); this.text = text; - this.queueKey = text; + this.queueKey = hash || text; this.reqType = "Extension Parse"; var apiargs = { @@ -608,7 +651,6 @@ // Function which returns a promise for the result of a parse request. PHPParseRequest.promise = promiseFor(PHPParseRequest); -var dummyDoc = domino.createDocument(); PHPParseRequest.prototype._handleJSON = function(error, data) { logAPIWarnings(this, data); @@ -620,49 +662,95 @@ if (error) { this.env.log("error", error); this._processListeners(error, ''); + } else { + this._processListeners(error, mangleParserResponse(this.env, data.parse)); + } +}; + +/** + * @class + * @extends ApiRequest + * + * Do a mixed-action batch request using the ParsoidBatchAPI extension. + * + * @constructor + * @param {MWParserEnvironment} env + * @param {Array} batchParams An array of objects + * @param {string} key The queue key + */ +function BatchRequest(env, batchParams, key) { + ApiRequest.call(this, env); + this.queueKey = key; + this.batchParams = batchParams; + this.reqType = 'Batch request'; + + var apiargs = { + format: 'json', + formatversion: '2', + action: 'parsoid-batch', + batch: JSON.stringify(batchParams), + }; + + this.requestOptions = { + method: 'POST', + followRedirect: true, + uri: env.conf.wiki.apiURI, + timeout: env.conf.parsoid.timeouts.mwApi.batch, + }; + var req = this.request(this.requestOptions, this._requestCB.bind(this)); + + // Use multipart form encoding to get more efficient transfer if the gain + // will be larger than the typical overhead. In later versions of the request + // library, this can easily be done with the formData option, but coveralls + // depends on request 2.40.0. + if (encodeURIComponent(apiargs.batch).length - apiargs.batch.length > 600) { + var form = req.form(); + for (var optName in apiargs) { + form.append(optName, apiargs[optName]); + } + } else { + req.form(apiargs); + } +} + +util.inherits(BatchRequest, ApiRequest); + +BatchRequest.prototype._handleJSON = function(error, data) { + if (!error && !(data && data['parsoid-batch'] && Array.isArray(data['parsoid-batch']))) { + error = new Error('Invalid result when expanding template batch'); + } + + if (error) { + this.env.log("error", error); + this.emit('batch', error, null); return; } - var parsedHtml = ''; - if (data.parse.text['*'] !== undefined) { - parsedHtml = data.parse.text['*']; - } - - // Strip two trailing newlines that action=parse adds after any - // extension output - parsedHtml = parsedHtml.replace(/\n\n$/, ''); - - // Also strip a paragraph wrapper, if any - parsedHtml = parsedHtml.replace(/(^<p>)|(<\/p>$)/g, ''); - - // Add the modules to the page data - setPageProperty(this.env, data.parse.modules, "extensionModules"); - setPageProperty(this.env, data.parse.modulescripts, "extensionModuleScripts"); - setPageProperty(this.env, data.parse.modulestyles, "extensionModuleStyles"); - - // Add the categories which were added by extensions directly into the - // page and not as in-text links - if (data.parse.categories) { - for (var i in data.parse.categories) { - var category = data.parse.categories[i]; - - var link = dummyDoc.createElement("link"); - link.setAttribute("rel", "mw:PageProp/Category"); - - var href = this.env.page.relativeLinkPrefix + "Category:" + encodeURIComponent(category['*']); - if (category.sortkey) { - href += "#" + encodeURIComponent(category.sortkey); - } - link.setAttribute("href", href); - - parsedHtml += "\n" + link.outerHTML; + var batchResponse = data['parsoid-batch']; + var callbackData = []; + var index, itemParams, itemResponse, j, mangled; + for (index = 0; index < batchResponse.length; index++) { + itemParams = this.batchParams[index]; + itemResponse = batchResponse[index]; + switch (itemParams.action) { + case 'parse': + mangled = mangleParserResponse(this.env, itemResponse); + break; + case 'preprocess': + mangled = manglePreprocessorResponse(this.env, itemResponse); + break; + case 'imageinfo': + mangled = {batchResponse: itemResponse}; + break; + default: + error = new Error("BatchRequest._handleJSON: Invalid action"); + this.emit('batch', error, null); + return; } + callbackData.push(mangled); + } - - // Add the source to the cache - this.env.pageCache[this.text] = parsedHtml; - - this._processListeners(error, parsedHtml); + this.emit('batch', error, callbackData); }; /** @@ -838,10 +926,10 @@ * @param {number} [dims.width] * @param {number} [dims.height] */ -function ImageInfoRequest(env, filename, dims) { +function ImageInfoRequest(env, filename, dims, key) { ApiRequest.call(this, env, null); this.env = env; - this.queueKey = filename + JSON.stringify(dims); + this.queueKey = key; this.reqType = "Image Info Request"; var conf = env.conf.wiki; @@ -905,7 +993,7 @@ } if (data && data.query) { - // The API indexes its response by page ID. That's stupid. + // The API indexes its response by page ID. That's inconvenient. newpages = {}; pagenames = {}; pages = data.query.pages; @@ -927,7 +1015,6 @@ data.query.pages = newpages; data.query.imgns = this.ns; - this.env.pageCache[ this.queueKey ] = data.query; this._processListeners(null, data.query); } else if (data && data.error) { if (data.error.code === 'readapidenied') { @@ -946,6 +1033,7 @@ module.exports.TemplateRequest = TemplateRequest; module.exports.PreprocessorRequest = PreprocessorRequest; module.exports.PHPParseRequest = PHPParseRequest; + module.exports.BatchRequest = BatchRequest; module.exports.ParsoidCacheRequest = ParsoidCacheRequest; module.exports.ImageInfoRequest = ImageInfoRequest; module.exports.DoesNotExistError = DoesNotExistError; diff --git a/lib/mediawiki.Batcher.js b/lib/mediawiki.Batcher.js new file mode 100644 index 0000000..b99310d --- /dev/null +++ b/lib/mediawiki.Batcher.js @@ -0,0 +1,315 @@ +'use strict'; +require('./core-upgrade.js'); + +var Util = require('./mediawiki.Util.js').Util; +var api = require('./mediawiki.ApiRequest.js'); + +/** + * @class + * + * This class combines requests into batches for dispatch to the + * ParsoidBatchAPI extension, and calls the item callbacks when the batch + * result is returned. It handles scheduling and concurrency of batch requests. + * It also has a legacy mode which sends requests to the MW core API. + * + * @constructor + * @param {MWParserEnvironment} env + */ +function Batcher(env) { + this.env = env; + this.itemCallbacks = {}; + this.currentBatch = []; + this.pendingBatches = []; + this.resultCache = {}; + this.numOutstanding = 0; + this.idleTimer = false; + + this.maxBatchSize = env.conf.parsoid.batchSize; + this.targetConcurrency = env.conf.parsoid.batchConcurrency; +} + +/** + * Internal function for adding a generic work item. + * + * @param {Object} dims + * @param {Function} item callback + */ +Batcher.prototype.pushGeneric = function(params, cb) { + var hash = params.hash; + if (hash in this.itemCallbacks) { + this.trace("Appending callback for hash", hash); + this.itemCallbacks[hash].push(cb); + } else { + this.trace("Creating batch item:", params); + this.itemCallbacks[hash] = [cb]; + this.currentBatch.push(params); + if (this.currentBatch.length >= this.maxBatchSize) { + this.sealBatch(); + } + } +}; + +/** + * Declare a batch complete and move it to the queue ready for dispatch. Moving + * batches to a queue instead of dispatching them immediately allows for an + * upper limit on concurrency. + */ +Batcher.prototype.sealBatch = function() { + if (this.currentBatch.length > 0) { + this.pendingBatches.push(this.currentBatch); + this.currentBatch = []; + } +}; + +/** + * Dispatch batches from the pending queue, if it is currently possible. + */ +Batcher.prototype.dispatch = function() { + while (this.numOutstanding < this.targetConcurrency && this.pendingBatches.length) { + var batch = this.pendingBatches.shift(); + + this.trace("Dispatching batch with", batch.length, "items"); + this.request(batch).once('batch', + this.onBatchResponse.bind(this, batch)); + + this.numOutstanding++; + if (this.idleTimer) { + clearTimeout(this.idleTimer); + this.idleTimer = false; + } + } +}; + +/** + * Schedule an idle event for the next tick. The idle event will dispatch + * batches if necessary to keep the job going. The idle event will be cancelled + * if a dispatch is done before returning to the event loop. + * + * This must be called after the completion of parsing work, and after any + * batch response is received, to avoid hanging the request by having an + * undispatched batch. + */ +Batcher.prototype.scheduleIdle = function() { + if (!this.idleTimer) { + this.idleTimer = setTimeout(this.onIdle.bind(this), 0); + } +}; + +/** + * Handler for the idle event. Dispatch batches if there is not enough work + * outstanding. + */ +Batcher.prototype.onIdle = function() { + this.idleTimer = false; + + this.trace("Idle with outstanding =", this.numOutstanding, + ", pending =", this.pendingBatches.length, "x", this.maxBatchSize, + ", current =", this.currentBatch.length); + + if (this.numOutstanding < this.targetConcurrency) { + this.sealBatch(); + this.dispatch(); + } +}; + +/** + * Handle a batch response and call item callbacks, after the request is + * decoded by BatchRequest. + * + * @param {Object} batchParams The parameters as in pushGeneric(). + * @param {Error/null} error + * @param {Array} batchResult + */ +Batcher.prototype.onBatchResponse = function(batchParams, error, batchResult) { + var i, j, result, params, callbacks; + this.numOutstanding--; + if (error) { + this.trace("Received error in batch response:", error); + } else { + this.trace("Received batch response with", batchResult.length, "items"); + } + for (i = 0; i < batchParams.length; i++) { + params = batchParams[i]; + callbacks = this.itemCallbacks[params.hash]; + if (error) { + for (j = 0; j < callbacks.length; j++) { + callbacks[j](error, null); + } + } else { + result = batchResult[i]; + this.resultCache[params.hash] = result; + delete this.itemCallbacks[params.hash]; + for (j = 0; j < callbacks.length; j++) { + callbacks[j](null, result); + } + } + } + this.scheduleIdle(); +}; + +/** + * Schedule a proprocess (expandtemplates) operation. + * @param {string} title The title of the page to use as the context + * @param {string} text + * @param {Function} cb The completion callback + */ +Batcher.prototype.preprocess = function(title, text, cb) { + var env = this.env; + var hash = Util.makeHash(["preprocess", text, title]); + if (hash in this.resultCache) { + this.trace("Result cache hit for hash", hash); + return this.resultCache[hash]; + } + if (!env.conf.parsoid.useBatchAPI) { + this.trace("Non-batched preprocess request"); + this.legacyRequest(api.PreprocessorRequest, + [env, title, text, hash], hash, cb); + return; + } + + // Add the item to the batch + this.pushGeneric( + { + action: "preprocess", + title: title, + text: text, + hash: hash, + }, cb + ); +}; + +/** + * Schedule an MW parse operation. + * @param {string} title The title of the page to use as the context + * @param {string} text + * @param {Function} cb The completion callback + */ +Batcher.prototype.parse = function(title, text, cb) { + var env = this.env; + var hash = Util.makeHash(["parse", text, title]); + if (hash in this.resultCache) { + return this.resultCache[hash]; + } + if (!env.conf.parsoid.useBatchAPI) { + this.trace("Non-batched parse request"); + this.legacyRequest(api.PHPParseRequest, + [env, title, text, false, hash], hash, cb); + return; + } + + this.pushGeneric( + { + action: "parse", + title: title, + text: text, + hash: hash, + }, cb + ); +}; + +/** + * Schedule fetching of image info. + * @param {string} filename + * @param {Object} dims + * @param {Function} cb The completion callback + */ +Batcher.prototype.imageinfo = function(filename, dims, cb) { + var env = this.env; + var hash = Util.makeHash(["imageinfo", filename, dims.width || "", dims.height || ""]); + if (hash in this.resultCache) { + return this.resultCache[hash]; + } + if (!env.conf.parsoid.useBatchAPI) { + this.trace("Non-batched imageinfo request"); + this.legacyRequest(api.ImageInfoRequest, + [env, filename, dims, hash], hash, cb); + return; + } + + var params = { + action: "imageinfo", + filename: filename, + hash: hash, + }; + if (dims.width !== null || dims.height !== null) { + params.txopts = {}; + if (dims.width !== null) { + params.txopts.width = dims.width; + } + if (dims.height !== null) { + params.txopts.height = dims.height; + } + } + + this.pushGeneric(params, cb); +}; + +/** + * Helper for sending legacy requests when the extension is not available + * @param {Function} Constructor The ApiRequest subclass constructor + * @param {Array} args The constructor arguments + * @param {string} hash The request identifier hash + * @param {Function} cb The completion callback + */ +Batcher.prototype.legacyRequest = function(Constructor, args, hash, cb) { + var env = this.env; + if (env.requestQueue[hash] === undefined) { + var req = Object.create(Constructor.prototype); + Constructor.apply(req, args); + env.requestQueue[hash] = req; + } + env.requestQueue[hash].once('src', this.onLegacyResponse.bind(this, hash, cb)); +}; + +/** + * Helper for handling a legacy response + */ +Batcher.prototype.onLegacyResponse = function(hash, cb, error, src) { + if (!error) { + this.resultCache[hash] = src; + } + cb(error, src); +}; + +/** + * Actually send a single batch request with the specified parameters. + */ +Batcher.prototype.request = function(batchParams) { + var i; + var params; + var apiBatch = []; + var key = []; + var apiItemParams; + for (i = 0; i < batchParams.length; i++) { + params = batchParams[i]; + if (params.action === 'imageinfo') { + apiItemParams = { + action: params.action, + filename: params.filename, + }; + if ("txopts" in params) { + apiItemParams.txopts = params.txopts; + } + } else { + apiItemParams = { + action: params.action, + title: params.title, + text: params.text, + }; + } + apiBatch.push(apiItemParams); + key.push(params.hash); + } + return new api.BatchRequest(this.env, apiBatch, key.join(':')); +}; + +/** + * Convenience helper for tracing + */ +Batcher.prototype.trace = function() { + this.env.log.apply(null, ["trace/batcher"].concat(Array.prototype.slice.call(arguments))); +}; + +module.exports = { + Batcher: Batcher, +}; diff --git a/lib/mediawiki.ParsoidConfig.js b/lib/mediawiki.ParsoidConfig.js index e8a150c..6972f86 100644 --- a/lib/mediawiki.ParsoidConfig.js +++ b/lib/mediawiki.ParsoidConfig.js @@ -37,6 +37,8 @@ preprocessor: 30 * 1000, // action=parse extParse: 30 * 1000, + // action=parsoid-batch + batch: 60 * 1000, // action=query&prop=revisions srcFetch: 40 * 1000, // action=query&prop=imageinfo @@ -269,6 +271,26 @@ ParsoidConfig.prototype.loadWMF = true; /** + * Set to true to use the Parsoid-specific batch API from the ParsoidBatchAPI + * extension (action=parsoid-batch). + */ +ParsoidConfig.prototype.useBatchAPI = false; + +/** + * The batch size for parse/preprocess requests + */ +ParsoidConfig.prototype.batchSize = 50; + +/** + * The maximum number of concurrent requests that the API request batcher will + * allow to be active at any given time. Before this limit is reached, requests + * will be dispatched more aggressively, giving smaller batches on average. + * After the limit is reached, batches will be stored in a queue with + * APIBatchSize items in each batch. + */ +ParsoidConfig.prototype.batchConcurrency = 4; + +/** * @property {null} Settings for Performance timer. */ ParsoidConfig.prototype.performanceTimer = null; diff --git a/lib/mediawiki.TokenTransformManager.js b/lib/mediawiki.TokenTransformManager.js index 273f3f7..e5f26ba 100644 --- a/lib/mediawiki.TokenTransformManager.js +++ b/lib/mediawiki.TokenTransformManager.js @@ -409,6 +409,7 @@ if (this.tailAccumulator) { this.env.dp('AsyncTokenTransformManager.onEndEvent: calling siblingDone', this.frame.title); + this.env.batcher.scheduleIdle(); this.tailAccumulator.siblingDone(); } else { // nothing was asynchronous, so we'll have to emit end here. diff --git a/lib/mediawiki.Util.js b/lib/mediawiki.Util.js index 6b662e9..80d90a7 100644 --- a/lib/mediawiki.Util.js +++ b/lib/mediawiki.Util.js @@ -6,6 +6,7 @@ require('./core-upgrade.js'); var async = require('async'); +var crypto = require('crypto'); var request = require('request'); var entities = require('entities'); var TXStatsD = require('node-txstatsd'); @@ -132,6 +133,7 @@ " * selser : trace actions of the selective serializer", " * domdiff : trace actions of the DOM diffing code", " * wt-escape : debug wikitext-escaping", + " * batcher : trace API batch aggregation and dispatch", "", "--debug enables tracing of all the above phases except Token Transform Managers", "", @@ -1252,6 +1254,29 @@ typeof (dsr[0]) === 'number' && dsr[0] >= 0 && typeof (dsr[1]) === 'number' && dsr[1] >= 0; }, + + /** + * Quickly hash an array or string. + * + * @param {Array/string} arr + */ + makeHash: function(arr) { + var md5 = crypto.createHash('MD5'); + var i; + if (Array.isArray(arr)) { + for (i = 0; i < arr.length; i++) { + if (arr[i] instanceof String) { + md5.update(arr[i]); + } else { + md5.update(arr[i].toString()); + } + md5.update("\0"); + } + } else { + md5.update(arr); + } + return md5.digest('hex'); + }, }; // FIXME: There is also a DOMUtils.getJSONAttribute. Consolidate diff --git a/lib/mediawiki.parser.environment.js b/lib/mediawiki.parser.environment.js index cb98592..c0c8e9a 100644 --- a/lib/mediawiki.parser.environment.js +++ b/lib/mediawiki.parser.environment.js @@ -4,6 +4,7 @@ var WikiConfig = require('./mediawiki.WikiConfig.js').WikiConfig; var ParsoidConfig = require('./mediawiki.ParsoidConfig.js').ParsoidConfig; var ConfigRequest = require('./mediawiki.ApiRequest.js').ConfigRequest; +var Batcher = require('./mediawiki.Batcher.js').Batcher; var Util = require('./mediawiki.Util.js').Util; var JSUtils = require('./jsutils.js').JSUtils; var Title = require('./mediawiki.Title.js').Title; @@ -92,6 +93,7 @@ // Outstanding page requests (for templates etc) this.requestQueue = {}; + this.batcher = new Batcher(this); }; MWParserEnvironment.prototype.configureLogging = function() { -- To view, visit https://gerrit.wikimedia.org/r/227208 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2bd6f574bca8c64302810a9569f9390c4cf64626 Gerrit-PatchSet: 7 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits