jenkins-bot has submitted this change and it was merged.

Change subject: Batch MW parser and imageinfo API requests
......................................................................


Batch MW parser and imageinfo API requests

* Implement a system for mixed batches of parser, preprocessor and
  imageinfo requests. This uses an MW extension specific to Parsoid
  which provides the relevant API.
* Implement caching inside Batcher, replacing env.pageCache, except for
  its original use case. parserTests.js uses env.pageCache to inject
  template wikitext, which will still work -- it was never really
  correct to allow parserTests.js to inject other API responses into the
  cache.
* Remove Processor parameter from fetchExpandedTpl() since it was always
  the same.

Bug: T45888
Change-Id: I2bd6f574bca8c64302810a9569f9390c4cf64626
---
M lib/ParsoidLogger.js
M lib/ext.core.ExtensionHandler.js
M lib/ext.core.LinkHandler.js
M lib/ext.core.TemplateHandler.js
M lib/mediawiki.ApiRequest.js
A lib/mediawiki.Batcher.js
M lib/mediawiki.ParsoidConfig.js
M lib/mediawiki.TokenTransformManager.js
M lib/mediawiki.Util.js
M lib/mediawiki.parser.environment.js
10 files changed, 580 insertions(+), 142 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/ParsoidLogger.js b/lib/ParsoidLogger.js
index 5207f0a..b6df3e1 100644
--- a/lib/ParsoidLogger.js
+++ b/lib/ParsoidLogger.js
@@ -146,6 +146,7 @@
        "trace/selser":     "[SELSER]",
        "trace/domdiff":    "[DOM-DIFF]",
        "trace/wt-escape":  "[wt-esc]",
+       "trace/batcher":    "[batcher]",
 };
 
 ParsoidLogger.prototype._defaultTracerBackend = function(logData, cb) {
diff --git a/lib/ext.core.ExtensionHandler.js b/lib/ext.core.ExtensionHandler.js
index 054edcf..ad86e54 100644
--- a/lib/ext.core.ExtensionHandler.js
+++ b/lib/ext.core.ExtensionHandler.js
@@ -4,7 +4,6 @@
 var coreutil = require('util');
 var Util = require('./mediawiki.Util.js').Util;
 var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
-var PHPParseRequest = require('./mediawiki.ApiRequest.js').PHPParseRequest;
 var defines = require('./mediawiki.parser.defines.js');
 
 // define some constructor shortcuts
@@ -83,16 +82,12 @@
        var env = this.env;
        // We are about to start an async request for an extension
        env.dp('Note: trying to expand ', text);
-
-       // Start a new request if none is outstanding
-       if (env.requestQueue[text] === undefined) {
-               env.tp('Note: Starting new request for ' + text);
-               env.requestQueue[text] = new PHPParseRequest(env, title, text);
+       var cacheEntry = env.batcher.parse(title, text, cb);
+       if (cacheEntry !== undefined) {
+               cb(cacheEntry);
+       } else {
+               parentCB ({ async: true });
        }
-       // append request, process in document order
-       env.requestQueue[text].once('src', cb);
-
-       parentCB ({ async: true });
 };
 
 function normalizeExtOptions(options) {
diff --git a/lib/ext.core.LinkHandler.js b/lib/ext.core.LinkHandler.js
index cc5a1d6..4fc51f5 100644
--- a/lib/ext.core.LinkHandler.js
+++ b/lib/ext.core.LinkHandler.js
@@ -947,8 +947,17 @@
        var containerClose = new EndTagTk(containerName);
 
        if (!err && data) {
-               var ns = data.imgns;
-               image = data.pages[ns + ':' + title.key];
+               if (data.batchResponse !== undefined) {
+                       info = data.batchResponse;
+               } else {
+                       var ns = data.imgns;
+                       image = data.pages[ns + ':' + title.key];
+                       if (image && image.imageinfo && image.imageinfo[0]) {
+                               info = image.imageinfo[0];
+                       } else {
+                               info = false;
+                       }
+               }
        }
 
        // FIXME gwicke: Make sure our filename is never of the form
@@ -961,18 +970,14 @@
        // full 'filename' does not match any of them, so image is then
        // undefined here. So for now (as a workaround) check if we
        // actually have an image to work with instead of crashing.
-       if (!image || !image.imageinfo) {
+       if (!info) {
                // Use sane defaults.
-               image = {
-                       imageinfo: [
-                               {
-                                       url: './Special:FilePath/' + 
Util.sanitizeTitleURI(title.key),
-                                       // Preserve width and height from the 
wikitext options
-                                       // even if the image is non-existent.
-                                       width: opts.size.v.width || 220,
-                                       height: opts.size.v.height || 
opts.size.v.width || 220,
-                               },
-                       ],
+               info = {
+                       url: './Special:FilePath/' + 
Util.sanitizeTitleURI(title.key),
+                       // Preserve width and height from the wikitext options
+                       // even if the image is non-existent.
+                       width: opts.size.v.width || 220,
+                       height: opts.size.v.height || opts.size.v.width || 220,
                };
 
                // Add mw:Error to the RDFa type.
@@ -1003,7 +1008,6 @@
                        errs.push({"key": "missing-image", "message": "This 
image does not exist." });
                }
        }
-       info = image.imageinfo[0];
 
        var imageSrc = dataAttribs.src;
        if (!dataAttribs.uneditable) {
@@ -1425,17 +1429,12 @@
                }
        }
 
-       var queueKey = title.key + JSON.stringify(opts.size.v);
-       if (queueKey in env.pageCache) {
-               this.handleImageInfo(cb, token, title, opts, optSources, null, 
env.pageCache[ queueKey ]);
+       var cacheEntry = env.batcher.imageinfo(title.key, opts.size.v,
+               this.handleImageInfo.bind(this, cb, token, title, opts, 
optSources));
+       if (cacheEntry !== undefined) {
+               this.handleImageInfo(cb, token, title, opts, optSources, null, 
cacheEntry);
        } else {
                cb({ async: true });
-
-               if (!(queueKey in env.requestQueue)) {
-                       env.requestQueue[queueKey] = new ImageInfoRequest(env, 
title.key, opts.size.v);
-               }
-
-               env.requestQueue[queueKey].once('src', 
this.handleImageInfo.bind(this, cb, token, title, opts, optSources));
        }
 };
 
diff --git a/lib/ext.core.TemplateHandler.js b/lib/ext.core.TemplateHandler.js
index 2321252..f7febc3 100644
--- a/lib/ext.core.TemplateHandler.js
+++ b/lib/ext.core.TemplateHandler.js
@@ -17,7 +17,6 @@
 var defines = require('./mediawiki.parser.defines.js');
 var TemplateRequest = require('./mediawiki.ApiRequest.js').TemplateRequest;
 var api = require('./mediawiki.ApiRequest.js');
-var PreprocessorRequest = api.PreprocessorRequest;
 var Util = require('./mediawiki.Util.js').Util;
 var DU = require('./mediawiki.DOMUtils.js').DOMUtils;
 var async = require('async');
@@ -149,8 +148,7 @@
                                        accumReceiveToksFromChild);
                                // Fetch and process the template expansion
                                this.fetchExpandedTpl(env.page.name || '',
-                                               text, PreprocessorRequest,
-                                               accumReceiveToksFromSibling, 
srcHandler);
+                                               text, 
accumReceiveToksFromSibling, srcHandler);
                        }
                } else {
                        // We don't perform recursive template expansion- 
something
@@ -1137,30 +1135,22 @@
 
 /**
  * Fetch the preprocessed wikitext for a template-like construct.
- * (The 'Processor' argument is a constructor, hence the capitalization.)
  */
-TemplateHandler.prototype.fetchExpandedTpl = function(title, text, Processor, 
parentCB, cb) {
+TemplateHandler.prototype.fetchExpandedTpl = function(title, text, parentCB, 
cb) {
        var env = this.manager.env;
-       if (text in env.pageCache) {
-               // XXX: store type too (and cache tokens/x-mediawiki)
-               cb(null, env.pageCache[text] /* , type */);
-       } else if (!env.conf.parsoid.fetchTemplates) {
-               parentCB({ tokens: [ 'Warning: Page/template fetching disabled, 
and no cache for ' + text] });
+       if (!env.conf.parsoid.fetchTemplates) {
+               parentCB({ tokens: [ 'Warning: Page/template fetching disabled 
cannot expand ' + text] });
        } else {
-
                // We are about to start an async request for a template
                env.dp('Note: trying to expand ', text);
-
-               // Start a new request if none is outstanding
-               if (env.requestQueue[text] === undefined) {
-                       env.tp('Note: Starting new request for ' + text);
-                       env.requestQueue[text] = new Processor(env, title, 
text);
+               var cacheEntry = env.batcher.preprocess(title, text, cb);
+               if (cacheEntry !== undefined) {
+                       // XXX: store type too (and cache tokens/x-mediawiki)
+                       cb(null, cacheEntry /* , type */);
+               } else {
+                       // Advise we're going async
+                       parentCB({tokens: [], async: true});
                }
-               // append request, process in document order
-               env.requestQueue[text].once('src', cb);
-
-               // Advise we're going async
-               parentCB({tokens: [], async: true});
        }
 };
 
diff --git a/lib/mediawiki.ApiRequest.js b/lib/mediawiki.ApiRequest.js
index 01eeb37..62f865f 100644
--- a/lib/mediawiki.ApiRequest.js
+++ b/lib/mediawiki.ApiRequest.js
@@ -70,6 +70,84 @@
        }
 };
 
+var manglePreprocessorResponse = function(env, response) {
+       var src = '';
+       if (response.wikitext !== undefined) {
+               src = response.wikitext;
+       } else if (response["*"] !== undefined) {
+               // For backwards compatibility. Older wikis still put the data 
here.
+               src = response["*"];
+       }
+
+       // Add the categories which were added by parser functions directly
+       // into the page and not as in-text links.
+       if (Array.isArray(response.categories)) {
+               for (var i in response.categories) {
+                       var category = response.categories[i];
+                       src += '\n[[Category:' + category['*'];
+                       if (category.sortkey) {
+                               src += "|" + category.sortkey;
+                       }
+                       src += ']]';
+               }
+       }
+       // Ditto for page properties (like DISPLAYTITLE and DEFAULTSORT)
+       if (Array.isArray(response.properties)) {
+               response.properties.forEach(function(prop) {
+                       if (prop.name === 'displaytitle' || prop.name === 
'defaultsort') {
+                               src += '\n{{' + prop.name.toUpperCase() + ':' + 
prop['*'] + '}}';
+                       }
+               });
+       }
+       // The same for ResourceLoader modules
+       setPageProperty(env, response.modules, "extensionModules");
+       setPageProperty(env, response.modulescripts, "extensionModuleScripts");
+       setPageProperty(env, response.modulestyles, "extensionModuleStyles");
+
+       return src;
+};
+
+var dummyDoc = domino.createDocument();
+var mangleParserResponse = function(env, response) {
+       var parsedHtml = '';
+       if (response.text['*'] !== undefined) {
+               parsedHtml = response.text['*'];
+       }
+
+       // Strip two trailing newlines that action=parse adds after any
+       // extension output
+       parsedHtml = parsedHtml.replace(/\n\n$/, '');
+
+       // Also strip a paragraph wrapper, if any
+       parsedHtml = parsedHtml.replace(/(^<p>)|(<\/p>$)/g, '');
+
+       // Add the modules to the page data
+       setPageProperty(env, response.modules, "extensionModules");
+       setPageProperty(env, response.modulescripts, "extensionModuleScripts");
+       setPageProperty(env, response.modulestyles, "extensionModuleStyles");
+
+       // Add the categories which were added by extensions directly into the
+       // page and not as in-text links
+       if (response.categories) {
+               for (var i in response.categories) {
+                       var category = response.categories[i];
+
+                       var link = dummyDoc.createElement("link");
+                       link.setAttribute("rel", "mw:PageProp/Category");
+
+                       var href = env.page.relativeLinkPrefix + "Category:" + 
encodeURIComponent(category['*']);
+                       if (category.sortkey) {
+                               href += "#" + 
encodeURIComponent(category.sortkey);
+                       }
+                       link.setAttribute("href", href);
+
+                       parsedHtml += "\n" + link.outerHTML;
+               }
+       }
+
+       return parsedHtml;
+};
+
 /**
  * @class
  * @extends Error
@@ -175,6 +253,7 @@
                        options.headers['X-Forwarded-Proto'] = 'https';
                }
        }
+       this.env.dp("Starting HTTP request", this.toString());
 
        return request(options, callback);
 };
@@ -444,11 +523,12 @@
  * @param {MWParserEnvironment} env
  * @param {string} title The title of the page to use as the context
  * @param {string} text
+ * @param {string} hash The queue key
  */
-function PreprocessorRequest(env, title, text) {
+function PreprocessorRequest(env, title, text, hash) {
        ApiRequest.call(this, env, title);
 
-       this.queueKey = text;
+       this.queueKey = hash;
        this.text = text;
        this.reqType = "Template Expansion";
 
@@ -501,48 +581,10 @@
        if (error) {
                this.env.log("error", error);
                this._processListeners(error, '');
-               return;
+       } else {
+               this._processListeners(error,
+                       manglePreprocessorResponse(this.env, 
data.expandtemplates));
        }
-
-       var src = '';
-       if (data.expandtemplates.wikitext !== undefined) {
-               src = data.expandtemplates.wikitext;
-       } else if (data.expandtemplates["*"] !== undefined) {
-               // For backwards compatibility. Older wikis still put the data 
here.
-               src = data.expandtemplates["*"];
-       }
-
-       this.env.tp('Expanded ', this.text, src);
-
-       // Add the categories which were added by parser functions directly
-       // into the page and not as in-text links.
-       if (Array.isArray(data.expandtemplates.categories)) {
-               for (var i in data.expandtemplates.categories) {
-                       var category = data.expandtemplates.categories[i];
-                       src += '\n[[Category:' + category['*'];
-                       if (category.sortkey) {
-                               src += "|" + category.sortkey;
-                       }
-                       src += ']]';
-               }
-       }
-       // Ditto for page properties (like DISPLAYTITLE and DEFAULTSORT)
-       if (Array.isArray(data.expandtemplates.properties)) {
-               data.expandtemplates.properties.forEach(function(prop) {
-                       if (prop.name === 'displaytitle' || prop.name === 
'defaultsort') {
-                               src += '\n{{' + prop.name.toUpperCase() + ':' + 
prop['*'] + '}}';
-                       }
-               });
-       }
-       // The same for ResourceLoader modules
-       setPageProperty(this.env, data.expandtemplates.modules, 
"extensionModules");
-       setPageProperty(this.env, data.expandtemplates.modulescripts, 
"extensionModuleScripts");
-       setPageProperty(this.env, data.expandtemplates.modulestyles, 
"extensionModuleStyles");
-
-       // Add the source to the cache
-       this.env.pageCache[this.text] = src;
-
-       this._processListeners(error, src);
 };
 
 /**
@@ -558,12 +600,13 @@
  * @param {string} title The title of the page to use as context
  * @param {string} text
  * @param {boolean} onlypst (optional) Pass onlypst to PHP parser
+ * @param {string} hash The queue key
  */
-function PHPParseRequest(env, name, text, onlypst) {
+function PHPParseRequest(env, name, text, onlypst, hash) {
        ApiRequest.call(this, env, name);
 
        this.text = text;
-       this.queueKey = text;
+       this.queueKey = hash || text;
        this.reqType = "Extension Parse";
 
        var apiargs = {
@@ -608,7 +651,6 @@
 // Function which returns a promise for the result of a parse request.
 PHPParseRequest.promise = promiseFor(PHPParseRequest);
 
-var dummyDoc = domino.createDocument();
 PHPParseRequest.prototype._handleJSON = function(error, data) {
        logAPIWarnings(this, data);
 
@@ -620,49 +662,95 @@
        if (error) {
                this.env.log("error", error);
                this._processListeners(error, '');
+       } else {
+               this._processListeners(error, mangleParserResponse(this.env, 
data.parse));
+       }
+};
+
+/**
+ * @class
+ * @extends ApiRequest
+ *
+ * Do a mixed-action batch request using the ParsoidBatchAPI extension.
+ *
+ * @constructor
+ * @param {MWParserEnvironment} env
+ * @param {Array} batchParams An array of objects
+ * @param {string} key The queue key
+ */
+function BatchRequest(env, batchParams, key) {
+       ApiRequest.call(this, env);
+       this.queueKey = key;
+       this.batchParams = batchParams;
+       this.reqType = 'Batch request';
+
+       var apiargs = {
+               format: 'json',
+               formatversion: '2',
+               action: 'parsoid-batch',
+               batch: JSON.stringify(batchParams),
+       };
+
+       this.requestOptions = {
+               method: 'POST',
+               followRedirect: true,
+               uri: env.conf.wiki.apiURI,
+               timeout: env.conf.parsoid.timeouts.mwApi.batch,
+       };
+       var req = this.request(this.requestOptions, this._requestCB.bind(this));
+
+       // Use multipart form encoding to get more efficient transfer if the 
gain
+       // will be larger than the typical overhead. In later versions of the 
request
+       // library, this can easily be done with the formData option, but 
coveralls
+       // depends on request 2.40.0.
+       if (encodeURIComponent(apiargs.batch).length - apiargs.batch.length > 
600) {
+               var form = req.form();
+               for (var optName in apiargs) {
+                       form.append(optName, apiargs[optName]);
+               }
+       } else {
+               req.form(apiargs);
+       }
+}
+
+util.inherits(BatchRequest, ApiRequest);
+
+BatchRequest.prototype._handleJSON = function(error, data) {
+       if (!error && !(data && data['parsoid-batch'] && 
Array.isArray(data['parsoid-batch']))) {
+               error = new Error('Invalid result when expanding template 
batch');
+       }
+
+       if (error) {
+               this.env.log("error", error);
+               this.emit('batch', error, null);
                return;
        }
 
-       var parsedHtml = '';
-       if (data.parse.text['*'] !== undefined) {
-               parsedHtml = data.parse.text['*'];
-       }
-
-       // Strip two trailing newlines that action=parse adds after any
-       // extension output
-       parsedHtml = parsedHtml.replace(/\n\n$/, '');
-
-       // Also strip a paragraph wrapper, if any
-       parsedHtml = parsedHtml.replace(/(^<p>)|(<\/p>$)/g, '');
-
-       // Add the modules to the page data
-       setPageProperty(this.env, data.parse.modules, "extensionModules");
-       setPageProperty(this.env, data.parse.modulescripts, 
"extensionModuleScripts");
-       setPageProperty(this.env, data.parse.modulestyles, 
"extensionModuleStyles");
-
-       // Add the categories which were added by extensions directly into the
-       // page and not as in-text links
-       if (data.parse.categories) {
-               for (var i in data.parse.categories) {
-                       var category = data.parse.categories[i];
-
-                       var link = dummyDoc.createElement("link");
-                       link.setAttribute("rel", "mw:PageProp/Category");
-
-                       var href = this.env.page.relativeLinkPrefix + 
"Category:" + encodeURIComponent(category['*']);
-                       if (category.sortkey) {
-                               href += "#" + 
encodeURIComponent(category.sortkey);
-                       }
-                       link.setAttribute("href", href);
-
-                       parsedHtml += "\n" + link.outerHTML;
+       var batchResponse = data['parsoid-batch'];
+       var callbackData = [];
+       var index, itemParams, itemResponse, j, mangled;
+       for (index = 0; index < batchResponse.length; index++) {
+               itemParams = this.batchParams[index];
+               itemResponse = batchResponse[index];
+               switch (itemParams.action) {
+                       case 'parse':
+                               mangled = mangleParserResponse(this.env, 
itemResponse);
+                               break;
+                       case 'preprocess':
+                               mangled = manglePreprocessorResponse(this.env, 
itemResponse);
+                               break;
+                       case 'imageinfo':
+                               mangled = {batchResponse: itemResponse};
+                               break;
+                       default:
+                               error = new Error("BatchRequest._handleJSON: 
Invalid action");
+                               this.emit('batch', error, null);
+                               return;
                }
+               callbackData.push(mangled);
+
        }
-
-       // Add the source to the cache
-       this.env.pageCache[this.text] = parsedHtml;
-
-       this._processListeners(error, parsedHtml);
+       this.emit('batch', error, callbackData);
 };
 
 /**
@@ -838,10 +926,10 @@
  * @param {number} [dims.width]
  * @param {number} [dims.height]
  */
-function ImageInfoRequest(env, filename, dims) {
+function ImageInfoRequest(env, filename, dims, key) {
        ApiRequest.call(this, env, null);
        this.env = env;
-       this.queueKey = filename + JSON.stringify(dims);
+       this.queueKey = key;
        this.reqType = "Image Info Request";
 
        var conf = env.conf.wiki;
@@ -905,7 +993,7 @@
        }
 
        if (data && data.query) {
-               // The API indexes its response by page ID. That's stupid.
+               // The API indexes its response by page ID. That's inconvenient.
                newpages = {};
                pagenames = {};
                pages = data.query.pages;
@@ -927,7 +1015,6 @@
 
                data.query.pages = newpages;
                data.query.imgns = this.ns;
-               this.env.pageCache[ this.queueKey ] = data.query;
                this._processListeners(null, data.query);
        } else if (data && data.error) {
                if (data.error.code === 'readapidenied') {
@@ -946,6 +1033,7 @@
        module.exports.TemplateRequest = TemplateRequest;
        module.exports.PreprocessorRequest = PreprocessorRequest;
        module.exports.PHPParseRequest = PHPParseRequest;
+       module.exports.BatchRequest = BatchRequest;
        module.exports.ParsoidCacheRequest = ParsoidCacheRequest;
        module.exports.ImageInfoRequest = ImageInfoRequest;
        module.exports.DoesNotExistError = DoesNotExistError;
diff --git a/lib/mediawiki.Batcher.js b/lib/mediawiki.Batcher.js
new file mode 100644
index 0000000..b99310d
--- /dev/null
+++ b/lib/mediawiki.Batcher.js
@@ -0,0 +1,315 @@
+'use strict';
+require('./core-upgrade.js');
+
+var Util = require('./mediawiki.Util.js').Util;
+var api = require('./mediawiki.ApiRequest.js');
+
+/**
+ * @class
+ *
+ * This class combines requests into batches for dispatch to the
+ * ParsoidBatchAPI extension, and calls the item callbacks when the batch
+ * result is returned. It handles scheduling and concurrency of batch requests.
+ * It also has a legacy mode which sends requests to the MW core API.
+ *
+ * @constructor
+ * @param {MWParserEnvironment} env
+ */
+function Batcher(env) {
+       this.env = env;
+       this.itemCallbacks = {};
+       this.currentBatch = [];
+       this.pendingBatches = [];
+       this.resultCache = {};
+       this.numOutstanding = 0;
+       this.idleTimer = false;
+
+       this.maxBatchSize = env.conf.parsoid.batchSize;
+       this.targetConcurrency = env.conf.parsoid.batchConcurrency;
+}
+
+/**
+ * Internal function for adding a generic work item.
+ *
+ * @param {Object} dims
+ * @param {Function} item callback
+ */
+Batcher.prototype.pushGeneric = function(params, cb) {
+       var hash = params.hash;
+       if (hash in this.itemCallbacks) {
+               this.trace("Appending callback for hash", hash);
+               this.itemCallbacks[hash].push(cb);
+       } else {
+               this.trace("Creating batch item:", params);
+               this.itemCallbacks[hash] = [cb];
+               this.currentBatch.push(params);
+               if (this.currentBatch.length >= this.maxBatchSize) {
+                       this.sealBatch();
+               }
+       }
+};
+
+/**
+ * Declare a batch complete and move it to the queue ready for dispatch. Moving
+ * batches to a queue instead of dispatching them immediately allows for an
+ * upper limit on concurrency.
+ */
+Batcher.prototype.sealBatch = function() {
+       if (this.currentBatch.length > 0) {
+               this.pendingBatches.push(this.currentBatch);
+               this.currentBatch = [];
+       }
+};
+
+/**
+ * Dispatch batches from the pending queue, if it is currently possible.
+ */
+Batcher.prototype.dispatch = function() {
+       while (this.numOutstanding < this.targetConcurrency && 
this.pendingBatches.length) {
+               var batch = this.pendingBatches.shift();
+
+               this.trace("Dispatching batch with", batch.length, "items");
+               this.request(batch).once('batch',
+                       this.onBatchResponse.bind(this, batch));
+
+               this.numOutstanding++;
+               if (this.idleTimer) {
+                       clearTimeout(this.idleTimer);
+                       this.idleTimer = false;
+               }
+       }
+};
+
+/**
+ * Schedule an idle event for the next tick. The idle event will dispatch
+ * batches if necessary to keep the job going. The idle event will be cancelled
+ * if a dispatch is done before returning to the event loop.
+ *
+ * This must be called after the completion of parsing work, and after any
+ * batch response is received, to avoid hanging the request by having an
+ * undispatched batch.
+ */
+Batcher.prototype.scheduleIdle = function() {
+       if (!this.idleTimer) {
+               this.idleTimer = setTimeout(this.onIdle.bind(this), 0);
+       }
+};
+
+/**
+ * Handler for the idle event. Dispatch batches if there is not enough work
+ * outstanding.
+ */
+Batcher.prototype.onIdle = function() {
+       this.idleTimer = false;
+
+       this.trace("Idle with outstanding =", this.numOutstanding,
+               ", pending =", this.pendingBatches.length, "x", 
this.maxBatchSize,
+               ", current =", this.currentBatch.length);
+
+       if (this.numOutstanding < this.targetConcurrency) {
+               this.sealBatch();
+               this.dispatch();
+       }
+};
+
+/**
+ * Handle a batch response and call item callbacks, after the request is
+ * decoded by BatchRequest.
+ *
+ * @param {Object} batchParams The parameters as in pushGeneric().
+ * @param {Error/null} error
+ * @param {Array} batchResult
+ */
+Batcher.prototype.onBatchResponse = function(batchParams, error, batchResult) {
+       var i, j, result, params, callbacks;
+       this.numOutstanding--;
+       if (error) {
+               this.trace("Received error in batch response:", error);
+       } else {
+               this.trace("Received batch response with", batchResult.length, 
"items");
+       }
+       for (i = 0; i < batchParams.length; i++) {
+               params = batchParams[i];
+               callbacks = this.itemCallbacks[params.hash];
+               if (error) {
+                       for (j = 0; j < callbacks.length; j++) {
+                               callbacks[j](error, null);
+                       }
+               } else {
+                       result = batchResult[i];
+                       this.resultCache[params.hash] = result;
+                       delete this.itemCallbacks[params.hash];
+                       for (j = 0; j < callbacks.length; j++) {
+                               callbacks[j](null, result);
+                       }
+               }
+       }
+       this.scheduleIdle();
+};
+
+/**
+ * Schedule a proprocess (expandtemplates) operation.
+ * @param {string} title The title of the page to use as the context
+ * @param {string} text
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.preprocess = function(title, text, cb) {
+       var env = this.env;
+       var hash = Util.makeHash(["preprocess", text, title]);
+       if (hash in this.resultCache) {
+               this.trace("Result cache hit for hash", hash);
+               return this.resultCache[hash];
+       }
+       if (!env.conf.parsoid.useBatchAPI) {
+               this.trace("Non-batched preprocess request");
+               this.legacyRequest(api.PreprocessorRequest,
+                       [env, title, text, hash], hash, cb);
+               return;
+       }
+
+       // Add the item to the batch
+       this.pushGeneric(
+               {
+                       action: "preprocess",
+                       title: title,
+                       text: text,
+                       hash: hash,
+               }, cb
+       );
+};
+
+/**
+ * Schedule an MW parse operation.
+ * @param {string} title The title of the page to use as the context
+ * @param {string} text
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.parse = function(title, text, cb) {
+       var env = this.env;
+       var hash = Util.makeHash(["parse", text, title]);
+       if (hash in this.resultCache) {
+               return this.resultCache[hash];
+       }
+       if (!env.conf.parsoid.useBatchAPI) {
+               this.trace("Non-batched parse request");
+               this.legacyRequest(api.PHPParseRequest,
+                       [env, title, text, false, hash], hash, cb);
+               return;
+       }
+
+       this.pushGeneric(
+               {
+                       action: "parse",
+                       title: title,
+                       text: text,
+                       hash: hash,
+               }, cb
+       );
+};
+
+/**
+ * Schedule fetching of image info.
+ * @param {string} filename
+ * @param {Object} dims
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.imageinfo = function(filename, dims, cb) {
+       var env = this.env;
+       var hash = Util.makeHash(["imageinfo", filename, dims.width || "", 
dims.height || ""]);
+       if (hash in this.resultCache) {
+               return this.resultCache[hash];
+       }
+       if (!env.conf.parsoid.useBatchAPI) {
+               this.trace("Non-batched imageinfo request");
+               this.legacyRequest(api.ImageInfoRequest,
+                       [env, filename, dims, hash], hash, cb);
+               return;
+       }
+
+       var params = {
+               action: "imageinfo",
+               filename: filename,
+               hash: hash,
+       };
+       if (dims.width !== null || dims.height !== null) {
+               params.txopts = {};
+               if (dims.width !== null) {
+                       params.txopts.width = dims.width;
+               }
+               if (dims.height !== null) {
+                       params.txopts.height = dims.height;
+               }
+       }
+
+       this.pushGeneric(params, cb);
+};
+
+/**
+ * Helper for sending legacy requests when the extension is not available
+ * @param {Function} Constructor The ApiRequest subclass constructor
+ * @param {Array} args The constructor arguments
+ * @param {string} hash The request identifier hash
+ * @param {Function} cb The completion callback
+ */
+Batcher.prototype.legacyRequest = function(Constructor, args, hash, cb) {
+       var env = this.env;
+       if (env.requestQueue[hash] === undefined) {
+               var req = Object.create(Constructor.prototype);
+               Constructor.apply(req, args);
+               env.requestQueue[hash] = req;
+       }
+       env.requestQueue[hash].once('src', this.onLegacyResponse.bind(this, 
hash, cb));
+};
+
+/**
+ * Helper for handling a legacy response
+ */
+Batcher.prototype.onLegacyResponse = function(hash, cb, error, src) {
+       if (!error) {
+               this.resultCache[hash] = src;
+       }
+       cb(error, src);
+};
+
+/**
+ * Actually send a single batch request with the specified parameters.
+ */
+Batcher.prototype.request = function(batchParams) {
+       var i;
+       var params;
+       var apiBatch = [];
+       var key = [];
+       var apiItemParams;
+       for (i = 0; i < batchParams.length; i++) {
+               params = batchParams[i];
+               if (params.action === 'imageinfo') {
+                       apiItemParams = {
+                               action: params.action,
+                               filename: params.filename,
+                       };
+                       if ("txopts" in params) {
+                               apiItemParams.txopts = params.txopts;
+                       }
+               } else {
+                       apiItemParams = {
+                               action: params.action,
+                               title: params.title,
+                               text: params.text,
+                       };
+               }
+               apiBatch.push(apiItemParams);
+               key.push(params.hash);
+       }
+       return new api.BatchRequest(this.env, apiBatch, key.join(':'));
+};
+
+/**
+ * Convenience helper for tracing
+ */
+Batcher.prototype.trace = function() {
+       this.env.log.apply(null, 
["trace/batcher"].concat(Array.prototype.slice.call(arguments)));
+};
+
+module.exports = {
+       Batcher: Batcher,
+};
diff --git a/lib/mediawiki.ParsoidConfig.js b/lib/mediawiki.ParsoidConfig.js
index e8a150c..6972f86 100644
--- a/lib/mediawiki.ParsoidConfig.js
+++ b/lib/mediawiki.ParsoidConfig.js
@@ -37,6 +37,8 @@
                        preprocessor: 30 * 1000,
                        // action=parse
                        extParse: 30 * 1000,
+                       // action=parsoid-batch
+                       batch: 60 * 1000,
                        // action=query&prop=revisions
                        srcFetch: 40 * 1000,
                        // action=query&prop=imageinfo
@@ -269,6 +271,26 @@
 ParsoidConfig.prototype.loadWMF = true;
 
 /**
+ * Set to true to use the Parsoid-specific batch API from the ParsoidBatchAPI
+ * extension (action=parsoid-batch).
+ */
+ParsoidConfig.prototype.useBatchAPI = false;
+
+/**
+ * The batch size for parse/preprocess requests
+ */
+ParsoidConfig.prototype.batchSize = 50;
+
+/**
+ * The maximum number of concurrent requests that the API request batcher will
+ * allow to be active at any given time. Before this limit is reached, requests
+ * will be dispatched more aggressively, giving smaller batches on average.
+ * After the limit is reached, batches will be stored in a queue with
+ * APIBatchSize items in each batch.
+ */
+ParsoidConfig.prototype.batchConcurrency = 4;
+
+/**
  * @property {null} Settings for Performance timer.
  */
 ParsoidConfig.prototype.performanceTimer = null;
diff --git a/lib/mediawiki.TokenTransformManager.js 
b/lib/mediawiki.TokenTransformManager.js
index 273f3f7..e5f26ba 100644
--- a/lib/mediawiki.TokenTransformManager.js
+++ b/lib/mediawiki.TokenTransformManager.js
@@ -409,6 +409,7 @@
        if (this.tailAccumulator) {
                this.env.dp('AsyncTokenTransformManager.onEndEvent: calling 
siblingDone',
                                this.frame.title);
+               this.env.batcher.scheduleIdle();
                this.tailAccumulator.siblingDone();
        } else {
                // nothing was asynchronous, so we'll have to emit end here.
diff --git a/lib/mediawiki.Util.js b/lib/mediawiki.Util.js
index 6b662e9..80d90a7 100644
--- a/lib/mediawiki.Util.js
+++ b/lib/mediawiki.Util.js
@@ -6,6 +6,7 @@
 require('./core-upgrade.js');
 
 var async = require('async');
+var crypto = require('crypto');
 var request = require('request');
 var entities = require('entities');
 var TXStatsD = require('node-txstatsd');
@@ -132,6 +133,7 @@
                        "  * selser    : trace actions of the selective 
serializer",
                        "  * domdiff   : trace actions of the DOM diffing code",
                        "  * wt-escape : debug wikitext-escaping",
+                       "  * batcher   : trace API batch aggregation and 
dispatch",
                        "",
                        "--debug enables tracing of all the above phases except 
Token Transform Managers",
                        "",
@@ -1252,6 +1254,29 @@
                        typeof (dsr[0]) === 'number' && dsr[0] >= 0 &&
                        typeof (dsr[1]) === 'number' && dsr[1] >= 0;
        },
+
+       /**
+        * Quickly hash an array or string.
+        *
+        * @param {Array/string} arr
+        */
+       makeHash: function(arr) {
+               var md5 = crypto.createHash('MD5');
+               var i;
+               if (Array.isArray(arr)) {
+                       for (i = 0; i < arr.length; i++) {
+                               if (arr[i] instanceof String) {
+                                       md5.update(arr[i]);
+                               } else {
+                                       md5.update(arr[i].toString());
+                               }
+                               md5.update("\0");
+                       }
+               } else {
+                       md5.update(arr);
+               }
+               return md5.digest('hex');
+       },
 };
 
 // FIXME: There is also a DOMUtils.getJSONAttribute. Consolidate
diff --git a/lib/mediawiki.parser.environment.js 
b/lib/mediawiki.parser.environment.js
index cb98592..c0c8e9a 100644
--- a/lib/mediawiki.parser.environment.js
+++ b/lib/mediawiki.parser.environment.js
@@ -4,6 +4,7 @@
 var WikiConfig = require('./mediawiki.WikiConfig.js').WikiConfig;
 var ParsoidConfig = require('./mediawiki.ParsoidConfig.js').ParsoidConfig;
 var ConfigRequest = require('./mediawiki.ApiRequest.js').ConfigRequest;
+var Batcher = require('./mediawiki.Batcher.js').Batcher;
 var Util = require('./mediawiki.Util.js').Util;
 var JSUtils = require('./jsutils.js').JSUtils;
 var Title = require('./mediawiki.Title.js').Title;
@@ -92,6 +93,7 @@
        // Outstanding page requests (for templates etc)
        this.requestQueue = {};
 
+       this.batcher = new Batcher(this);
 };
 
 MWParserEnvironment.prototype.configureLogging = function() {

-- 
To view, visit https://gerrit.wikimedia.org/r/227208
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2bd6f574bca8c64302810a9569f9390c4cf64626
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to