Mobrovac has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/390144 )
Change subject: Terminate browser process after certain time ...................................................................... Terminate browser process after certain time By default after the arbitrarily chosen 90 seconds puppeteer will terminate the browser subprocess if it hasn't finished returning a PDF yet. Bug: T178501 Change-Id: I904fc15826b7de3a34e5c977b40bfd8518bb1679 --- M config.dev.yaml M lib/queue.js M lib/renderer.js M routes/html2pdf-v1.js M test/lib/queue.js 5 files changed, 162 insertions(+), 57 deletions(-) Approvals: Mobrovac: Verified; Looks good to me, approved diff --git a/config.dev.yaml b/config.dev.yaml index eae5f5b..7b91760 100644 --- a/config.dev.yaml +++ b/config.dev.yaml @@ -91,12 +91,17 @@ # some room for page numbers bottom: '0.75in' left: '0.5in' - puppeteer_flags: - - '--no-sandbox' - - '--disable-setuid-sandbox' + # https://github.com/GoogleChrome/puppeteer/blob/v0.11.0/docs/api.md#puppeteerlaunchoptions + puppeteer_options: + timeout: 30000 + args: + - '--no-sandbox' + - '--disable-setuid-sandbox' # the maximum number of puppeteer instances that can be launched at a time render_concurrency: 1 # don't wait to render a PDF after this many seconds render_queue_timeout: 60 # maximum allowed number of pending jobs max_render_queue_size: 3 + # the number of seconds before puppeteer terminates the browser instance + render_execution_timout: 90 \ No newline at end of file diff --git a/lib/queue.js b/lib/queue.js index f66c584..ff437ac 100644 --- a/lib/queue.js +++ b/lib/queue.js @@ -1,7 +1,7 @@ 'use strict'; const asyncQueue = require('async/queue'); -const renderer = require('./renderer'); +const asyncTimeout = require('async/timeout'); const uuid = require('cassandra-uuid'); // Errors used as the first argument of the callback passed to the queue @@ -11,7 +11,9 @@ // something went wrong in the render phase renderFailed: 1, // the queue is already full, not waiting for it to have room - queueFull: 2 + queueFull: 2, + // when the render takes longer than allowed + renderTimeout: 3 }; /** @@ -24,20 +26,22 @@ * @param {Object} queueOptions * @param {number} queueOptions.concurrency number of concurrent * render instances - * @param {number} queueOptions.timeout number of seconds after + * @param {number} queueOptions.queueTimeout number of seconds after * which the yet-to-start renders are aborted + * @param {number} queueOptions.executionTimeout number of seconds after + * which puppeteer is asked to abort the render * @param {number} queueOptions.maxTaskCount number of tasks the queue * should hold. New tasks will be rejected once the sum of the * number of running tasks and the tasks in the queue is equal to * this number. - * @param {Object} puppeteerFlags flags used to in starting puppeteer + * @param {Object} puppeteerOptions options used to in starting puppeteer * @param {Object} pdfOptions pdf options passed to Chromium * @param {Object} logger app logger */ - constructor(queueOptions, puppeteerFlags, pdfOptions, logger) { + constructor(queueOptions, puppeteerOptions, pdfOptions, logger) { this._queueObject = asyncQueue(this._worker.bind(this), queueOptions.concurrency); - this._puppeteerFlags = puppeteerFlags; + this._puppeteerOptions = puppeteerOptions; this._pdfOptions = pdfOptions; this._options = queueOptions; this._logger = logger; @@ -73,7 +77,7 @@ _setCancelTaskTimeout(data, callback) { const logger = this._logger; const queue = this._queueObject; - const timeout = this._options.timeout; + const timeout = this._options.queueTimeout; data._id = `${uuid.TimeUuid.now().toString()}|${data.uri}`; data._timeoutID = setTimeout(() => { @@ -140,8 +144,35 @@ _worker(data, callback) { this._clearCancelTaskTimeout(data); this._logger.log('info/queue', `Started rendering ${data._id}`); - renderer - .articleToPdf(data.uri, data.format, this._puppeteerFlags, + + const timeout = this._options.executionTimeout; + const timedRender = asyncTimeout(this._render.bind(this), + timeout * 1000); + const renderer = data._renderer; + timedRender(data, (error, pdf) => { + // error returned by async timeout + if (error && error.code === 'ETIMEDOUT') { + this._logger.log( + 'error/render', + `Aborting. Render hasn't finished within ${timeout} ` + + `seconds. Data ID: ${data._id}.` + ); + renderer.abortRender(); + callback(callbackErrors.renderTimeout, null); + } else { + callback(error, pdf); + } + }); + } + + /** + * Render a PDF + * @param {Object} data used for rendering + * @param {Function} callback called on render success/failure + */ + _render(data, callback) { + data._renderer + .articleToPdf(data.uri, data.format, this._puppeteerOptions, this._pdfOptions) .then((pdf) => { this._logger.log( diff --git a/lib/renderer.js b/lib/renderer.js index dae34f1..68ef504 100644 --- a/lib/renderer.js +++ b/lib/renderer.js @@ -2,40 +2,63 @@ const puppeteer = require('puppeteer'); -/** - * Renders content from `url` in PDF - * @param {string} url URL to get content from - * @param {string} format Page size, e.g. Letter or A4, passed to understands - * @param {Object} puppeteerFlags - * @param {Object} pdfOptions - * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer - */ -exports.articleToPdf = function articleToPdf(url, format, puppeteerFlags, pdfOptions) { - let browser; - let page; +module.exports = class Renderer { + constructor() { + this._browser = null; + } - return puppeteer.launch({ args: puppeteerFlags }) - .then((browser_) => { - browser = browser_; - return browser.newPage(); - }) - .then((page_) => { - page = page_; - return page.goto(url, { waitUntil: 'networkidle' }); - }) - .then(() => { - return page.pdf(Object.assign( - {}, pdfOptions, { format } - )); - }) - .catch((error) => { - if (browser) { - browser.close(); - } - throw error; - }) - .then((pdf) => { - browser.close(); - return pdf; - }); + /** + * Closes any open browser instance + */ + _closeBrowser() { + if (this._browser) { + this._browser.close(); + this._browser = null; + } + } + + /** + * Renders content from `url` in PDF + * @param {string} url URL to get content from + * TODO: merge format with pdfOptions + * @param {string} format Page size, e.g. Letter or A4, passed to understands + * @param {Object} puppeteerOptions + * @param {Object} pdfOptions + * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer + */ + articleToPdf(url, format, puppeteerOptions, pdfOptions) { + let page; + const that = this; + + return puppeteer.launch(puppeteerOptions) + .then((browser) => { + that._browser = browser; + return browser.newPage(); + }) + .then((page_) => { + page = page_; + return page.goto(url, { waitUntil: 'networkidle' }); + }) + .then(() => { + return page.pdf(Object.assign( + {}, pdfOptions, { format } + )); + }) + .catch((error) => { + that._closeBrowser(); + throw error; + }) + .then((pdf) => { + that._closeBrowser(); + return pdf; + }); + } + + /** + * Aborts the request to create a PDF. + * Should be called after calling articleToPdf + */ + abortRender() { + this._closeBrowser(); + } }; diff --git a/routes/html2pdf-v1.js b/routes/html2pdf-v1.js index cfeae4a..f1993e0 100644 --- a/routes/html2pdf-v1.js +++ b/routes/html2pdf-v1.js @@ -2,6 +2,7 @@ const { callbackErrors, Queue } = require('../lib/queue'); const sUtil = require('../lib/util'); +const Renderer = require('../lib/renderer'); /** * The main router object @@ -26,18 +27,23 @@ } }); + const renderer = new Renderer(); app.queue.push({ + renderer, uri: restbaseRequest.uri, format: req.params.format }, ((error, pdf) => { if (error) { let status; + const e = callbackErrors; - if ([callbackErrors.queueBusy, callbackErrors.queueFull] - .includes(error)) { - status = 503; - } else if (error === callbackErrors.renderFailed) { - status = 500; + switch (error) { + case e.queueBusy: + case e.queueFull: + status = 503; + break; + default: + status = 500; } const errorObject = new sUtil.HTTPError({ status }); @@ -63,10 +69,11 @@ app.queue = new Queue( { concurrency: conf.render_concurrency, - timeout: conf.render_queue_timeout, + queueTimeout: conf.render_queue_timeout, + executionTimeout: conf.render_execution_timout, maxTaskCount: conf.max_render_queue_size }, - conf.puppeteer_flags, + conf.puppeteer_options, conf.pdf_options, app.logger ); diff --git a/test/lib/queue.js b/test/lib/queue.js index f593b2b..3d63bad 100644 --- a/test/lib/queue.js +++ b/test/lib/queue.js @@ -3,6 +3,7 @@ const assert = require('../utils/assert.js'); const { callbackErrors, Queue } = require('../../lib/queue'); const logger = { log: () => {} }; +const renderer = { abortRender: () => {} }; const puppeteerFlags = [ '--no-sandbox', '--disable-setuid-sandbox' @@ -42,13 +43,15 @@ } const q = new QueueTest({ concurrency: 1, - timeout: 90, + queueTimeout: 90, + executionTimeout: 90, maxTaskCount: 3 }, puppeteerFlags, pdfOptions, logger); // first worker must finish after 1 sec q.push({ id: 1, + renderer, timeout: 1000 }, () => { assert.ok(status === 'done 1'); @@ -58,6 +61,7 @@ // second worker must finish 0.5 sec after the first one q.push({ id: 2, + renderer, timeout: 500 }, () => { assert.ok(status === 'done 2'); @@ -67,6 +71,7 @@ // the last worker must finish last, regardless of the timeout q.push({ id: 3, + renderer, timeout: 10 }, () => { assert.ok(testsCompleted === 2); @@ -89,13 +94,15 @@ } const q = new QueueTest({ concurrency: 1, - timeout: 5, + queueTimeout: 5, + executionTimeout: 90, maxTaskCount: 1 }, puppeteerFlags, pdfOptions, logger); // first worker completes in 3 seconds q.push({ id: 1, + renderer, timeout: 3000 }, (error, data) => { assert.ok(error === null); @@ -106,6 +113,7 @@ // even though it's allowed to wait 5 seconds. q.push({ id: 2, + renderer, timeout: 0 }, (error, data) => { assert.ok(testsCompleted === 0); @@ -130,13 +138,15 @@ } const q = new QueueTest({ concurrency: 1, - timeout: 1, + queueTimeout: 1, + executionTimeout: 90, maxTaskCount: 3 }, puppeteerFlags, pdfOptions, logger); // first worker completes in 3 seconds q.push({ id: 1, + renderer, timeout: 3000 }, (error, data) => { assert.ok(error === null); @@ -146,6 +156,7 @@ // the following two requests should be rejected q.push({ id: 2, + renderer, timeout: 10 }, (error, data) => { assert.ok(tasksCompleted === 0); @@ -154,6 +165,7 @@ }); q.push({ id: 3, + renderer, timeout: 20 }, (error, data) => { assert.ok(tasksCompleted === 0); @@ -162,4 +174,31 @@ done(); }); }); + + it('should reject tasks when render takes long', function(done) { + class QueueTest extends Queue { + _render(data, callback) { + // simulate render + setTimeout(() => { + callback(null, {}); + }, data.timeout); + } + } + const q = new QueueTest({ + concurrency: 10, + queueTimeout: 30, + executionTimeout: 1, + maxTaskCount: 10 + }, puppeteerFlags, pdfOptions, logger); + + q.push({ + id: 1, + renderer, + timeout: 3000 + }, (error, data) => { + assert.ok(error === callbackErrors.renderTimeout, + 'Render took more than a second.'); + done(); + }); + }); }); -- To view, visit https://gerrit.wikimedia.org/r/390144 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I904fc15826b7de3a34e5c977b40bfd8518bb1679 Gerrit-PatchSet: 8 Gerrit-Project: mediawiki/services/chromium-render Gerrit-Branch: master Gerrit-Owner: Bmansurov <bmansu...@wikimedia.org> Gerrit-Reviewer: Bmansurov <bmansu...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Pmiazga <pmia...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits