Mobrovac has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/390144 )

Change subject: Terminate browser process after certain time
......................................................................


Terminate browser process after certain time

By default after the arbitrarily chosen 90 seconds puppeteer will
terminate the browser subprocess if it hasn't finished returning a PDF
yet.

Bug: T178501
Change-Id: I904fc15826b7de3a34e5c977b40bfd8518bb1679
---
M config.dev.yaml
M lib/queue.js
M lib/renderer.js
M routes/html2pdf-v1.js
M test/lib/queue.js
5 files changed, 162 insertions(+), 57 deletions(-)

Approvals:
  Mobrovac: Verified; Looks good to me, approved



diff --git a/config.dev.yaml b/config.dev.yaml
index eae5f5b..7b91760 100644
--- a/config.dev.yaml
+++ b/config.dev.yaml
@@ -91,12 +91,17 @@
           # some room for page numbers
           bottom: '0.75in'
           left: '0.5in'
-      puppeteer_flags:
-        - '--no-sandbox'
-        - '--disable-setuid-sandbox'
+      # 
https://github.com/GoogleChrome/puppeteer/blob/v0.11.0/docs/api.md#puppeteerlaunchoptions
+      puppeteer_options:
+        timeout: 30000
+        args:
+          - '--no-sandbox'
+          - '--disable-setuid-sandbox'
       # the maximum number of puppeteer instances that can be launched at a 
time
       render_concurrency: 1
       # don't wait to render a PDF after this many seconds
       render_queue_timeout: 60
       # maximum allowed number of pending jobs
       max_render_queue_size: 3
+      # the number of seconds before puppeteer terminates the browser instance
+      render_execution_timout: 90
\ No newline at end of file
diff --git a/lib/queue.js b/lib/queue.js
index f66c584..ff437ac 100644
--- a/lib/queue.js
+++ b/lib/queue.js
@@ -1,7 +1,7 @@
 'use strict';
 
 const asyncQueue = require('async/queue');
-const renderer = require('./renderer');
+const asyncTimeout = require('async/timeout');
 const uuid = require('cassandra-uuid');
 
 // Errors used as the first argument of the callback passed to the queue
@@ -11,7 +11,9 @@
     // something went wrong in the render phase
     renderFailed: 1,
     // the queue is already full, not waiting for it to have room
-    queueFull: 2
+    queueFull: 2,
+    // when the render takes longer than allowed
+    renderTimeout: 3
 };
 
 /**
@@ -24,20 +26,22 @@
      * @param {Object} queueOptions
      * @param {number} queueOptions.concurrency number of concurrent
      * render instances
-     * @param {number} queueOptions.timeout number of seconds after
+     * @param {number} queueOptions.queueTimeout number of seconds after
      * which the yet-to-start renders are aborted
+     * @param {number} queueOptions.executionTimeout number of seconds after
+     * which puppeteer is asked to abort the render
      * @param {number} queueOptions.maxTaskCount number of tasks the queue
      * should hold. New tasks will be rejected once the sum of the
      * number of running tasks and the tasks in the queue is equal to
      * this number.
-     * @param {Object} puppeteerFlags flags used to in starting puppeteer
+     * @param {Object} puppeteerOptions options used to in starting puppeteer
      * @param {Object} pdfOptions pdf options passed to Chromium
      * @param {Object} logger app logger
      */
-    constructor(queueOptions, puppeteerFlags, pdfOptions, logger) {
+    constructor(queueOptions, puppeteerOptions, pdfOptions, logger) {
         this._queueObject = asyncQueue(this._worker.bind(this),
                                        queueOptions.concurrency);
-        this._puppeteerFlags = puppeteerFlags;
+        this._puppeteerOptions = puppeteerOptions;
         this._pdfOptions = pdfOptions;
         this._options = queueOptions;
         this._logger = logger;
@@ -73,7 +77,7 @@
     _setCancelTaskTimeout(data, callback) {
         const logger = this._logger;
         const queue = this._queueObject;
-        const timeout = this._options.timeout;
+        const timeout = this._options.queueTimeout;
 
         data._id = `${uuid.TimeUuid.now().toString()}|${data.uri}`;
         data._timeoutID = setTimeout(() => {
@@ -140,8 +144,35 @@
     _worker(data, callback) {
         this._clearCancelTaskTimeout(data);
         this._logger.log('info/queue', `Started rendering ${data._id}`);
-        renderer
-            .articleToPdf(data.uri, data.format, this._puppeteerFlags,
+
+        const timeout = this._options.executionTimeout;
+        const timedRender = asyncTimeout(this._render.bind(this),
+                                         timeout * 1000);
+        const renderer = data._renderer;
+        timedRender(data, (error, pdf) => {
+            // error returned by async timeout
+            if (error && error.code === 'ETIMEDOUT') {
+                this._logger.log(
+                    'error/render',
+                    `Aborting. Render hasn't finished within ${timeout} ` +
+                        `seconds. Data ID: ${data._id}.`
+                );
+                renderer.abortRender();
+                callback(callbackErrors.renderTimeout, null);
+            } else {
+                callback(error, pdf);
+            }
+        });
+    }
+
+    /**
+     * Render a PDF
+     * @param {Object} data used for rendering
+     * @param {Function} callback called on render success/failure
+     */
+    _render(data, callback) {
+        data._renderer
+            .articleToPdf(data.uri, data.format, this._puppeteerOptions,
                           this._pdfOptions)
             .then((pdf) => {
                 this._logger.log(
diff --git a/lib/renderer.js b/lib/renderer.js
index dae34f1..68ef504 100644
--- a/lib/renderer.js
+++ b/lib/renderer.js
@@ -2,40 +2,63 @@
 
 const puppeteer = require('puppeteer');
 
-/**
- * Renders content from `url` in PDF
- * @param {string} url URL to get content from
- * @param {string} format Page size, e.g. Letter or A4, passed to understands
- * @param {Object} puppeteerFlags
- * @param {Object} pdfOptions
- * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer
- */
-exports.articleToPdf = function articleToPdf(url, format, puppeteerFlags, 
pdfOptions) {
-    let browser;
-    let page;
+module.exports = class Renderer {
+    constructor() {
+        this._browser = null;
+    }
 
-    return puppeteer.launch({ args: puppeteerFlags })
-        .then((browser_) => {
-            browser = browser_;
-            return browser.newPage();
-        })
-        .then((page_) => {
-            page = page_;
-            return page.goto(url, { waitUntil: 'networkidle' });
-        })
-        .then(() => {
-            return page.pdf(Object.assign(
-                {}, pdfOptions, { format }
-            ));
-        })
-        .catch((error) => {
-            if (browser) {
-                browser.close();
-            }
-            throw error;
-        })
-        .then((pdf) => {
-            browser.close();
-            return pdf;
-        });
+    /**
+     * Closes any open browser instance
+     */
+    _closeBrowser() {
+        if (this._browser) {
+            this._browser.close();
+            this._browser = null;
+        }
+    }
+
+    /**
+     * Renders content from `url` in PDF
+     * @param {string} url URL to get content from
+     * TODO: merge format with pdfOptions
+     * @param {string} format Page size, e.g. Letter or A4, passed to 
understands
+     * @param {Object} puppeteerOptions
+     * @param {Object} pdfOptions
+     * @return {<Promise<Buffer>>} Promise which resolves with PDF buffer
+     */
+    articleToPdf(url, format, puppeteerOptions, pdfOptions) {
+        let page;
+        const that = this;
+
+        return puppeteer.launch(puppeteerOptions)
+            .then((browser) => {
+                that._browser = browser;
+                return browser.newPage();
+            })
+            .then((page_) => {
+                page = page_;
+                return page.goto(url, { waitUntil: 'networkidle' });
+            })
+            .then(() => {
+                return page.pdf(Object.assign(
+                    {}, pdfOptions, { format }
+                ));
+            })
+            .catch((error) => {
+                that._closeBrowser();
+                throw error;
+            })
+            .then((pdf) => {
+                that._closeBrowser();
+                return pdf;
+            });
+    }
+
+    /**
+     * Aborts the request to create a PDF.
+     * Should be called after calling articleToPdf
+     */
+    abortRender() {
+        this._closeBrowser();
+    }
 };
diff --git a/routes/html2pdf-v1.js b/routes/html2pdf-v1.js
index cfeae4a..f1993e0 100644
--- a/routes/html2pdf-v1.js
+++ b/routes/html2pdf-v1.js
@@ -2,6 +2,7 @@
 
 const { callbackErrors, Queue } = require('../lib/queue');
 const sUtil = require('../lib/util');
+const Renderer = require('../lib/renderer');
 
 /**
  * The main router object
@@ -26,18 +27,23 @@
         }
     });
 
+    const renderer = new Renderer();
     app.queue.push({
+        renderer,
         uri: restbaseRequest.uri,
         format: req.params.format
     }, ((error, pdf) => {
         if (error) {
             let status;
+            const e = callbackErrors;
 
-            if ([callbackErrors.queueBusy, callbackErrors.queueFull]
-                .includes(error)) {
-                status = 503;
-            } else if (error === callbackErrors.renderFailed) {
-                status = 500;
+            switch (error) {
+                case e.queueBusy:
+                case e.queueFull:
+                    status = 503;
+                    break;
+                default:
+                    status = 500;
             }
 
             const errorObject = new sUtil.HTTPError({ status });
@@ -63,10 +69,11 @@
     app.queue = new Queue(
         {
             concurrency: conf.render_concurrency,
-            timeout: conf.render_queue_timeout,
+            queueTimeout: conf.render_queue_timeout,
+            executionTimeout: conf.render_execution_timout,
             maxTaskCount: conf.max_render_queue_size
         },
-        conf.puppeteer_flags,
+        conf.puppeteer_options,
         conf.pdf_options,
         app.logger
     );
diff --git a/test/lib/queue.js b/test/lib/queue.js
index f593b2b..3d63bad 100644
--- a/test/lib/queue.js
+++ b/test/lib/queue.js
@@ -3,6 +3,7 @@
 const assert = require('../utils/assert.js');
 const { callbackErrors, Queue } = require('../../lib/queue');
 const logger = { log: () => {} };
+const renderer = { abortRender: () => {} };
 const puppeteerFlags = [
     '--no-sandbox',
     '--disable-setuid-sandbox'
@@ -42,13 +43,15 @@
         }
         const q = new QueueTest({
             concurrency: 1,
-            timeout: 90,
+            queueTimeout: 90,
+            executionTimeout: 90,
             maxTaskCount: 3
         }, puppeteerFlags, pdfOptions, logger);
 
         // first worker must finish after 1 sec
         q.push({
             id: 1,
+            renderer,
             timeout: 1000
         }, () => {
             assert.ok(status === 'done 1');
@@ -58,6 +61,7 @@
         // second worker must finish 0.5 sec after the first one
         q.push({
             id: 2,
+            renderer,
             timeout: 500
         }, () => {
             assert.ok(status === 'done 2');
@@ -67,6 +71,7 @@
         // the last worker must finish last, regardless of the timeout
         q.push({
             id: 3,
+            renderer,
             timeout: 10
         }, () => {
             assert.ok(testsCompleted === 2);
@@ -89,13 +94,15 @@
         }
         const q = new QueueTest({
             concurrency: 1,
-            timeout: 5,
+            queueTimeout: 5,
+            executionTimeout: 90,
             maxTaskCount: 1
         }, puppeteerFlags, pdfOptions, logger);
 
         // first worker completes in 3 seconds
         q.push({
             id: 1,
+            renderer,
             timeout: 3000
         }, (error, data) => {
             assert.ok(error === null);
@@ -106,6 +113,7 @@
         // even though it's allowed to wait 5 seconds.
         q.push({
             id: 2,
+            renderer,
             timeout: 0
         }, (error, data) => {
             assert.ok(testsCompleted === 0);
@@ -130,13 +138,15 @@
         }
         const q = new QueueTest({
             concurrency: 1,
-            timeout: 1,
+            queueTimeout: 1,
+            executionTimeout: 90,
             maxTaskCount: 3
         }, puppeteerFlags, pdfOptions, logger);
 
         // first worker completes in 3 seconds
         q.push({
             id: 1,
+            renderer,
             timeout: 3000
         }, (error, data) => {
             assert.ok(error === null);
@@ -146,6 +156,7 @@
         // the following two requests should be rejected
         q.push({
             id: 2,
+            renderer,
             timeout: 10
         }, (error, data) => {
             assert.ok(tasksCompleted === 0);
@@ -154,6 +165,7 @@
         });
         q.push({
             id: 3,
+            renderer,
             timeout: 20
         }, (error, data) => {
             assert.ok(tasksCompleted === 0);
@@ -162,4 +174,31 @@
             done();
         });
     });
+
+    it('should reject tasks when render takes long', function(done) {
+        class QueueTest extends Queue {
+            _render(data, callback) {
+                // simulate render
+                setTimeout(() => {
+                    callback(null, {});
+                }, data.timeout);
+            }
+        }
+        const q = new QueueTest({
+            concurrency: 10,
+            queueTimeout: 30,
+            executionTimeout: 1,
+            maxTaskCount: 10
+        }, puppeteerFlags, pdfOptions, logger);
+
+        q.push({
+            id: 1,
+            renderer,
+            timeout: 3000
+        }, (error, data) => {
+            assert.ok(error === callbackErrors.renderTimeout,
+                      'Render took more than a second.');
+            done();
+        });
+    });
 });

-- 
To view, visit https://gerrit.wikimedia.org/r/390144
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I904fc15826b7de3a34e5c977b40bfd8518bb1679
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/services/chromium-render
Gerrit-Branch: master
Gerrit-Owner: Bmansurov <bmansu...@wikimedia.org>
Gerrit-Reviewer: Bmansurov <bmansu...@wikimedia.org>
Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org>
Gerrit-Reviewer: Pmiazga <pmia...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to