Yurik has uploaded a new change for review. https://gerrit.wikimedia.org/r/201998
Change subject: Merged with upstream service-template-node ...................................................................... Merged with upstream service-template-node * Merged files * removed missing func metrics workaround * host->domain, changed domain handling Change-Id: Id12a2cda4829db30298598c12de6e989e9418bac --- A .dockerignore A Dockerfile M README.md M app.js M config.dev.yaml M config.labs.yaml M config.prod.yaml A dist/init-scripts/systemd.erb A dist/init-scripts/sysvinit.erb A dist/init-scripts/upstart.erb A doc/README.md A doc/api-design.md A doc/coding.md A doc/commands.md A doc/config.md A doc/template.md A lib/util.js M package.json M routes/empty.js.template A routes/ex.js M routes/info.js M routes/root.js M routes/v1.js A scripts/docker.js A scripts/gen-init-scripts.rb M server.js M test/features/app/app.js A test/features/ex/errors.js M test/features/v1/page.js M test/features/v1/siteinfo.js M test/utils/assert.js M test/utils/logStream.js 32 files changed, 1,850 insertions(+), 168 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/graphoid refs/changes/98/201998/1 diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..563df11 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +.git +coverage +node_modules diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..f8f2496 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM buildpack-deps:jessie + +# install node && npm +RUN apt-get update && apt-get install -y nodejs npm && rm -rf /var/lib/apt/lists/* +# link /usr/bin/node to /usr/bin/nodejs +RUN ln -s /usr/bin/nodejs /usr/bin/node + +# copy the repo files over +RUN mkdir -p /opt/service +ADD . /opt/service +# install the dependencies +WORKDIR /opt/service +RUN npm install + +# start the server +CMD ["/usr/bin/npm", "start"] + diff --git a/README.md b/README.md index 2ff10fc..c062ff1 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ Template for creating MediaWiki Services in Node.js -# Getting Started +## Getting Started -## Installation +### Installation First, clone the repository @@ -19,18 +19,14 @@ npm install ``` -Finally, activate the development configuration file - -``` -ln -s config.dev.yaml config.yaml -``` - You are now ready to get to work! * Inspect/modify/configure `app.js` * Add routes by placing files in `routes/` (look at the files there for examples) -## Running the examples +You can also read [the documentation](doc/). + +### Running the examples The template is a fully-working example, so you may try it right away. To start the server hosting the REST API, simply run (inside the repo's directory) @@ -39,18 +35,23 @@ npm start ``` -This starts an HTTP server listening on `localhost:6927`. There are several routes -you may query (with a browser, or `curl` and friends): +This starts an HTTP server listening on `localhost:6927`. There are several +routes you may query (with a browser, or `curl` and friends): * `http://localhost:6927/_info/` * `http://localhost:6927/_info/name` * `http://localhost:6927/_info/version` * `http://localhost:6927/_info/home` -* `http://localhost:6927/v1/siteinfo/{uri}{/prop}` -* `http://localhost:6927/v1/page/{title}` -* `http://localhost:6927/v1/page/{title}/lead` +* `http://localhost:6927/{domain}/v1/siteinfo{/prop}` +* `http://localhost:6927/{domain}/v1/page/{title}` +* `http://localhost:6927/{domain}/v1/page/{title}/lead` +* `http://localhost:6927/ex/err/array` +* `http://localhost:6927/ex/err/file` +* `http://localhost:6927/ex/err/manual/error` +* `http://localhost:6927/ex/err/manual/deny` +* `http://localhost:6927/ex/err/auth` -## Tests +### Tests The template also includes a test suite a small set of executable tests. To fire them up, simply run: @@ -68,5 +69,15 @@ npm run-script coverage ``` +### Troubleshooting + +In a lot of cases when there is an issue with node it helps to recreate the +`node_modules` directory: + +``` +rm -r node_modules +npm install +``` + Enjoy! diff --git a/app.js b/app.js index 3d77654..88943b1 100644 --- a/app.js +++ b/app.js @@ -1,89 +1,102 @@ 'use strict'; + var http = require('http'); var BBPromise = require('bluebird'); var express = require('express'); var compression = require('compression'); var bodyParser = require('body-parser'); -var multer = require('multer'); var fs = BBPromise.promisifyAll(require('fs')); +var sUtil = require('./lib/util'); +var packageInfo = require('./package.json'); /** - * Promise create an express app and initialize it - * @param options - * @returns {bluebird} + * Creates an express app and initialises it + * @param {Object} options the options to initialise the app with + * @return {bluebird} the promise resolving to the app object */ function initApp(options) { - // The main application object - return BBPromise.resolve(express()).then(function(app){ - // get the options and make them available in the app - app.logger = options.logger; // the logging device - app.metrics = options.metrics; // the metrics - app.conf = options.config; // this app's config options - app.info = require('./package.json'); // this app's package info + // the main application object + var app = express(); - // ensure some sane defaults - if (!app.conf.hasOwnProperty('port')) { app.conf.port = 8888; } - if (!app.conf.hasOwnProperty('interface')) { app.conf.interface = '0.0.0.0'; } - if (!app.conf.hasOwnProperty('compressionLevel')) { app.conf.compressionLevel = 3; } + // get the options and make them available in the app + app.logger = options.logger; // the logging device + app.metrics = options.metrics; // the metrics + app.conf = options.config; // this app's config options + app.info = packageInfo; // this app's package info - // disable the X-Powered-By header - app.set('x-powered-by', false); - // disable the ETag header - users should provide them! - app.set('etag', false); - // enable compression - app.use(compression({level: app.conf.compressionLevel})); - // use the JSON body parser - app.use(bodyParser.json()); - // use the application/x-www-form-urlencoded parser - app.use(bodyParser.urlencoded({extended: true})); - // use the multipart/form-data - app.use(multer()); - // serve static files from static/ - app.use('/static', express.static(__dirname + '/static')); + // ensure some sane defaults + if(!app.conf.port) { app.conf.port = 8888; } + if(!app.conf.interface) { app.conf.interface = '0.0.0.0'; } + if(!app.conf.compression_level) { app.conf.compression_level = 3; } - return app; - }); + // disable the X-Powered-By header + app.set('x-powered-by', false); + // disable the ETag header - users should provide them! + app.set('etag', false); + // enable compression + app.use(compression({level: app.conf.compression_level})); + // use the JSON body parser + app.use(bodyParser.json()); + // use the application/x-www-form-urlencoded parser + app.use(bodyParser.urlencoded({extended: true})); + // serve static files from static/ + app.use('/static', express.static(__dirname + '/static')); + + return BBPromise.resolve(app); + } + /** - * Async load all routes for the app - * @param app - * @returns {bluebird} + * Loads all routes declared in routes/ into the app + * @param {Application} app the application object to load routes into + * @returns {bluebird} a promise resolving to the app object */ function loadRoutes (app) { + // get the list of files in routes/ - return fs - .readdirAsync(__dirname + '/routes') - .map(function (fname) { - // ... and then load each route - // but only if it's a js file - if (!/\.js$/.test(fname)) { - return; - } - // import the route file - var route = require(__dirname + '/routes/' + fname); - route = route(app); - // check that the route exports the object we need - if (route.constructor !== Object || !route.path || !route.router) { - throw new Error('routes/' + fname + ' does not export the correct object!'); - } - // all good, use that route - app.use(route.path, route.router); - }).then(function () { - // route loading is now complete, return the app object - return app; - }); + return fs.readdirAsync(__dirname + '/routes') + .map(function (fname) { + // ... and then load each route + // but only if it's a js file + if(!/\.js$/.test(fname)) { + return; + } + // import the route file + var route = require(__dirname + '/routes/' + fname); + route = route(app); + // check that the route exports the object we need + if(route.constructor !== Object || !route.path || !route.router || !(route.api_version || route.skip_domain)) { + throw new TypeError('routes/' + fname + ' does not export the correct object!'); + } + // wrap the route handlers with Promise.try() blocks + sUtil.wrapRouteHandlers(route.router, app); + // determine the path prefix + var prefix = ''; + if(!route.skip_domain) { + prefix = '/:domain/v' + route.api_version; + } + // all good, use that route + app.use(prefix + route.path, route.router); + }).then(function () { + // catch errors + sUtil.setErrorHandler(app); + // route loading is now complete, return the app object + return BBPromise.resolve(app); + }); + } /** - * Async create a web server - * @param app - * @returns {bluebird} + * Creates and start the service's web server + * @param {Application} app the app object to use in the service + * @returns {bluebird} a promise creating the web server */ function createServer(app) { + // return a promise which creates an HTTP server, // attaches the app to it, and starts accepting // incoming client requests @@ -97,6 +110,7 @@ app.logger.log('info', 'Worker ' + process.pid + ' listening on ' + app.conf.interface + ':' + app.conf.port); }); + } /** @@ -106,7 +120,10 @@ * object to it. */ module.exports = function(options) { + return initApp(options) - .then(loadRoutes) - .then(createServer); + .then(loadRoutes) + .then(createServer); + }; + diff --git a/config.dev.yaml b/config.dev.yaml index c1db27b..8ca44d3 100644 --- a/config.dev.yaml +++ b/config.dev.yaml @@ -1,13 +1,11 @@ -# Info about this config. Used for packaging & other purposes. -info: - name: service-template-node - version: 0.0.1 - description: A blueprint for MediaWiki REST API services - # Number of worker processes to spawn. # Set to 0 to run everything in a single process without clustering. # Use 'ncpu' to run as many workers as there are CPU units num_workers: 0 + +# Log error messages and gracefully restart a worker if v8 reports that it +# uses more heap (note: not RSS) than this many mb. +worker_heap_limit_mb: 250 # Logger info logging: @@ -20,12 +18,12 @@ # Statsd metrics reporter metrics: - type: txstatsd - host: localhost - port: 8125 + type: log + #host: localhost + #port: 8125 services: - - name: service-template-node + - name: graphoid # a relative path or the name of an npm package, if different from name module: ./app.js # optionally, a version constraint of the npm package @@ -34,7 +32,6 @@ conf: port: 6927 # interface: localhost # uncomment to only listen on localhost - # compressionLevel: 3 # output gzip compression level, 6 by default # more per-service config settings # Graphoid-specific settings diff --git a/config.labs.yaml b/config.labs.yaml index 8b28184..9b951fb 100644 --- a/config.labs.yaml +++ b/config.labs.yaml @@ -1,9 +1,3 @@ -# Info about this config. Used for packaging & other purposes. -info: - name: service-template-node - version: 0.0.1 - description: A blueprint for MediaWiki REST API services - # Number of worker processes to spawn. # Set to 0 to run everything in a single process without clustering. # Use 'ncpu' to run as many workers as there are CPU units @@ -25,7 +19,7 @@ port: 8125 services: - - name: service-template-node + - name: graphoid # a relative path or the name of an npm package, if different from name module: ./app.js # optionally, a version constraint of the npm package @@ -34,7 +28,6 @@ conf: port: 19000 # interface: localhost # uncomment to only listen on localhost - # compressionLevel: 3 # output gzip compression level, 6 by default # more per-service config settings # Graphoid-specific settings diff --git a/config.prod.yaml b/config.prod.yaml index c8c3b44..a759c13 100644 --- a/config.prod.yaml +++ b/config.prod.yaml @@ -1,13 +1,11 @@ -# Info about this config. Used for packaging & other purposes. -info: - name: service-template-node - version: 0.0.1 - description: A blueprint for MediaWiki REST API services - # Number of worker processes to spawn. # Set to 0 to run everything in a single process without clustering. # Use 'ncpu' to run as many workers as there are CPU units num_workers: ncpu + +# Log error messages and gracefully restart a worker if v8 reports that it +# uses more heap (note: not RSS) than this many mb. +worker_heap_limit_mb: 500 # Logger info logging: @@ -25,7 +23,7 @@ port: 8125 services: - - name: service-template-node + - name: graphoid # a relative path or the name of an npm package, if different from name module: ./app.js # optionally, a version constraint of the npm package @@ -34,7 +32,6 @@ conf: port: 6927 # interface: localhost # uncomment to only listen on localhost - # compressionLevel: 3 # output gzip compression level, 6 by default # more per-service config settings # Graphoid-specific settings diff --git a/dist/init-scripts/systemd.erb b/dist/init-scripts/systemd.erb new file mode 100644 index 0000000..78b5be3 --- /dev/null +++ b/dist/init-scripts/systemd.erb @@ -0,0 +1,25 @@ +[Unit] +Description=<%= @description ? @service_name + ' - ' + @description : @service_name %> +Documentation=<%= @homepage %> +After=network.target local-fs.target + +[Service] +Type=simple +LimitNOFILE=<%= @no_file %> +PIDFile=%t/<%= @service_name %>.pid +User=<%= @service_name %> +Group=<%= @service_name %> +WorkingDirectory=/srv/deployment/<%= @service_name %>/deploy +Environment="NODE_PATH='/srv/deployment/<%= @service_name %>/deploy/node_modules'" "<%= @service_name.gsub(/[^a-z0-9_]/, '_').upcase %>_PORT=<%= @port %>" +ExecStart=/usr/bin/nodejs src/server.js -c /etc/<%= @service_name %>/config.yaml +Restart=always +RestartSec=5 +StandardOutput=syslog +StandardError=syslog +SyslogIdentifier=<%= @service_name %> +TimeoutStartSec=5 +TimeoutStopSec=60 + +[Install] +WantedBy=multi-user.target + diff --git a/dist/init-scripts/sysvinit.erb b/dist/init-scripts/sysvinit.erb new file mode 100644 index 0000000..f889596 --- /dev/null +++ b/dist/init-scripts/sysvinit.erb @@ -0,0 +1,172 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: <%= @service_name %> +# Required-Start: $local_fs $network $remote_fs $syslog +# Required-Stop: $local_fs $network $remote_fs $syslog +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: <%= @description || @service_name %> +# Description: <%= @description ? @service_name + ' - ' + @description : @service_name %> +### END INIT INFO + + +# PATH should only include /usr/* if it runs after the mountnfs.sh script +PATH=/sbin:/usr/sbin:/bin:/usr/bin +DESC="<%= @service_name %> service" +NAME=<%= @service_name %> +SCRIPT_PATH=/srv/deployment/$NAME/deploy/src/server.js +DAEMON="/usr/bin/nodejs $SCRIPT_PATH" +DAEMON_ARGS="-c /etc/$NAME/config.yaml" +PIDFILE=/var/run/$NAME.pid +SCRIPTNAME=/etc/init.d/$NAME + +# Exit if the package is not installed +[ -e "$SCRIPT_PATH" ] || exit 0 + +# Read configuration variable file if it is present +# NOTE: only the DAEMON_ARGS var should be set in the file if present +[ -r /etc/default/$NAME ] && . /etc/default/$NAME + +# export some variables into the process' environment +export PORT +export INTERFACE +export NODE_PATH=/srv/deployment/$NAME/deploy/node_modules +export <%= @service_name.gsub(/[^a-z0-9_]/, '_').upcase %>_PORT=<%= @port %> + + +# Load the VERBOSE setting and other rcS variables +. /lib/init/vars.sh + +# Define LSB log_* functions. +# Depend on lsb-base (>= 3.2-14) to ensure that this file is present +# and status_of_proc is working. +. /lib/lsb/init-functions + +# +# Function that starts the daemon/service +# +do_start() +{ + # up the number of fds [sockets] from 1024 + ulimit -n <%= @no_file %> + + # Return + # 0 if daemon has been started + # 1 if daemon was already running + # 2 if daemon could not be started + + start-stop-daemon --start --quiet --pidfile $PIDFILE -bm \ + -c $NAME:$NAME --test \ + --exec /bin/sh -- \ + -c "$DAEMON $DAEMON_ARGS 2>&1 | logger -i -t $NAME" \ + || return 1 + start-stop-daemon --start --quiet --pidfile $PIDFILE -bm \ + -c <%= @service_name %>:<%= @service_name %> \ + --exec /bin/sh -- \ + -c "$DAEMON $DAEMON_ARGS 2>&1 | logger -i -t $NAME" \ + || return 2 + echo "Started <%= @service_name %> service on port <%= @port %>" + + + # Add code here, if necessary, that waits for the process to be ready + # to handle requests from services started subsequently which depend + # on this one. As a last resort, sleep for some time. + sleep 5 +} + +# +# Function that stops the daemon/service +# +do_stop() +{ + # Return + # 0 if daemon has been stopped + # 1 if daemon was already stopped + # 2 if daemon could not be stopped + # other if a failure occurred + start-stop-daemon --stop --quiet --retry=TERM/60/KILL/5 --pidfile $PIDFILE --name $NAME + RETVAL="$?" + [ "$RETVAL" = 2 ] && return 2 + # Wait for children to finish too if this is a daemon that forks + # and if the daemon is only ever run from this initscript. + # If the above conditions are not satisfied then add some other code + # that waits for the process to drop all resources that could be + # needed by services started subsequently. A last resort is to + # sleep for some time. + start-stop-daemon --stop --quiet --oknodo --retry=0/5/KILL/5 --exec $DAEMON + [ "$?" = 2 ] && return 2 + # Many daemons don't delete their pidfiles when they exit. + rm -f $PIDFILE + return "$RETVAL" +} + +# +# Function that sends a SIGHUP to the daemon/service +# +do_reload() { + # + # If the daemon can reload its configuration without + # restarting (for example, when it is sent a SIGHUP), + # then implement that here. + # + start-stop-daemon --stop --signal 1 --quiet --pidfile $PIDFILE --name $NAME + return 0 +} + +case "$1" in + start) + [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME" + do_start + case "$?" in + 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; + 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; + esac + ;; + stop) + [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME" + do_stop + case "$?" in + 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; + 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; + esac + ;; + status) + status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $? + ;; + #reload|force-reload) + # + # If do_reload() is not implemented then leave this commented out + # and leave 'force-reload' as an alias for 'restart'. + # + #log_daemon_msg "Reloading $DESC" "$NAME" + #do_reload + #log_end_msg $? + #;; + restart|force-reload) + # + # If the "reload" option is implemented then remove the + # 'force-reload' alias + # + log_daemon_msg "Restarting $DESC" "$NAME" + do_stop + case "$?" in + 0|1) + do_start + case "$?" in + 0) log_end_msg 0 ;; + 1) log_end_msg 1 ;; # Old process is still running + *) log_end_msg 1 ;; # Failed to start + esac + ;; + *) + # Failed to stop + log_end_msg 1 + ;; + esac + ;; + *) + echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2 + exit 3 + ;; +esac + diff --git a/dist/init-scripts/upstart.erb b/dist/init-scripts/upstart.erb new file mode 100644 index 0000000..4cffcb5 --- /dev/null +++ b/dist/init-scripts/upstart.erb @@ -0,0 +1,24 @@ +# Upstart job for <%= @service_name %> + +description "<%= @description ? @service_name + ' - ' + @description : @service_name %>" + +start on (local-filesystems and net-device-up IFACE!=lo) +stop on runlevel [!2345] + +# up ulimit -n a bit +limit nofile <%= @no_file %> <%= @no_file %> + +setuid "<%= @service_name %>" +setgid "<%= @service_name %>" + +env NODE_PATH="/srv/deployment/<%= @service_name %>/deploy/node_modules" +env <%= @service_name.gsub(/[^a-zA-Z0-9_]/, '_').upcase %>_PORT="<%= @port %>" + +respawn + +# wait 60 seconds for a graceful restart before killing the master +kill timeout 60 + +chdir /srv/deployment/<%= @service_name %>/deploy +exec /usr/bin/nodejs src/server.js -c /etc/<%= @service_name %>/config.yaml >> /var/log/<%= @service_name %>/main.log 2>&1 + diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000..26adc44 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,13 @@ +This directory contains the documentation for the service template, aimed at +getting you started up and running quickly. + +The documentation should be read in the following order: + +1. [Short API design practices](api-design.md) +2. [Service template overview](template.md) +3. [Configuration](config.md) +4. [Useful Commands](commands.md) +5. [Coding Guide](coding.md) + +Have fun while creating RESTful API services! + diff --git a/doc/api-design.md b/doc/api-design.md new file mode 100644 index 0000000..20f2e5b --- /dev/null +++ b/doc/api-design.md @@ -0,0 +1,83 @@ +# API Design + +Before you start coding your service, you need to think hard about your API's +design, especially for a public service exposing its API. Below are a couple of +practices you should follow. + +- [Statelessness](#statelessness) +- [Versioning](#versioning) +- [Hierarchical URI Layout](#hierarchical-uri-layout) +- [HTTP Verbs](#http-verbs) +- [Documentation](#documentation) +- [See Also](#see-also) + +## Statelessness + +RESTful API services should be +[stateless](https://en.wikipedia.org/wiki/Service_statelessness_principle), since +they are conceptually modelled around *resources* (as opposed to *systems*). +Accordingly, your service should take actions depending on assumptions about the +caller or the current state of the service's process (e.g. that the user is +logged in, that they are allowed to modify a resource, etc.) + +## Versioning + +You should always version all of your API. Always. Period. Applications depend on +its stability and invariance. It is tolerable to add endpoints to an API +version, but removing or modifying existing ones is not an option. Thus, +versioning provides an easy way for you to improve upon the API while avoiding +third-party application breakage. The template enforces this practice by +requiring all of your [route files](../routes/) specify the API version. + +## Hierarchical URI Layout + +Use a hierarchical URI layout that is intuitive and makes sense. Grouping +endpoints under a common URI prefix allows both you and the future API consumer +to reason about the API. As an example, consider +[RESTBase](https://www.mediawiki.org/wiki/RESTBase)'s API layout: + +``` +/{domain} + -- /v1 + |- /page + | |- /title + | |- /html + | |- /data-parsoid + | -- /revision + -- /transform + |- /html/to/wikitext + |- /wikitext/to/html + -- /html/to/html +``` + +The API is grouped in two *sections* - `page` and `transform`. The former +exposes endpoints dealing with Wiki pages, while the latter comprises endpoints +transforming one format to another. + +## HTTP Verbs + +There are many [HTTP +verbs](http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html) you can use and +expose in your API. Use them appropriately. Especially, **do not allow GET +requests to modify** any type of content. + +## Documentation + +Document your API meticulously, and keep it up to date with the code. Remember +that API's are meant to be consumed by external applications, whose developers +most often do not know the internal workings of your stack. A good starting +point is to look into [Swagger](https://github.com/swagger-api/swagger-spec), a +specification for API declaration and documentation from which nice, demo-able +documentation such as [this](http://rest.wikimedia.org/en.wikipedia.org/v1/?doc) +can be automatically generated. + +## See Also + +The above is just a short list of things you should think about when designing +your API. Here are some resources you might find useful at this step: + +- https://github.com/Wikia/guidelines/tree/master/APIDesign +- https://restful-api-design.readthedocs.org/en/latest/ +- http://www.vinaysahni.com/best-practices-for-a-pragmatic-restful-api +- http://www.thoughtworks.com/insights/blog/rest-api-design-resource-modeling + diff --git a/doc/coding.md b/doc/coding.md new file mode 100644 index 0000000..27aa85e --- /dev/null +++ b/doc/coding.md @@ -0,0 +1,350 @@ +# Coding Guide + +Let's get started! + +- [Route Set-up](#route-set-up) +- [Routes](#routes) +- [Promises](#promises) + - [I/O](#io) + - [External Requests](#external-requests) +- [Error Handling](#error-handling) +- [Logging and Metrics](#logging-and-metrics) + - [Logging](#logging) + - [Metrics Collection](#metrics-collection) +- [Test Cases](#test-cases) + +## Route Set-up + +All of the routes are read from the [routes directory](../routes) and are +automatically mounted on start-up. The first step is to create a new route file +by copying the [route template](../routes/empty.js.template): + +```bash +$ cd routes +$ cp empty.js.template people.js +``` + +Now, open `people.js` in your favourite editor. The first thing you need to +decide is the mount path for the routes contained in the file and the API +version the route belongs to. Let's say that this file will contain routes +pertaining to famous people, so a path like `/people/` makes sense here. +Obviously, the API version is going to be `1`. Change lines 32 - 36 to reflect +this: + +```javascript +return { + path: '/people', + api_version: 1, + router: router +}; +``` + +This causes all of the routes you create in `people.js` to be mounted on +`/{domain}/v1/people/`, where `{domain}` represents the sought domain (such as +`en.wikipedia.org`, `www.mediawiki.org`, etc.). + +## Routes + +Creating routes is accomplished by calling `router.METHOD(path, handlerFunction)` +where `METHOD` is the HTTP verb you want to create the route for (`get`, `put`, +`post`, etc.), and `handlerFunction` is the callback function called when the +relative path `path` is matched. We are now ready to set up our first route. +Replace line 23 with the following code: + +```javascript +router.get('/:name', function(req, res) { + + res.status(200).json({ + name: decodeURIComponent(req.params.name) + }); + +}); +``` + +The route's path is `:name`, which signifies a variable path. In this case, it +is the name of the person the request is about. Thus, both +`/people/Albert_Einstein` and `/people/David_Lynch` will match the route. The +callback's body is rather simple: we set the response's status to `200` and send +back a JSON containing the person's name. To learn more about routes and their +various options, read Express.js' [routing +guide](http://expressjs.com/guide/routing.html). + +## Promises + +The service template includes the [bluebird +module](https://github.com/petkaantonov/bluebird) for handling asynchronous +patterns via promises. Prime examples of when they should be used are performing +external requests or I/O actions. Promises allow the service process not to +block on them and continue serving other requests until the action is completed. + +### I/O + +Coming back to our example route, let's say that we want to serve a simple HTML +document on the endpoint `/people/:name/about`. To do so, first we need to +require and *promisify* the `fs` module. Put this line in the header of your +routes file (right below line 6): + +```javascript +var fs = BBPromise.promisifyAll(require('fs')); +``` + +This creates additional functions, which are *promisified* versions of the +original ones exported by the `fs` module. Henceforth, we can read a file either +using the built-in `fs.readFile()` or its promise-aware counterpart +`fs.readFileAsync()`. + +Armed with this knowledge, we can now easily create a route handler: + +```javascript +router.get('/:name/about', function(req, res) { + + // read the file + return fs.readFileAsync(__dirname + '/../static/index.html') + // and then send back its contents + .then(function(src) { + res.status(200).type('html').send(src); + }); + +}); +``` +As you can see, promises allow us to specify chained actions in a natural way +(using the `.then()` continuation pattern). Note that, when using promises in +services derived from this template it is important that you `return` the +promise to the caller. Doing so allows the template's framework to automatically +handle any possible errors during the promise's execution. + +### External Requests + +One other area where promises come in handy is making external requests. Suppose +we want to serve the latest news about a person from +[Wikinews](http://www.wikinews.org). The template includes the +[preq](https://github.com/gwicke/preq) -- a module promisifying the popular +[request](https://github.com/request/request) module -- which we can use +right away: + +```javascript +router.get('/:name/news/:lang?', function(req, res) { + + // set the language if not set + var lang = req.params.lang || 'en'; + + // get the news + return preq.get({ + uri: 'https://' + lang + '.wikinews.org/wiki/' + + encodeURIComponent(req.params.name) + }).then(function(wnRes) { + res.status(200).type('html').send(wnRes.body); + }); + +}); +``` + +## Error Handling + +As mentioned earlier, the template is capable of automatically handling errors +for you. However, you might want to take matters into your own hands in some +occasions. The template provides a convenient `HTTPError` object class which you +can use. + +Let's revise the handler for the `/people/:name/about` route. It does not seem +to be very useful, as it returns the same content for any given name. We would +like it to return content relevant to the person whose name was specified in the +request URI by looking up the file `/static/name.html`. If the file does not +exist, a `404` should be returned to the caller. + +```javascript +router.get('/:name/about', function(req, res) { + + return fs.readFileAsync(__dirname + '/../static/' + + encodeURIComponent(req.params.name) + '.html') + .then(function(src) { + res.status(200).type('html').send(src) + }).catch(function(err) { + throw new HTTPError({ + status: 404, + type: 'not_found', + title: 'Not Found', + detail: 'No information could be found on ' + req.params.name + }); + }); + +}); +``` + +Note that you can also attach additional debug information to the `HTTPError` +object to help you track down bugs. This information is going to be logged, but +will not reach the client, thus ensuring no sensitive information is leaked +unintentionally. To do so, simply add any property you deem important when +creating / throwing the error. + +## Logging and Metrics + +Logging and metrics collection is supported out of the box via +[service-runner](https://github.com/wikimedia/service-runner). They are exposed +in route handler files via the `req.logger` and `app.metrics` objects. + +### Logging + +To log something, simply use `req.logger.log(level, what)`. The logger itself is +a [bunyan](https://github.com/trentm/node-bunyan) wrapper, and thus supports the +following levels: + +- `trace` +- `debug` +- `info` +- `warn` +- `error` +- `fatal` + +Additionally, it is good practice to attach a component name to the log level as +it eases log indexing and filtering later in production. For example, if a log +entry has the `debug` level and pertains to one of our example routes, the log +level could be set to `debug/people`. The `what` portion of the log entry can be +either a string message, or any *stringifiable* object. As an example, let's +log the person's name given to the `/people/:name/about` route and the file name +that is going to be looked up: + +```javascript +router.get('/:name/about', function(req, res) { + + var info = { + name: req.params.name, + path: __dirname + '/../static/' + + encodeURIComponent(req.params.name) + '.html' + }; + + req.logger.log('debug/people/about', info); + + return fs.readFileAsync(info.path) + .then(function(src) { + res.status(200).type('html').send(src) + }).catch(function(err) { + throw new HTTPError({ + status: 404, + type: 'not_found', + title: 'Not Found', + detail: 'No information could be found on ' + info.name + }); + }); + +}); +``` + +As you can see, the request object (`req`) has an additional property - +`req.logger`, which allows you to log messages and objects in the context of the +current request. To do so, it attaches a unique *request ID* to each logged +information. If you would like to log context-free information, you can use the +`app.logger` object instead, even though that is not recommended. + +### Metrics Collection + +Collecting metrics is a great way to have insights into the overall health and +performance of your service. When using the template, this is as easy as calling +one of the following methods: + +- `app.metrics.timing` +- `app.metrics.increment` +- `app.metrics.decrement` +- `app.metrics.histogram` +- `app.metrics.gauge` +- `app.metrics.unique` + +How can one collect them? Let's show it on `/people/:name/news`. This route uses +an external request to complete its action, which means that you have little +control over your service's response time, as it is dominated by the request to +Wikinews. Two interesting metrics that we can collect here (and that directly +affect the service's response time) are the external request's response time and +the size of its response. We can measure the former with `app.metrics.timing()` +and the latter with `app.metrics.histogram()`. Additionally, it interesting to +see the distribution of languages, which can be achieved with +`app.metrics.unique()`. + +```javascript +router.get('/:name/news/:lang?', function(req, res) { + + // set the language if not set + var lang = req.params.lang || 'en'; + + // count the language occurrence + app.metrics.unique('people.news.lang', lang); + // start measuring the time + var startTime = Date.now(); + + // get the news + return preq.get({ + uri: 'https://' + lang + '.wikinews.org/wiki/' + + encodeURIComponent(req.params.name) + }).then(function(wnRes) { + // external request done, report the request time + app.metrics.timing('people.news.time', Date.now() - startTime); + // also report the payload's size + app.metrics.histogram('people.news.size', wnRes.body.length); + res.status(200).type('html').send(wnRes.body); + }); + +}); +``` +For more information on the available methods, see the [service-runner +documentation](https://github.com/wikimedia/service-runner#metric-reporting). + +## Test Cases + +The service needs to thoroughly tested since other services and clients are +going to depend on it. The template uses +[mocha](https://github.com/mochajs/mocha) for test execution and provides some +useful utility functions in [test/utils](../test/utils). + +To create a test suite for our example routes, create the `people` directory in +`/test/features/` and two files inside of it: `about.js` and `news.js`. These +will test the example routes. Let's start with `about.js`: + +```javascript +'use strict'; + + +// mocha defines to avoid JSHint breakage +/* global describe, it, before, beforeEach, after, afterEach */ + + +var preq = require('preq'); +var assert = require('../../utils/assert.js'); +var server = require('../../utils/server.js'); + + +describe('people - about', function() { + + this.timeout(20000); + + before(function () { return server.start(); }); + + // common URI prefix + var uri = server.config.uri + 'en.wikipedia.org/v1/people/'; + + it('get HTML for index', function() { + return preq.get({ + uri: uri + 'index/about' + }).then(function(res) { + // check the status + assert.status(res, 200); + // check the returned Content-Type header + assert.contentType(res, 'text/html'); + // inspect the body + assert.notDeepEqual(res.body, undefined, 'No body returned!'); + }); + }); + + it('fail for a non-existent person', function() { + return preq.get({ + uri: uri + 'Walt_Disney/about' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ', res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 404); + }); + }); + +}); +``` + diff --git a/doc/commands.md b/doc/commands.md new file mode 100644 index 0000000..e1bc803 --- /dev/null +++ b/doc/commands.md @@ -0,0 +1,97 @@ +# Useful Commands + +- [npm 101](#npm-101) +- [Service-related Tasks](#service-related-tasks) +- [Docker](#docker) + +## npm 101 + +[npm](https://www.npmjs.com/) is the package manager for Node.js modules. It is +used for managing and publishing modules. + +The template (and your future service) needs its dependencies to be present. +Install them with: + +``` +npm install +``` + +Sometimes the configuration can get a bit messed up you may experience strange +*npm*-related errors when running your service. The remedy is: + +``` +rm -rf node_modules +npm install +``` + +If you need to add a dependency, this will install it and add it your +`package.json`: + +``` +npm install --save <name_of_module> +``` + +## Service-related Tasks + +The template comes with some handy `npm` tasks. To start your service based on +the configuration in `config.yaml`, use simply: + +``` +npm start +``` + +Starting unit tests is as easy as: + +``` +npm test +``` + +A code coverage utility is also available: + +``` +npm run-script coverage +``` + +Once the script finishes, open up `coverage/lcov-report/index.html` which will +show you detailed reports about which lines of code have been covered by the +unit tests. + +## Docker + +Included in the template is also a Dockerfile, allowing you to run and test your +service in a production-like environment inside of a Docker container. You need +to have [docker](https://www.docker.com/) installed if you are on a Linux host, +or [boot2docker](http://boot2docker.io/) in case of OSX/Windows hosts. + +To start your service in the container, execute: + +``` +npm run-script docker-start +``` + +The first time you run it, it takes a while as the script automatically builds +the full image and then starts the service. + +If you want to test your service instead, use: + +``` +npm run-script docker-test +``` + +Similarly, to run code coverage, run: + +``` +npm run-script docker-cover +``` + +*Note:* On Linux hosts, running `docker` requires superuser rights, so you may +need to prefix the commands with `sudo`. If you are on a Ubuntu box, you may +circumvent that by adding yourself to the `docker` group: + +``` +sudo gpasswd -a <your_username> docker +``` + +After you log out completely and log back in, you should be able to run the +above scripts without resorting to `sudo`. + diff --git a/doc/config.md b/doc/config.md new file mode 100644 index 0000000..e2009f8 --- /dev/null +++ b/doc/config.md @@ -0,0 +1,49 @@ +# Configuration + +The first thing you should configure is the service's general information (name, +description, etc.). Open up [`package.json`](../package.json) and change (at +least) the following fields: + +- `name` +- `version` +- `description` +- `repository/url` +- `keywords` +- `author` +- `contributors` +- `licence` +- `bugs` +- `homepage` + +Now change the service's name in [`config.dev.yaml`](../config.dev.yaml#L26) and +[`config.prod.yaml`](../config.prod.yaml#L26). While you are there, you might +want to look at and play with other configuration parameters, such as: + +- `num_workers` - the number of workers to start; some special values are: + - `0` will not do any forking, but run the service in the master process + - `ncpu` will spawn as many worker processes as there are CPU cores on the + host +- `worker_heap_limit_mb` - the maximum amount of memory (in MB) a worker's heap + can have +- `logging` and `metrics` - the configuration for logging and metrics facilities +- `services` - the block instructing the master process which services to start; + there can be more than one service, if, e.g., your service depends on another + Node.js service being present; each service has further the following + information: + - `name` - the service's name + - `module` - the module starting the service; if not given, the service's name + is used instead + - `conf` - the configuration object passed directly to the service; settings + to consider (remember to update them in both + [`config.dev.yaml`](../config.dev.yaml) as well as + [`config.prod.yaml`](../config.prod.yaml)): + - `port` - the port to start the service on (default: `8888`) + - `interface` - where to bind the service's server (default: `0.0.0.0`) + - you may add here any other configuration options needed by your service, + as long as it is [valid YAML](http://www.yaml.org/spec/1.2/spec.html); + these will be accessible via the `app.conf` object + +For more information on configuration possibilities, take a look at the +[service-runner +documentation](https://github.com/wikimedia/service-runner#config-loading). + diff --git a/doc/template.md b/doc/template.md new file mode 100644 index 0000000..b8c9af9 --- /dev/null +++ b/doc/template.md @@ -0,0 +1,67 @@ +# Service Template Overview + +This service template allows you to quickly dive into coding your own RESTful API +service. + +- [Stack](#stack) +- [Repository Outline](#repository-outline) + +## Stack + +The template makes use the following components: + +- [service-runner](https://github.com/wikimedia/service-runner) +- [express.js](http://expressjs.com/) +- [Bluebird Promises](https://github.com/petkaantonov/bluebird) +- [mocha](http://mochajs.org/) +- [istanbul](https://github.com/gotwarlost/istanbul) +- [docker](https://www.docker.com/) + +Everything begins and ends with *service-runner*. It is the supervisor in charge +of starting the service and controlling its execution. It spawns worker +processes, each of which accepts and handles connections. If some workers use +too much heap memory, they are restarted. Additionally, it provides your service +with configurable logging and metrics facilities. + +When it comes to request handling, *express.js* and *Bluebird* take the centre +stage. *express.js* is in charge of receiving the requests, routing and +dispatching them to the correct handlers and send responses back to the clients. +*Bluebird* comes into play when there are actions which warrant asynchronous +processing (such as reading files, dispatching requests to external resources, +etc.). You can find example route handlers constructing the response both +synchronously and asynchronously in this template's [routes](../routes/) +directory. + +Finally, testing is an important aspect of service programming, not only for +their creators, but also testers (think CI) and consumers. The template uses +*mocha* for carrying out the testing, and *istanbul* for reporting code coverage +with tests. There are quite a few tests [available](../test/) for you to check +out. + +The WMF is in the process of switching its production servers to Debian Jessie. +As people developing services might use different platforms, the template +provides also a Dockerfile, with which one can execute their service inside a +container running Debian Jessie. + +## Repository Outline + +Below is a simplified repository outline listing the important files/directories +for service development. + +- [`package.json`](../package.json) - the file containing the service's name and + dependencies +- [`config.dev.yaml`](../config.dev.yaml) and + [`config.prod.yaml`](../config.prod.yaml) - contain development and production + configuration settings for the service +- [`server.js`](../server.js) - the service's starter script +- [`app.js`](../app.js) - contains the application declaration and loading logic +- [`routes`](../routes/) - contains the definitions of the loaded routes; this + is where most of your coding is to take place +- [`lib/util.js`](../lib/util.js) - contains some utility functions and classes +- [`static`](../static/) - this is where served static files go (HTML, CSS, + client-side JS, etc.) +- [`test`](../test/) - contains the test files for the example routes in the + template; you should add your own here +- [`scripts/docker.js`](../scripts/docker.js) - a utility script building the + service's docker image and starting the container + diff --git a/lib/util.js b/lib/util.js new file mode 100644 index 0000000..58ed1e3 --- /dev/null +++ b/lib/util.js @@ -0,0 +1,240 @@ +'use strict'; + + +var BBPromise = require('bluebird'); +var util = require('util'); +var express = require('express'); +var uuid = require('node-uuid'); +var bunyan = require('bunyan'); + + +/** + * Error instance wrapping HTTP error responses + */ +function HTTPError(response) { + + Error.call(this); + Error.captureStackTrace(this, HTTPError); + + if(response.constructor !== Object) { + // just assume this is just the error message + var msg = response; + response = { + status: 500, + type: 'internal_error', + title: 'InternalError', + detail: msg + }; + } + + this.name = this.constructor.name; + this.message = response.status + ''; + if(response.type) { + this.message += ': ' + response.type; + } + + for (var key in response) { + this[key] = response[key]; + } + +} + +util.inherits(HTTPError, Error); + + +/** + * Generates an object suitable for logging out of a request object + * + * @param {Request} the request + * @return {Object} an object containing the key components of the request + */ +function reqForLog(req) { + + return { + url: req.originalUrl, + headers: req.headers, + method: req.method, + params: req.params, + query: req.query, + body: req.body, + remoteAddress: req.connection.remoteAddress, + remotePort: req.connection.remotePort + }; + +} + + +/** + * Serialises an error object in a form suitable for logging + * + * @param {Error} the error to serialise + * @return {Object} the serialised version of the error + */ +function errForLog(err) { + + var ret = bunyan.stdSerializers.err(err); + ret.status = err.status; + ret.type = err.type; + ret.detail = err.detail; + + return ret; + +} + +/** + * Generates a unique request ID + * + * @return {String} the generated request ID + */ +var reqIdBuff = new Buffer(16); +function generateRequestId() { + + uuid.v4(null, reqIdBuff); + return reqIdBuff.toString('hex'); + +} + + + +/** + * Wraps all of the given router's handler functions with + * promised try blocks so as to allow catching all errors, + * regardless of whether a handler returns/uses promises + * or not. + * + * @param {Router} the router object + * @param {Application} the application object + */ +function wrapRouteHandlers(router, app) { + + router.stack.forEach(function(routerLayer) { + routerLayer.route.stack.forEach(function(layer) { + var origHandler = layer.handle; + layer.handle = function(req, res, next) { + BBPromise.try(function() { + req.headers = req.headers || {}; + req.headers['x-request-id'] = req.headers['x-request-id'] || generateRequestId(); + req.logger = app.logger.child({request_id: req.headers['x-request-id']}); + req.logger.log('trace/req', {req: reqForLog(req), msg: 'incoming request'}); + return origHandler(req, res, next); + }) + .catch(next); + }; + }); + }); + +} + + +/** + * Generates an error handler for the given applications + * and installs it. Usage: + * + * @param {Application} app the application object to add the handler to + */ +function setErrorHandler(app) { + + app.use(function(err, req, res, next) { + var errObj; + // ensure this is an HTTPError object + if(err.constructor === HTTPError) { + errObj = err; + } else if(err instanceof Error) { + // is this an HTTPError defined elsewhere? (preq) + if(err.constructor.name === 'HTTPError') { + var o = { status: err.status }; + if(err.body && err.body.constructor === Object) { + Object.keys(err.body).forEach(function(key) { + o[key] = err.body[key]; + }); + } else { + o.detail = err.body; + } + o.message = err.message; + errObj = new HTTPError(o); + } else { + // this is a standard error, convert it + errObj = new HTTPError({ + status: 500, + type: 'internal_error', + title: err.name, + detail: err.message, + stack: err.stack + }); + } + } else if(err.constructor === Object) { + // this is a regular object, suppose it's a response + errObj = new HTTPError(err); + } else { + // just assume this is just the error message + errObj = new HTTPError({ + status: 500, + type: 'internal_error', + title: 'InternalError', + detail: err + }); + } + // ensure some important error fields are present + if(!errObj.status) { errObj.status = 500; } + if(!errObj.type) { errObj.type = 'internal_error'; } + // add the offending URI and method as well + if(!errObj.method) { errObj.method = req.method; } + if(!errObj.uri) { errObj.uri = req.url; } + // some set 'message' or 'description' instead of 'detail' + errObj.detail = errObj.detail || errObj.message || errObj.description || ''; + // adjust the log level based on the status code + var level = 'error'; + if(Number.parseInt(errObj.status) < 400) { + level = 'trace'; + } else if(Number.parseInt(errObj.status) < 500) { + level = 'info'; + } + // log the error + req.logger.log(level + + (errObj.component ? '/' + errObj.component : '/' + errObj.status), + errForLog(errObj)); + // let through only non-sensitive info + var respBody = { + status: errObj.status, + type: errObj.type, + title: errObj.title, + detail: errObj.detail, + method: errObj.method, + uri: errObj.uri + }; + res.status(errObj.status).json(respBody); + }); + +} + + +/** + * Creates a new router with some default options. + * + * @param {Object} opts additional options to pass to express.Router() + * @return {Router} a new router object + */ +function createRouter(opts) { + + var options = { + mergeParams: true + }; + + if(opts && opts.constructor === Object) { + Object.keys(opts).forEach(function(key) { + options[key] = opts[key]; + }); + } + + return express.Router(options); + +} + + +module.exports = { + HTTPError: HTTPError, + wrapRouteHandlers: wrapRouteHandlers, + setErrorHandler: setErrorHandler, + router: createRouter +}; + diff --git a/package.json b/package.json index 1217bb7..ba57194 100644 --- a/package.json +++ b/package.json @@ -2,9 +2,13 @@ "name": "Graphoid", "version": "0.1.0", "description": "renders vega graphs from mediawiki pages", + "main": "./app.js", "scripts": { "start": "service-runner", "test": "mocha", + "docker-start": "./scripts/docker.js", + "docker-test": "./scripts/docker.js --test", + "docker-cover": "./scripts/docker.js --cover", "coverage": "istanbul cover _mocha -- -R spec" }, "repository": { @@ -27,21 +31,21 @@ "homepage": "https://www.mediawiki.org/wiki/Extension:Graph", "dependencies": { "vega": "git+http://g...@github.com/nyurik/vega", - "bluebird": "^2.9.9", - "body-parser": "^1.12.0", - "compression": "^1.4.1", + "bluebird": "^2.9.14", + "body-parser": "^1.12.1", + "bunyan": "^1.3.4", + "compression": "^1.4.3", "domino": "^1.0.18", - "express": "^4.11.2", - "js-yaml": "^3.2.6", - "multer": "^0.1.7", - "preq": "^0.3.9", - "service-runner": "^0.0.2" + "express": "^4.12.2", + "js-yaml": "^3.2.7", + "node-uuid": "^1.4.3", + "preq": "^0.3.12", + "service-runner": "^0.1.5" }, "devDependencies": { "assert": "^1.3.0", - "bunyan": "^1.3.4", - "istanbul": "^0.3.6", - "mocha": "^2.1.0", + "istanbul": "^0.3.8", + "mocha": "^2.2.1", "mocha-jshint": "0.0.9", "mocha-lcov-reporter": "0.0.1" } diff --git a/routes/empty.js.template b/routes/empty.js.template index 314a1a9..97cff86 100644 --- a/routes/empty.js.template +++ b/routes/empty.js.template @@ -2,14 +2,17 @@ var BBPromise = require('bluebird'); -var express = require('express'); var preq = require('preq'); +var sUtil = require('../lib/util'); + +// shortcut +var HTTPError = sUtil.HTTPError; /** * The main router object */ -var router = express.Router(); +var router = sUtil.router(); /** * The main application object reported when this module is require()d @@ -17,14 +20,18 @@ var app; +/** ROUTE DECLARATIONS GO HERE **/ module.exports = function(appObj) { app = appObj; + // the returned object mounts the routes on + // /{domain}/vX/mount/path return { - path: '/full/path/to/mount/routes/to', + path: '/mount/path', + api_version: X, // must be a number! router: router }; diff --git a/routes/ex.js b/routes/ex.js new file mode 100644 index 0000000..e8b0a3e --- /dev/null +++ b/routes/ex.js @@ -0,0 +1,132 @@ +'use strict'; + + +var BBPromise = require('bluebird'); +var preq = require('preq'); +var sUtil = require('../lib/util'); +var fs = BBPromise.promisifyAll(require('fs')); + +// shortcut +var HTTPError = sUtil.HTTPError; + + +/** + * The main router object + */ +var router = sUtil.router(); + +/** + * The main application object reported when this module is require()d + */ +var app; + + +/******************** + * ERROR EXAMPLES * + ********************/ + + +/** + * GET /err/array + * An example route creating an invalid array to show generic, + * direct error handling + */ +router.get('/err/array', function(req, res) { + + // let's create an array with -1 elems! + var arr = new Array(-1); + // this is never reached + res.send(arr.join()); + +}); + + +/** + * GET /err/file + * Showcases promise error handling. The function is trying to + * read a non-existent file, which will produce an error, + * automatically handled by the template. + */ +router.get('/err/file', function(req, res) { + + // NOTE the return statement here, the promise + // must be returned! + // read the file + return fs.readFileAsync('../mushrooms.txt') + // and then send it back to the caller + .then(function(text) { + // note that this point is never reached + res.send(text); + }); + +}); + + +/** + * GET /err/manual/error + * Throws a generic error manually + */ +router.get('/err/manual/error', function(req, res) { + + // simulate a constraint check + var max = 50; + if(max > 10) { + throw new Error('A maximum value of 10 is expected, ' + max + ' given!'); + } + +}); + + +/** + * GET /err/manual/deny + * Denies access to this resource endpoint + */ +router.get('/err/manual/deny', function(req, res) { + + // don't allow access + throw new HTTPError({ + status: 403, + type: 'access_denied', + title: 'Access denied', + detail: 'No access is allowed to this endpoint' + }); + +}); + + +/** + * GET /err/manual/auth + */ +router.get('/err/manual/auth', function(req, res) { + + // pretend to read a token file + // again, note the return statement + return fs.readFileAsync(__dirname + '/../static/index.html') + // and pretend to compare it with what the user sent + .then(function(token) { + if(!req.params || req.params.token !== token) { + // nope, not authorised to be here, sorry + throw new HTTPError({ + status: 401, + type: 'unauthorized', + title: 'Unauthorized', + detail: 'You are not authorized to fetch this endpoint!' + }); + } + }); + +}); + + +module.exports = function(appObj) { + + app = appObj; + + return { + path: '/ex', + skip_domain: true, + router: router + }; + +}; + diff --git a/routes/info.js b/routes/info.js index 4e0e13b..5dd2590 100644 --- a/routes/info.js +++ b/routes/info.js @@ -1,13 +1,13 @@ 'use strict'; -var express = require('express'); +var sUtil = require('../lib/util'); /** * The main router object */ -var router = express.Router(); +var router = sUtil.router(); /** * The main application object reported when this module is require()d @@ -82,6 +82,7 @@ return { path: '/_info', + skip_domain: true, router: router }; diff --git a/routes/root.js b/routes/root.js index 511afe8..7c1158e 100644 --- a/routes/root.js +++ b/routes/root.js @@ -1,23 +1,36 @@ 'use strict'; -var router = require('express').Router(); + +var sUtil = require('../lib/util'); + + +/** + * The main router object + */ +var router = sUtil.router(); + /** * GET /robots.txt - * no indexing + * Instructs robots no indexing should occur on this domain. */ router.get('/robots.txt', function(req, res) { + res.set({ 'User-agent': '*', 'Disallow': '/' }).end(); + }); + module.exports = function(appObj) { return { path: '/', + skip_domain: true, router: router }; }; + diff --git a/routes/v1.js b/routes/v1.js index eca8aad..0ec036d 100644 --- a/routes/v1.js +++ b/routes/v1.js @@ -1,10 +1,16 @@ 'use strict'; -var express = require('express'), - preq = require('preq'), - Promise = require('bluebird'), - urllib = require('url'), - vega = null; // Visualization grammar - https://github.com/trifacta/vega +var Promise = require('bluebird'); +var preq = require('preq'); +var domino = require('domino'); +var sUtil = require('../lib/util'); +var urllib = require('url'); +var vega = null; // Visualization grammar - https://github.com/trifacta/vega + +/** + * The main router object + */ +var router = sUtil.router(); /** * Main log function @@ -17,12 +23,7 @@ var metrics; /** - * The main router object - */ -var router = express.Router(); - -/** - * A set of 'oldHost' => 'newHost' mappings + * A set of 'oldDomain' => 'newDomain' mappings */ var domainMap = false; @@ -37,7 +38,7 @@ var timeout = 10000; /** - * Regex to validate host parameter + * Regex to validate domain parameter */ var serverRe = null; @@ -50,10 +51,6 @@ // Uncomment to console.log metrics calls //metrics = wrapMetrics(app.metrics); - - // Workaround for missing funcs - metrics.increment = app.metrics.statsd.increment.bind(app.metrics.statsd); - log('info/init', 'starting v1' ); @@ -124,8 +121,8 @@ } } if (url && domainMap) { - url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, prot, host) { - var repl = domainMap[host]; + url = url.replace(/^(https?:\/\/)([^#?\/]+)/, function (match, prot, domain) { + var repl = domainMap[domain]; return repl ? prot + repl : match; }); } @@ -150,7 +147,7 @@ var start = Date.now(); var p = state.request.params, - host = p.host, + domain = p.domain, title = p.title, revid = p.revid, id = p.id; @@ -189,23 +186,23 @@ } state.graphId = id; - var parts = serverRe.exec(host); + var parts = serverRe.exec(domain); if (!parts) { - throw new Err('info/param-host', 'req.host'); + throw new Err('info/param-domain', 'req.domain'); } - // Remove optional part #2 from host (makes m. links appear as desktop to optimize cache) + // Remove optional part #2 from domain (makes m. links appear as desktop to optimize cache) // 1 2 3 // en.m.wikipedia.org - var host2 = parts[3]; + var domain2 = parts[3]; if (parts[1]) { - host2 = parts[1] + host2; + domain2 = parts[1] + domain2; } - host2 = (domainMap && domainMap[host2]) || host2; + domain2 = (domainMap && domainMap[domain2]) || domain2; - state.host = host2; - state.apiUrl = defaultProtocol + '//' + host2 + '/w/api.php'; - if (host !== host2) { - state.log.backend = host2; + state.domain = domain2; + state.apiUrl = defaultProtocol + '//' + domain2 + '/w/api.php'; + if (domain !== domain2) { + state.log.backend = domain2; } metrics.endTiming('req.time', start); @@ -214,7 +211,7 @@ } /** - * Retrieve graph specifications from the host + * Retrieve graph specifications from the domain * @param state is the object with the current state of the request processing */ function downloadGraphDef(state) { @@ -241,7 +238,7 @@ }; return preq(requestOpts) .then(function (resp) { - metrics.endTiming('host.time', startApiReq); + metrics.endTiming('domain.time', startApiReq); return resp; }); @@ -254,18 +251,18 @@ if (apiRes.status !== 200) { state.log.apiRetStatus = apiRes.status; - throw new Err('error/host-status', 'host.status'); + throw new Err('error/domain-status', 'domain.status'); } var res = apiRes.body; if (res.hasOwnProperty('error')) { state.log.apiRetError = res.error; - throw new Err('error/host-error', 'host.error'); + throw new Err('error/domain-error', 'domain.error'); } if (res.hasOwnProperty('warnings')) { state.log.apiWarning = res.warnings; - log('warn/host-warning', state.log); + log('warn/domain-warning', state.log); // Warnings are usually safe to continue } @@ -293,10 +290,10 @@ if (res.hasOwnProperty('continue')) { return merge(state.apiRequest, res.continue); } - throw new Err('info/host-no-graph', 'host.no-graph'); + throw new Err('info/domain-no-graph', 'domain.no-graph'); }).then(function () { - metrics.endTiming('host.total', startDefDownload); + metrics.endTiming('domain.total', startDefDownload); return state; }); } @@ -311,8 +308,8 @@ var start = Date.now(); // BUG: see comment above at vega.data.load.sanitizeUrl = ... - // In case of non-absolute URLs, use requesting host as "local" - vega.config.baseURL = defaultProtocol + '//' + state.host; + // In case of non-absolute URLs, use requesting domain as "local" + vega.config.baseURL = defaultProtocol + '//' + state.domain; vega.headless.render({spec: state.graphData, renderer: 'canvas'}, function (err, result) { if (err) { @@ -337,7 +334,7 @@ /** * Main entry point for graphoid */ -router.get('/:host/:title/:revid/:id.png', function(req, res) { +router.get('/:title/:revid/:id.png', function(req, res) { var start = Date.now(); var state = {request: req, response: res}; @@ -387,7 +384,8 @@ init(app); return { - path: '/v1', + path: '/', + api_version: 1, router: router }; }; diff --git a/scripts/docker.js b/scripts/docker.js new file mode 100755 index 0000000..0b4b433 --- /dev/null +++ b/scripts/docker.js @@ -0,0 +1,170 @@ +#!/usr/bin/env node + + +'use strict'; + +var fs = require('fs'); +var spawn = require('child_process').spawn; +var P = require('bluebird'); + + +// load info from the package definition +var pkg = require('../package.json'); +// load info from the service-runner config file +var config = require('js-yaml').safeLoad(fs.readFileSync(__dirname + '/../config.yaml')); + +// use the package's name as the image name +var img_name = pkg.name; +// the container's name +var name = pkg.name + '-' + Date.now() + '-' + Math.floor(Math.random() * 1000); + +// holds the curently running process +var child; + + +/** + * Wraps a child process spawn in a promise which resolves + * when the child process exists. + * + * @param {Array} args the command and its arguments to run (uses /usr/bin/env) + * @return {Promise} the promise which is fulfilled once the child exists + */ +function promised_spawn(args) { + + return new P(function(resolve, reject) { + child = spawn('/usr/bin/env', args, {stdio: 'inherit'}); + child.on('exit', resolve); + }); + +} + + +/** + * Spawns a docker process which (re)builds the image + * + * @return {Promise} the promise starting the build + */ +function build_img() { + + return promised_spawn(['docker', 'build', '-t', img_name, '.']); + +} + + +/** + * Starts the container either using the default script + * (npm start) or the test script (npm test) if do_tests is set + * + * @param {Object} options additional options + * @prop {Boolean} tests whether to start the tests instead of the service + * @prop {Boolean} coverage whether to start the tests and coverage instead of the service + * @return {Promise} the promise starting the container + */ +function start_container(options) { + + var cmd = ['docker', 'run', '--name', name]; + + // list all of the ports defined in the config file + config.services.forEach(function(srv) { + srv.conf = srv.conf || {}; + srv.conf.port = srv.conf.port || 8888; + cmd.push('-p', srv.conf.port + ':' + srv.conf.port); + }); + + // append the image name to create a container from + cmd.push(img_name); + + // use a different command to run inside if + // we have to run the tests or coverage + if(options.tests) { + cmd.push('/usr/bin/npm', 'test'); + } else if(options.coverage) { + cmd.push('/usr/bin/npm', 'run-script', 'coverage'); + } + + // ok, start the container + return promised_spawn(cmd); + +} + + +/** + * Deletes the container + * + * @return {Promise} the promise removing the container + */ +function remove_container() { + + return promised_spawn(['docker', 'rm', name]); + +} + + +/** + * Main process signal handler + */ +function sig_handle() { + if(child) { + child.kill('SIGINT'); + } +} + + +function main(options) { + + // trap exit signals + process.on('SIGINT', sig_handle); + process.on('SIGTERM', sig_handle); + + // change the dir + process.chdir(__dirname + '/..'); + + // start the process + return build_img() + .then(function() { + return start_container(options); + }) + .then(remove_container); + +} + + +if(module.parent === null) { + + var opts = { + tests: false, + coverage: false + }; + + // check for command-line args + var args = process.argv.slice(2); + var arg; + while((arg = args.shift()) !== undefined) { + switch(arg) { + case '-t': + case '--test': + opts.tests = true; + break; + case '-c': + case '--cover': + opts.coverage = true; + break; + default: + console.log('This is a utility script for starting service containers using docker.'); + console.log('Usage: ' + process.argv.slice(0, 2).join(' ') + ' [OPTIONS]'); + console.log('Options are:'); + console.log(' -t, --test instead of starting the service, run the tests'); + console.log(' -c, --cover run the tests and report the coverage info'); + process.exit(/^-(h|-help)/.test(arg) ? 0 : 1); + } + } + + // start the process + main(opts); + +} else { + + module.exports = main; + +} + diff --git a/scripts/gen-init-scripts.rb b/scripts/gen-init-scripts.rb new file mode 100755 index 0000000..e523b97 --- /dev/null +++ b/scripts/gen-init-scripts.rb @@ -0,0 +1,71 @@ +#!/usr/bin/env ruby + + +require 'erb' +require 'json' +require 'yaml' + + +rootdir = File.expand_path(File.join(File.dirname(__FILE__), '..')) +indir = File.join(rootdir, 'dist', 'init-scripts') +outdir = indir + + +class ScriptData + + include ERB::Util + + @@suffix = {'systemd' => '.service', 'upstart' => '.conf'} + + def initialize input_dir + @template = {} + self.init input_dir + end + + def set_info root_dir + self.read_info(root_dir).each do |key, value| + self.instance_variable_set "@#{key}".to_sym, value + end + @service_name = @name + @no_file ||= 10000 + end + + def generate output_dir + @template.each do |name, erb| + File.open(File.join(output_dir, "#{@name}#{@@suffix[name]}"), 'w') do |io| + io.write erb.result(binding()) + end + end + end + + def init input_dir + Dir.glob(File.join(input_dir, '*.erb')).each do |fname| + @template[File.basename(fname, '.erb')] = ERB.new(File.read(fname)) + end + end + + def read_info root_dir + data = YAML.load(File.read(File.join(root_dir, 'config.yaml')))['services'][0]['conf'] + return data.merge(JSON.load(File.read(File.join(root_dir, 'package.json')))) + end + +end + + +if ARGV.size > 0 and ['-h', '--help'].include? ARGV[0] + puts 'This is a simple script to generate various service init scripts' + puts 'Usage: gen-init-scripts.rb [output_dir]' + exit 1 +elsif ARGV.size > 0 + outdir = ARGV[0] +end + +unless File.directory? outdir + STDERR.puts 'The output directory must exist! Aborting...' + exit 2 +end + +data = ScriptData.new indir +data.set_info rootdir +data.generate outdir + diff --git a/server.js b/server.js old mode 100644 new mode 100755 index 91530e5..da9fad8 --- a/server.js +++ b/server.js @@ -1,3 +1,5 @@ +#!/usr/bin/env node + 'use strict'; // Service entry point. Try node server --help for commandline options. diff --git a/test/features/app/app.js b/test/features/app/app.js index be90c06..c7fb08c 100644 --- a/test/features/app/app.js +++ b/test/features/app/app.js @@ -16,6 +16,15 @@ before(function () { return server.start(); }); + it('should get robots.txt', function() { + return preq.get({ + uri: server.config.uri + 'robots.txt' + }).then(function(res) { + assert.deepEqual(res.status, 200); + assert.deepEqual(res.headers['disallow'], '/'); + }); + }); + it('should get static content gzipped', function() { return preq.get({ uri: server.config.uri + 'static/index.html', diff --git a/test/features/ex/errors.js b/test/features/ex/errors.js new file mode 100644 index 0000000..141c425 --- /dev/null +++ b/test/features/ex/errors.js @@ -0,0 +1,93 @@ +'use strict'; + + +// mocha defines to avoid JSHint breakage +/* global describe, it, before, beforeEach, after, afterEach */ + + +var preq = require('preq'); +var assert = require('../../utils/assert.js'); +var server = require('../../utils/server.js'); + + +describe('errors', function() { + + this.timeout(20000); + + before(function () { return server.start(); }); + + // common URI prefix for the errors + var uri = server.config.uri + 'ex/err/'; + + it('array creation error', function() { + return preq.get({ + uri: uri + 'array' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 500); + // check the error title + assert.deepEqual(err.body.title, 'RangeError'); + }); + }); + + it('file read error', function() { + return preq.get({ + uri: uri + 'file' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 500); + // check the error title + assert.deepEqual(err.body.title, 'OperationalError'); + }); + }); + + it('constraint check error', function() { + return preq.get({ + uri: uri + 'manual/error' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 500); + // check the error title + assert.deepEqual(err.body.title, 'Error'); + }); + }); + + it('access denied error', function() { + return preq.get({ + uri: uri + 'manual/deny' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 403); + // check the error title + assert.deepEqual(err.body.type, 'access_denied'); + }); + }); + + it('authorisation error', function() { + return preq.get({ + uri: uri + 'manual/auth' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 401); + // check the error title + assert.deepEqual(err.body.type, 'unauthorized'); + }); + }); + +}); + diff --git a/test/features/v1/page.js b/test/features/v1/page.js index 4b1a326..b46b15e 100644 --- a/test/features/v1/page.js +++ b/test/features/v1/page.js @@ -17,7 +17,7 @@ before(function () { return server.start(); }); // common URI prefix for the page - var uri = server.config.uri + 'v1/page/Mulholland%20Drive%20%28film%29/'; + var uri = server.config.uri + 'en.wikipedia.org/v1/page/Mulholland%20Drive%20%28film%29/'; it('should get the whole page body', function() { return preq.get({ @@ -57,5 +57,17 @@ }); }); + it('should throw a 404 for a non-existent page', function() { + return preq.get({ + uri: server.config.uri + 'en.wikipedia.org/v1/page/Foobar_and_friends' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ', res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 404); + }); + }); + }); diff --git a/test/features/v1/siteinfo.js b/test/features/v1/siteinfo.js index b0434e8..e62a106 100644 --- a/test/features/v1/siteinfo.js +++ b/test/features/v1/siteinfo.js @@ -17,7 +17,7 @@ before(function () { return server.start(); }); // common URI prefix for v1 - var uri = server.config.uri + 'v1/siteinfo/en.wikipedia.org/'; + var uri = server.config.uri + 'en.wikipedia.org/v1/siteinfo/'; it('should get all general enwiki site info', function() { return preq.get({ @@ -50,9 +50,24 @@ it('should fail to get a non-existent setting of enwiki', function() { return preq.get({ uri: uri + 'dummy_wiki_setting' - }).catch(function(res) { - // check the status - assert.status(res, 404); + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 404); + }); + }); + + it('should fail to get info from a non-existent wiki', function() { + return preq.get({ + uri: server.config.uri + 'non.existent.wiki/v1/siteinfo/' + }).then(function(res) { + // if we are here, no error was thrown, not good + throw new Error('Expected an error to be thrown, got status: ' + res.status); + }, function(err) { + // inspect the status + assert.deepEqual(err.status, 500); }); }); diff --git a/test/utils/assert.js b/test/utils/assert.js index ec09f2d..ed030ea 100644 --- a/test/utils/assert.js +++ b/test/utils/assert.js @@ -5,7 +5,7 @@ /** - * Asserts whether the return satus was as expected + * Asserts whether the return status was as expected */ function status(res, expected) { diff --git a/test/utils/logStream.js b/test/utils/logStream.js index bafe95c..f8e292d 100644 --- a/test/utils/logStream.js +++ b/test/utils/logStream.js @@ -2,7 +2,7 @@ var bunyan = require('bunyan'); -function logStream() { +function logStream(logStdout) { var log = []; var parrot = bunyan.createLogger({ @@ -14,7 +14,7 @@ try { var entry = JSON.parse(chunk); var levelMatch = /^(\w+)/.exec(entry.levelPath); - if (levelMatch) { + if (logStdout && levelMatch) { var level = levelMatch[1]; if (parrot[level]) { parrot[level](entry); -- To view, visit https://gerrit.wikimedia.org/r/201998 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id12a2cda4829db30298598c12de6e989e9418bac Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/graphoid Gerrit-Branch: master Gerrit-Owner: Yurik <yu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits