jenkins-bot has submitted this change and it was merged.

Change subject: Disentangle versioned APIs from the wt2html path
......................................................................


Disentangle versioned APIs from the wt2html path

 * This is mostly just refactoring. The one situation that differs is in
   the v2 API, we now only try to reuse expansions from passed in
   revisions, whereas before the else/if clause fell into making a
   cache request.

 * Adds a test for a situation that wasn't covered but roundtrip-test.js
   exposed.

 * Mocks expanding the echo template so there's less noise in the mocha
   output.

Change-Id: I0dcbfad738e916999d31849bc48cd39d0b6d6936
---
M api/routes.js
M api/utils.js
M tests/mocha/api.js
M tests/mockAPI.js
4 files changed, 439 insertions(+), 353 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/api/routes.js b/api/routes.js
index e1546c7..929fe8b 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -16,291 +16,17 @@
 
 var ParsoidCacheRequest = ApiRequest.ParsoidCacheRequest;
 var TemplateRequest = ApiRequest.TemplateRequest;
-var PHPParseRequest = ApiRequest.PHPParseRequest;
-var PegTokenizer = require('../lib/mediawiki.tokenizer.peg.js').PegTokenizer;
 
-var WIKITEXT_CONTENT_TYPE = 
'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0;charset=utf-8';
 
 module.exports = function(parsoidConfig) {
        var routes = {};
 
        var REQ_TIMEOUT = parsoidConfig.timeouts.request;
 
-       var parseWithReuse = function(env, req, res) {
-               env.log('info', 'started parsing');
-
-               var meta = env.page.meta;
-               var v2 = res.local('v2');
-               var p = Promise.resolve();
-
-               // See if we can reuse transclusion or extension expansions.
-               if ( v2 && ( v2.previous || v2.original ) ) {
-                       p = p.then(function() {
-                               var revision = v2.previous || v2.original;
-                               var doc = DU.parseHTML( revision.html.body );
-                               DU.applyDataParsoid( doc, 
revision["data-parsoid"].body );
-                               var ret = {
-                                       expansions: DU.extractExpansions( doc )
-                               };
-                               if ( v2.update ) {
-                                       ["templates", "files"].some(function(m) 
{
-                                               if ( v2.update[m] ) {
-                                                       ret.mode = m;
-                                                       return true;
-                                               }
-                                       });
-                               }
-                               return ret;
-                       });
-               // And don't parse twice for recursive parsoid requests.
-               } else if ( env.conf.parsoid.parsoidCacheURI && 
!req.headers['x-parsoid-request'] ) {
-                       p = p.then(function() {
-                               // Try to retrieve a cached copy of the content.
-                               var parsoidHeader = 
JSON.parse(req.headers['x-parsoid'] || '{}');
-                               // If a cacheID is passed in X-Parsoid (from 
our PHP extension),
-                               // use that explicitly. Otherwise default to 
the parentID.
-                               var cacheID = parsoidHeader.cacheID || 
meta.revision.parentid;
-                               return ParsoidCacheRequest
-                                       .promise(env, meta.title, cacheID)
-                                       .then(function(src) {
-                                               // Extract transclusion and 
extension content from the DOM
-                                               var ret = {
-                                                       expansions: 
DU.extractExpansions(DU.parseHTML(src))
-                                               };
-                                               if (parsoidHeader.cacheID) {
-                                                       ret.mode = 
parsoidHeader.mode;
-                                               }
-                                               return ret;
-                                       }, function(err) {
-                                               // No luck with the cache 
request.
-                                               return null;
-                                       });
-                       });
-               }
-
-               return p.then(function(ret) {
-                       if (ret) {
-                               // Figure out what we can reuse
-                               switch (ret.mode) {
-                               case "templates":
-                                       // Transclusions need to be updated, so 
don't reuse them.
-                                       ret.expansions.transclusions = {};
-                                       break;
-                               case "files":
-                                       // Files need to be updated, so don't 
reuse them.
-                                       ret.expansions.files = {};
-                                       break;
-                               }
-                       }
-                       return env.pipelineFactory.parse(env, env.page.src, ret 
&& ret.expansions);
-               });
-       };
-
-       var wt2html = function(req, res, wt) {
-               var env = res.local('env');
-
-               // Performance Timing options
-               var timer = env.conf.parsoid.performanceTimer;
-               var startTimers;
-
-               if (timer) {
-                       startTimers = new Map();
-                       // init refers to time elapsed before parsing begins
-                       startTimers.set('wt2html.init', Date.now());
-                       startTimers.set('wt2html.total', Date.now());
-               }
-
-               var prefix = res.local('iwp');
-               var oldid = res.local('oldid');
-               var v2 = res.local('v2');
-               var target = 
env.resolveTitle(env.normalizeTitle(env.page.name), '');
-
-               function sendRes(doc) {
-                       var contentType = 
'text/html;profile=mediawiki.org/specs/html/1.1.0;charset=utf-8';
-                       var output;
-                       if (v2 && v2.format === 'pagebundle') {
-                               var out = DU.extractDpAndSerialize(doc, 
res.local('body'));
-                               output = out.str;
-                               apiUtils.jsonResponse(res, env, {
-                                       // revid: 12345 (maybe?),
-                                       html: {
-                                               headers: { 'content-type': 
contentType },
-                                               body: output
-                                       },
-                                       "data-parsoid": {
-                                               headers: { 'content-type': 
out.type },
-                                               body: out.dp
-                                       }
-                               });
-                       } else {
-                               output = DU.serializeNode(res.local('body') ? 
doc.body : doc).str;
-                               apiUtils.setHeader(res, env, 'content-type', 
contentType);
-                               apiUtils.endResponse(res, env, output);
-                       }
-
-                       if ( timer ) {
-                               if ( startTimers.has( 'wt2html.wt.parse' ) ) {
-                                       timer.timing( 'wt2html.wt.parse', '',
-                                               Date.now() - startTimers.get( 
'wt2html.wt.parse' ));
-                                       timer.timing( 'wt2html.wt.size.output', 
'', output.length );
-                               } else if ( startTimers.has( 
'wt2html.pageWithOldid.parse' ) ) {
-                                       timer.timing( 
'wt2html.pageWithOldid.parse', '',
-                                               Date.now() - startTimers.get( 
'wt2html.pageWithOldid.parse' ));
-                                       timer.timing( 
'wt2html.pageWithOldid.size.output', '', output.length );
-                               }
-                               timer.timing( 'wt2html.total', '',
-                                       Date.now() - startTimers.get( 
'wt2html.total' ));
-                       }
-
-                       apiUtils.logTime(env, res, 'parsing');
-               }
-
-               function parseWt() {
-                       env.log('info', 'started parsing');
-                       env.setPageSrcInfo(wt);
-
-                       // Don't cache requests when wt is set in case somebody 
uses
-                       // GET for wikitext parsing
-                       apiUtils.setHeader(res, env, 'Cache-Control', 
'private,no-cache,s-maxage=0');
-
-                       if (timer) {
-                               timer.timing('wt2html.wt.init', '',
-                                       Date.now() - startTimers.get( 
'wt2html.init'));
-                               startTimers.set('wt2html.wt.parse', Date.now());
-                               timer.timing('wt2html.wt.size.input', '', 
wt.length);
-                       }
-
-                       if (!res.local('pageName')) {
-                               // clear default page name
-                               env.page.name = '';
-                       }
-
-                       return env.pipelineFactory.parse(env, wt);
-               }
-
-               function parsePageWithOldid() {
-                       if (timer) {
-                               timer.timing('wt2html.pageWithOldid.init', '',
-                                       Date.now() - 
startTimers.get('wt2html.init'));
-                               startTimers.set('wt2html.pageWithOldid.parse', 
Date.now());
-                               
timer.timing('wt2html.pageWithOldid.size.input', '', env.page.src.length);
-                       }
-
-                       return parseWithReuse(env, req, res).then(function(doc) 
{
-                               if (req.headers.cookie || v2) {
-                                       // Don't cache requests with a session.
-                                       // Also don't cache requests to the v2 
entry point, as those
-                                       // are stored by RESTBase & will just 
dilute the Varnish cache
-                                       // in the meantime.
-                                       apiUtils.setHeader(res, env, 
'Cache-Control', 'private,no-cache,s-maxage=0');
-                               } else {
-                                       apiUtils.setHeader(res, env, 
'Cache-Control', 's-maxage=2592000');
-                               }
-                               // Indicate the MediaWiki revision in a header 
as well for
-                               // ease of extraction in clients.
-                               apiUtils.setHeader(res, env, 
'content-revision-id', oldid);
-                               return doc;
-                       });
-               }
-
-               function redirectToOldid() {
-                       // Don't cache requests with no oldid
-                       apiUtils.setHeader(res, env, 'Cache-Control', 
'private,no-cache,s-maxage=0');
-                       oldid = env.page.meta.revision.revid;
-                       env.log("info", "redirecting to revision", oldid);
-
-                       if ( timer ) {
-                               timer.count('wt2html.redirectToOldid', '');
-                       }
-
-                       var path = "/";
-                       if ( v2 ) {
-                               path += [
-                                       "v2",
-                                       
url.parse(env.conf.parsoid.mwApiMap.get(prefix).uri).host,
-                                       v2.format,
-                                       encodeURIComponent( target ),
-                                       oldid
-                               ].join("/");
-                       } else {
-                               path += [
-                                       prefix,
-                                       encodeURIComponent( target )
-                               ].join("/");
-                               req.query.oldid = oldid;
-                       }
-
-                       if ( Object.keys( req.query ).length > 0 ) {
-                               path += "?" + qs.stringify( req.query );
-                       }
-
-                       // Redirect to oldid
-                       apiUtils.relativeRedirect({ "path": path, "res": res, 
"env": env });
-               }
-
-               // To support the 'subst' API parameter, we need to prefix each
-               // top-level template with 'subst'. To make sure we do this for 
the
-               // correct templates, tokenize the starting wikitext and use 
that to
-               // detect top-level templates. Then, substitute each starting 
'{{' with
-               // '{{subst' using the template token's tsr.
-               function substTopLevelTemplates(p) {
-                       var tokenizer = new PegTokenizer(env);
-                       var tokens = tokenizer.tokenize(wt, null, null, true);
-                       var tsrIncr = 0;
-                       for (var i = 0; i < tokens.length; i++) {
-                               if (tokens[i].name === 'template') {
-                                       var tsr = tokens[i].dataAttribs.tsr;
-                                       wt = wt.substring(0, tsr[0] + tsrIncr) +
-                                               '{{subst:' +
-                                               wt.substring(tsr[0] + tsrIncr + 
2);
-                                       tsrIncr += 6;
-                               }
-                       }
-                       // Now pass it to the MediaWiki API with onlypst set so 
that it
-                       // subst's the templates.
-                       return p.then(function() {
-                               return PHPParseRequest.promise(env, target, wt, 
true);
-                       }).then(function(text) {
-                               // Use the returned wikitext as the page source.
-                               wt = text;
-                               // Set data-parsoid to be discarded, so that 
the subst'ed
-                               // content is considered new when it comes back.
-                               env.discardDataParsoid = true;
-                       });
-               }
-
-               var p;
-               if (oldid || typeof wt !== 'string') {
-                       // Always fetch the page info if we have an oldid.
-                       // Otherwise, if no wt was passed, we need to figure out
-                       // the latest revid to which we'll redirect.
-                       p = TemplateRequest.setPageSrcInfo(env, target, oldid);
-               } else {
-                       p = Promise.resolve();
-               }
-
-               if (typeof wt === 'string') {
-                       if (res.local('subst')) {
-                               p = substTopLevelTemplates(p);
-                       }
-                       p = p.then(parseWt)
-                               .timeout(REQ_TIMEOUT)
-                               .then(sendRes);
-               } else if (oldid) {
-                       p = p.then(parsePageWithOldid)
-                               .timeout(REQ_TIMEOUT)
-                               .then(sendRes);
-               } else {
-                       p = p.then(redirectToOldid);
-               }
-
-               return apiUtils.cpuTimeout(p, res)
-                       .catch(apiUtils.timeoutResp.bind(null, env));
-       };
 
        // Middlewares
 
-       routes.interParams = function( req, res, next ) {
+       routes.interParams = function(req, res, next) {
                res.local('iwp', req.params[0] || parsoidConfig.defaultWiki || 
'');
                res.local('pageName', req.params[1] || '');
                res.local('oldid', req.body.oldid || req.query.oldid || null);
@@ -311,20 +37,73 @@
                next();
        };
 
-       routes.parserEnvMw = function( req, res, next ) {
-               function errBack( env, logData, callback ) {
-                       if ( !env.responseSent ) {
-                               return new Promise(function( resolve, reject ) {
+       var wt2htmlFormats = new Set(['pagebundle', 'html']);
+       var supportedFormats = new Set(['pagebundle', 'html', 'wt']);
+
+       routes.v2Middle = function(req, res, next) {
+               function errOut(err, code) {
+                       apiUtils.sendResponse(res, {}, err, code || 404);
+               }
+
+               var iwp = parsoidConfig.reverseMwApiMap.get(req.params.domain);
+               if (!iwp) {
+                       return errOut('Invalid domain.');
+               }
+
+               res.local('iwp', iwp);
+               res.local('pageName', req.params.title || '');
+               res.local('oldid', req.params.revision || null);
+
+               // "body" flag to return just the body (instead of the entire 
HTML doc)
+               res.local('body', !!(req.query.body || req.body.body));
+
+               var v2 = Object.assign({ format: req.params.format }, req.body);
+
+               if (!supportedFormats.has(v2.format) ||
+                               (req.method === 'GET' && 
!wt2htmlFormats.has(v2.format))) {
+                       return errOut('Invalid format.');
+               }
+
+               // "subst" flag to perform {{subst:}} template expansion
+               res.local('subst', !!(req.query.subst || req.body.subst));
+               // This is only supported for the html format
+               if (res.local('subst') && v2.format !== 'html') {
+                       return errOut('Substitution is only supported for the 
HTML format.', 501);
+               }
+
+               if (req.method === 'POST') {
+                       var original = v2.original || {};
+                       if (original.revid) {
+                               res.local('oldid', original.revid);
+                       }
+                       if (original.title) {
+                               res.local('pageName', original.title);
+                       }
+               }
+
+               if (v2 && v2.original && v2.original['data-parsoid'] &&
+                               
!Object.keys(v2.original['data-parsoid'].body).length) {
+                       return errOut('data-parsoid was provided without an ids 
property.', 400);
+               }
+
+               res.local('v2', v2);
+               next();
+       };
+
+       routes.parserEnvMw = function(req, res, next) {
+               function errBack(env, logData, callback) {
+                       if (!env.responseSent) {
+                               return new Promise(function(resolve, reject) {
                                        var socket = res.socket;
-                                       if ( res.finished || (socket && 
!socket.writable) ) {
+                                       if (res.finished || (socket && 
!socket.writable)) {
                                                return resolve();
                                        }
-                                       res.once( 'finish', resolve );
-                                       apiUtils.setHeader( res, env, 
'content-type', 'text/plain;charset=utf-8' );
-                                       apiUtils.sendResponse( res, env, 
logData.fullMsg(), logData.flatLogObject().code || 500 );
+                                       res.once('finish', resolve);
+                                       apiUtils.setHeader(res, env, 
'content-type', 'text/plain;charset=utf-8');
+                                       apiUtils.sendResponse(res, env, 
logData.fullMsg(), logData.flatLogObject().code || 500);
                                }).catch(function(e) {
-                                       console.error( e.stack || e );
-                                       res.end( e.stack || e );
+                                       console.error(e.stack || e);
+                                       res.end(e.stack || e);
                                }).nodify(callback);
                        }
                        return Promise.resolve().nodify(callback);
@@ -333,7 +112,7 @@
                        prefix: res.local('iwp'),
                        pageName: res.local('pageName'),
                        cookie: req.headers.cookie,
-                       reqId: req.headers['x-request-id']
+                       reqId: req.headers['x-request-id'],
                };
                MWParserEnv.getParserEnv(parsoidConfig, null, 
options).then(function(env) {
                        env.logger.registerBackend(/fatal(\/.*)?/, 
errBack.bind(this, env));
@@ -348,10 +127,10 @@
                        }
                        if (req.body.hasOwnProperty('scrubWikitext')) {
                                env.scrubWikitext = !(!req.body.scrubWikitext ||
-                                       req.body.scrubWikitext === "false");
+                                       req.body.scrubWikitext === 'false');
                        } else if (req.query.hasOwnProperty('scrubWikitext')) {
                                env.scrubWikitext = !(!req.query.scrubWikitext 
||
-                                       req.query.scrubWikitext === "false");
+                                       req.query.scrubWikitext === 'false');
                        }
                        res.local('env', env);
                        next();
@@ -361,6 +140,7 @@
                        errBack({}, new LogData('error', ['error:', err, 
'path:', req.path]));
                });
        };
+
 
        // Routes
 
@@ -571,9 +351,70 @@
 
        // v1 Routes
 
+       var v1Wt2html = function(req, res, wt) {
+               var env = res.local('env');
+               var p = apiUtils.startWt2html(req, res, wt).then(function(ret) {
+                       if (typeof ret.wikitext === 'string') {
+                               return apiUtils.parseWt(ret)
+                                       .timeout(REQ_TIMEOUT)
+                                       .then(apiUtils.endWt2html.bind(null, 
ret));
+                       } else if (ret.oldid) {
+                               var p2 = Promise.resolve(ret);
+                               // See if we can reuse transclusion or 
extension expansions.
+                               // And don't parse twice for recursive parsoid 
requests.
+                               if (env.conf.parsoid.parsoidCacheURI && 
!req.headers['x-parsoid-request']) {
+                                       p2 = p2.then(function(ret) {
+                                               var meta = env.page.meta;
+                                               // Try to retrieve a cached 
copy of the content.
+                                               var parsoidHeader = 
JSON.parse(req.headers['x-parsoid'] || '{}');
+                                               // If a cacheID is passed in 
X-Parsoid (from our PHP extension),
+                                               // use that explicitly. 
Otherwise default to the parentID.
+                                               var cacheID = 
parsoidHeader.cacheID || meta.revision.parentid;
+                                               return ParsoidCacheRequest
+                                                       .promise(env, 
meta.title, cacheID)
+                                                       .then(function(src) {
+                                                               // Extract 
transclusion and extension content from the DOM
+                                                               ret.reuse = {
+                                                                       
expansions: DU.extractExpansions(DU.parseHTML(src)),
+                                                               };
+                                                               if 
(parsoidHeader.cacheID) {
+                                                                       
ret.reuse.mode = parsoidHeader.mode;
+                                                               }
+                                                               return ret;
+                                                       }, function(err) {
+                                                               // No luck with 
the cache request.
+                                                               return ret;
+                                                       });
+                                       });
+                               }
+                               return 
p2.then(apiUtils.parsePageWithOldid).tap(function() {
+                                       if (req.headers.cookie) {
+                                               // Don't cache requests with a 
session.
+                                               apiUtils.setHeader(res, env, 
'Cache-Control', 'private,no-cache,s-maxage=0');
+                                       } else {
+                                               apiUtils.setHeader(res, env, 
'Cache-Control', 's-maxage=2592000');
+                                       }
+                               
}).timeout(REQ_TIMEOUT).then(apiUtils.endWt2html.bind(null, ret));
+                       } else {
+                               var revid = env.page.meta.revision.revid;
+                               var path = [
+                                       '',
+                                       ret.prefix,
+                                       encodeURIComponent(ret.target)
+                               ].join('/');
+                               req.query.oldid = revid;
+                               path += '?' + qs.stringify(req.query);
+                               apiUtils.redirectToRevision(env, res, path, 
revid);
+                       }
+               });
+
+               return apiUtils.cpuTimeout(p, res)
+                       .catch(apiUtils.timeoutResp.bind(null, env));
+       };
+
        routes.v1Get = function(req, res) {
                // Regular article parsing
-               wt2html(req, res);
+               return v1Wt2html(req, res);
        };
 
        routes.v1Post = function(req, res) {
@@ -581,13 +422,13 @@
                var body = req.body;
                if (req.body.wt) {
                        // Form-based article parsing
-                       wt2html(req, res, body.wt);
+                       return v1Wt2html(req, res, body.wt);
                } else {
                        // Regular and form-based article serialization
                        var p = apiUtils.startHtml2wt(req, res, body.html || 
body.content || '')
                                        .then(apiUtils.endHtml2wt)
                                        .then(function(output) {
-                               apiUtils.setHeader(res, env, 'content-type', 
WIKITEXT_CONTENT_TYPE);
+                               apiUtils.setHeader(res, env, 'content-type', 
apiUtils.WIKITEXT_CONTENT_TYPE);
                                apiUtils.endResponse(res, env, output);
                        });
                        return apiUtils.cpuTimeout(p, res)
@@ -596,70 +437,70 @@
        };
 
 
-       // v2 Middleware
-
-       var wt2htmlFormats = new Set([ "pagebundle", "html" ]);
-       var supportedFormats = new Set([ "pagebundle", "html", "wt" ]);
-
-       routes.v2Middle = function( req, res, next ) {
-               function errOut( err, code ) {
-                       // FIXME: provide more consistent error handling.
-                       apiUtils.sendResponse( res, {}, err, code || 404 );
-               }
-
-               var iwp = parsoidConfig.reverseMwApiMap.get(req.params.domain);
-               if ( !iwp ) {
-                       return errOut("Invalid domain.");
-               }
-
-               res.local('iwp', iwp);
-               res.local('pageName', req.params.title || '');
-               res.local('oldid', req.params.revision || null);
-
-               // "body" flag to return just the body (instead of the entire 
HTML doc)
-               res.local('body', !!(req.query.body || req.body.body));
-
-               var v2 = Object.assign({ format: req.params.format }, req.body);
-
-               if (!supportedFormats.has(v2.format) ||
-                               (req.method === "GET" && 
!wt2htmlFormats.has(v2.format))) {
-                       return errOut("Invalid format.");
-               }
-
-               // "subst" flag to perform {{subst:}} template expansion
-               res.local('subst', !!(req.query.subst || req.body.subst));
-               // This is only supported for the html format
-               if (res.local('subst') && v2.format !== "html") {
-                       return errOut("Substitution is only supported for the 
HTML format.", 501);
-               }
-
-               if ( req.method === "POST" ) {
-                       var original = v2.original || {};
-                       if ( original.revid ) {
-                               res.local('oldid', original.revid);
-                       }
-                       if ( original.title ) {
-                               res.local('pageName', original.title);
-                       }
-               }
-
-               if (v2 && v2.original && v2.original['data-parsoid'] &&
-                               
!Object.keys(v2.original['data-parsoid'].body).length) {
-                       return errOut('data-parsoid was provided without an ids 
property.', 400);
-               }
-
-               res.local('v2', v2);
-               next();
-       };
-
-
        // v2 Routes
 
        // Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
 
+       var v2Wt2html = function(req, res, wt) {
+               var env = res.local('env');
+               var v2 = res.local('v2');
+               var p = apiUtils.startWt2html(req, res, wt).then(function(ret) {
+                       if (typeof ret.wikitext === 'string') {
+                               return apiUtils.parseWt(ret)
+                                       .timeout(REQ_TIMEOUT)
+                                       .then(apiUtils.v2endWt2html.bind(null, 
ret));
+                       } else if (ret.oldid) {
+                               var p2 = Promise.resolve(ret);
+                               // See if we can reuse transclusion or 
extension expansions.
+                               var revision = v2.previous || v2.original;
+                               if (revision) {
+                                       p2 = p2.then(function(ret) {
+                                               var doc = 
DU.parseHTML(revision.html.body);
+                                               DU.applyDataParsoid(doc, 
revision['data-parsoid'].body);
+                                               ret.reuse = {
+                                                       expansions: 
DU.extractExpansions(doc),
+                                               };
+                                               if (v2.update) {
+                                                       ['templates', 
'files'].some(function(m) {
+                                                               if 
(v2.update[m]) {
+                                                                       
ret.reuse.mode = m;
+                                                                       return 
true;
+                                                               }
+                                                       });
+                                               }
+                                               return ret;
+                                       });
+                               }
+                               return 
p2.then(apiUtils.parsePageWithOldid).tap(function() {
+                                       // Don't cache requests to the v2 entry 
point, as those
+                                       // are stored by RESTBase & will just 
dilute the Varnish
+                                       // cache in the meantime.
+                                       apiUtils.setHeader(res, env, 
'Cache-Control', 'private,no-cache,s-maxage=0');
+                               })
+                               .timeout(REQ_TIMEOUT)
+                               .then(apiUtils.v2endWt2html.bind(null, ret));
+                       } else {
+                               var revid = env.page.meta.revision.revid;
+                               var path = [
+                                       '/v2',
+                                       
url.parse(env.conf.parsoid.mwApiMap.get(ret.prefix).uri).host,
+                                       v2.format,
+                                       encodeURIComponent(ret.target),
+                                       revid
+                               ].join('/');
+                               if (Object.keys(req.query).length > 0) {
+                                       path += '?' + qs.stringify(req.query);
+                               }
+                               apiUtils.redirectToRevision(env, res, path, 
revid);
+                       }
+               });
+               return apiUtils.cpuTimeout(p, res)
+                       .catch(apiUtils.timeoutResp.bind(null, env));
+       };
+
        // GET requests
        routes.v2Get = function(req, res) {
-               wt2html(req, res);
+               return v2Wt2html(req, res);
        };
 
        // POST requests
@@ -684,7 +525,7 @@
                                        wikitext = v2.original.wikitext.body;
                                }
                        }
-                       wt2html(req, res, wikitext);
+                       return v2Wt2html(req, res, wikitext);
                } else {
                        // html is required for serialization
                        if (v2.html === undefined) {
@@ -715,7 +556,7 @@
                        }).then(apiUtils.endHtml2wt).then(function(output) {
                                apiUtils.jsonResponse(res, env, {
                                        wikitext: {
-                                               headers: { 'content-type': 
WIKITEXT_CONTENT_TYPE },
+                                               headers: { 'content-type': 
apiUtils.WIKITEXT_CONTENT_TYPE },
                                                body: output,
                                        }
                                });
diff --git a/api/utils.js b/api/utils.js
index df889ba..ab19249 100644
--- a/api/utils.js
+++ b/api/utils.js
@@ -7,9 +7,17 @@
 
 var Diff = require('../lib/mediawiki.Diff.js').Diff;
 var DU = require('../lib/mediawiki.DOMUtils.js').DOMUtils;
+var PegTokenizer = require('../lib/mediawiki.tokenizer.peg.js').PegTokenizer;
+var ApiRequest = require('../lib/mediawiki.ApiRequest.js');
+
+var TemplateRequest = ApiRequest.TemplateRequest;
+var PHPParseRequest = ApiRequest.PHPParseRequest;
 
 
-var apiUtils = module.exports = {};
+var apiUtils = module.exports = {
+       WIKITEXT_CONTENT_TYPE: 
'text/plain;profile=mediawiki.org/specs/wikitext/1.0.0;charset=utf-8',
+       HTML_CONTENT_TYPE:     
'text/html;profile=mediawiki.org/specs/html/1.1.0;charset=utf-8',
+};
 
 /**
  * Send a redirect response with optional code and a relative URL
@@ -282,3 +290,210 @@
                return output;
        });
 };
+
+// To support the 'subst' API parameter, we need to prefix each
+// top-level template with 'subst'. To make sure we do this for the
+// correct templates, tokenize the starting wikitext and use that to
+// detect top-level templates. Then, substitute each starting '{{' with
+// '{{subst' using the template token's tsr.
+var substTopLevelTemplates = function(env, target, wt) {
+       var tokenizer = new PegTokenizer(env);
+       var tokens = tokenizer.tokenize(wt, null, null, true);
+       var tsrIncr = 0;
+       for (var i = 0; i < tokens.length; i++) {
+               if (tokens[i].name === 'template') {
+                       var tsr = tokens[i].dataAttribs.tsr;
+                       wt = wt.substring(0, tsr[0] + tsrIncr) +
+                               '{{subst:' +
+                               wt.substring(tsr[0] + tsrIncr + 2);
+                       tsrIncr += 6;
+               }
+       }
+       // Now pass it to the MediaWiki API with onlypst set so that it
+       // subst's the templates.
+       return PHPParseRequest.promise(env, target, wt, 
true).then(function(wikitext) {
+               // Set data-parsoid to be discarded, so that the subst'ed
+               // content is considered new when it comes back.
+               env.discardDataParsoid = true;
+               // Use the returned wikitext as the page source.
+               return wikitext;
+       });
+};
+
+apiUtils.startWt2html = function(req, res, wt) {
+       var env = res.local('env');
+
+       // Performance Timing options
+       var timer = env.conf.parsoid.performanceTimer;
+       var startTimers;
+
+       if (timer) {
+               startTimers = new Map();
+               // init refers to time elapsed before parsing begins
+               startTimers.set('wt2html.init', Date.now());
+               startTimers.set('wt2html.total', Date.now());
+       }
+
+       var prefix = res.local('iwp');
+       var oldid = res.local('oldid');
+       var target = env.resolveTitle(env.normalizeTitle(env.page.name), '');
+
+       var p = Promise.resolve(wt);
+
+       if (oldid || typeof wt !== 'string') {
+               // Always fetch the page info if we have an oldid.
+               // Otherwise, if no wt was passed, we need to figure out
+               // the latest revid to which we'll redirect.
+               p = p.tap(function() {
+                       return TemplateRequest.setPageSrcInfo(env, target, 
oldid);
+               });
+       }
+
+       if (typeof wt === 'string' && res.local('subst')) {
+               p = p.then(function(wt) {
+                       return substTopLevelTemplates(env, target, wt);
+               });
+       }
+
+       return p.then(function(wikitext) {
+               return {
+                       req: req,
+                       res: res,
+                       env: env,
+                       startTimers: startTimers,
+                       oldid: oldid,
+                       target: target,
+                       prefix: prefix,
+                       // Calling this wikitext so that it's easily 
distinguishable.
+                       // It may have been modified by substTopLevelTemplates.
+                       wikitext: wikitext,
+               };
+       });
+};
+
+apiUtils.redirectToRevision = function(env, res, path, revid) {
+       var timer = env.conf.parsoid.performanceTimer;
+       env.log('info', 'redirecting to revision', revid);
+
+       if (timer) {
+               timer.count('wt2html.redirectToOldid', '');
+       }
+
+       // Don't cache requests with no oldid
+       apiUtils.setHeader(res, env, 'Cache-Control', 
'private,no-cache,s-maxage=0');
+       apiUtils.relativeRedirect({ 'path': path, 'res': res, 'env': env });
+};
+
+apiUtils.parsePageWithOldid = function(ret) {
+       var env = ret.env;
+       var timer = env.conf.parsoid.performanceTimer;
+       var startTimers = ret.startTimers;
+       env.log('info', 'started parsing');
+
+       // Indicate the MediaWiki revision in a header as well for
+       // ease of extraction in clients.
+       apiUtils.setHeader(ret.res, env, 'content-revision-id', ret.oldid);
+
+       if (timer) {
+               timer.timing('wt2html.pageWithOldid.init', '',
+                       Date.now() - startTimers.get('wt2html.init'));
+               startTimers.set('wt2html.pageWithOldid.parse', Date.now());
+               timer.timing('wt2html.pageWithOldid.size.input', '', 
env.page.src.length);
+       }
+
+       var expansions = ret.reuse && ret.reuse.expansions;
+       if (expansions) {
+               // Figure out what we can reuse
+               switch (ret.reuse.mode) {
+               case "templates":
+                       // Transclusions need to be updated, so don't reuse 
them.
+                       expansions.transclusions = {};
+                       break;
+               case "files":
+                       // Files need to be updated, so don't reuse them.
+                       expansions.files = {};
+                       break;
+               }
+       }
+
+       return env.pipelineFactory.parse(env, env.page.src, expansions);
+};
+
+apiUtils.parseWt = function(ret) {
+       var env = ret.env;
+       var res = ret.res;
+       var timer = env.conf.parsoid.performanceTimer;
+       var startTimers = ret.startTimers;
+
+       env.log('info', 'started parsing');
+       env.setPageSrcInfo(ret.wikitext);
+
+       // Don't cache requests when wt is set in case somebody uses
+       // GET for wikitext parsing
+       apiUtils.setHeader(res, env, 'Cache-Control', 
'private,no-cache,s-maxage=0');
+
+       if (timer) {
+               timer.timing('wt2html.wt.init', '',
+                       Date.now() - startTimers.get( 'wt2html.init'));
+               startTimers.set('wt2html.wt.parse', Date.now());
+               timer.timing('wt2html.wt.size.input', '', ret.wikitext.length);
+       }
+
+       if (!res.local('pageName')) {
+               // clear default page name
+               env.page.name = '';
+       }
+
+       return env.pipelineFactory.parse(env, ret.wikitext);
+};
+
+apiUtils.endWt2html = function(ret, doc, output) {
+       var env = ret.env;
+       var res = ret.res;
+       var timer = env.conf.parsoid.performanceTimer;
+       var startTimers = ret.startTimers;
+
+       if (doc) {
+               output = DU.serializeNode(res.local('body') ? doc.body : 
doc).str;
+               apiUtils.setHeader(res, env, 'content-type', 
apiUtils.HTML_CONTENT_TYPE);
+               apiUtils.endResponse(res, env, output);
+       }
+
+       if (timer) {
+               if (startTimers.has('wt2html.wt.parse')) {
+                       timer.timing('wt2html.wt.parse', '',
+                               Date.now() - 
startTimers.get('wt2html.wt.parse'));
+                       timer.timing('wt2html.wt.size.output', '', 
output.length);
+               } else if (startTimers.has( 'wt2html.pageWithOldid.parse')) {
+                       timer.timing('wt2html.pageWithOldid.parse', '',
+                               Date.now() - 
startTimers.get('wt2html.pageWithOldid.parse'));
+                       timer.timing('wt2html.pageWithOldid.size.output', '', 
output.length);
+               }
+               timer.timing('wt2html.total', '',
+                       Date.now() - startTimers.get('wt2html.total'));
+       }
+
+       apiUtils.logTime(env, res, 'parsing');
+};
+
+apiUtils.v2endWt2html = function(ret, doc) {
+       var env = ret.env;
+       var res = ret.res;
+       var v2 = res.local('v2');
+       if (v2.format === 'pagebundle') {
+               var out = DU.extractDpAndSerialize(doc, res.local('body'));
+               apiUtils.jsonResponse(res, env, {
+                       html: {
+                               headers: { 'content-type': 
apiUtils.HTML_CONTENT_TYPE },
+                               body: out.str,
+                       },
+                       'data-parsoid': {
+                               headers: { 'content-type': out.type },
+                               body: out.dp,
+                       }
+               });
+               apiUtils.endWt2html(ret, null, out.str);
+       } else {
+               apiUtils.endWt2html(ret, doc);
+       }
+};
diff --git a/tests/mocha/api.js b/tests/mocha/api.js
index 41a1f3d..64b16ef 100644
--- a/tests/mocha/api.js
+++ b/tests/mocha/api.js
@@ -209,7 +209,7 @@
                                .end(done);
                        });
 
-                       it('should accept wikitext as a string', function(done) 
{
+                       it('should accept wikitext as a string for html', 
function(done) {
                                request(api)
                                .post('v2/' + mockHost + '/html/')
                                .send({
@@ -223,6 +223,22 @@
                                .end(done);
                        });
 
+                       it('should accept wikitext as a string for pagebundle', 
function(done) {
+                               request(api)
+                               .post('v2/' + mockHost + '/pagebundle/')
+                               .send({
+                                       wikitext: "== h2 =="
+                               })
+                               .expect(200)
+                               .expect(function(res) {
+                                       res.body.should.have.property('html');
+                                       
res.body.should.have.property('data-parsoid');
+                                       var doc = 
domino.createDocument(res.body.html.body);
+                                       
doc.body.firstChild.nodeName.should.equal('H2');
+                               })
+                               .end(done);
+                       });
+
                        it('should accept wikitext with headers', 
function(done) {
                                request(api)
                                .post('v2/' + mockHost + '/html/')
diff --git a/tests/mockAPI.js b/tests/mockAPI.js
index 5d00f90..d074469 100644
--- a/tests/mockAPI.js
+++ b/tests/mockAPI.js
@@ -242,6 +242,20 @@
                response.query.pages['1'] = imageinfo;
                cb( null, response );
        },
+
+       expandtemplates: function(body, cb) {
+               var match = body.text.match(/{{echo\|(.*?)}}/);
+               if (match) {
+                       cb(null, {
+                               expandtemplates: {
+                                       wikitext: match[1],
+                               },
+                       });
+               } else {
+                       cb(new Error('Sorry!'));
+               }
+       },
+
 };
 
 var actionDefinitions = {

-- 
To view, visit https://gerrit.wikimedia.org/r/219508
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0dcbfad738e916999d31849bc48cd39d0b6d6936
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to