jenkins-bot has submitted this change and it was merged.
Change subject: Return a JSON response with separate html and data-parsoid
......................................................................
Return a JSON response with separate html and data-parsoid
* Implements the v2 pagebundle style routes,
/v2/{domain}/{title}/{format}/{revision}
Bug: 52936
Change-Id: I97a88cf3e0790ad0d2fa663ca80a1c0c92c129c3
---
M api/ParsoidService.js
M lib/mediawiki.ParsoidConfig.js
2 files changed, 125 insertions(+), 41 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/api/ParsoidService.js b/api/ParsoidService.js
index ea2d65c..903b281 100644
--- a/api/ParsoidService.js
+++ b/api/ParsoidService.js
@@ -3,6 +3,8 @@
*/
"use strict";
+require('../lib/core-upgrade.js');
+
/**
* @class ParserServiceModule
* @singleton
@@ -13,11 +15,11 @@
var express = require('express'),
domino = require('domino'),
hbs = require('handlebars'),
- // memwatch = require('memwatch'),
childProc = require('child_process'),
cluster = require('cluster'),
fs = require('fs'),
path = require('path'),
+ url = require('url'),
util = require('util'),
pkg = require('../package.json'),
Diff = require('../lib/mediawiki.Diff.js').Diff,
@@ -70,7 +72,7 @@
* @param {Response} res The response object from our routing function.
* @property {Function} Serializer
*/
- function endResponse (res, env) {
+ function endResponse(res, env) {
if (env.responseSent) {
return;
} else {
@@ -88,7 +90,7 @@
* @param {Response} res The response object from our routing function.
* @property {Function} Serializer
*/
- function sendResponse (res, env) {
+ function sendResponse(res, env) {
if (env.responseSent) {
return;
} else {
@@ -107,6 +109,15 @@
} else {
env.responseSent = true;
res.render.apply(res,
Array.prototype.slice.call(arguments, 2));
+ }
+ }
+
+ function jsonResponse(res, env) {
+ if (env.responseSent) {
+ return;
+ } else {
+ env.responseSent = true;
+ res.json.apply(res,
Array.prototype.slice.call(arguments, 2));
}
}
@@ -328,6 +339,7 @@
function interParams( req, res, next ) {
res.local('iwp', req.params[0] || parsoidConfig.defaultWiki ||
'');
res.local('pageName', req.params[1] || '');
+ res.local('oldid', req.query.oldid || null);
next();
}
@@ -534,10 +546,11 @@
}
}
- function wt2html( req, res, wt ) {
- var env = res.local('env');
- var prefix = res.local('iwp');
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
+ function wt2html( req, res, wt, v2 ) {
+ var env = res.local('env'),
+ prefix = res.local('iwp'),
+ oldid = res.local('oldid'),
+ target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
// Set the timeout to 600 seconds..
req.connection.setTimeout( 600 * 1000 );
@@ -545,12 +558,10 @@
if ( env.conf.parsoid.allowCORS ) {
// allow cross-domain requests (CORS) so that parsoid
service
// can be used by third-party sites
- setHeader(res, env, 'Access-Control-Allow-Origin',
- env.conf.parsoid.allowCORS );
+ setHeader( res, env, 'Access-Control-Allow-Origin',
env.conf.parsoid.allowCORS );
}
- var tmpCb,
- oldid = req.query.oldid || null;
+ var tmpCb;
if ( wt ) {
wt = wt.replace( /\r/g, '' );
env.log('info', 'starting parsing');
@@ -620,13 +631,26 @@
// Set the source
env.setPageSrcInfo( src_and_metadata );
- var url = [ "", prefix,
-
encodeURIComponent( target ) +
- "?oldid=" +
env.page.meta.revision.revid
- ].join( "/" );
+ oldid = env.page.meta.revision.revid;
+
+ var path = "/";
+ if ( v2 ) {
+ path += [
+ "v2",
+ url.parse(
env.conf.parsoid.interwikiMap.get( prefix ) ).host,
+ encodeURIComponent(
target ),
+ v2.format,
+ oldid
+ ].join("/");
+ } else {
+ path += [
+ prefix,
+ encodeURIComponent(
target ) + "?oldid=" + oldid
+ ].join("/");
+ }
// Redirect to oldid
- relativeRedirect({"path" : url, "res" :
res, "env" : env});
+ relativeRedirect({ "path": path, "res":
res, "env": env });
env.log("info", "redirected to
revision", env.page.meta.revision.revid);
};
}
@@ -636,11 +660,19 @@
tpr.once( 'src', tmpCb );
function sendRes( doc ) {
- var out = DU.serializeNode( doc );
try {
setHeader(res, env, 'X-Parsoid-Performance',
env.getPerformanceHeader());
- setHeader(res, env, 'Content-Type', 'text/html;
charset=UTF-8' );
- endResponse(res, env, out );
+ if ( v2 && v2.format === "pagebundle" ) {
+ var dp =
doc.ownerDocument.getElementById('mw-data-parsoid');
+ dp.parentNode.removeChild(dp);
+ jsonResponse(res, env, {
+ html: DU.serializeNode( doc ),
+ "data-parsoid":
JSON.parse(dp.text)
+ });
+ } else {
+ setHeader(res, env, 'Content-Type',
'text/html; charset=UTF-8');
+ endResponse(res, env,
DU.serializeNode( doc ));
+ }
env.log("info", "completed parsing in",
env.performance.duration, "ms");
} catch (e) {
env.log("fatal/request", e);
@@ -648,13 +680,48 @@
}
}
+ // Attempt to define a new version of the API
+ // /v2/{domain}/{title}/{format}/{revision}
+
+ var supportedFormats = new Set([ "pagebundle", "html" ]);
+
+ function v2Middle( req, res, next ) {
+ function errOut(err) {
+ // FIXME: provide more consistent error handling.
+ sendResponse( res, {}, err, 404 );
+ }
+
+ var iwp = parsoidConfig.reverseIWMap.get( req.params.domain );
+ if ( !iwp ) {
+ return errOut("Invalid domain.");
+ }
+ res.local('iwp', iwp);
+
+ res.local('format', req.params.format || "html");
+ if ( !supportedFormats.has( res.local('format') ) ) {
+ return errOut("Invalid format.");
+ }
+
+ res.local('pageName', req.params.title);
+ res.local('oldid', req.params.revision || null);
+ next();
+ }
+
+ app.get('/v2/:domain/:title/:format?/:revision?', v2Middle,
parserEnvMw, function(req, res) {
+ var v2 = { format: res.local("format") };
+ if ( v2.format === "pagebundle" ) {
+ res.local('env').conf.parsoid.storeDataParsoid = true;
+ }
+ wt2html( req, res, null, v2 );
+ });
+
// Regular article parsing
- app.get( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), interParams,
parserEnvMw, function(req, res) {
+ app.get(new RegExp('/(' + getInterwikiRE() + ')/(.*)'), interParams,
parserEnvMw, function(req, res) {
wt2html( req, res );
});
// Regular article serialization using POST
- app.post( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ),
interParams, parserEnvMw, function ( req, res ) {
+ app.post(new RegExp('/(' + getInterwikiRE() + ')/(.*)'), interParams,
parserEnvMw, function(req, res) {
// parse html or wt
if ( req.body.wt ) {
wt2html( req, res, req.body.wt );
@@ -711,7 +778,7 @@
gitVersion( function () {
app.listen( port, host );
console.log( ' - ' + instanceName + ' ready on ' +
- (host||'') + ':' + port );
+ (host||'') + ':' + port );
});
}
diff --git a/lib/mediawiki.ParsoidConfig.js b/lib/mediawiki.ParsoidConfig.js
index 7951bca..628277c 100644
--- a/lib/mediawiki.ParsoidConfig.js
+++ b/lib/mediawiki.ParsoidConfig.js
@@ -5,33 +5,42 @@
"use strict";
require('./core-upgrade.js');
-var Cite = require('./ext.Cite.js').Cite;
-var Util = require('./mediawiki.Util.js').Util;
+var url = require('url'),
+ Cite = require('./ext.Cite.js').Cite,
+ Util = require('./mediawiki.Util.js').Util;
var wikipedias =
"en|de|fr|nl|it|pl|es|ru|ja|pt|zh|sv|vi|uk|ca|no|fi|cs|hu|ko|fa|id|tr|ro|ar|sk|eo|da|sr|lt|ms|eu|he|sl|bg|kk|vo|war|hr|hi|et|az|gl|simple|nn|la|th|el|new|roa-rup|oc|sh|ka|mk|tl|ht|pms|te|ta|be-x-old|ceb|br|be|lv|sq|jv|mg|cy|lb|mr|is|bs|yo|an|hy|fy|bpy|lmo|pnb|ml|sw|bn|io|af|gu|zh-yue|ne|nds|ku|ast|ur|scn|su|qu|diq|ba|tt|my|ga|cv|ia|nap|bat-smg|map-bms|wa|kn|als|am|bug|tg|gd|zh-min-nan|yi|vec|hif|sco|roa-tara|os|arz|nah|uz|sah|mn|sa|mzn|pam|hsb|mi|li|ky|si|co|gan|glk|ckb|bo|fo|bar|bcl|ilo|mrj|fiu-vro|nds-nl|tk|vls|se|gv|ps|rue|dv|nrm|pag|koi|pa|rm|km|kv|udm|csb|mhr|fur|mt|wuu|lij|ug|lad|pi|zea|sc|bh|zh-classical|nov|ksh|or|ang|kw|so|nv|xmf|stq|hak|ay|frp|frr|ext|szl|pcd|ie|gag|haw|xal|ln|rw|pdc|pfl|krc|crh|eml|ace|gn|to|ce|kl|arc|myv|dsb|vep|pap|bjn|as|tpi|lbe|wo|mdf|jbo|kab|av|sn|cbk-zam|ty|srn|kbd|lo|ab|lez|mwl|ltg|ig|na|kg|tet|za|kaa|nso|zu|rmy|cu|tn|chr|got|sm|bi|mo|bm|iu|chy|ik|pih|ss|sd|pnt|cdo|ee|ha|ti|bxr|om|ks|ts|ki|ve|sg|rn|dz|cr|lg|ak|tum|fj|st|tw|ch|ny|ff|xh|ng|ii|cho|mh|aa|kj|ho|mus|kr|hz|tyv|min";
var interwikiMap = new Map();
+var reverseIWMap = new Map();
+
+function insertInMaps( prefix, domain, path, protocol ) {
+ interwikiMap.set( prefix, (protocol || 'http://') + domain + path );
+ reverseIWMap.set( domain, prefix );
+}
+
wikipedias.split('|').forEach(function(lang) {
- var dbLangPrefix = lang.replace(/-/g, '_');
[ 'wikipedia', 'wikivoyage', 'wikibooks', 'wikisource', 'wikinews',
'wikiquote', 'wikiversity', 'wiktionary'
].forEach(function(suffix) {
- interwikiMap.set(
- dbLangPrefix + suffix.replace('pedia', ''),
- 'http://' + lang + '.' + suffix + '.org/w/api.php'
- );
+ insertInMaps(
+ lang.replace(/-/g, '_') + suffix.replace('pedia', ''),
+ lang + '.' + suffix + '.org',
+ '/w/api.php'
+ );
});
});
// Add mediawiki.org, commons and localhost too
-interwikiMap.set('mediawikiwiki', 'http://www.mediawiki.org/w/api.php');
-interwikiMap.set('commonswiki', 'http://commons.wikimedia.org/w/api.php');
-interwikiMap.set('localhost', 'http://localhost/wiki/api.php');
+insertInMaps( 'mediawikiwiki', 'www.mediawiki.org', '/w/api.php' );
+insertInMaps( 'commonswiki', 'commons.wikimedia.org', '/w/api.php' );
+insertInMaps( 'localhost', 'localhost', '/wiki/api.php' );
// Build the interwiki regexp
var it = interwikiMap.keys(),
key = it.next(),
interwikiRegexp = key.value;
+
while ( !key.done ) {
interwikiRegexp += "|" + key.value;
key = it.next();
@@ -40,16 +49,19 @@
// Subclass a Map to avoid overwriting defaults
// Maybe just let that happen ... it only seems to occur
// from localSettings
-function IWMap() { Map.call(this); }
-Object.setPrototypeOf(IWMap, Map);
-IWMap.prototype = Object.create(Map.prototype, {
- constructor: { value: IWMap },
+function DefaultMap(defaultMap) {
+ Map.call(this);
+ this.defaultMap = defaultMap;
+}
+Object.setPrototypeOf(DefaultMap, Map);
+DefaultMap.prototype = Object.create(Map.prototype, {
+ constructor: { value: DefaultMap },
get: {
writeable: false,
value: function(key) {
return this.has(key)
? Map.prototype.get.call(this, key)
- : interwikiMap.get(key);
+ : this.defaultMap.get(key);
}
},
keys: {
@@ -58,7 +70,7 @@
// This return an array because the es6-shim doesn't
// expose Iterators.
var keys = Array.from(Map.prototype.keys.call(this));
- interwikiMap.forEach(function(val, key) {
+ this.defaultMap.forEach(function(val, key) {
if ( keys.indexOf(key) < 0 ) {
keys.push(key);
}
@@ -87,7 +99,8 @@
// The default api proxy, overridden by apiProxyURIs entries
this.defaultAPIProxyURI = undefined;
- this.interwikiMap = new IWMap();
+ this.interwikiMap = new DefaultMap(interwikiMap);
+ this.reverseIWMap = new DefaultMap(reverseIWMap);
this.interwikiRegexp = interwikiRegexp;
if ( localSettings && localSettings.setup ) {
@@ -122,7 +135,9 @@
* back to ParsoidConfig.defaultAPIProxyURI if undefined (default value).
*/
ParsoidConfig.prototype.setInterwiki = function ( prefix, apiURI, apiProxyURI
) {
- this.interwikiMap.set(prefix, apiURI);
+ this.interwikiMap.set( prefix, apiURI );
+ this.reverseIWMap.set( url.parse( apiURI ).host, prefix );
+
if ( apiProxyURI !== undefined ) {
this.apiProxyURIs.set(prefix, apiProxyURI);
}
@@ -139,7 +154,9 @@
* @param {string} prefix
*/
ParsoidConfig.prototype.removeInterwiki = function ( prefix ) {
- this.interwikiMap.delete(prefix, null);
+ var u = url.parse( this.interwikiMap.get(prefix) );
+ this.reverseIWMap.delete( u.host, null );
+ this.interwikiMap.delete( prefix, null );
this.interwikiRegexp = this.interwikiRegexp.replace(
new RegExp( '(^|\\|)' + prefix + '(\\||$)' ), function() {
return arguments[0] === ("|" + prefix + "|") ? "|" : '';
--
To view, visit https://gerrit.wikimedia.org/r/159111
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I97a88cf3e0790ad0d2fa663ca80a1c0c92c129c3
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Marcoil <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits