jenkins-bot has submitted this change and it was merged.

Change subject: Return a JSON response with separate html and data-parsoid
......................................................................


Return a JSON response with separate html and data-parsoid

 * Implements the v2 pagebundle style routes,
     /v2/{domain}/{title}/{format}/{revision}

Bug: 52936
Change-Id: I97a88cf3e0790ad0d2fa663ca80a1c0c92c129c3
---
M api/ParsoidService.js
M lib/mediawiki.ParsoidConfig.js
2 files changed, 125 insertions(+), 41 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/api/ParsoidService.js b/api/ParsoidService.js
index ea2d65c..903b281 100644
--- a/api/ParsoidService.js
+++ b/api/ParsoidService.js
@@ -3,6 +3,8 @@
  */
 "use strict";
 
+require('../lib/core-upgrade.js');
+
 /**
  * @class ParserServiceModule
  * @singleton
@@ -13,11 +15,11 @@
 var express = require('express'),
        domino = require('domino'),
        hbs = require('handlebars'),
-       // memwatch = require('memwatch'),
        childProc = require('child_process'),
        cluster = require('cluster'),
        fs = require('fs'),
        path = require('path'),
+       url = require('url'),
        util = require('util'),
        pkg = require('../package.json'),
        Diff = require('../lib/mediawiki.Diff.js').Diff,
@@ -70,7 +72,7 @@
         * @param {Response} res The response object from our routing function.
         * @property {Function} Serializer
         */
-       function endResponse (res, env) {
+       function endResponse(res, env) {
                if (env.responseSent) {
                        return;
                } else {
@@ -88,7 +90,7 @@
         * @param {Response} res The response object from our routing function.
         * @property {Function} Serializer
         */
-       function sendResponse (res, env) {
+       function sendResponse(res, env) {
                if (env.responseSent) {
                        return;
                } else {
@@ -107,6 +109,15 @@
                } else {
                        env.responseSent = true;
                        res.render.apply(res, 
Array.prototype.slice.call(arguments, 2));
+               }
+       }
+
+       function jsonResponse(res, env) {
+               if (env.responseSent) {
+                       return;
+               } else {
+                       env.responseSent = true;
+                       res.json.apply(res, 
Array.prototype.slice.call(arguments, 2));
                }
        }
 
@@ -328,6 +339,7 @@
        function interParams( req, res, next ) {
                res.local('iwp', req.params[0] || parsoidConfig.defaultWiki || 
'');
                res.local('pageName', req.params[1] || '');
+               res.local('oldid', req.query.oldid || null);
                next();
        }
 
@@ -534,10 +546,11 @@
                }
        }
 
-       function wt2html( req, res, wt ) {
-               var env = res.local('env');
-               var prefix = res.local('iwp');
-               var target = env.resolveTitle( env.normalizeTitle( 
env.page.name ), '' );
+       function wt2html( req, res, wt, v2 ) {
+               var env = res.local('env'),
+                       prefix = res.local('iwp'),
+                       oldid = res.local('oldid'),
+                       target = env.resolveTitle( env.normalizeTitle( 
env.page.name ), '' );
 
                // Set the timeout to 600 seconds..
                req.connection.setTimeout( 600 * 1000 );
@@ -545,12 +558,10 @@
                if ( env.conf.parsoid.allowCORS ) {
                        // allow cross-domain requests (CORS) so that parsoid 
service
                        // can be used by third-party sites
-                       setHeader(res, env, 'Access-Control-Allow-Origin',
-                                                  env.conf.parsoid.allowCORS );
+                       setHeader( res, env, 'Access-Control-Allow-Origin', 
env.conf.parsoid.allowCORS );
                }
 
-               var tmpCb,
-                       oldid = req.query.oldid || null;
+               var tmpCb;
                if ( wt ) {
                        wt = wt.replace( /\r/g, '' );
                        env.log('info', 'starting parsing');
@@ -620,13 +631,26 @@
 
                                        // Set the source
                                        env.setPageSrcInfo( src_and_metadata );
-                                       var url = [ "", prefix,
-                                                               
encodeURIComponent( target ) +
-                                                               "?oldid=" + 
env.page.meta.revision.revid
-                                                       ].join( "/" );
+                                       oldid = env.page.meta.revision.revid;
+
+                                       var path = "/";
+                                       if ( v2 ) {
+                                               path += [
+                                                       "v2",
+                                                       url.parse( 
env.conf.parsoid.interwikiMap.get( prefix ) ).host,
+                                                       encodeURIComponent( 
target ),
+                                                       v2.format,
+                                                       oldid
+                                               ].join("/");
+                                       } else {
+                                               path += [
+                                                       prefix,
+                                                       encodeURIComponent( 
target ) + "?oldid=" + oldid
+                                               ].join("/");
+                                       }
 
                                        // Redirect to oldid
-                                       relativeRedirect({"path" : url, "res" : 
res, "env" : env});
+                                       relativeRedirect({ "path": path, "res": 
res, "env": env });
                                        env.log("info", "redirected to 
revision", env.page.meta.revision.revid);
                                };
                        }
@@ -636,11 +660,19 @@
                tpr.once( 'src', tmpCb );
 
                function sendRes( doc ) {
-                       var out = DU.serializeNode( doc );
                        try {
                                setHeader(res, env, 'X-Parsoid-Performance', 
env.getPerformanceHeader());
-                               setHeader(res, env, 'Content-Type', 'text/html; 
charset=UTF-8' );
-                               endResponse(res, env,  out );
+                               if ( v2 && v2.format === "pagebundle" ) {
+                                       var dp = 
doc.ownerDocument.getElementById('mw-data-parsoid');
+                                       dp.parentNode.removeChild(dp);
+                                       jsonResponse(res, env, {
+                                               html: DU.serializeNode( doc ),
+                                               "data-parsoid": 
JSON.parse(dp.text)
+                                       });
+                               } else {
+                                       setHeader(res, env, 'Content-Type', 
'text/html; charset=UTF-8');
+                                       endResponse(res, env,  
DU.serializeNode( doc ));
+                               }
                                env.log("info", "completed parsing in", 
env.performance.duration, "ms");
                        } catch (e) {
                                env.log("fatal/request", e);
@@ -648,13 +680,48 @@
                }
        }
 
+       // Attempt to define a new version of the API
+       // /v2/{domain}/{title}/{format}/{revision}
+
+       var supportedFormats = new Set([ "pagebundle", "html" ]);
+
+       function v2Middle( req, res, next ) {
+               function errOut(err) {
+                       // FIXME: provide more consistent error handling.
+                       sendResponse( res, {}, err, 404 );
+               }
+
+               var iwp = parsoidConfig.reverseIWMap.get( req.params.domain );
+               if ( !iwp ) {
+                       return errOut("Invalid domain.");
+               }
+               res.local('iwp', iwp);
+
+               res.local('format', req.params.format || "html");
+               if ( !supportedFormats.has( res.local('format') ) ) {
+                       return errOut("Invalid format.");
+               }
+
+               res.local('pageName', req.params.title);
+               res.local('oldid', req.params.revision || null);
+               next();
+       }
+
+       app.get('/v2/:domain/:title/:format?/:revision?', v2Middle, 
parserEnvMw, function(req, res) {
+               var v2 = { format: res.local("format") };
+               if ( v2.format === "pagebundle" ) {
+                       res.local('env').conf.parsoid.storeDataParsoid = true;
+               }
+               wt2html( req, res, null, v2 );
+       });
+
        // Regular article parsing
-       app.get( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), interParams, 
parserEnvMw, function(req, res) {
+       app.get(new RegExp('/(' + getInterwikiRE() + ')/(.*)'), interParams, 
parserEnvMw, function(req, res) {
                wt2html( req, res );
        });
 
        // Regular article serialization using POST
-       app.post( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), 
interParams, parserEnvMw, function ( req, res ) {
+       app.post(new RegExp('/(' + getInterwikiRE() + ')/(.*)'), interParams, 
parserEnvMw, function(req, res) {
                // parse html or wt
                if ( req.body.wt ) {
                        wt2html( req, res, req.body.wt );
@@ -711,7 +778,7 @@
        gitVersion( function () {
                app.listen( port, host );
                console.log( ' - ' + instanceName + ' ready on ' +
-                            (host||'') + ':' + port );
+                       (host||'') + ':' + port );
        });
 
 }
diff --git a/lib/mediawiki.ParsoidConfig.js b/lib/mediawiki.ParsoidConfig.js
index 7951bca..628277c 100644
--- a/lib/mediawiki.ParsoidConfig.js
+++ b/lib/mediawiki.ParsoidConfig.js
@@ -5,33 +5,42 @@
 "use strict";
 
 require('./core-upgrade.js');
-var Cite = require('./ext.Cite.js').Cite;
-var Util = require('./mediawiki.Util.js').Util;
+var url = require('url'),
+       Cite = require('./ext.Cite.js').Cite,
+       Util = require('./mediawiki.Util.js').Util;
 
 var wikipedias = 
"en|de|fr|nl|it|pl|es|ru|ja|pt|zh|sv|vi|uk|ca|no|fi|cs|hu|ko|fa|id|tr|ro|ar|sk|eo|da|sr|lt|ms|eu|he|sl|bg|kk|vo|war|hr|hi|et|az|gl|simple|nn|la|th|el|new|roa-rup|oc|sh|ka|mk|tl|ht|pms|te|ta|be-x-old|ceb|br|be|lv|sq|jv|mg|cy|lb|mr|is|bs|yo|an|hy|fy|bpy|lmo|pnb|ml|sw|bn|io|af|gu|zh-yue|ne|nds|ku|ast|ur|scn|su|qu|diq|ba|tt|my|ga|cv|ia|nap|bat-smg|map-bms|wa|kn|als|am|bug|tg|gd|zh-min-nan|yi|vec|hif|sco|roa-tara|os|arz|nah|uz|sah|mn|sa|mzn|pam|hsb|mi|li|ky|si|co|gan|glk|ckb|bo|fo|bar|bcl|ilo|mrj|fiu-vro|nds-nl|tk|vls|se|gv|ps|rue|dv|nrm|pag|koi|pa|rm|km|kv|udm|csb|mhr|fur|mt|wuu|lij|ug|lad|pi|zea|sc|bh|zh-classical|nov|ksh|or|ang|kw|so|nv|xmf|stq|hak|ay|frp|frr|ext|szl|pcd|ie|gag|haw|xal|ln|rw|pdc|pfl|krc|crh|eml|ace|gn|to|ce|kl|arc|myv|dsb|vep|pap|bjn|as|tpi|lbe|wo|mdf|jbo|kab|av|sn|cbk-zam|ty|srn|kbd|lo|ab|lez|mwl|ltg|ig|na|kg|tet|za|kaa|nso|zu|rmy|cu|tn|chr|got|sm|bi|mo|bm|iu|chy|ik|pih|ss|sd|pnt|cdo|ee|ha|ti|bxr|om|ks|ts|ki|ve|sg|rn|dz|cr|lg|ak|tum|fj|st|tw|ch|ny|ff|xh|ng|ii|cho|mh|aa|kj|ho|mus|kr|hz|tyv|min";
 
 var interwikiMap = new Map();
+var reverseIWMap = new Map();
+
+function insertInMaps( prefix, domain, path, protocol ) {
+       interwikiMap.set( prefix, (protocol || 'http://') + domain + path );
+       reverseIWMap.set( domain, prefix );
+}
+
 wikipedias.split('|').forEach(function(lang) {
-       var dbLangPrefix = lang.replace(/-/g, '_');
        [ 'wikipedia', 'wikivoyage', 'wikibooks', 'wikisource', 'wikinews',
          'wikiquote', 'wikiversity', 'wiktionary'
        ].forEach(function(suffix) {
-               interwikiMap.set(
-                       dbLangPrefix + suffix.replace('pedia', ''),
-                       'http://' + lang + '.' + suffix + '.org/w/api.php'
-               );
+               insertInMaps(
+                       lang.replace(/-/g, '_') + suffix.replace('pedia', ''),
+                       lang + '.' + suffix + '.org',
+                       '/w/api.php'
+                );
        });
 });
 
 // Add mediawiki.org, commons and localhost too
-interwikiMap.set('mediawikiwiki', 'http://www.mediawiki.org/w/api.php');
-interwikiMap.set('commonswiki', 'http://commons.wikimedia.org/w/api.php');
-interwikiMap.set('localhost', 'http://localhost/wiki/api.php');
+insertInMaps( 'mediawikiwiki', 'www.mediawiki.org', '/w/api.php' );
+insertInMaps( 'commonswiki', 'commons.wikimedia.org', '/w/api.php' );
+insertInMaps( 'localhost', 'localhost', '/wiki/api.php' );
 
 // Build the interwiki regexp
 var it = interwikiMap.keys(),
        key = it.next(),
        interwikiRegexp = key.value;
+
 while ( !key.done ) {
        interwikiRegexp += "|" + key.value;
        key = it.next();
@@ -40,16 +49,19 @@
 // Subclass a Map to avoid overwriting defaults
 // Maybe just let that happen ... it only seems to occur
 // from localSettings
-function IWMap() { Map.call(this); }
-Object.setPrototypeOf(IWMap, Map);
-IWMap.prototype = Object.create(Map.prototype, {
-       constructor: { value: IWMap },
+function DefaultMap(defaultMap) {
+       Map.call(this);
+       this.defaultMap = defaultMap;
+}
+Object.setPrototypeOf(DefaultMap, Map);
+DefaultMap.prototype = Object.create(Map.prototype, {
+       constructor: { value: DefaultMap },
        get: {
                writeable: false,
                value: function(key) {
                        return this.has(key)
                                ? Map.prototype.get.call(this, key)
-                               : interwikiMap.get(key);
+                               : this.defaultMap.get(key);
                }
        },
        keys: {
@@ -58,7 +70,7 @@
                        // This return an array because the es6-shim doesn't
                        // expose Iterators.
                        var keys = Array.from(Map.prototype.keys.call(this));
-                       interwikiMap.forEach(function(val, key) {
+                       this.defaultMap.forEach(function(val, key) {
                                if ( keys.indexOf(key) < 0 ) {
                                        keys.push(key);
                                }
@@ -87,7 +99,8 @@
        // The default api proxy, overridden by apiProxyURIs entries
        this.defaultAPIProxyURI = undefined;
 
-       this.interwikiMap = new IWMap();
+       this.interwikiMap = new DefaultMap(interwikiMap);
+       this.reverseIWMap = new DefaultMap(reverseIWMap);
        this.interwikiRegexp = interwikiRegexp;
 
        if ( localSettings && localSettings.setup ) {
@@ -122,7 +135,9 @@
  * back to ParsoidConfig.defaultAPIProxyURI if undefined (default value).
  */
 ParsoidConfig.prototype.setInterwiki = function ( prefix, apiURI, apiProxyURI 
) {
-       this.interwikiMap.set(prefix, apiURI);
+       this.interwikiMap.set( prefix, apiURI );
+       this.reverseIWMap.set( url.parse( apiURI ).host, prefix );
+
        if ( apiProxyURI !== undefined ) {
                this.apiProxyURIs.set(prefix, apiProxyURI);
        }
@@ -139,7 +154,9 @@
  * @param {string} prefix
  */
 ParsoidConfig.prototype.removeInterwiki = function ( prefix ) {
-       this.interwikiMap.delete(prefix, null);
+       var u = url.parse( this.interwikiMap.get(prefix) );
+       this.reverseIWMap.delete( u.host, null );
+       this.interwikiMap.delete( prefix, null );
        this.interwikiRegexp = this.interwikiRegexp.replace(
                new RegExp( '(^|\\|)' + prefix + '(\\||$)' ), function() {
                        return arguments[0] === ("|" + prefix + "|") ? "|" : '';

-- 
To view, visit https://gerrit.wikimedia.org/r/159111
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I97a88cf3e0790ad0d2fa663ca80a1c0c92c129c3
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Marcoil <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to