Cscott has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/295707

Change subject: Allow extensions to handle specific contentmodels.
......................................................................

Allow extensions to handle specific contentmodels.

Some extensions (for example, Extension:ProofreadPage) do more than
register specific extension tags: they also hook the parser to declare
responsibility for a specific contentmodel (like "proofread-page" or "json").
These are
https://www.mediawiki.org/wiki/Category:ContentHandler_extensions
(as opposed to https://www.mediawiki.org/wiki/Category:Tag_extensions).
See https://www.mediawiki.org/wiki/Manual:ContentHandler for more
details.

We abstract the top-level parser entry points to allow dispatching
to extensions to parse alternative content models and add a
core extension as a demonstration that handles the "json"
content model, rendering it in DOM as an HTML table (as
the json content model in mediawiki core does).

Change-Id: I7ca31c99de8e04b1359bc521df121db0eb69e384
---
M bin/parse.js
M bin/roundtrip-test.js
M lib/api/routes.js
M lib/config/ParsoidConfig.js
M lib/config/WikiConfig.js
M lib/config/extapi.js
A lib/ext/JSON/index.js
M lib/utils/DOMUtils.js
M lib/wt2html/DOMPostProcessor.js
M package.json
M tests/mocha/api.js
M tests/mocha/parse.js
M tests/mocha/test.helpers.js
M tests/mockAPI.js
14 files changed, 550 insertions(+), 53 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/07/295707/1

diff --git a/bin/parse.js b/bin/parse.js
index 852cecd..4a53b24 100755
--- a/bin/parse.js
+++ b/bin/parse.js
@@ -72,6 +72,11 @@
                'boolean': false,
                'default': ParserEnv.prototype.defaultPageName,
        },
+       'contentmodel': {
+               description: 'The content model of the input.  Defaults to 
"wikitext" but extensions may support others (for example, "json").',
+               'boolean': false,
+               'default': null,
+       },
        'oldid': {
                description: 'Oldid of the given page.',
                'boolean': false,
@@ -165,19 +170,26 @@
        if (pb) {
                DU.applyPageBundle(doc, pb);
        }
+       if (argv.contentmodel) {
+               env.page.meta.revision.contentmodel = argv.contentmodel;
+       }
        return DU.serializeDOM(env, doc.body, argv.selser).then(function(out) {
                if (argv.html2wt || argv.wt2wt) {
                        return { trailingNL: true, out: out, env: env };
                } else {
-                       return startsAtWikitext(argv, env, out);
+                       env.setPageSrcInfo(out);
+                       return startsAtWikitext(argv, env);
                }
        });
 };
 
-startsAtWikitext = function(argv, env, input) {
-       env.setPageSrcInfo(input);
+startsAtWikitext = function(argv, env) {
+       // input string is in env.page.src.
+       console.assert(env.page.meta);
+
+       var handler = env.conf.wiki.getContentHandler(env, argv.contentmodel);
        // Kick off the pipeline by feeding the input into the parser pipeline
-       return env.pipelineFactory.parse(env, env.page.src).then(function(doc) {
+       return handler.toHTML(env).then(function(doc) {
                if (argv.lint) {
                        env.log("end/parse");
                }
@@ -249,13 +261,15 @@
                }
 
                if (typeof input === 'string') {
-                       return input;
+                       env.setPageSrcInfo(input.replace(/\r/g, ''));
+                       return;
                }
 
                if (argv.inputfile) {
                        // read input from the file, then process
                        var fileContents = fs.readFileSync(argv.inputfile, 
'utf8');
-                       return fileContents;
+                       env.setPageSrcInfo(fileContents.replace(/\r/g, ''));
+                       return;
                }
 
                // Send a message to stderr if there is no input for a while, 
since the
@@ -280,17 +294,17 @@
                        clearTimeout(stdinTimer);
                        // parse page if no input
                        if (inputChunks.length > 0) {
-                               return inputChunks.join('');
+                               
env.setPageSrcInfo(inputChunks.join('').replace(/\r/g, ''));
+                               return;
                        } else if (argv.html2wt || argv.html2html) {
                                env.log("fatal", "Pages start at wikitext.");
                        }
                        var target = env.normalizeAndResolvePageTitle();
                        return TemplateRequest
-                               .setPageSrcInfo(env, target, argv.oldid)
-                               .then(function() { return env.page.src; });
+                               .setPageSrcInfo(env, target, argv.oldid);
                });
-       }).then(function(str) {
-               str = str.replace(/\r/g, '');
+       }).then(function() {
+               // string to convert is in env.page.src.
                if (argv.html2wt || argv.html2html) {
                        var pb;
                        if (argv.pbin.length > 0) {
@@ -298,9 +312,9 @@
                        } else if (argv.pbinfile) {
                                pb = JSON.parse(fs.readFileSync(argv.pbinfile, 
'utf8'));
                        }
-                       return startsAtHTML(argv, env, str, pb);
+                       return startsAtHTML(argv, env, env.page.src, pb);
                } else {
-                       return startsAtWikitext(argv, env, str);
+                       return startsAtWikitext(argv, env);
                }
        });
 };
diff --git a/bin/roundtrip-test.js b/bin/roundtrip-test.js
index 58fba92..aa4da94 100755
--- a/bin/roundtrip-test.js
+++ b/bin/roundtrip-test.js
@@ -473,7 +473,6 @@
 function parsoidPost(profile, options) {
        var httpOptions = {
                method: 'POST',
-               json: true,
                body: options.data,
        };
 
@@ -484,11 +483,18 @@
                        uri += '/' + options.oldid;
                }
                httpOptions.body.scrub_wikitext = true;
+               // We want to encode the request but *not* decode the response.
+               httpOptions.body = JSON.stringify(httpOptions.body);
+               httpOptions.headers = {
+                       'Content-Type': 'application/json',
+               };
        } else {  // wt2html
                uri += 'wikitext/to/pagebundle/' + options.title;
                httpOptions.headers = {
                        Accept: apiUtils.pagebundleContentType(null, 
options.contentVersion),
                };
+               // setting json here encodes the request *and* decodes the 
response.
+               httpOptions.json = true;
        }
        httpOptions.uri = uri;
 
@@ -601,11 +607,12 @@
                // later use in selser.
                data.oldid = res.request.path.replace(/^(.*)\//, '');
                data.oldWt = body;
+               data.contentmodel = res.headers['x-contentmodel'] || 'wikitext';
                // First, fetch the HTML for the requested page's wikitext
                var opts = Object.assign({
                        wt2html: true,
                        recordSizes: true,
-                       data: { wikitext: data.oldWt },
+                       data: { wikitext: data.oldWt, contentmodel: 
data.contentmodel },
                }, parsoidOptions);
                return parsoidPost(profile, opts);
        }).then(function(body) {
@@ -618,6 +625,7 @@
                        recordSizes: true,
                        data: {
                                html: data.oldHTML,
+                               contentmodel: data.contentmodel,
                                original: {
                                        'data-parsoid': data.oldDp,
                                        'data-mw': data.oldMw,
@@ -644,6 +652,7 @@
                        oldid: data.oldid,
                        data: {
                                html: newDocument.outerHTML,
+                               contentmodel: data.contentmodel,
                                original: {
                                        'data-parsoid': data.oldDp,
                                        'data-mw': data.oldMw,
diff --git a/lib/api/routes.js b/lib/api/routes.js
index cdce213..accba20 100644
--- a/lib/api/routes.js
+++ b/lib/api/routes.js
@@ -288,7 +288,7 @@
 
                var p = TemplateRequest.setPageSrcInfo(env, target, 
oldid).then(function() {
                        env.log('info', 'started parsing');
-                       return env.pipelineFactory.parse(env, env.page.src);
+                       return env.conf.wiki.getContentHandler(env).toHTML(env);
                })
                .then(apiUtils.roundTripDiff.bind(null, env, req, res, false))
                // .timeout(REQ_TIMEOUT)
@@ -318,7 +318,7 @@
 
                var p = TemplateRequest.setPageSrcInfo(env, target, 
oldid).then(function() {
                        env.log('info', 'started parsing');
-                       return env.pipelineFactory.parse(env, env.page.src);
+                       return env.conf.wiki.getContentHandler(env).toHTML(env);
                }).then(function(doc) {
                        // strip newlines from the html
                        var html = doc.innerHTML.replace(/[\r\n]/g, '');
@@ -350,7 +350,7 @@
 
                var p = TemplateRequest.setPageSrcInfo(env, target, 
oldid).then(function() {
                        env.log('info', 'started parsing');
-                       return env.pipelineFactory.parse(env, env.page.src);
+                       return env.conf.wiki.getContentHandler(env).toHTML(env);
                }).then(function(doc) {
                        doc = DU.parseHTML(DU.toXML(doc));
                        var comment = 
doc.createComment('rtSelserEditTestComment');
@@ -386,7 +386,7 @@
                env.setPageSrcInfo(req.body.content);
 
                env.log('info', 'started parsing');
-               return env.pipelineFactory.parse(env, env.page.src).then(
+               return env.conf.wiki.getContentHandler(env).toHTML(env).then(
                        apiUtils.roundTripDiff.bind(null, env, req, res, false)
                ).then(
                        apiUtils.rtResponse.bind(null, env, req, res)
@@ -399,7 +399,7 @@
 
        // Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
 
-       var wt2html = Promise.method(function(req, res, wt) {
+       var wt2html = Promise.method(function(req, res, wt, contentmodel) {
                var env = res.locals.env;
                var oldid = res.locals.oldid;
                var target = env.normalizeAndResolvePageTitle();
@@ -451,6 +451,9 @@
                        var p2;
                        if (typeof wikitext === 'string') {
                                env.setPageSrcInfo(wikitext);
+                               if (contentmodel) {
+                                       env.page.meta.revision.contentmodel = 
contentmodel;
+                               }
 
                                // Don't cache requests when wt is set in case 
somebody uses
                                // GET for wikitext parsing
@@ -468,7 +471,7 @@
                                        env.page.name = '';
                                }
 
-                               p2 = env.pipelineFactory.parse(env, wikitext);
+                               p2 = 
env.conf.wiki.getContentHandler(env).toHTML(env);
                        } else if (oldid) {
                                // Indicate the MediaWiki revision in a header 
as well for
                                // ease of extraction in clients.
@@ -481,7 +484,7 @@
                                        
stats.timing('wt2html.pageWithOldid.size.input', '', env.page.src.length);
                                }
 
-                               p2 = env.pipelineFactory.parse(env, 
env.page.src)
+                               p2 = 
env.conf.wiki.getContentHandler(env).toHTML(env)
                                .tap(function() {
                                        if (req.headers.cookie) {
                                                // Don't cache requests with a 
session.
@@ -546,7 +549,7 @@
                });
        });
 
-       var html2wt = Promise.method(function(req, res, html) {
+       var html2wt = Promise.method(function(req, res, html, contentmodel) {
                var env = res.locals.env;
                var opts = res.locals.opts;
 
@@ -557,6 +560,14 @@
                if (opts.original && opts.original.wikitext) {
                        env.setPageSrcInfo(opts.original.wikitext.body);
                }
+               if (!env.page.meta) {
+                       env.page.meta = { revision: {} };
+               }
+               env.page.meta.revision.contentmodel =
+                       contentmodel ||
+                       opts.contentmodel ||
+                       (opts.original && opts.original.contentmodel) ||
+                       env.page.meta.revision.contentmodel;
 
                // var REQ_TIMEOUT = env.conf.parsoid.timeouts.request;
 
@@ -635,7 +646,7 @@
                });
        });
 
-       var html2html = Promise.method(function(req, res) {
+       var html2html = Promise.method(function(req, res, contentmodel) {
                var env = res.locals.env;
                var opts = res.locals.opts;
 
@@ -646,6 +657,13 @@
                        // Similar to the html2wt case, stored html is expected
                        // to also pass in data-* attributes.
                        apiUtils.validatePageBundle(revision);
+                       if (!env.page.meta) {
+                               env.page.meta = { revision: {} };
+                       }
+                       contentmodel = env.page.meta.revision.contentmodel =
+                               contentmodel ||
+                               revision.contentmodel ||
+                               env.page.meta.revision.contentmodel;
                        DU.applyPageBundle(doc, {
                                parsoid: revision['data-parsoid'].body,
                                mw: revision['data-mw'] && 
revision['data-mw'].body,
@@ -669,7 +687,7 @@
                        env.setCaches(expansions);
                }
 
-               return wt2html(req, res);
+               return wt2html(req, res, null, contentmodel);
        });
 
        // GET requests
@@ -686,6 +704,9 @@
                                        return apiUtils.redirectToOldid(req, 
res);
                                }
                                apiUtils.setHeader(res, env, 'content-type', 
apiUtils.wikitextContentType(env));
+                               if (env.page.meta && env.page.meta.revision && 
env.page.meta.revision.contentmodel) {
+                                       apiUtils.setHeader(res, env, 
'x-contentmodel', env.page.meta.revision.contentmodel);
+                               }
                                apiUtils.sendResponse(res, env, env.page.src);
                        });
                } else {
@@ -707,18 +728,20 @@
                        }
                        // Accept wikitext as a string or object{body,headers}
                        var wikitext = opts.wikitext;
+                       var contentmodel = opts.contentmodel;
                        if (typeof wikitext !== 'string' && opts.wikitext) {
                                wikitext = opts.wikitext.body;
                        }
                        // We've been given source for this page
                        if (typeof wikitext !== 'string' && opts.original && 
opts.original.wikitext) {
                                wikitext = opts.original.wikitext.body;
+                               contentmodel = contentmodel || 
opts.original.contentmodel;
                        }
                        // Abort if no wikitext or title.
                        if (typeof wikitext !== 'string' && 
res.locals.titleMissing) {
                                return apiUtils.fatalRequest(env, 'No title or 
wikitext was provided.', 400);
                        }
-                       p = wt2html(req, res, wikitext);
+                       p = wt2html(req, res, wikitext, contentmodel);
                } else {  // from html/pagebundle
                        if (opts.format === 'wikitext') {
                                // html is required for serialization
@@ -734,9 +757,9 @@
                                // name for this (inputVersion, etc.) since 
contentVersion is
                                // for the output.
 
-                               p = html2wt(req, res, html);
+                               p = html2wt(req, res, html, opts.contentmodel);
                        } else {
-                               p = html2html(req, res);
+                               p = html2html(req, res, opts.contentmodel);
                        }
                }
                return apiUtils.cpuTimeout(p, res)
diff --git a/lib/config/ParsoidConfig.js b/lib/config/ParsoidConfig.js
index 82db32a..46fb58d 100644
--- a/lib/config/ParsoidConfig.js
+++ b/lib/config/ParsoidConfig.js
@@ -501,6 +501,9 @@
        // Give them some default extensions.
        if (!Array.isArray(apiConf.extensions)) {
                // Native support for certain extensions (Cite, etc)
+               // Note that in order to remain compatible with mediawiki core,
+               // core extensions (for example, for the JSON content model)
+               // must take precedence over other extensions.
                apiConf.extensions = Util.clone(this.defaultNativeExtensions);
                /* Include global user extensions */
                ParsoidConfig._collectExtensions(
@@ -591,7 +594,11 @@
        try {
                if (!fs.statSync(base).isDirectory()) { return; /* not dir */}
        } catch (e) { return; /* no file there */ }
-       fs.readdirSync(base).forEach(function(d) {
+       var files = fs.readdirSync(base);
+       // Sort! To ensure that we have a repeatable order in which we load
+       // and process extensions.
+       files.sort();
+       files.forEach(function(d) {
                var p = isNative ? path.join(base, d) : path.join(base, d, 
'parsoid');
                try {
                        if (!fs.statSync(p).isDirectory()) { return; /* not dir 
*/ }
diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js
index b345864..da9cf8e 100644
--- a/lib/config/WikiConfig.js
+++ b/lib/config/WikiConfig.js
@@ -9,6 +9,10 @@
 var JSUtils = require('../utils/jsutils.js').JSUtils;
 var Util = require('../utils/Util.js').Util;
 
+// Circular references; these are loaded lazily.
+var WikitextSerializer;
+var SelectiveSerializer;
+
 // Make sure our base config is never modified
 JSUtils.deepFreeze(baseConfig);
 
@@ -482,6 +486,26 @@
 
        // Register native extension handlers second to overwrite the above.
        this.nativeExtPostProcessors = [];
+       this.extContentModel = Object.create(null);
+       this.extContentModel.wikitext = {
+               toHTML: function(env_) {
+                       // Default: wikitext parser.
+                       return env_.pipelineFactory.parse(env_, env_.page.src);
+               },
+               fromHTML: function(env_, body, useSelser) {
+                       // Circular refs
+                       if (!WikitextSerializer) {
+                               WikitextSerializer = 
require('../html2wt/WikitextSerializer.js')
+                                       .WikitextSerializer;
+                               SelectiveSerializer = 
require('../html2wt/SelectiveSerializer.js')
+                                       .SelectiveSerializer;
+                       }
+                       var Serializer = useSelser ?
+                                       SelectiveSerializer : 
WikitextSerializer;
+                       var serializer = new Serializer({ env: env_ });
+                       return serializer.serializeDOM(body);
+               },
+       };
        mwApiConf.extensions.forEach(function(Ext) {
                var ext = new Ext();
                var tags = ext.config.hasOwnProperty('tags') ? ext.config.tags 
: [];
@@ -491,6 +515,12 @@
                if (ext.config.hasOwnProperty('domPostProcessor')) {
                        
this.nativeExtPostProcessors.push(ext.config.domPostProcessor);
                }
+               Object.keys(ext.config.contentmodels || 
{}).forEach(function(cm) {
+                       // For compatibility with mediawiki core, the first
+                       // registered extension wins.
+                       if (this.extContentModel[cm]) { return; }
+                       this.extContentModel[cm] = ext.config.contentmodels[cm];
+               }, this);
        }, this);
 
        // Function hooks on this wiki, indexed by their normalized form
@@ -541,6 +571,29 @@
 /**
  * @method
  *
+ * Get an appropriate content handler, given a contentmodel.
+ *
+ * @param {MWEnvironment} env The environment containing the page source,
+ *   including its contentmodel.
+ * @param {string|undefined} forceContentModel An optional content model
+ *   which will override whatever the source specifies.
+ * @return an appropriate content handler with `toHTML` and `fromHTML`
+ *   methods.
+ */
+WikiConfig.prototype.getContentHandler = function(env, forceContentModel) {
+       var contentmodel = forceContentModel ||
+                       env.page.meta.revision.contentmodel ||
+                       'wikitext';
+       if (!this.extContentModel[contentmodel]) {
+               env.log('error', 'Unknown contentmodel', contentmodel);
+               contentmodel = 'wikitext';
+       }
+       return this.extContentModel[contentmodel];
+};
+
+/**
+ * @method
+ *
  * Get the canonical name of a magic word alias.
  *
  * @param {string} alias
diff --git a/lib/config/extapi.js b/lib/config/extapi.js
index 2f5a4f7..02f3dbc 100644
--- a/lib/config/extapi.js
+++ b/lib/config/extapi.js
@@ -30,6 +30,7 @@
                        // functions are changed.
                        Util: require('../utils/Util.js').Util,
                        DOMUtils: require('../utils/DOMUtils.js').DOMUtils,
+                       addMetaData: 
require('../wt2html/DOMPostProcessor.js').DOMPostProcessor.addMetaData,
                        defines: require('../wt2html/parser.defines.js'),
                };
        },
diff --git a/lib/ext/JSON/index.js b/lib/ext/JSON/index.js
new file mode 100644
index 0000000..dddc78f
--- /dev/null
+++ b/lib/ext/JSON/index.js
@@ -0,0 +1,246 @@
+/* ----------------------------------------------------------------------
+ * This is a demonstration of content model handling in extensions for
+ * Parsoid.  It implements the "json" content model, to allow editing
+ * JSON data structures using Visual Editor.  It represents the JSON
+ * structure as a nested table.
+ * ---------------------------------------------------------------------- */
+'use strict';
+
+var ParsoidExtApi = 
module.parent.require('./extapi.js').versionCheck('^0.5.1');
+var DU = ParsoidExtApi.DOMUtils;
+var Promise = ParsoidExtApi.Promise;
+var addMetaData = ParsoidExtApi.addMetaData;
+
+/**
+ * Native Parsoid implementation of the "json" contentmodel.
+ */
+var JSONExt = function() {
+       this.config = {
+               contentmodels: {
+                       json: this,
+               },
+       };
+};
+
+var PARSE_ERROR_HTML =
+       '<!DOCTYPE html><html>' +
+       '<body>' +
+       '<table data-mw=\'{"errors":[{"key":"bad-json"}]}\' typeof="mw:Error">' 
+
+       '</body>';
+
+// JSON to HTML
+// Implementation matches that from includes/content/JsonContent.php in
+// mediawiki core, except that we add some additional classes to distinguish
+// value types.
+JSONExt.prototype.toHTML = Promise.method(function(env) {
+       var document = DU.parseHTML('<!DOCTYPE html><html><body>');
+       var rootValueTable;
+       var objectTable;
+       var objectRow;
+       var arrayTable;
+       var valueCell;
+       var primitiveValue;
+       var src;
+
+       rootValueTable = function(parent, val) {
+               if (Array.isArray(val)) {
+                       // Wrap arrays in another array so they're visually 
boxed in a
+                       // container.  Otherwise they are visually 
indistinguishable from
+                       // a single value.
+                       return arrayTable(parent, [ val ]);
+               }
+               if (val && typeof val === "object") {
+                       return objectTable(parent, val);
+               }
+               parent.innerHTML =
+                       '<table class="mw-json 
mw-json-single-value"><tbody><tr><td>';
+               return primitiveValue(parent.querySelector('td'), val);
+       };
+       objectTable = function(parent, val) {
+               parent.innerHTML = '<table class="mw-json 
mw-json-object"><tbody>';
+               var tbody = parent.firstElementChild.firstElementChild;
+               var keys = Object.keys(val);
+               if (keys.length) {
+                       keys.forEach(function(k) {
+                               objectRow(tbody, k, val[k]);
+                       });
+               } else {
+                       tbody.innerHTML =
+                               '<tr><td class="mw-json-empty">';
+               }
+       };
+       objectRow = function(parent, key, val) {
+               var tr = document.createElement('tr');
+               if (key !== undefined) {
+                       var th = document.createElement('th');
+                       th.textContent = key;
+                       tr.appendChild(th);
+               }
+               valueCell(tr, val);
+               parent.appendChild(tr);
+       };
+       arrayTable = function(parent, val) {
+               parent.innerHTML = '<table class="mw-json 
mw-json-array"><tbody>';
+               var tbody = parent.firstElementChild.firstElementChild;
+               if (val.length) {
+                       for (var i = 0; i < val.length; i++) {
+                               objectRow(tbody, undefined, val[i]);
+                       }
+               } else {
+                       tbody.innerHTML =
+                               '<tr><td class="mw-json-empty">';
+               }
+       };
+       valueCell = function(parent, val) {
+               var td = document.createElement('td');
+               if (Array.isArray(val)) {
+                       arrayTable(td, val);
+               } else if (val && typeof val === 'object') {
+                       objectTable(td, val);
+               } else {
+                       td.classList.add('value');
+                       primitiveValue(td, val);
+               }
+               parent.appendChild(td);
+       };
+       primitiveValue = function(parent, val) {
+               if (val === null) {
+                       parent.classList.add('mw-json-null');
+               } else if (val === true || val === false) {
+                       parent.classList.add('mw-json-boolean');
+               } else if (typeof val === 'number') {
+                       parent.classList.add('mw-json-number');
+               } else if (typeof val === 'string') {
+                       parent.classList.add('mw-json-string');
+               }
+               parent.textContent = '' + val;
+       };
+
+       try {
+               src = JSON.parse(env.page.src);
+               rootValueTable(document.body, src);
+       } catch (e) {
+               document = DU.parseHTML(PARSE_ERROR_HTML);
+       }
+       // We're responsible for running the standard DOMPostProcessor on our
+       // resulting document.
+       if (env.pageBundle) {
+               DU.setDataParsoid(document, {
+                       pagebundle: {
+                               parsoid: { counter: -1, ids: {} },
+                               mw: { ids: {} },
+                       },
+               });
+               DU.visitDOM(document.body, DU.storeDataAttribs, {
+                       storeInPageBundle: env.pageBundle,
+                       env: env,
+               });
+       }
+       addMetaData(env, document);
+       return document;
+});
+
+// HTML to JSON
+JSONExt.prototype.fromHTML = Promise.method(function(env, body, useSelser) {
+       var rootValueTable;
+       var objectTable;
+       var objectRow;
+       var arrayTable;
+       var valueCell;
+       var primitiveValue;
+
+       console.assert(DU.isBody(body), 'Expected a body node.');
+
+       rootValueTable = function(el) {
+               if (el.classList.contains('mw-json-single-value')) {
+                       return primitiveValue(el.querySelector('tr > td'));
+               } else if (el.classList.contains('mw-json-array')) {
+                       return arrayTable(el)[0];
+               } else {
+                       return objectTable(el);
+               }
+       };
+       objectTable = function(el) {
+               console.assert(el.classList.contains('mw-json-object'));
+               var tbody = el;
+               if (
+                       tbody.firstElementChild &&
+                       tbody.firstElementChild.tagName === 'TBODY'
+               ) {
+                       tbody = tbody.firstElementChild;
+               }
+               var rows = tbody.children;
+               var obj = {};
+               var empty = rows.length === 0 || (
+                       rows[0].firstElementChild &&
+                       
rows[0].firstElementChild.classList.contains('mw-json-empty')
+               );
+               if (!empty) {
+                       for (var i = 0; i < rows.length; i++) {
+                               objectRow(rows[i], obj, undefined);
+                       }
+               }
+               return obj;
+       };
+       objectRow = function(tr, obj, key) {
+               var td = tr.firstElementChild;
+               if (key === undefined) {
+                       key = td.textContent;
+                       td = td.nextElementSibling;
+               }
+               obj[key] = valueCell(td);
+       };
+       arrayTable = function(el) {
+               console.assert(el.classList.contains('mw-json-array'));
+               var tbody = el;
+               if (
+                       tbody.firstElementChild &&
+                       tbody.firstElementChild.tagName === 'TBODY'
+               ) {
+                       tbody = tbody.firstElementChild;
+               }
+               var rows = tbody.children;
+               var arr = [];
+               var empty = rows.length === 0 || (
+                       rows[0].firstElementChild &&
+                       
rows[0].firstElementChild.classList.contains('mw-json-empty')
+               );
+               if (!empty) {
+                       for (var i = 0; i < rows.length; i++) {
+                               objectRow(rows[i], arr, i);
+                       }
+               }
+               return arr;
+       };
+       valueCell = function(el) {
+               console.assert(el.tagName === 'TD');
+               var table = el.firstElementChild;
+               if (table && table.classList.contains('mw-json-array')) {
+                       return arrayTable(table);
+               } else if (table && table.classList.contains('mw-json-object')) 
{
+                       return objectTable(table);
+               } else {
+                       return primitiveValue(el);
+               }
+       };
+       primitiveValue = function(el) {
+               if (el.classList.contains('mw-json-null')) {
+                       return null;
+               } else if (el.classList.contains('mw-json-boolean')) {
+                       return /true/.test(el.textContent);
+               } else if (el.classList.contains('mw-json-number')) {
+                       return +el.textContent;
+               } else if (el.classList.contains('mw-json-string')) {
+                       return '' + el.textContent;
+               } else {
+                       return undefined; // shouldn't happen.
+               }
+       };
+       var table = body.firstElementChild;
+       console.assert(table && table.tagName === 'TABLE');
+       return JSON.stringify(rootValueTable(table));
+});
+
+if (typeof module === "object") {
+       module.exports = JSONExt;
+}
diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js
index 3e4c7eb..6888071 100644
--- a/lib/utils/DOMUtils.js
+++ b/lib/utils/DOMUtils.js
@@ -2632,8 +2632,6 @@
        return entities.encodeXML(string);
 };
 
-var WikitextSerializer;
-var SelectiveSerializer;
 /**
  * @method
  *
@@ -2645,14 +2643,6 @@
  * @param {Function} cb Optional callback.
  */
 DOMUtils.serializeDOM = function(env, body, useSelser, cb) {
-       // Circular refs
-       if (!WikitextSerializer) {
-               WikitextSerializer = require('../html2wt/WikitextSerializer.js')
-                       .WikitextSerializer;
-               SelectiveSerializer = 
require('../html2wt/SelectiveSerializer.js')
-                       .SelectiveSerializer;
-       }
-
        console.assert(DU.isBody(body), 'Expected a body node.');
 
        var hasOldId = (env.page.id && env.page.id !== '0');
@@ -2677,8 +2667,8 @@
                                // We'll just fallback to non-selser.
                                return;
                        }
-                       return env.pipelineFactory.parse(
-                               env, env.page.src
+                       return env.conf.wiki.getContentHandler(env).toHTML(
+                               env
                        ).then(function(doc) {
                                env.page.dom = DU.parseHTML(DU.toXML(doc)).body;
                        }, function(err) {
@@ -2698,8 +2688,6 @@
        }
 
        return p.then(function() {
-               var Serializer = useSelser ? SelectiveSerializer : 
WikitextSerializer;
-               var serializer = new Serializer({ env: env });
                // TODO(arlolra): There's probably an opportunity to refactor 
callers
                // of `DU.serializeDOM` to use `DU.ppToDOM` but this is a safe 
bet
                // for now, since it's the main entrypoint to serialization.
@@ -2708,7 +2696,9 @@
                        DU.visitDOM(env.page.dom, DU.loadDataAttribs, true);
                }
                env.page.editedDoc = body.ownerDocument;
-               return serializer.serializeDOM(body);
+               if (!env.page.meta) { env.page.meta = { revision: {} }; }
+               return env.conf.wiki.getContentHandler(env)
+                       .fromHTML(env, body, useSelser);
        }).nodify(cb);
 };
 
diff --git a/lib/wt2html/DOMPostProcessor.js b/lib/wt2html/DOMPostProcessor.js
index 4d12f81..cf383e7 100644
--- a/lib/wt2html/DOMPostProcessor.js
+++ b/lib/wt2html/DOMPostProcessor.js
@@ -180,7 +180,7 @@
 
 DOMPostProcessor.prototype.resetState = function(opts) {
        this.atTopLevel = opts && opts.toplevel;
-       this.displayTitle = null;
+       this.env.page.meta.displayTitle = null;
 };
 
 /**
@@ -199,7 +199,7 @@
                // Set title to display when present (last one wins).
                if (DU.hasNodeName(node, "meta") &&
                                node.getAttribute("property") === 
"mw:PageProp/displaytitle") {
-                       this.displayTitle = node.getAttribute("content");
+                       env.page.meta.displayTitle = 
node.getAttribute("content");
                }
        } else if (DU.isComment(node) && /^\{[^]+\}$/.test(node.data)) {
                // Convert serialized meta tags back from comments.
@@ -232,9 +232,7 @@
        return true;
 };
 
-DOMPostProcessor.prototype.addMetaData = function(document) {
-       var env = this.env;
-
+DOMPostProcessor.addMetaData = function(env, document) {
        // add <head> element if it was missing
        if (!document.head) {
                document.documentElement.
@@ -313,7 +311,7 @@
        appendToHead(document, 'link',
                { rel: 'dc:isVersionOf', href: wikiPageUrl });
 
-       document.title = this.displayTitle || env.page.meta.title || '';
+       document.title = env.page.meta.displayTitle || env.page.meta.title || 
'';
 
        // Add base href pointing to the wiki root
        appendToHead(document, 'base', { href: env.conf.wiki.baseURI });
@@ -400,7 +398,7 @@
        // For sub-pipeline documents, we are done.
        // For the top-level document, we generate <head> and add it.
        if (this.atTopLevel) {
-               this.addMetaData(document);
+               DOMPostProcessor.addMetaData(env, document);
        }
 
        this.emit('document', document);
diff --git a/package.json b/package.json
index 6152301..829fe72 100644
--- a/package.json
+++ b/package.json
@@ -56,7 +56,7 @@
     "dump-tokenizer": "node lib/wt2html/tokenizer.js",
     "mocha": "mocha --opts tests/mocha/mocha.opts tests/mocha",
     "parserTests": "node bin/parserTests.js --wt2html --wt2wt --html2wt 
--html2html --selser --no-color --quiet --blacklist",
-    "roundtrip": "node bin/roundtrip-test.js 'Barack Obama' && node 
bin/roundtrip-test.js 'Parkour'",
+    "roundtrip": "node bin/roundtrip-test.js 'Barack Obama' && node 
bin/roundtrip-test.js 'Parkour' && node bin/roundtrip-test.js --domain 
www.mediawiki.org 'User:Legoktm/test_this_is_json'",
     "test": "npm run nsp && npm run lint && npm run parserTests && npm run 
mocha",
     "cover-mocha": "istanbul cover _mocha --dir ./coverage/mocha --  --opts 
tests/mocha/mocha.opts tests/mocha",
     "cover-parserTests": "istanbul cover bin/parserTests.js --dir 
./coverage/parserTests -- --wt2html --wt2wt --html2wt --html2html --selser 
--no-color --quiet --blacklist",
diff --git a/tests/mocha/api.js b/tests/mocha/api.js
index 742edd3..537da96 100644
--- a/tests/mocha/api.js
+++ b/tests/mocha/api.js
@@ -422,10 +422,28 @@
                        .end(done);
                });
 
+               it('should get from a title and revision (html, json content)', 
function(done) {
+                       request(api)
+                       .get(mockDomain + '/v3/page/html/JSON_Page/101')
+                       .expect(validHtmlResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                       }))
+                       .end(done);
+               });
+
                it('should get from a title and revision (pagebundle)', 
function(done) {
                        request(api)
                        .get(mockDomain + '/v3/page/pagebundle/Main_Page/1')
                        .expect(validPageBundleResponse())
+                       .end(done);
+               });
+
+               it('should get from a title and revision (pagebundle, json 
content)', function(done) {
+                       request(api)
+                       .get(mockDomain + '/v3/page/pagebundle/JSON_Page/101')
+                       .expect(validPageBundleResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                       }))
                        .end(done);
                });
 
@@ -448,6 +466,19 @@
                        .end(done);
                });
 
+               it('should accept json contentmodel as a string for html', 
function(done) {
+                       request(api)
+                       .post(mockDomain + '/v3/transform/wikitext/to/html/')
+                       .send({
+                               wikitext: '{"1":2}',
+                               contentmodel: 'json',
+                       })
+                       .expect(validHtmlResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                       }))
+                       .end(done);
+               });
+
                it('should accept wikitext as a string for pagebundle', 
function(done) {
                        request(api)
                        .post(mockDomain + 
'/v3/transform/wikitext/to/pagebundle/')
@@ -456,6 +487,20 @@
                        })
                        .expect(validPageBundleResponse(function(doc) {
                                doc.body.firstChild.nodeName.should.equal('H2');
+                       }))
+                       .end(done);
+               });
+
+               it('should accept json contentmodel as a string for 
pagebundle', function(done) {
+                       request(api)
+                       .post(mockDomain + 
'/v3/transform/wikitext/to/pagebundle/')
+                       .send({
+                               wikitext: '{"1":2}',
+                               contentmodel: 'json',
+                       })
+                       .expect(validPageBundleResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                               
should.not.exist(doc.querySelector('*[typeof="mw:Error"]'));
                        }))
                        .end(done);
                });
@@ -756,6 +801,17 @@
                                html: '<!DOCTYPE html>\n<html prefix="dc: 
http://purl.org/dc/terms/ mw: http://mediawiki.org/rdf/"; 
about="http://localhost/index.php/Special:Redirect/revision/1";><head 
prefix="mwr: http://localhost/index.php/Special:Redirect/";><meta 
property="mw:articleNamespace" content="0"/><link rel="dc:replaces" 
resource="mwr:revision/0"/><meta property="dc:modified" 
content="2014-09-12T22:46:59.000Z"/><meta about="mwr:user/0" 
property="dc:title" content="MediaWiki default"/><link rel="dc:contributor" 
resource="mwr:user/0"/><meta property="mw:revisionSHA1" 
content="8e0aa2f2a7829587801db67d0424d9b447e09867"/><meta 
property="dc:description" content=""/><meta property="mw:parsoidVersion" 
content="0"/><link rel="dc:isVersionOf" 
href="http://localhost/index.php/Main_Page"/><title>Main_Page</title><base 
href="http://localhost/index.php/"/><link rel="stylesheet" 
href="//localhost/load.php?modules=mediawiki.legacy.commonPrint,shared|mediawiki.skinning.elements|mediawiki.skinning.content|mediawiki.skinning.interface|skins.vector.styles|site|mediawiki.skinning.content.parsoid&amp;only=styles&amp;debug=true&amp;skin=vector"/></head><body
 data-parsoid=\'{"dsr":[0,592,0,0]}\' lang="en" class="mw-content-ltr 
sitedir-ltr ltr mw-body mw-body-content mediawiki" dir="ltr"><p 
data-parsoid=\'{"dsr":[0,59,0,0]}\'><strong 
data-parsoid=\'{"stx":"html","dsr":[0,59,8,9]}\'>MediaWiki has been 
successfully installed.</strong></p>\n\n<p 
data-parsoid=\'{"dsr":[61,171,0,0]}\'>Consult the <a rel="mw:ExtLink" 
href="//meta.wikimedia.org/wiki/Help:Contents" 
data-parsoid=\'{"targetOff":114,"contentOffsets":[114,126],"dsr":[73,127,41,1]}\'>User\'s
 Guide</a> for information on using the wiki software.</p>\n\n<h2 
data-parsoid=\'{"dsr":[173,194,2,2]}\'> Getting started </h2>\n<ul 
data-parsoid=\'{"dsr":[195,592,0,0]}\'><li 
data-parsoid=\'{"dsr":[195,300,1,0]}\'> <a rel="mw:ExtLink" 
href="//www.mediawiki.org/wiki/Special:MyLanguage/Manual:Configuration_settings"
 
data-parsoid=\'{"targetOff":272,"contentOffsets":[272,299],"dsr":[197,300,75,1]}\'>Configuration
 settings list</a></li>\n<li data-parsoid=\'{"dsr":[301,373,1,0]}\'> <a 
rel="mw:ExtLink" href="//www.mediawiki.org/wiki/Special:MyLanguage/Manual:FAQ" 
data-parsoid=\'{"targetOff":359,"contentOffsets":[359,372],"dsr":[303,373,56,1]}\'>MediaWiki
 FAQ</a></li>\n<li data-parsoid=\'{"dsr":[374,472,1,0]}\'> <a rel="mw:ExtLink" 
href="https://lists.wikimedia.org/mailman/listinfo/mediawiki-announce"; 
data-parsoid=\'{"targetOff":441,"contentOffsets":[441,471],"dsr":[376,472,65,1]}\'>MediaWiki
 release mailing list</a></li>\n<li data-parsoid=\'{"dsr":[473,592,1,0]}\'> <a 
rel="mw:ExtLink" 
href="//www.mediawiki.org/wiki/Special:MyLanguage/Localisation#Translation_resources"
 
data-parsoid=\'{"targetOff":555,"contentOffsets":[555,591],"dsr":[475,592,80,1]}\'>Localise
 MediaWiki for your language</a></li></ul></body></html>',
                        })
                        .expect(validWikitextResponse())
+                       .end(done);
+               });
+
+               it('should accept html for json contentmodel as a string', 
function(done) {
+                       request(api)
+                       .post(mockDomain + '/v3/transform/html/to/wikitext/')
+                       .send({
+                               html: '<!DOCTYPE html>\n<html prefix="dc: 
http://purl.org/dc/terms/ mw: http://mediawiki.org/rdf/";><head prefix="mwr: 
http://en.wikipedia.org/wiki/Special:Redirect/";><meta charset="utf-8"/><meta 
property="mw:articleNamespace" content="0"/><meta property="mw:html:version" 
content="1.2.1"/><meta property="mw:data-parsoid:version" 
content="0.0.2"/><link rel="dc:isVersionOf" 
href="//en.wikipedia.org/wiki/Main_Page"/><title></title><base 
href="//en.wikipedia.org/wiki/"/><link rel="stylesheet" 
href="//en.wikipedia.org/w/load.php?modules=mediawiki.legacy.commonPrint,shared|mediawiki.skinning.elements|mediawiki.skinning.content|mediawiki.skinning.interface|skins.vector.styles|site|mediawiki.skinning.content.parsoid|ext.cite.style&amp;only=styles&amp;skin=vector"/></head><body
 lang="en" class="mw-content-ltr sitedir-ltr ltr mw-body mw-body-content 
mediawiki" dir="ltr"><table class="mw-json 
mw-json-object"><tbody><tr><th>a</th><td class="value 
mw-json-number">4</td></tr><tr><th>b</th><td class="value 
mw-json-number">3</td></tr></tbody></table></body></html>',
+                               contentmodel: 'json',
+                       })
+                       .expect(validWikitextResponse('{"a":4,"b":3}'))
                        .end(done);
                });
 
@@ -1232,4 +1288,42 @@
 
        });  // end html2html
 
+       describe('html2html (JSON contentmodel)', function() {
+
+               var previousRevHTML = {
+                       revid: 101,
+                       html: {
+                               headers: {
+                                       'content-type': 
'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/1.2.1";',
+                               },
+                               body: '<body lang="en" class="mw-content-ltr 
sitedir-ltr ltr mw-body mw-body-content mediawiki" dir="ltr"><table 
class="mw-json mw-json-array"><tbody><tr><td><table class="mw-json 
mw-json-array"><tbody><tr><td class="value 
mw-json-number">1</td></tr></tbody></table></td></tr></tbody></table></body>',
+                       },
+                       contentmodel: 'json',
+                       "data-parsoid": {
+                               headers: {
+                                       'content-type': 
'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/0.0.2";',
+                               },
+                               body: {
+                                       'counter': 1,
+                                       'ids': {
+                                       },
+                               },
+                       },
+               };
+
+               it('should accept the previous revision to reuse expansions 
(html)', function(done) {
+                       request(api)
+                       .post(mockDomain + 
'/v3/transform/html/to/html/JSON_Page/101')
+                       .send({
+                               previous: previousRevHTML,
+                       })
+                       .expect(validHtmlResponse(function(doc) {
+                               
doc.body.firstChild.tagName.should.equal('TABLE');
+                               
should.not.exist(doc.querySelector('*[typeof="mw:Error"]'));
+                       }))
+                       .end(done);
+               });
+
+       });  // end html2html (JSON)
+
 });
diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js
index fd82f8e..a65f2f5 100644
--- a/tests/mocha/parse.js
+++ b/tests/mocha/parse.js
@@ -47,6 +47,36 @@
                        });
                });
 
+               it('should support json contentmodel', function() {
+                       var opts = { contentmodel: 'json' };
+                       var testval = {a: "a", b: [2, true, ""], c: null};
+                       return parse(JSON.stringify(testval), 
opts).then(function(doc) {
+                               doc.should.have.property('nodeName', 
'#document');
+                               doc.outerHTML.startsWith('<!DOCTYPE 
html><html').should.equal(true);
+                               
doc.outerHTML.endsWith('</body></html>').should.equal(true);
+                               // verify that body has only one <html> tag, 
one <body> tag, etc.
+                               doc.childNodes.length.should.equal(2);// 
<!DOCTYPE> and <html>
+                               doc.firstChild.nodeName.should.equal('html');
+                               doc.lastChild.nodeName.should.equal('HTML');
+                               // <html> children should be <head> and <body>
+                               var html = doc.documentElement;
+                               html.childNodes.length.should.equal(2);
+                               html.firstChild.nodeName.should.equal('HEAD');
+                               html.lastChild.nodeName.should.equal('BODY');
+                               // <body> should have one child, <table>
+                               var body = doc.body;
+                               body.childElementCount.should.equal(1);
+                               
body.firstElementChild.nodeName.should.equal('TABLE');
+                               var table = doc.body.firstElementChild;
+                               
table.classList.contains('mw-json').should.equal(true);
+                               // Now convert back to JSON
+                               return serialize(doc, null, opts);
+                       }).then(function(result) {
+                               var v = JSON.parse(result); // shouldn't throw 
an error!
+                               v.should.eql(testval);
+                       });
+               });
+
                ['no subpages', 'subpages'].forEach(function(desc, subpages) {
                        describe('should handle page titles with embedded ? (' 
+ desc + ')', function() {
                                var linktests = [
diff --git a/tests/mocha/test.helpers.js b/tests/mocha/test.helpers.js
index 511c836..a98af34 100644
--- a/tests/mocha/test.helpers.js
+++ b/tests/mocha/test.helpers.js
@@ -12,7 +12,10 @@
                        env = options.tweakEnv(env) || env;
                }
                env.setPageSrcInfo(src);
-               return env.pipelineFactory.parse(env, env.page.src)
+               if (options.contentmodel) {
+                       env.page.meta.revision.contentmodel = 
options.contentmodel;
+               }
+               return env.conf.wiki.getContentHandler(env).toHTML(env)
                .then(function(doc) {
                        // linter tests need the env object
                        return { env: env, doc: doc };
@@ -30,6 +33,12 @@
                if (options.tweakEnv) {
                        env = options.tweakEnv(env) || env;
                }
+               if (!env.page.meta) {
+                       env.page.meta = { revision: {} };
+               }
+               if (options.contentmodel) {
+                       env.page.meta.revision.contentmodel = 
options.contentmodel;
+               }
                pb = pb || DU.extractPageBundle(doc);
                if (pb) {
                        DU.applyPageBundle(doc, pb);
diff --git a/tests/mockAPI.js b/tests/mockAPI.js
index ecba71a..e6d49e5 100644
--- a/tests/mockAPI.js
+++ b/tests/mockAPI.js
@@ -149,6 +149,27 @@
        },
 };
 
+var jsonPage = {
+       query: {
+               pages: {
+                       '101': {
+                               pageid: 101,
+                               ns: 0,
+                               title: 'JSON_Page',
+                               revisions: [
+                                       {
+                                               revid: 101,
+                                               parentid: 0,
+                                               contentmodel: 'json',
+                                               contentformat: 'text/json',
+                                               '*': '[1]',
+                                       },
+                               ],
+                       },
+               },
+       },
+};
+
 var fnames = {
        'Image:Foobar.jpg': 'Foobar.jpg',
        'File:Foobar.jpg': 'Foobar.jpg',
@@ -249,6 +270,8 @@
                                return cb(null , largePage);
                        } else if (body.revids === '100' || body.titles === 
'Reuse_Page') {
                                return cb(null , reusePage);
+                       } else if (body.revids === '101' || body.titles === 
'JSON_Page') {
+                               return cb(null , jsonPage);
                        }
                }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/295707
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7ca31c99de8e04b1359bc521df121db0eb69e384
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <canan...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to