Cscott has uploaded a new change for review. https://gerrit.wikimedia.org/r/295707
Change subject: Allow extensions to handle specific contentmodels. ...................................................................... Allow extensions to handle specific contentmodels. Some extensions (for example, Extension:ProofreadPage) do more than register specific extension tags: they also hook the parser to declare responsibility for a specific contentmodel (like "proofread-page" or "json"). These are https://www.mediawiki.org/wiki/Category:ContentHandler_extensions (as opposed to https://www.mediawiki.org/wiki/Category:Tag_extensions). See https://www.mediawiki.org/wiki/Manual:ContentHandler for more details. We abstract the top-level parser entry points to allow dispatching to extensions to parse alternative content models and add a core extension as a demonstration that handles the "json" content model, rendering it in DOM as an HTML table (as the json content model in mediawiki core does). Change-Id: I7ca31c99de8e04b1359bc521df121db0eb69e384 --- M bin/parse.js M bin/roundtrip-test.js M lib/api/routes.js M lib/config/ParsoidConfig.js M lib/config/WikiConfig.js M lib/config/extapi.js A lib/ext/JSON/index.js M lib/utils/DOMUtils.js M lib/wt2html/DOMPostProcessor.js M package.json M tests/mocha/api.js M tests/mocha/parse.js M tests/mocha/test.helpers.js M tests/mockAPI.js 14 files changed, 550 insertions(+), 53 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/07/295707/1 diff --git a/bin/parse.js b/bin/parse.js index 852cecd..4a53b24 100755 --- a/bin/parse.js +++ b/bin/parse.js @@ -72,6 +72,11 @@ 'boolean': false, 'default': ParserEnv.prototype.defaultPageName, }, + 'contentmodel': { + description: 'The content model of the input. Defaults to "wikitext" but extensions may support others (for example, "json").', + 'boolean': false, + 'default': null, + }, 'oldid': { description: 'Oldid of the given page.', 'boolean': false, @@ -165,19 +170,26 @@ if (pb) { DU.applyPageBundle(doc, pb); } + if (argv.contentmodel) { + env.page.meta.revision.contentmodel = argv.contentmodel; + } return DU.serializeDOM(env, doc.body, argv.selser).then(function(out) { if (argv.html2wt || argv.wt2wt) { return { trailingNL: true, out: out, env: env }; } else { - return startsAtWikitext(argv, env, out); + env.setPageSrcInfo(out); + return startsAtWikitext(argv, env); } }); }; -startsAtWikitext = function(argv, env, input) { - env.setPageSrcInfo(input); +startsAtWikitext = function(argv, env) { + // input string is in env.page.src. + console.assert(env.page.meta); + + var handler = env.conf.wiki.getContentHandler(env, argv.contentmodel); // Kick off the pipeline by feeding the input into the parser pipeline - return env.pipelineFactory.parse(env, env.page.src).then(function(doc) { + return handler.toHTML(env).then(function(doc) { if (argv.lint) { env.log("end/parse"); } @@ -249,13 +261,15 @@ } if (typeof input === 'string') { - return input; + env.setPageSrcInfo(input.replace(/\r/g, '')); + return; } if (argv.inputfile) { // read input from the file, then process var fileContents = fs.readFileSync(argv.inputfile, 'utf8'); - return fileContents; + env.setPageSrcInfo(fileContents.replace(/\r/g, '')); + return; } // Send a message to stderr if there is no input for a while, since the @@ -280,17 +294,17 @@ clearTimeout(stdinTimer); // parse page if no input if (inputChunks.length > 0) { - return inputChunks.join(''); + env.setPageSrcInfo(inputChunks.join('').replace(/\r/g, '')); + return; } else if (argv.html2wt || argv.html2html) { env.log("fatal", "Pages start at wikitext."); } var target = env.normalizeAndResolvePageTitle(); return TemplateRequest - .setPageSrcInfo(env, target, argv.oldid) - .then(function() { return env.page.src; }); + .setPageSrcInfo(env, target, argv.oldid); }); - }).then(function(str) { - str = str.replace(/\r/g, ''); + }).then(function() { + // string to convert is in env.page.src. if (argv.html2wt || argv.html2html) { var pb; if (argv.pbin.length > 0) { @@ -298,9 +312,9 @@ } else if (argv.pbinfile) { pb = JSON.parse(fs.readFileSync(argv.pbinfile, 'utf8')); } - return startsAtHTML(argv, env, str, pb); + return startsAtHTML(argv, env, env.page.src, pb); } else { - return startsAtWikitext(argv, env, str); + return startsAtWikitext(argv, env); } }); }; diff --git a/bin/roundtrip-test.js b/bin/roundtrip-test.js index 58fba92..aa4da94 100755 --- a/bin/roundtrip-test.js +++ b/bin/roundtrip-test.js @@ -473,7 +473,6 @@ function parsoidPost(profile, options) { var httpOptions = { method: 'POST', - json: true, body: options.data, }; @@ -484,11 +483,18 @@ uri += '/' + options.oldid; } httpOptions.body.scrub_wikitext = true; + // We want to encode the request but *not* decode the response. + httpOptions.body = JSON.stringify(httpOptions.body); + httpOptions.headers = { + 'Content-Type': 'application/json', + }; } else { // wt2html uri += 'wikitext/to/pagebundle/' + options.title; httpOptions.headers = { Accept: apiUtils.pagebundleContentType(null, options.contentVersion), }; + // setting json here encodes the request *and* decodes the response. + httpOptions.json = true; } httpOptions.uri = uri; @@ -601,11 +607,12 @@ // later use in selser. data.oldid = res.request.path.replace(/^(.*)\//, ''); data.oldWt = body; + data.contentmodel = res.headers['x-contentmodel'] || 'wikitext'; // First, fetch the HTML for the requested page's wikitext var opts = Object.assign({ wt2html: true, recordSizes: true, - data: { wikitext: data.oldWt }, + data: { wikitext: data.oldWt, contentmodel: data.contentmodel }, }, parsoidOptions); return parsoidPost(profile, opts); }).then(function(body) { @@ -618,6 +625,7 @@ recordSizes: true, data: { html: data.oldHTML, + contentmodel: data.contentmodel, original: { 'data-parsoid': data.oldDp, 'data-mw': data.oldMw, @@ -644,6 +652,7 @@ oldid: data.oldid, data: { html: newDocument.outerHTML, + contentmodel: data.contentmodel, original: { 'data-parsoid': data.oldDp, 'data-mw': data.oldMw, diff --git a/lib/api/routes.js b/lib/api/routes.js index cdce213..accba20 100644 --- a/lib/api/routes.js +++ b/lib/api/routes.js @@ -288,7 +288,7 @@ var p = TemplateRequest.setPageSrcInfo(env, target, oldid).then(function() { env.log('info', 'started parsing'); - return env.pipelineFactory.parse(env, env.page.src); + return env.conf.wiki.getContentHandler(env).toHTML(env); }) .then(apiUtils.roundTripDiff.bind(null, env, req, res, false)) // .timeout(REQ_TIMEOUT) @@ -318,7 +318,7 @@ var p = TemplateRequest.setPageSrcInfo(env, target, oldid).then(function() { env.log('info', 'started parsing'); - return env.pipelineFactory.parse(env, env.page.src); + return env.conf.wiki.getContentHandler(env).toHTML(env); }).then(function(doc) { // strip newlines from the html var html = doc.innerHTML.replace(/[\r\n]/g, ''); @@ -350,7 +350,7 @@ var p = TemplateRequest.setPageSrcInfo(env, target, oldid).then(function() { env.log('info', 'started parsing'); - return env.pipelineFactory.parse(env, env.page.src); + return env.conf.wiki.getContentHandler(env).toHTML(env); }).then(function(doc) { doc = DU.parseHTML(DU.toXML(doc)); var comment = doc.createComment('rtSelserEditTestComment'); @@ -386,7 +386,7 @@ env.setPageSrcInfo(req.body.content); env.log('info', 'started parsing'); - return env.pipelineFactory.parse(env, env.page.src).then( + return env.conf.wiki.getContentHandler(env).toHTML(env).then( apiUtils.roundTripDiff.bind(null, env, req, res, false) ).then( apiUtils.rtResponse.bind(null, env, req, res) @@ -399,7 +399,7 @@ // Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests. - var wt2html = Promise.method(function(req, res, wt) { + var wt2html = Promise.method(function(req, res, wt, contentmodel) { var env = res.locals.env; var oldid = res.locals.oldid; var target = env.normalizeAndResolvePageTitle(); @@ -451,6 +451,9 @@ var p2; if (typeof wikitext === 'string') { env.setPageSrcInfo(wikitext); + if (contentmodel) { + env.page.meta.revision.contentmodel = contentmodel; + } // Don't cache requests when wt is set in case somebody uses // GET for wikitext parsing @@ -468,7 +471,7 @@ env.page.name = ''; } - p2 = env.pipelineFactory.parse(env, wikitext); + p2 = env.conf.wiki.getContentHandler(env).toHTML(env); } else if (oldid) { // Indicate the MediaWiki revision in a header as well for // ease of extraction in clients. @@ -481,7 +484,7 @@ stats.timing('wt2html.pageWithOldid.size.input', '', env.page.src.length); } - p2 = env.pipelineFactory.parse(env, env.page.src) + p2 = env.conf.wiki.getContentHandler(env).toHTML(env) .tap(function() { if (req.headers.cookie) { // Don't cache requests with a session. @@ -546,7 +549,7 @@ }); }); - var html2wt = Promise.method(function(req, res, html) { + var html2wt = Promise.method(function(req, res, html, contentmodel) { var env = res.locals.env; var opts = res.locals.opts; @@ -557,6 +560,14 @@ if (opts.original && opts.original.wikitext) { env.setPageSrcInfo(opts.original.wikitext.body); } + if (!env.page.meta) { + env.page.meta = { revision: {} }; + } + env.page.meta.revision.contentmodel = + contentmodel || + opts.contentmodel || + (opts.original && opts.original.contentmodel) || + env.page.meta.revision.contentmodel; // var REQ_TIMEOUT = env.conf.parsoid.timeouts.request; @@ -635,7 +646,7 @@ }); }); - var html2html = Promise.method(function(req, res) { + var html2html = Promise.method(function(req, res, contentmodel) { var env = res.locals.env; var opts = res.locals.opts; @@ -646,6 +657,13 @@ // Similar to the html2wt case, stored html is expected // to also pass in data-* attributes. apiUtils.validatePageBundle(revision); + if (!env.page.meta) { + env.page.meta = { revision: {} }; + } + contentmodel = env.page.meta.revision.contentmodel = + contentmodel || + revision.contentmodel || + env.page.meta.revision.contentmodel; DU.applyPageBundle(doc, { parsoid: revision['data-parsoid'].body, mw: revision['data-mw'] && revision['data-mw'].body, @@ -669,7 +687,7 @@ env.setCaches(expansions); } - return wt2html(req, res); + return wt2html(req, res, null, contentmodel); }); // GET requests @@ -686,6 +704,9 @@ return apiUtils.redirectToOldid(req, res); } apiUtils.setHeader(res, env, 'content-type', apiUtils.wikitextContentType(env)); + if (env.page.meta && env.page.meta.revision && env.page.meta.revision.contentmodel) { + apiUtils.setHeader(res, env, 'x-contentmodel', env.page.meta.revision.contentmodel); + } apiUtils.sendResponse(res, env, env.page.src); }); } else { @@ -707,18 +728,20 @@ } // Accept wikitext as a string or object{body,headers} var wikitext = opts.wikitext; + var contentmodel = opts.contentmodel; if (typeof wikitext !== 'string' && opts.wikitext) { wikitext = opts.wikitext.body; } // We've been given source for this page if (typeof wikitext !== 'string' && opts.original && opts.original.wikitext) { wikitext = opts.original.wikitext.body; + contentmodel = contentmodel || opts.original.contentmodel; } // Abort if no wikitext or title. if (typeof wikitext !== 'string' && res.locals.titleMissing) { return apiUtils.fatalRequest(env, 'No title or wikitext was provided.', 400); } - p = wt2html(req, res, wikitext); + p = wt2html(req, res, wikitext, contentmodel); } else { // from html/pagebundle if (opts.format === 'wikitext') { // html is required for serialization @@ -734,9 +757,9 @@ // name for this (inputVersion, etc.) since contentVersion is // for the output. - p = html2wt(req, res, html); + p = html2wt(req, res, html, opts.contentmodel); } else { - p = html2html(req, res); + p = html2html(req, res, opts.contentmodel); } } return apiUtils.cpuTimeout(p, res) diff --git a/lib/config/ParsoidConfig.js b/lib/config/ParsoidConfig.js index 82db32a..46fb58d 100644 --- a/lib/config/ParsoidConfig.js +++ b/lib/config/ParsoidConfig.js @@ -501,6 +501,9 @@ // Give them some default extensions. if (!Array.isArray(apiConf.extensions)) { // Native support for certain extensions (Cite, etc) + // Note that in order to remain compatible with mediawiki core, + // core extensions (for example, for the JSON content model) + // must take precedence over other extensions. apiConf.extensions = Util.clone(this.defaultNativeExtensions); /* Include global user extensions */ ParsoidConfig._collectExtensions( @@ -591,7 +594,11 @@ try { if (!fs.statSync(base).isDirectory()) { return; /* not dir */} } catch (e) { return; /* no file there */ } - fs.readdirSync(base).forEach(function(d) { + var files = fs.readdirSync(base); + // Sort! To ensure that we have a repeatable order in which we load + // and process extensions. + files.sort(); + files.forEach(function(d) { var p = isNative ? path.join(base, d) : path.join(base, d, 'parsoid'); try { if (!fs.statSync(p).isDirectory()) { return; /* not dir */ } diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js index b345864..da9cf8e 100644 --- a/lib/config/WikiConfig.js +++ b/lib/config/WikiConfig.js @@ -9,6 +9,10 @@ var JSUtils = require('../utils/jsutils.js').JSUtils; var Util = require('../utils/Util.js').Util; +// Circular references; these are loaded lazily. +var WikitextSerializer; +var SelectiveSerializer; + // Make sure our base config is never modified JSUtils.deepFreeze(baseConfig); @@ -482,6 +486,26 @@ // Register native extension handlers second to overwrite the above. this.nativeExtPostProcessors = []; + this.extContentModel = Object.create(null); + this.extContentModel.wikitext = { + toHTML: function(env_) { + // Default: wikitext parser. + return env_.pipelineFactory.parse(env_, env_.page.src); + }, + fromHTML: function(env_, body, useSelser) { + // Circular refs + if (!WikitextSerializer) { + WikitextSerializer = require('../html2wt/WikitextSerializer.js') + .WikitextSerializer; + SelectiveSerializer = require('../html2wt/SelectiveSerializer.js') + .SelectiveSerializer; + } + var Serializer = useSelser ? + SelectiveSerializer : WikitextSerializer; + var serializer = new Serializer({ env: env_ }); + return serializer.serializeDOM(body); + }, + }; mwApiConf.extensions.forEach(function(Ext) { var ext = new Ext(); var tags = ext.config.hasOwnProperty('tags') ? ext.config.tags : []; @@ -491,6 +515,12 @@ if (ext.config.hasOwnProperty('domPostProcessor')) { this.nativeExtPostProcessors.push(ext.config.domPostProcessor); } + Object.keys(ext.config.contentmodels || {}).forEach(function(cm) { + // For compatibility with mediawiki core, the first + // registered extension wins. + if (this.extContentModel[cm]) { return; } + this.extContentModel[cm] = ext.config.contentmodels[cm]; + }, this); }, this); // Function hooks on this wiki, indexed by their normalized form @@ -541,6 +571,29 @@ /** * @method * + * Get an appropriate content handler, given a contentmodel. + * + * @param {MWEnvironment} env The environment containing the page source, + * including its contentmodel. + * @param {string|undefined} forceContentModel An optional content model + * which will override whatever the source specifies. + * @return an appropriate content handler with `toHTML` and `fromHTML` + * methods. + */ +WikiConfig.prototype.getContentHandler = function(env, forceContentModel) { + var contentmodel = forceContentModel || + env.page.meta.revision.contentmodel || + 'wikitext'; + if (!this.extContentModel[contentmodel]) { + env.log('error', 'Unknown contentmodel', contentmodel); + contentmodel = 'wikitext'; + } + return this.extContentModel[contentmodel]; +}; + +/** + * @method + * * Get the canonical name of a magic word alias. * * @param {string} alias diff --git a/lib/config/extapi.js b/lib/config/extapi.js index 2f5a4f7..02f3dbc 100644 --- a/lib/config/extapi.js +++ b/lib/config/extapi.js @@ -30,6 +30,7 @@ // functions are changed. Util: require('../utils/Util.js').Util, DOMUtils: require('../utils/DOMUtils.js').DOMUtils, + addMetaData: require('../wt2html/DOMPostProcessor.js').DOMPostProcessor.addMetaData, defines: require('../wt2html/parser.defines.js'), }; }, diff --git a/lib/ext/JSON/index.js b/lib/ext/JSON/index.js new file mode 100644 index 0000000..dddc78f --- /dev/null +++ b/lib/ext/JSON/index.js @@ -0,0 +1,246 @@ +/* ---------------------------------------------------------------------- + * This is a demonstration of content model handling in extensions for + * Parsoid. It implements the "json" content model, to allow editing + * JSON data structures using Visual Editor. It represents the JSON + * structure as a nested table. + * ---------------------------------------------------------------------- */ +'use strict'; + +var ParsoidExtApi = module.parent.require('./extapi.js').versionCheck('^0.5.1'); +var DU = ParsoidExtApi.DOMUtils; +var Promise = ParsoidExtApi.Promise; +var addMetaData = ParsoidExtApi.addMetaData; + +/** + * Native Parsoid implementation of the "json" contentmodel. + */ +var JSONExt = function() { + this.config = { + contentmodels: { + json: this, + }, + }; +}; + +var PARSE_ERROR_HTML = + '<!DOCTYPE html><html>' + + '<body>' + + '<table data-mw=\'{"errors":[{"key":"bad-json"}]}\' typeof="mw:Error">' + + '</body>'; + +// JSON to HTML +// Implementation matches that from includes/content/JsonContent.php in +// mediawiki core, except that we add some additional classes to distinguish +// value types. +JSONExt.prototype.toHTML = Promise.method(function(env) { + var document = DU.parseHTML('<!DOCTYPE html><html><body>'); + var rootValueTable; + var objectTable; + var objectRow; + var arrayTable; + var valueCell; + var primitiveValue; + var src; + + rootValueTable = function(parent, val) { + if (Array.isArray(val)) { + // Wrap arrays in another array so they're visually boxed in a + // container. Otherwise they are visually indistinguishable from + // a single value. + return arrayTable(parent, [ val ]); + } + if (val && typeof val === "object") { + return objectTable(parent, val); + } + parent.innerHTML = + '<table class="mw-json mw-json-single-value"><tbody><tr><td>'; + return primitiveValue(parent.querySelector('td'), val); + }; + objectTable = function(parent, val) { + parent.innerHTML = '<table class="mw-json mw-json-object"><tbody>'; + var tbody = parent.firstElementChild.firstElementChild; + var keys = Object.keys(val); + if (keys.length) { + keys.forEach(function(k) { + objectRow(tbody, k, val[k]); + }); + } else { + tbody.innerHTML = + '<tr><td class="mw-json-empty">'; + } + }; + objectRow = function(parent, key, val) { + var tr = document.createElement('tr'); + if (key !== undefined) { + var th = document.createElement('th'); + th.textContent = key; + tr.appendChild(th); + } + valueCell(tr, val); + parent.appendChild(tr); + }; + arrayTable = function(parent, val) { + parent.innerHTML = '<table class="mw-json mw-json-array"><tbody>'; + var tbody = parent.firstElementChild.firstElementChild; + if (val.length) { + for (var i = 0; i < val.length; i++) { + objectRow(tbody, undefined, val[i]); + } + } else { + tbody.innerHTML = + '<tr><td class="mw-json-empty">'; + } + }; + valueCell = function(parent, val) { + var td = document.createElement('td'); + if (Array.isArray(val)) { + arrayTable(td, val); + } else if (val && typeof val === 'object') { + objectTable(td, val); + } else { + td.classList.add('value'); + primitiveValue(td, val); + } + parent.appendChild(td); + }; + primitiveValue = function(parent, val) { + if (val === null) { + parent.classList.add('mw-json-null'); + } else if (val === true || val === false) { + parent.classList.add('mw-json-boolean'); + } else if (typeof val === 'number') { + parent.classList.add('mw-json-number'); + } else if (typeof val === 'string') { + parent.classList.add('mw-json-string'); + } + parent.textContent = '' + val; + }; + + try { + src = JSON.parse(env.page.src); + rootValueTable(document.body, src); + } catch (e) { + document = DU.parseHTML(PARSE_ERROR_HTML); + } + // We're responsible for running the standard DOMPostProcessor on our + // resulting document. + if (env.pageBundle) { + DU.setDataParsoid(document, { + pagebundle: { + parsoid: { counter: -1, ids: {} }, + mw: { ids: {} }, + }, + }); + DU.visitDOM(document.body, DU.storeDataAttribs, { + storeInPageBundle: env.pageBundle, + env: env, + }); + } + addMetaData(env, document); + return document; +}); + +// HTML to JSON +JSONExt.prototype.fromHTML = Promise.method(function(env, body, useSelser) { + var rootValueTable; + var objectTable; + var objectRow; + var arrayTable; + var valueCell; + var primitiveValue; + + console.assert(DU.isBody(body), 'Expected a body node.'); + + rootValueTable = function(el) { + if (el.classList.contains('mw-json-single-value')) { + return primitiveValue(el.querySelector('tr > td')); + } else if (el.classList.contains('mw-json-array')) { + return arrayTable(el)[0]; + } else { + return objectTable(el); + } + }; + objectTable = function(el) { + console.assert(el.classList.contains('mw-json-object')); + var tbody = el; + if ( + tbody.firstElementChild && + tbody.firstElementChild.tagName === 'TBODY' + ) { + tbody = tbody.firstElementChild; + } + var rows = tbody.children; + var obj = {}; + var empty = rows.length === 0 || ( + rows[0].firstElementChild && + rows[0].firstElementChild.classList.contains('mw-json-empty') + ); + if (!empty) { + for (var i = 0; i < rows.length; i++) { + objectRow(rows[i], obj, undefined); + } + } + return obj; + }; + objectRow = function(tr, obj, key) { + var td = tr.firstElementChild; + if (key === undefined) { + key = td.textContent; + td = td.nextElementSibling; + } + obj[key] = valueCell(td); + }; + arrayTable = function(el) { + console.assert(el.classList.contains('mw-json-array')); + var tbody = el; + if ( + tbody.firstElementChild && + tbody.firstElementChild.tagName === 'TBODY' + ) { + tbody = tbody.firstElementChild; + } + var rows = tbody.children; + var arr = []; + var empty = rows.length === 0 || ( + rows[0].firstElementChild && + rows[0].firstElementChild.classList.contains('mw-json-empty') + ); + if (!empty) { + for (var i = 0; i < rows.length; i++) { + objectRow(rows[i], arr, i); + } + } + return arr; + }; + valueCell = function(el) { + console.assert(el.tagName === 'TD'); + var table = el.firstElementChild; + if (table && table.classList.contains('mw-json-array')) { + return arrayTable(table); + } else if (table && table.classList.contains('mw-json-object')) { + return objectTable(table); + } else { + return primitiveValue(el); + } + }; + primitiveValue = function(el) { + if (el.classList.contains('mw-json-null')) { + return null; + } else if (el.classList.contains('mw-json-boolean')) { + return /true/.test(el.textContent); + } else if (el.classList.contains('mw-json-number')) { + return +el.textContent; + } else if (el.classList.contains('mw-json-string')) { + return '' + el.textContent; + } else { + return undefined; // shouldn't happen. + } + }; + var table = body.firstElementChild; + console.assert(table && table.tagName === 'TABLE'); + return JSON.stringify(rootValueTable(table)); +}); + +if (typeof module === "object") { + module.exports = JSONExt; +} diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js index 3e4c7eb..6888071 100644 --- a/lib/utils/DOMUtils.js +++ b/lib/utils/DOMUtils.js @@ -2632,8 +2632,6 @@ return entities.encodeXML(string); }; -var WikitextSerializer; -var SelectiveSerializer; /** * @method * @@ -2645,14 +2643,6 @@ * @param {Function} cb Optional callback. */ DOMUtils.serializeDOM = function(env, body, useSelser, cb) { - // Circular refs - if (!WikitextSerializer) { - WikitextSerializer = require('../html2wt/WikitextSerializer.js') - .WikitextSerializer; - SelectiveSerializer = require('../html2wt/SelectiveSerializer.js') - .SelectiveSerializer; - } - console.assert(DU.isBody(body), 'Expected a body node.'); var hasOldId = (env.page.id && env.page.id !== '0'); @@ -2677,8 +2667,8 @@ // We'll just fallback to non-selser. return; } - return env.pipelineFactory.parse( - env, env.page.src + return env.conf.wiki.getContentHandler(env).toHTML( + env ).then(function(doc) { env.page.dom = DU.parseHTML(DU.toXML(doc)).body; }, function(err) { @@ -2698,8 +2688,6 @@ } return p.then(function() { - var Serializer = useSelser ? SelectiveSerializer : WikitextSerializer; - var serializer = new Serializer({ env: env }); // TODO(arlolra): There's probably an opportunity to refactor callers // of `DU.serializeDOM` to use `DU.ppToDOM` but this is a safe bet // for now, since it's the main entrypoint to serialization. @@ -2708,7 +2696,9 @@ DU.visitDOM(env.page.dom, DU.loadDataAttribs, true); } env.page.editedDoc = body.ownerDocument; - return serializer.serializeDOM(body); + if (!env.page.meta) { env.page.meta = { revision: {} }; } + return env.conf.wiki.getContentHandler(env) + .fromHTML(env, body, useSelser); }).nodify(cb); }; diff --git a/lib/wt2html/DOMPostProcessor.js b/lib/wt2html/DOMPostProcessor.js index 4d12f81..cf383e7 100644 --- a/lib/wt2html/DOMPostProcessor.js +++ b/lib/wt2html/DOMPostProcessor.js @@ -180,7 +180,7 @@ DOMPostProcessor.prototype.resetState = function(opts) { this.atTopLevel = opts && opts.toplevel; - this.displayTitle = null; + this.env.page.meta.displayTitle = null; }; /** @@ -199,7 +199,7 @@ // Set title to display when present (last one wins). if (DU.hasNodeName(node, "meta") && node.getAttribute("property") === "mw:PageProp/displaytitle") { - this.displayTitle = node.getAttribute("content"); + env.page.meta.displayTitle = node.getAttribute("content"); } } else if (DU.isComment(node) && /^\{[^]+\}$/.test(node.data)) { // Convert serialized meta tags back from comments. @@ -232,9 +232,7 @@ return true; }; -DOMPostProcessor.prototype.addMetaData = function(document) { - var env = this.env; - +DOMPostProcessor.addMetaData = function(env, document) { // add <head> element if it was missing if (!document.head) { document.documentElement. @@ -313,7 +311,7 @@ appendToHead(document, 'link', { rel: 'dc:isVersionOf', href: wikiPageUrl }); - document.title = this.displayTitle || env.page.meta.title || ''; + document.title = env.page.meta.displayTitle || env.page.meta.title || ''; // Add base href pointing to the wiki root appendToHead(document, 'base', { href: env.conf.wiki.baseURI }); @@ -400,7 +398,7 @@ // For sub-pipeline documents, we are done. // For the top-level document, we generate <head> and add it. if (this.atTopLevel) { - this.addMetaData(document); + DOMPostProcessor.addMetaData(env, document); } this.emit('document', document); diff --git a/package.json b/package.json index 6152301..829fe72 100644 --- a/package.json +++ b/package.json @@ -56,7 +56,7 @@ "dump-tokenizer": "node lib/wt2html/tokenizer.js", "mocha": "mocha --opts tests/mocha/mocha.opts tests/mocha", "parserTests": "node bin/parserTests.js --wt2html --wt2wt --html2wt --html2html --selser --no-color --quiet --blacklist", - "roundtrip": "node bin/roundtrip-test.js 'Barack Obama' && node bin/roundtrip-test.js 'Parkour'", + "roundtrip": "node bin/roundtrip-test.js 'Barack Obama' && node bin/roundtrip-test.js 'Parkour' && node bin/roundtrip-test.js --domain www.mediawiki.org 'User:Legoktm/test_this_is_json'", "test": "npm run nsp && npm run lint && npm run parserTests && npm run mocha", "cover-mocha": "istanbul cover _mocha --dir ./coverage/mocha -- --opts tests/mocha/mocha.opts tests/mocha", "cover-parserTests": "istanbul cover bin/parserTests.js --dir ./coverage/parserTests -- --wt2html --wt2wt --html2wt --html2html --selser --no-color --quiet --blacklist", diff --git a/tests/mocha/api.js b/tests/mocha/api.js index 742edd3..537da96 100644 --- a/tests/mocha/api.js +++ b/tests/mocha/api.js @@ -422,10 +422,28 @@ .end(done); }); + it('should get from a title and revision (html, json content)', function(done) { + request(api) + .get(mockDomain + '/v3/page/html/JSON_Page/101') + .expect(validHtmlResponse(function(doc) { + doc.body.firstChild.nodeName.should.equal('TABLE'); + })) + .end(done); + }); + it('should get from a title and revision (pagebundle)', function(done) { request(api) .get(mockDomain + '/v3/page/pagebundle/Main_Page/1') .expect(validPageBundleResponse()) + .end(done); + }); + + it('should get from a title and revision (pagebundle, json content)', function(done) { + request(api) + .get(mockDomain + '/v3/page/pagebundle/JSON_Page/101') + .expect(validPageBundleResponse(function(doc) { + doc.body.firstChild.nodeName.should.equal('TABLE'); + })) .end(done); }); @@ -448,6 +466,19 @@ .end(done); }); + it('should accept json contentmodel as a string for html', function(done) { + request(api) + .post(mockDomain + '/v3/transform/wikitext/to/html/') + .send({ + wikitext: '{"1":2}', + contentmodel: 'json', + }) + .expect(validHtmlResponse(function(doc) { + doc.body.firstChild.nodeName.should.equal('TABLE'); + })) + .end(done); + }); + it('should accept wikitext as a string for pagebundle', function(done) { request(api) .post(mockDomain + '/v3/transform/wikitext/to/pagebundle/') @@ -456,6 +487,20 @@ }) .expect(validPageBundleResponse(function(doc) { doc.body.firstChild.nodeName.should.equal('H2'); + })) + .end(done); + }); + + it('should accept json contentmodel as a string for pagebundle', function(done) { + request(api) + .post(mockDomain + '/v3/transform/wikitext/to/pagebundle/') + .send({ + wikitext: '{"1":2}', + contentmodel: 'json', + }) + .expect(validPageBundleResponse(function(doc) { + doc.body.firstChild.nodeName.should.equal('TABLE'); + should.not.exist(doc.querySelector('*[typeof="mw:Error"]')); })) .end(done); }); @@ -756,6 +801,17 @@ html: '<!DOCTYPE html>\n<html prefix="dc: http://purl.org/dc/terms/ mw: http://mediawiki.org/rdf/" about="http://localhost/index.php/Special:Redirect/revision/1"><head prefix="mwr: http://localhost/index.php/Special:Redirect/"><meta property="mw:articleNamespace" content="0"/><link rel="dc:replaces" resource="mwr:revision/0"/><meta property="dc:modified" content="2014-09-12T22:46:59.000Z"/><meta about="mwr:user/0" property="dc:title" content="MediaWiki default"/><link rel="dc:contributor" resource="mwr:user/0"/><meta property="mw:revisionSHA1" content="8e0aa2f2a7829587801db67d0424d9b447e09867"/><meta property="dc:description" content=""/><meta property="mw:parsoidVersion" content="0"/><link rel="dc:isVersionOf" href="http://localhost/index.php/Main_Page"/><title>Main_Page</title><base href="http://localhost/index.php/"/><link rel="stylesheet" href="//localhost/load.php?modules=mediawiki.legacy.commonPrint,shared|mediawiki.skinning.elements|mediawiki.skinning.content|mediawiki.skinning.interface|skins.vector.styles|site|mediawiki.skinning.content.parsoid&only=styles&debug=true&skin=vector"/></head><body data-parsoid=\'{"dsr":[0,592,0,0]}\' lang="en" class="mw-content-ltr sitedir-ltr ltr mw-body mw-body-content mediawiki" dir="ltr"><p data-parsoid=\'{"dsr":[0,59,0,0]}\'><strong data-parsoid=\'{"stx":"html","dsr":[0,59,8,9]}\'>MediaWiki has been successfully installed.</strong></p>\n\n<p data-parsoid=\'{"dsr":[61,171,0,0]}\'>Consult the <a rel="mw:ExtLink" href="//meta.wikimedia.org/wiki/Help:Contents" data-parsoid=\'{"targetOff":114,"contentOffsets":[114,126],"dsr":[73,127,41,1]}\'>User\'s Guide</a> for information on using the wiki software.</p>\n\n<h2 data-parsoid=\'{"dsr":[173,194,2,2]}\'> Getting started </h2>\n<ul data-parsoid=\'{"dsr":[195,592,0,0]}\'><li data-parsoid=\'{"dsr":[195,300,1,0]}\'> <a rel="mw:ExtLink" href="//www.mediawiki.org/wiki/Special:MyLanguage/Manual:Configuration_settings" data-parsoid=\'{"targetOff":272,"contentOffsets":[272,299],"dsr":[197,300,75,1]}\'>Configuration settings list</a></li>\n<li data-parsoid=\'{"dsr":[301,373,1,0]}\'> <a rel="mw:ExtLink" href="//www.mediawiki.org/wiki/Special:MyLanguage/Manual:FAQ" data-parsoid=\'{"targetOff":359,"contentOffsets":[359,372],"dsr":[303,373,56,1]}\'>MediaWiki FAQ</a></li>\n<li data-parsoid=\'{"dsr":[374,472,1,0]}\'> <a rel="mw:ExtLink" href="https://lists.wikimedia.org/mailman/listinfo/mediawiki-announce" data-parsoid=\'{"targetOff":441,"contentOffsets":[441,471],"dsr":[376,472,65,1]}\'>MediaWiki release mailing list</a></li>\n<li data-parsoid=\'{"dsr":[473,592,1,0]}\'> <a rel="mw:ExtLink" href="//www.mediawiki.org/wiki/Special:MyLanguage/Localisation#Translation_resources" data-parsoid=\'{"targetOff":555,"contentOffsets":[555,591],"dsr":[475,592,80,1]}\'>Localise MediaWiki for your language</a></li></ul></body></html>', }) .expect(validWikitextResponse()) + .end(done); + }); + + it('should accept html for json contentmodel as a string', function(done) { + request(api) + .post(mockDomain + '/v3/transform/html/to/wikitext/') + .send({ + html: '<!DOCTYPE html>\n<html prefix="dc: http://purl.org/dc/terms/ mw: http://mediawiki.org/rdf/"><head prefix="mwr: http://en.wikipedia.org/wiki/Special:Redirect/"><meta charset="utf-8"/><meta property="mw:articleNamespace" content="0"/><meta property="mw:html:version" content="1.2.1"/><meta property="mw:data-parsoid:version" content="0.0.2"/><link rel="dc:isVersionOf" href="//en.wikipedia.org/wiki/Main_Page"/><title></title><base href="//en.wikipedia.org/wiki/"/><link rel="stylesheet" href="//en.wikipedia.org/w/load.php?modules=mediawiki.legacy.commonPrint,shared|mediawiki.skinning.elements|mediawiki.skinning.content|mediawiki.skinning.interface|skins.vector.styles|site|mediawiki.skinning.content.parsoid|ext.cite.style&only=styles&skin=vector"/></head><body lang="en" class="mw-content-ltr sitedir-ltr ltr mw-body mw-body-content mediawiki" dir="ltr"><table class="mw-json mw-json-object"><tbody><tr><th>a</th><td class="value mw-json-number">4</td></tr><tr><th>b</th><td class="value mw-json-number">3</td></tr></tbody></table></body></html>', + contentmodel: 'json', + }) + .expect(validWikitextResponse('{"a":4,"b":3}')) .end(done); }); @@ -1232,4 +1288,42 @@ }); // end html2html + describe('html2html (JSON contentmodel)', function() { + + var previousRevHTML = { + revid: 101, + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/1.2.1"', + }, + body: '<body lang="en" class="mw-content-ltr sitedir-ltr ltr mw-body mw-body-content mediawiki" dir="ltr"><table class="mw-json mw-json-array"><tbody><tr><td><table class="mw-json mw-json-array"><tbody><tr><td class="value mw-json-number">1</td></tr></tbody></table></td></tr></tbody></table></body>', + }, + contentmodel: 'json', + "data-parsoid": { + headers: { + 'content-type': 'application/json;profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/0.0.2"', + }, + body: { + 'counter': 1, + 'ids': { + }, + }, + }, + }; + + it('should accept the previous revision to reuse expansions (html)', function(done) { + request(api) + .post(mockDomain + '/v3/transform/html/to/html/JSON_Page/101') + .send({ + previous: previousRevHTML, + }) + .expect(validHtmlResponse(function(doc) { + doc.body.firstChild.tagName.should.equal('TABLE'); + should.not.exist(doc.querySelector('*[typeof="mw:Error"]')); + })) + .end(done); + }); + + }); // end html2html (JSON) + }); diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js index fd82f8e..a65f2f5 100644 --- a/tests/mocha/parse.js +++ b/tests/mocha/parse.js @@ -47,6 +47,36 @@ }); }); + it('should support json contentmodel', function() { + var opts = { contentmodel: 'json' }; + var testval = {a: "a", b: [2, true, ""], c: null}; + return parse(JSON.stringify(testval), opts).then(function(doc) { + doc.should.have.property('nodeName', '#document'); + doc.outerHTML.startsWith('<!DOCTYPE html><html').should.equal(true); + doc.outerHTML.endsWith('</body></html>').should.equal(true); + // verify that body has only one <html> tag, one <body> tag, etc. + doc.childNodes.length.should.equal(2);// <!DOCTYPE> and <html> + doc.firstChild.nodeName.should.equal('html'); + doc.lastChild.nodeName.should.equal('HTML'); + // <html> children should be <head> and <body> + var html = doc.documentElement; + html.childNodes.length.should.equal(2); + html.firstChild.nodeName.should.equal('HEAD'); + html.lastChild.nodeName.should.equal('BODY'); + // <body> should have one child, <table> + var body = doc.body; + body.childElementCount.should.equal(1); + body.firstElementChild.nodeName.should.equal('TABLE'); + var table = doc.body.firstElementChild; + table.classList.contains('mw-json').should.equal(true); + // Now convert back to JSON + return serialize(doc, null, opts); + }).then(function(result) { + var v = JSON.parse(result); // shouldn't throw an error! + v.should.eql(testval); + }); + }); + ['no subpages', 'subpages'].forEach(function(desc, subpages) { describe('should handle page titles with embedded ? (' + desc + ')', function() { var linktests = [ diff --git a/tests/mocha/test.helpers.js b/tests/mocha/test.helpers.js index 511c836..a98af34 100644 --- a/tests/mocha/test.helpers.js +++ b/tests/mocha/test.helpers.js @@ -12,7 +12,10 @@ env = options.tweakEnv(env) || env; } env.setPageSrcInfo(src); - return env.pipelineFactory.parse(env, env.page.src) + if (options.contentmodel) { + env.page.meta.revision.contentmodel = options.contentmodel; + } + return env.conf.wiki.getContentHandler(env).toHTML(env) .then(function(doc) { // linter tests need the env object return { env: env, doc: doc }; @@ -30,6 +33,12 @@ if (options.tweakEnv) { env = options.tweakEnv(env) || env; } + if (!env.page.meta) { + env.page.meta = { revision: {} }; + } + if (options.contentmodel) { + env.page.meta.revision.contentmodel = options.contentmodel; + } pb = pb || DU.extractPageBundle(doc); if (pb) { DU.applyPageBundle(doc, pb); diff --git a/tests/mockAPI.js b/tests/mockAPI.js index ecba71a..e6d49e5 100644 --- a/tests/mockAPI.js +++ b/tests/mockAPI.js @@ -149,6 +149,27 @@ }, }; +var jsonPage = { + query: { + pages: { + '101': { + pageid: 101, + ns: 0, + title: 'JSON_Page', + revisions: [ + { + revid: 101, + parentid: 0, + contentmodel: 'json', + contentformat: 'text/json', + '*': '[1]', + }, + ], + }, + }, + }, +}; + var fnames = { 'Image:Foobar.jpg': 'Foobar.jpg', 'File:Foobar.jpg': 'Foobar.jpg', @@ -249,6 +270,8 @@ return cb(null , largePage); } else if (body.revids === '100' || body.titles === 'Reuse_Page') { return cb(null , reusePage); + } else if (body.revids === '101' || body.titles === 'JSON_Page') { + return cb(null , jsonPage); } } -- To view, visit https://gerrit.wikimedia.org/r/295707 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7ca31c99de8e04b1359bc521df121db0eb69e384 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Cscott <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits