jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/348111 )
Change subject: Remove advanced PDoc usage from the jsapi ...................................................................... Remove advanced PDoc usage from the jsapi * This now lives here: https://github.com/wikimedia/parsoid-jsapi Change-Id: I03c089c68e8a9639b9ca775dd74fed75b25db5c4 --- M .jsduck/categories.json M .jsduck/guides.json M README.md D guides/jsapi/README.md M jsduck.json M lib/index.js D lib/jsapi.js M tests/mocha/jsapi.js 8 files changed, 9 insertions(+), 2,066 deletions(-) Approvals: jenkins-bot: Verified Arlolra: Looks good to me, approved diff --git a/.jsduck/categories.json b/.jsduck/categories.json index 2db1dfe..0925fd2 100644 --- a/.jsduck/categories.json +++ b/.jsduck/categories.json @@ -11,20 +11,7 @@ { "name": "JavaScript Interface (experimental)", "classes": [ - "Parsoid", - "PComment", - "PDoc", - "PNodeList", - "PNode", - "PExtLink", - "PHeading", - "PHtmlEntity", - "PMedia", - "PTag", - "PTemplate", - "PTemplate.Parameter", - "PText", - "PWikiLink" + "Parsoid" ] } ] diff --git a/.jsduck/guides.json b/.jsduck/guides.json index 964fcd2..098717a 100644 --- a/.jsduck/guides.json +++ b/.jsduck/guides.json @@ -19,12 +19,6 @@ "url": "../guides/apiuse", "title": "Using Parsoid's HTTP API", "description": "How to use Parsoid's HTTP API to parse and roundtrip wikitext." - }, - { - "name": "jsapi", - "url": "../guides/jsapi", - "title": "Using Parsoid's JavaScript API", - "description": "How to use Parsoid's JavaScript API to parse and manipulate article content." } ] } diff --git a/README.md b/README.md index e448024..84e36a5 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,14 @@ Developer API documentation can be found at https://doc.wikimedia.org/Parsoid/master/ -and some helpful getting-started guides are at + +And some helpful getting-started guides are at https://doc.wikimedia.org/Parsoid/master/ +An example of a library that builds on Parsoid output to offer an API that +mimic's mwparserfromhell in JavaScript can be found at, +https://github.com/wikimedia/parsoid-jsapi + License ------- diff --git a/guides/jsapi/README.md b/guides/jsapi/README.md deleted file mode 100644 index 4df3b5a..0000000 --- a/guides/jsapi/README.md +++ /dev/null @@ -1,148 +0,0 @@ -Usage of the JavaScript API -=========================== - -This file describes usage of Parsoid as a standalone wikitext parsing -package, in the spirit of [`mwparserfromhell`]. This is not the typical -use case for Parsoid; it is more often used as a network service. -See [the HTTP API guide](#!/guide/apiuse) or [Parsoid service] on the wiki -for more details. - -These examples will use the [`prfun`] library and [ES6 generators] in -order to fluently express asynchronous operations. The library also -exports vanilla [`Promise`]s if you wish to maintain compatibility -with old versions of `node` at the cost of a little bit of readability. - -Since many methods in the API return [`Promise`]s, we've also provided -a [`Promise`]-aware REPL, that will wait for a promise to be resolved -before printing its value. This can be started from the -shell using: - - node -e 'require("parsoid").repl()' - -Use `"./"` instead of `"parsoid"` if you are running this from a -checked-out repository. Code examples below which contain lines -starting with `>` show sessions using this REPL. (You may also -wish to look in `tests/mocha/jsapi.js` for examples using a more -traditional promise-chaining style.) - -Use of Parsoid as a wikitext parser is straightforward (where `text` is -wikitext input): - - #/usr/bin/node --harmony-generators - var Promise = require('prfun'); - var Parsoid = require('parsoid'); - - var main = Promise.async(function*() { - var text = "I love wikitext!"; - var pdoc = yield Parsoid.parse(text, { pdoc: true }); - console.log(pdoc.document.outerHTML); - }); - - // start me up! - main().done(); - -As you can see, there is a little bit of boilerplate needed to get the -asynchronous machinery started. The body of the `main()` method can -be replaced with your code. - -The `pdoc` variable above holds a [`PDoc`] object, which has -helpful methods to filter and manipulate the document. If you want -to access the raw [Parsoid DOM], however, it is easily accessible -via the [`document`](#!/api/PDoc-property-document) property, as shown above, -and all normal DOM manipulation functions can be used on it (Parsoid uses -[`domino`] to implement these methods). Be sure to call -[`update()`](#!/api/PNode-method-update) after any direct DOM manipulation. -[`PDoc`] is a subclass of [`PNodeList`], which provides a number of -useful access and mutation methods -- and if you use these you won't need -to manually call `update()`. These provided methods can be quite useful. -For example: - - > var text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?\n"; - > var pdoc = yield Parsoid.parse(text, { pdoc: true }); - > console.log(yield pdoc.toWikitext()); - I has a template! {{foo|bar|baz|eggs=spam}} See it? - > var templates = pdoc.filterTemplates(); - > console.log(yield Promise.map(templates, Parsoid.toWikitext)); - [ '{{foo|bar|baz|eggs=spam}}' ] - > var template = templates[0]; - > console.log(template.name); - foo - > template.name = 'notfoo'; - > console.log(yield template.toWikitext()); - {{notfoo|bar|baz|eggs=spam}} - > console.log(template.params.map(function(p) { return p.name; })); - [ '1', '2', 'eggs' ] - > console.log(yield template.get(1).value.toWikitext()); - bar - > console.log(yield template.get("eggs").value.toWikitext()); - spam - -Getting nested templates is trivial: - - > var text = "{{foo|bar={{baz|{{spam}}}}}}"; - > var pdoc = yield Parsoid.parse(text, { pdoc: true }); - > console.log(yield Promise.map(pdoc.filterTemplates(), Parsoid.toWikitext)); - [ '{{foo|bar={{baz|{{spam}}}}}}', - '{{baz|{{spam}}}}', - '{{spam}}' ] - -You can also pass `{ recursive: false }` to -[`filterTemplates()`](#!/api/PNodeList-method-filterTemplates) and explore -templates manually. This is possible because the -[`get`](#!/api/PTemplate-method-get) method on a -[`PTemplate`] object returns an object containing further [`PNodeList`]s: - - > var text = "{{foo|this {{includes a|template}}}}"; - > var pdoc = yield Parsoid.parse(text, { pdoc: true }); - > var templates = pdoc.filterTemplates({ recursive: false }); - > console.log(yield Promise.map(templates, Parsoid.toWikitext)); - [ '{{foo|this {{includes a|template}}}}' ] - > var foo = templates[0]; - > console.log(yield foo.get(1).value.toWikitext()); - this {{includes a|template}} - > var more = foo.get(1).value.filterTemplates(); - > console.log(yield Promise.map(more, Parsoid.toWikitext)); - [ '{{includes a|template}}' ] - > console.log(yield more[0].get(1).value.toWikitext()); - template - -Templates can be easily modified to add, remove, or alter params. -Templates also have a [`nameMatches()`](#!/api/PTemplate-method-nameMatches) -method for comparing template names, which takes care of capitalization and -white space: - - > var text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"; - > var pdoc = yield Parsoid.parse(text, { pdoc: true }); - > pdoc.filterTemplates().forEach(function(template) { - ... if (template.nameMatches('Cleanup') && !template.has('date')) { - ... template.add('date', 'July 2012'); - ... } - ... if (template.nameMatches('uncategorized')) { - ... template.name = 'bar-stub'; - ... } - ... }); - > console.log(yield pdoc.toWikitext()); - {{cleanup|date = July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}} - -At any time you can convert the `pdoc` into HTML conforming to the -[MediaWiki DOM spec] (by referencing the -[`document`](#!/api/PDoc-property-document) property) or into wikitext (by -invoking [`toWikitext()`](#!/api/PNodeList-method-toWikitext), which -returns a [`Promise`] for the wikitext string). This allows you -to save the page using either standard API methods or the RESTBase API -(once [T101501](https://phabricator.wikimedia.org/T101501) is resolved). - -For more tips, check out [PNodeList's full method list](#!/api/PNodeList) -and the list of [PNode](#!/api/PNode) subclasses. - -[`mwparserfromhell`]: http://mwparserfromhell.readthedocs.org/en/latest/index.html -[Parsoid service]: https://www.mediawiki.org/wiki/Parsoid -[`prfun`]: https://github.com/cscott/prfun -[ES6 generators]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/function* -[`Promise`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise -[Parsoid DOM]: http://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec -[MediaWiki DOM spec]: http://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec -[`domino`]: https://www.npmjs.com/package/domino -[`PDoc`]: #!/api/PDoc -[`PNodeList`]: #!/api/PNodeList -[`PTemplate`]: #!/api/PTemplate diff --git a/jsduck.json b/jsduck.json index 3665d60..80e0837 100644 --- a/jsduck.json +++ b/jsduck.json @@ -12,7 +12,6 @@ "--": [ ".jsduck/external.js", "lib/index.js", - "lib/jsapi.js", "lib/logger/LogData.js", "lib/logger/Logger.js", "lib/logger/ParsoidLogger.js", diff --git a/lib/index.js b/lib/index.js index 80c2dd9..30f3a72 100644 --- a/lib/index.js +++ b/lib/index.js @@ -6,7 +6,6 @@ var parseJs = require('../bin/parse.js'); var ParsoidConfig = require('./config/ParsoidConfig.js').ParsoidConfig; var ParsoidService = require('./api/ParsoidService.js'); -var JsApi = require('./jsapi.js'); var prepareLog = function(logData) { var log = Object.assign({ logType: logData.logType }, logData.locationData); @@ -33,9 +32,6 @@ * a native JavaScript API. This file provides that, more-or-less. * It should be considered unstable. Patches welcome. * - * See `USAGE.md` and `./jsapi.js` for a useful wrapper API which works - * well with this interface. - * * @class * @singleton */ @@ -55,20 +51,11 @@ * console.log(res.out.outerHTML); * }).done(); * - * Advanced usage using the {@link PDoc} API: - * - * Parsoid.parse('{{echo|hi}}', { pdoc: true }).then(function(pdoc) { - * var templates = pdoc.filterTemplates(); - * console.log(templates[0].name); - * }).done(); - * * @param {String} input * The input wikitext or HTML (depending on conversion direction). * @param {Object} options * @param {Boolean} [options.document=false] * Return a DOM {@link Document} (instead of a string) - * @param {Boolean} [options.pdoc=false] - * Return a {@link PDoc} object (instead of a string) * @param {Boolean} [options.wt2html=true] * Convert wikitext to HTML. * @param {Boolean} [options.html2wt=false] @@ -85,10 +72,6 @@ Parsoid.parse = function(input, options, optCb) { options = options || {}; var argv = Object.assign({}, parseJs.defaultOptions, options); - - if (argv.pdoc) { - argv.document = true; - } if (argv.selser) { argv.html2wt = true; @@ -116,48 +99,10 @@ parsoidConfig.defaultWiki = prefix ? prefix : parsoidConfig.reverseMwApiMap.get(domain); } - if (argv.pdoc) { - parsoidConfig.addHTMLTemplateParameters = true; - // Since the jsapi acts directly on our serialized XML, it's heavily - // tied to the content version. Let's be explicit about which one - // is acceptable, so that we fail loudly if/when it's no longer - // supported. - argv.contentversion = '1.3.0'; - } - return parseJs.parse(input || '', argv, parsoidConfig, prefix, domain).then(function(res) { - return argv.pdoc ? new JsApi.PDoc(res.env, res.out) : res; - }).nodify(optCb); + return parseJs.parse(input || '', argv, parsoidConfig, prefix, domain) + .nodify(optCb); }; - -// Add a helper method to PNodeList, based on Parsoid.parse. - -/** @class PNodeList */ -/** - * Create a {@link PNodeList} belonging to the given {@link PDoc} - * from a string containing wikitext. - * @param {PDoc} pdoc - * The {@link PDoc} which will own the result. - * @param {String} wikitext - * The wikitext to convert. - * @param {Object} options - * Options which are passed to {@link Parsoid#parse}. - * @return {Promise} - * Fulfilled by a {@link PNodeList} representing the given wikitext. - * @static - */ -JsApi.PNodeList.fromWikitext = function(pdoc, wikitext, options) { - options = Object.assign({}, options, { pdoc: true }); - return Parsoid.parse(wikitext, options).then(function(pdoc2) { - var node = pdoc.document.adoptNode(pdoc2.document.body); - return new JsApi.PNodeList(pdoc, null, node); - }); -}; - -// Expose other helpful objects. -Object.keys(JsApi).forEach(function(k) { - Parsoid[k] = JsApi[k]; -}); /** * Start an API service worker as part of a service-runner service. diff --git a/lib/jsapi.js b/lib/jsapi.js deleted file mode 100644 index 4c8d166..0000000 --- a/lib/jsapi.js +++ /dev/null @@ -1,1487 +0,0 @@ -/* - * Handy JavaScript API for Parsoid DOM, inspired by the - * python `mwparserfromhell` package. - */ -'use strict'; -require('../core-upgrade.js'); - -// TO DO: -// extension -// PExtLink#url PWikiLink#title should handle mw:ExpandedAttrs -// make separate package? - -var util = require('util'); - -var DOMImpl = require('domino').impl; -var Node = DOMImpl.Node; -var NodeFilter = DOMImpl.NodeFilter; -var DU = require('./utils/DOMUtils.js').DOMUtils; -var Promise = require('./utils/promise.js'); - -// Note that the JSAPI exposes data-mw directly as a DOM attribute to -// allow clients to easily edit it. - -// WTS helper -var wts = function(env, nodes) { - var body; - if (nodes.length === 0) { - return ''; - } else if (nodes.length === 1 && DU.isBody(nodes[0])) { - body = nodes[0]; - } else { - body = nodes[0].ownerDocument.createElement('body'); - for (var i = 0; i < nodes.length; i++) { - body.appendChild(nodes[i].cloneNode(true)); - } - } - return env.getContentHandler().fromHTML(env, body, false); -}; - -// toString helper -var toStringHelper = function(nodes, sizeLimit) { - var out; - if (sizeLimit === undefined) { sizeLimit = 80; /* characters */ } - if (nodes.length === 0) { - return ''; - } else if (nodes.length === 1) { - var body = nodes[0].ownerDocument.createElement('body'); - body.appendChild(nodes[0].cloneNode(true)); - out = DU.normalizeOut(body, 'parsoidOnly'); - if (out.length <= sizeLimit || !DU.isElt(nodes[0])) { return out; } - body.firstChild.innerHTML = '...'; - out = DU.normalizeOut(body, 'parsoidOnly'); - if (out.length <= sizeLimit) { return out; } - var name = nodes[0].nodeName.toLowerCase(); - var children = nodes[0].childNodes; - if (children.length === 0) { - return '<' + name + ' .../>'; - } else { - return '<' + name + ' ...>...</' + name + '>'; - } - } else { - for (var i = 0; i < nodes.length; i++) { - out += toStringHelper( - [nodes[i]], - (sizeLimit - out.length) / (nodes.length - i) - ); - } - return out; - } -}; - -// Forward declarations of Wrapper classes. -var PNode, PNodeList, PComment, PExtLink, PHeading, PHtmlEntity, PMedia, PTag, PTemplate, PText, PWikiLink; - -// HTML escape helper -var toHtmlStr = function(node, v) { - if (typeof v === 'string') { - var div = node.ownerDocument.createElement('div'); - div.textContent = v; - return div.innerHTML; - } else if (v instanceof PNodeList) { - return v.container.innerHTML; - } else { - return v.outerHTML; - } -}; - - -/** - * The PNodeList class wraps a collection of DOM {@link Node}s. - * It provides methods that can be used to extract data from or - * modify the nodes. The `filter()` series of functions is very - * useful for extracting and iterating over, for example, all - * of the templates in the project (via {@link #filterTemplates}). - * @class PNodeList - * @alternateClassName Parsoid.PNodeList - */ -/** - * @method constructor - * @private - * @param {PDoc} pdoc The parent document for this {@link PNodeList}. - * @param {PNode|null} parent A {@link PNode} which will receive updates - * when this {@link PNodeList} is mutated. - * @param {Node} container A DOM {@link Node} which is the parent of all - * of the DOM {@link Node}s in this {@link PNodeList}. The container - * element itself is *not* considered part of the list. - * @param {Object} [opts] - * @param {Function} [opts.update] - * A function which will be invoked when {@link #update} is called. - */ -PNodeList = function PNodeList(pdoc, parent, container, opts) { - this.pdoc = pdoc; - this.parent = parent; - this.container = container; - this._update = (opts && opts.update); - this._cachedPNodes = null; -}; -Object.defineProperties(PNodeList.prototype, { - /** - * Returns an {@link Array} of the DOM {@link Node}s represented - * by this {@link PNodeList}. - * @property {Node[]} - */ - nodes: { - get: function() { return Array.from(this.container.childNodes); }, - }, - - /** - * Call {@link #update} after manually mutating any of the DOM - * {@link Node}s represented by this {@link PNodeList} in order to - * ensure that any containing templates are refreshed with their - * updated contents. - * - * The mutation methods in the {@link PDoc}/{@link PNodeList} API - * automatically call {@link #update} for you when required. - * @method - */ - update: { - value: function() { - this._cachedPNodes = null; - if (this._update) { this._update(); } - if (this.parent) { this.parent.update(); } - }, - }, - _querySelectorAll: { - value: function(selector) { - var tweakedSelector = ',' + selector + ','; - if (!(/,(COMMENT|TEXT),/.test(tweakedSelector))) { - // Use fast native querySelectorAll - return Array.from(this.container.querySelectorAll(selector)); - } - // Implement comment/text node selector the hard way - /* jshint bitwise: false */ - var whatToShow = NodeFilter.SHOW_ELEMENT; // always show templates - if (/,COMMENT,/.test(tweakedSelector)) { - whatToShow = whatToShow | NodeFilter.SHOW_COMMENT; - } - if (/,TEXT,/.test(tweakedSelector)) { - whatToShow = whatToShow | NodeFilter.SHOW_TEXT; - } - var nodeFilter = function(node) { - if (node.nodeType !== Node.ELEMENT_NODE) { - return NodeFilter.FILTER_ACCEPT; - } - if (node.matches(PTemplate._selector)) { - return NodeFilter.FILTER_ACCEPT; - } - return NodeFilter.FILTER_SKIP; - }; - var result = []; - var includeTemplates = - /,\[typeof~="mw:Transclusion"\],/.test(tweakedSelector); - var treeWalker = this.pdoc.document.createTreeWalker( - this.container, whatToShow, nodeFilter, false - ); - while (treeWalker.nextNode()) { - var node = treeWalker.currentNode; - // We don't need the extra test for ELEMENT_NODEs yet, since - // non-template element nodes will be skipped by the nodeFilter - // above. But if we ever extend filter() to be fully generic, - // we might need the commented-out portion of this test. - if (node.nodeType === Node.ELEMENT_NODE /* && - node.matches(PTemplate._selector) */ - ) { - treeWalker.lastChild(); // always skip over all children - if (!includeTemplates) { - continue; // skip template itself - } - } - result.push(node); - } - return result; - }, - }, - _templatesForNode: { - value: function(node) { - // each Transclusion node could represent multiple templates. - var parent = this; - var result = []; - var parts = DU.getJSONAttribute(node, 'data-mw', {}).parts || []; - parts.forEach(function(part, i) { - if (part.template) { - result.push(new PTemplate(parent.pdoc, parent, node, i)); - } - }); - return result; - }, - }, - - /** - * @method - * @private - * @param {Array} result - * A result array to append new items to as they are found - * @param {string} selector - * CSS-style selector for the nodes of interest - * @param {Function} func - * Function to apply to every non-template match - * @param {Object} [opts] - * @param {boolean} [opts.recursive] - * Set to `false` to avoid recursing into templates. - */ - _filter: { - value: function(result, selector, func, opts) { - var self = this; - var recursive = (opts && opts.recursive) !== false; - var tSelector = PTemplate._selector; - if (selector) { - tSelector += ',' + selector; - } - this._querySelectorAll(tSelector).forEach(function(node) { - var isTemplate = node.nodeType === Node.ELEMENT_NODE && - node.matches(PTemplate._selector); - if (isTemplate) { - self._templatesForNode(node).forEach(function(t) { - if (!selector) { - result.push(t); - } - if (recursive) { - t.params.forEach(function(k) { - var td = t.get(k); - ['key', 'value'].forEach(function(prop) { - if (td[prop]) { - td[prop]._filter(result, selector, func, opts); - } - }); - }); - } - }); - } else { - func(result, self, node, opts); - } - }); - return result; - }, - }, - - /** - * Return an array of {@link PComment} representing comments - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PComment[]} - */ - filterComments: { - value: function(opts) { - return this._filter([], PComment._selector, function(r, parent, node) { - r.push(new PComment(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Return an array of {@link PExtLink} representing external links - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PExtLink[]} - */ - filterExtLinks: { - value: function(opts) { - return this._filter([], PExtLink._selector, function(r, parent, node) { - r.push(new PExtLink(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Return an array of {@link PHeading} representing headings - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PHeading[]} - */ - filterHeadings: { - value: function(opts) { - return this._filter([], PHeading._selector, function(r, parent, node) { - r.push(new PHeading(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Return an array of {@link PHtmlEntity} representing HTML entities - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PHtmlEntity[]} - */ - filterHtmlEntities: { - value: function(opts) { - return this._filter([], PHtmlEntity._selector, function(r, parent, node) { - r.push(new PHtmlEntity(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Return an array of {@link PMedia} representing images or other - * media content found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PMedia[]} - */ - filterMedia: { - value: function(opts) { - return this._filter([], PMedia._selector, function(r, parent, node) { - r.push(new PMedia(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Return an array of {@link PTemplate} representing templates - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PTemplate[]} - */ - filterTemplates: { - value: function(opts) { - return this._filter([], null, null, opts); - }, - }, - - /** - * Return an array of {@link PText} representing plain text - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PText[]} - */ - filterText: { - value: function(opts) { - return this._filter([], PText._selector, function(r, parent, node) { - r.push(new PText(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Return an array of {@link PWikiLink} representing wiki links - * found in this {@link PNodeList}. - * @inheritdoc #_filter - * @return {PWikiLink[]} - */ - filterWikiLinks: { - value: function(opts) { - return this._filter([], PWikiLink._selector, function(r, parent, node) { - r.push(new PWikiLink(parent.pdoc, parent, node)); - }, opts); - }, - }, - - /** - * Internal list of PNodes in this list. - * @property {PNode[]} - * @private - */ - pnodes: { - get: function() { - if (this._cachedPNodes !== null) { - return this._cachedPNodes; - } - var templates = new Set(); - var result = []; - OUTER: for (var i = 0; i < this.container.childNodes.length; i++) { - var node = this.container.childNodes.item(i); - if (node.nodeType === Node.TEXT_NODE) { - result.push(new PText(this.pdoc, this, node)); - continue; - } - if (node.nodeType === Node.COMMENT_NODE) { - result.push(new PComment(this.pdoc, this, node)); - continue; - } - if (node.nodeType === Node.ELEMENT_NODE) { - // Note: multiple PTemplates per Node, and possibly - // multiple Nodes per PTemplate. - if (node.matches(PTemplate._selector)) { - templates.add(node.getAttribute('about')); - this._templatesForNode(node).forEach(function(t) { - result.push(t); - }); - continue; - } else if (templates.has(node.getAttribute('about'))) { - continue; - } - // PTag is the catch-all; it should always be last. - var which = [ - PExtLink, PHeading, PHtmlEntity, PMedia, PWikiLink, - PTag, - ]; - for (var j = 0; j < which.length; j++) { - var Ty = which[j]; - if (node.matches(Ty._selector)) { - result.push(new Ty(this.pdoc, this, node)); - continue OUTER; - } - } - } - // Unknown type. - result.push(new PNode(this.pdoc, this, node)); - } - return (this._cachedPNodes = result); - }, - }, - - /** - * The number of nodes within the node list. - * @property {Number} - */ - length: { get: function() { return this.pnodes.length; }, }, - - /** - * Return the `index`th node within the node list. - * @param {Number} index - * @return {PNode} - */ - get: { value: function(index) { return this.pnodes[index]; }, }, - - /** - * Return the index of `target` in the list of nodes, or `-1` if - * the target was not found. - * - * If `recursive` is true, we will look in all nodes of ours and - * their descendants, and return the index of our direct descendant - * node which contains the target. Otherwise, the search is done - * only on direct descendants. - * - * If `fromIndex` is provided, it is the index to start the search - * at. - * @param {PNode|Node} target - * @param {Object} [options] - * @param {Boolean} [options.recursive=false] - * @param {Number} [options.fromIndex=0] - */ - indexOf: { - value: function(target, options) { - var recursive = Boolean(options && options.recursive); - var fromIndex = Number(options && options.fromIndex) || 0; - var child, children; - var i, j; - if (target instanceof PNode) { - target = target.node; - } - for (i = fromIndex; i < this.length; i++) { - child = this.get(i); - if (child.matches(target)) { - return i; - } - if (recursive) { - children = child._children(); - for (j = 0; j < children.length; j++) { - if (children[j].indexOf(target, options) !== -1) { - return i; - } - } - } - } - return -1; - }, - }, - - /** - * Return a string representing the contents of this object - * as HTML conforming to the - * [MediaWiki DOM specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec). - * @return {String} - */ - toHtml: { - value: function() { - return this.container.innerHTML; - }, - }, - - /** - * Return a promise for a string representing the contents of this - * object as wikitext. - * @return {Promise} - */ - toWikitext: { - value: Promise.method(function() { - return wts(this.pdoc.env, this.nodes); - }), - }, - - /** - * Return a string representing the contents of this object for - * debugging. Some contents may be elided. - * @return {String} - */ - toString: { - value: function() { - return toStringHelper(this.nodes); - }, - }, -}); -/** - * Create a {@link PNodeList} from a string containing HTML. - * @return {PNodeList} - * @static - */ -PNodeList.fromHTML = function(pdoc, html) { - var div = pdoc.document.createElement('div'); - div.innerHTML = html; - return new PNodeList(pdoc, null, div); -}; - -/** - * @class PNode - * A PNode represents a specific DOM {@link Node}. Its subclasses provide - * specific accessors and mutators for associated semantic information. - * - * Useful subclasses of {@link PNode} include: - * - * - {@link PComment}: comments, like `<!-- example -->` - * - {@link PExtLink}: external links, like `[http://example.com Example]` - * - {@link PHeading}: headings, like `== Section 1 ==` - * - {@link PHtmlEntity}: html entities, like ` ` - * - {@link PMedia}: images and media, like `[[File:Foo.jpg|caption]]` - * - {@link PTag}: other HTML tags, like `<span>` - * - {@link PTemplate}: templates, like `{{foo|bar}}` - * - {@link PText}: unformatted text, like `foo` - * - {@link PWikiLink}: wiki links, like `[[Foo|bar]]` - */ -/** - * @method constructor - * @private - * @param {PDoc} pdoc The parent document for this PNode. - * @param {PNodeList|null} parent A containing node list which will receive - * updates when this {@link PNode} is mutated. - * @param {Node} node The DOM node. - * @param {Object} [opts] - * @param {Function} [opts.update] - * A function which will be invoked when {@link #update} is called. - * @param {Function} [opts.wtsNodes] - * A function returning an array of {@link Node}s which can tweak the - * portion of the document serialized by {@link #toWikitext}. - */ -PNode = function PNode(pdoc, parent, node, opts) { - /** @property {PDoc} pdoc The parent document for this {@link PNode}. */ - this.pdoc = pdoc; - this.parent = parent; - /** @property {Node} node The underlying DOM {@link Node}. */ - this.node = node; - this._update = (opts && opts.update); - this._wtsNodes = (opts && opts.wtsNodes); -}; -Object.defineProperties(PNode.prototype, { - ownerDocument: { - get: function() { return this.node.ownerDocument; }, - }, - dataMw: { - get: function() { - return DU.getJSONAttribute(this.node, 'data-mw', {}); - }, - set: function(v) { - DU.setJSONAttribute(this.node, 'data-mw', v); - this.update(); - }, - }, - /** - * Internal helper: enumerate all PNodeLists contained within this node. - * @private - * @return {PNodeList[]} - */ - _children: { value: function() { return []; }, }, - /** - * Call {@link #update} after manually mutating the DOM {@link Node} - * associated with this {@link PNode} in order to ensure that any - * containing templates are refreshed with their updated contents. - * - * The mutation methods in the API automatically call {@link #update} - * for you when required. - * @method - */ - update: { - value: function() { - if (this._update) { this._update(); } - if (this.parent) { this.parent.update(); } - }, - }, - /** - * Returns true if the `target` matches this node. By default a - * node matches only if its #node is strictly equal to the target - * or the target's #node. Subclasses can override this to provide - * more flexible matching: for example see {@link PText#matches}. - * @param {Node|PNode} target - * @return {Boolean} true if the target matches this node, false otherwise. - */ - matches: { - value: function(target) { - return (target === this) || (target === this.node) || - (target instanceof PNode && target.node === this.node); - }, - }, - /** - * @inheritdoc PNodeList#toHtml - * @method - */ - toHtml: { - value: function() { - var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ]; - return nodes.map(function(n) { return n.outerHTML; }).join(''); - }, - }, - /** - * @inheritdoc PNodeList#toWikitext - * @method - */ - toWikitext: { - value: Promise.method(function() { - var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ]; - return wts(this.pdoc.env, nodes); - }), - }, - /** - * @inheritdoc PNodeList#toString - * @method - */ - toString: { - value: function() { - var nodes = this._wtsNodes ? this._wtsNodes() : [ this.node ]; - return toStringHelper(nodes); - }, - }, -}); - -// Helper: getter and setter for the inner contents of a node. -var innerAccessor = { - get: function() { - return new PNodeList(this.pdoc, this, this.node); - }, - set: function(v) { - this.node.innerHTML = toHtmlStr(this.node, v); - this.update(); - }, -}; - -/** - * PComment represents a hidden HTML comment, like `<!-- fobar -->`. - * @class PComment - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PComment = function PComment(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PComment, PNode); -Object.defineProperties(PComment.prototype, { - /** - * The hidden text contained between `<!--` and `-->`. - * @property {String} - */ - contents: { - get: function() { - return DU.decodeComment(this.node.data); - }, - set: function(v) { - this.node.data = DU.encodeComment(v); - this.update(); - }, - }, -}); -/** - * @ignore - * @static - * @private - */ -PComment._selector = 'COMMENT'; // non-standard selector - -/** - * PExtLink represents an external link, like `[http://example.com Example]`. - * @class PExtLink - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PExtLink = function PExtLink(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PExtLink, PNode); -Object.defineProperties(PExtLink.prototype, { - /** - * The URL of the link target. - * @property {String} - */ - url: { - // XXX url should be a PNodeList, but that requires handling - // typeof="mw:ExpandedAttrs" - get: function() { - return this.node.getAttribute('href'); - }, - set: function(v) { - this.node.setAttribute('href', v); - }, - }, - /** - * The link title, as a {@link PNodeList}. - * You can assign a String, Node, or PNodeList to mutate the title. - * @property {PNodeList} - */ - title: innerAccessor, - // XXX include this.url, once it is a PNodeList - _children: { value: function() { return [this.title]; }, }, -}); -/** - * @ignore - * @static - * @private - */ -PExtLink._selector = 'a[rel="mw:ExtLink"]'; - -/** - * PHeading represents a section heading in wikitext, like `== Foo ==`. - * @class PHeading - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PHeading = function PHeading(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PHeading, PNode); -Object.defineProperties(PHeading.prototype, { - /** - * The heading level, as an integer between 1 and 6 inclusive. - * @property {Number} - */ - level: { - get: function() { - return +this.node.nodeName.slice(1); - }, - set: function(v) { - v = +v; - if (v === this.level) { - return; - } else if (v >= 1 && v <= 6) { - var nh = this.ownerDocument.createElement('h' + v); - while (this.node.firstChild !== null) { - nh.appendChild(this.node.firstChild); - } - this.node.parentNode.replaceChild(nh, this.node); - this.node = nh; - this.update(); - } else { - throw new Error("Level must be between 1 and 6, inclusive."); - } - }, - }, - /** - * The title of the heading, as a {@link PNodeList}. - * You can assign a String, Node, or PNodeList to mutate the title. - * @property {PNodeList} - */ - title: innerAccessor, - - _children: { value: function() { return [this.title]; }, }, -}); -/** - * @ignore - * @static - * @private - */ -PHeading._selector = 'h1,h2,h3,h4,h5,h6'; - -/** - * PHtmlEntity represents an HTML entity, like ` `. - * @class PHtmlEntity - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PHtmlEntity = function PHtmlEntity(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PHtmlEntity, PNode); -Object.defineProperties(PHtmlEntity.prototype, { - /** - * The character represented by the HTML entity. - * @property {String} - */ - normalized: { - get: function() { return this.node.textContent; }, - set: function(v) { - this.node.textContent = v; - this.node.removeAttribute('data-parsoid'); - this.update(); - }, - }, - /** - * Extends {@link PNode#matches} to allow a target string to match - * if it matches this node's #normalized character. - * @method - * @inheritdoc PNode#matches - * @param {Node|PNode|String} target - */ - matches: { - value: function(target) { - return PNode.prototype.matches.call(this, target) || - this.normalized === target; - }, - }, -}); -/** - * @ignore - * @static - * @private - */ -PHtmlEntity._selector = '[typeof="mw:Entity"]'; - -/** - * PMedia represents an image or audio/video element in wikitext, - * like `[[File:Foobar.jpg|caption]]`. - * @class PMedia - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PMedia = function PMedia(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PMedia, PNode); -Object.defineProperties(PMedia.prototype, { - // Internal helper: is the outer element a <figure> or a <span>? - _isBlock: { get: function() { return this.node.tagName === 'FIGURE'; }, }, - // Internal helper: get at the 'caption' property in the dataMw - _caption: { - get: function() { - var c = this.dataMw.caption; - return c === undefined ? null : c; - }, - set: function(v) { - var dmw = this.dataMw; - if (v === undefined || v === null) { - delete dmw.caption; - } else { - dmw.caption = v; - } - this.dataMw = dmw; - }, - }, - - /** - * The caption of the image or media file, or `null` if not present. - * You can assign `null`, a String, Node, or PNodeList to mutate the - * contents. - * @property {PNodeList|null} - */ - caption: { - get: function() { - var c, captionDiv; - // Note that _cachedNodeList is null if caption is missing. - if (this._cachedNodeList === undefined) { - if (this._isBlock) { - c = this.node.firstChild.nextSibling; - this._cachedNodeList = - c ? new PNodeList(this.pdoc, this, c) : null; - } else { - c = this._caption; - if (c === null) { - this._cachedNodeList = null; - } else { - captionDiv = this.ownerDocument.createElement('div'); - captionDiv.innerHTML = c; - this._cachedNodeList = new PNodeList( - this.pdoc, this, captionDiv, { - update: function() { - this.parent._caption = this.container.innerHTML; - }, - }); - } - } - } - return this._cachedNodeList; - }, - set: function(v) { - this._cachedNodeList = undefined; - if (this._isBlock) { - var c = this.node.firstChild.nextSibling; - if (v === null || v === undefined) { - if (c) { - this.node.removeChild(c); - this.update(); - } - } else { - if (!c) { - c = this.ownerDocument.createElement('figcaption'); - this.node.appendChild(c); - } - c.innerHTML = toHtmlStr(c, v); - this.update(); - } - } else { - this._caption = (v === null || v === undefined) ? v : - toHtmlStr(this.node, v); - this.update(); - } - }, - }, - - _children: { - value: function() { - var c = this.caption; - return c ? [ c ] : []; - }, - }, -}); -/** - * @ignore - * @static - * @private - */ -PMedia._selector = 'figure,[typeof~="mw:Image"]'; - - -/** - * PTag represents any otherwise-unmatched tag. This includes - * HTML-style tags in wikicode, like `<span>`, as well as some - * "invisible" tags like `<p>`. - * @class PTag - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PTag = function PTag(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PTag, PNode); -Object.defineProperties(PTag.prototype, { - /** - * The name of the tag, in lowercase. - */ - tagName: { - get: function() { return this.node.tagName.toLowerCase(); }, - }, - - /** - * The contents of the tag, as a {@PNodeList} object. - * You can assign a String, Node, or PNodeList to mutate the contents. - * @property {PNodeList} - */ - contents: innerAccessor, - - _children: { value: function() { return [this.contents]; }, }, -}); -/** - * @ignore - * @static - * @private - */ -PTag._selector = '*'; // any otherwise-unmatched element - -/** - * PTemplate represents a wikitext template, like `{{foo}}`. - * @class PTemplate - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - * @param {PDoc} pdoc The parent document for this PNode. - * @param {PNodeList|null} parent A containing node list which will receive - * updates when this {@link PNode} is mutated. - * @param {Node} node The DOM node. - * @param {Number} which A single {@link Node} can represent multiple - * templates; this parameter serves to distinguish them. - */ -PTemplate = function PTemplate(pdoc, parent, node, which) { - PNode.call(this, pdoc, parent, node, { - wtsNodes: function() { - // Templates are actually a collection of nodes. - return this.parent._querySelectorAll - ('[about="' + this.node.getAttribute('about') + '"]'); - }, - }); - this.which = which; - this._cachedParams = Object.create(null); -}; -util.inherits(PTemplate, PNode); -Object.defineProperties(PTemplate.prototype, { - _template: { - get: function() { - return this.dataMw.parts[this.which]; - }, - set: function(v) { - var dmw = this.dataMw; - dmw.parts[this.which] = v; - this.dataMw = dmw; - }, - }, - /** - * The name of the template, as a String. - * - * See: [T107194](https://phabricator.wikimedia.org/T107194) - * @property {String} - */ - name: { - get: function() { - // This should really be a PNodeList; see T107194 - return this._template.template.target.wt; - }, - set: function(v) { - var t = this._template; - t.template.target.wt = v; - t.template.target.href = './' + - this.pdoc.env.normalizedTitleKey('Template:' + v); - this._template = t; - }, - }, - /** - * Test whether the name of this template matches a given string, after - * normalizing titles. - * @param {String} name The template name to test against. - * @return {Boolean} - */ - nameMatches: { - value: function(name) { - var href = './' + this.pdoc.env.normalizedTitleKey('Template:' + name); - return this._template.template.target.href === href; - }, - }, - /** - * The parameters supplied to this template. - * @property {PTemplate.Parameter[]} - */ - params: { - get: function() { - return Object.keys(this._template.template.params).sort().map(function(k) { - return this.get(k); - }, this); - }, - }, - /** - * Return `true` if any parameter in the template is named `name`. - * With `ignoreEmpty`, `false` will be returned even if the template - * contains a parameter named `name`, if the parameter's value is empty - * (ie, only contains whitespace). Note that a template may have - * multiple parameters with the same name, but only the last one is - * read by Parsoid (and the MediaWiki parser). - * @param {String|PTemplate.Parameter} name - * @param {Object} [opts] - * @param {Boolean} [opts.ignoreEmpty=false] - */ - has: { - value: function(name, opts) { - if (name instanceof PTemplate.Parameter) { - name = name.name; - } - var t = this._template.template; - return Object.prototype.hasOwnProperty.call(t.params, name) && ( - (opts && opts.ignoreEmpty) ? - !/^\s*$/.test(t.params[name].html) : true - ); - }, - }, - /** - * Add a parameter to the template with a given `name` and `value`. - * If `name` is already a parameter in the template, we'll replace - * its value. - * @param {String|PTemplate.Parameter} name - * @param {String|Node|PNodeList} value - */ - add: { - value: function(k, v) { - if (k instanceof PTemplate.Parameter) { - k = k.name; - } - var t = this._template; - var html = toHtmlStr(this.node, v); - t.template.params[k] = { html: html }; - this._template = t; - }, - }, - /** - * Remove a parameter from the template with the given `name`. - * If `keepField` is `true`, we will keep the parameter's name but - * blank its value. Otherwise we will remove the parameter completely - * *unless* other parameters are dependent on it (e.g. removing - * `bar` from `{{foo|bar|baz}}` is unsafe because `{{foo|baz}}` is - * not what we expected, so `{{foo||baz}}` will be produced instead). - * @param {String|PTemplate.Parameter} name - * @param {Object} [opts] - * @param {Boolean} [opts.keepField=false] - */ - remove: { - value: function(k, opts) { - if (k instanceof PTemplate.Parameter) { - k = k.name; - } - var t = this._template; - var keepField = opts && opts.keepField; - // if this is a numbered template, force keepField if there - // are subsequent numbered templates. - var isNumeric = (String(+k) === String(k)); - if (isNumeric && this.has(1 + (+k))) { - keepField = true; - } - if (keepField) { - t.template.params[k] = { html: '' }; - } else { - delete t.template.params[k]; - } - this._template = t; - }, - }, - - /** - * Get the parameter whose name is `name`. - * @param {String|PTemplate.Parameter} name - * @return {PTemplate.Parameter} The parameter record. - */ - get: { - value: function(k) { - if (k instanceof PTemplate.Parameter) { - k = k.name; - } - if (!this._cachedParams[k]) { - this._cachedParams[k] = new PTemplate.Parameter(this, k); - } - return this._cachedParams[k]; - }, - }, - - _children: { - value: function() { - var result = []; - this.params.forEach(function(k) { - var p = this.get(k); - if (p.key) { result.push(p.key); } - result.push(p.value); - }, this); - return result; - }, - }, -}); -/** - * @ignore - * @static - * @private - */ -PTemplate._selector = '[typeof~="mw:Transclusion"]'; - -/** - * @class PTemplate.Parameter - * - * Represents a parameter of a template. - * - * For example, the template `{{foo|bar|spam=eggs}}` contains two - * {@link PTemplate.Parameter}s: one whose #name is `"1"` and whose - * whose #value is a {@link PNodeList} corresponding to `"bar"`, and one - * whose #name is `"spam"` and whose #value is a {@link PNodeList} - * corresponding to `"eggs"`. - * - * See: {@link PTemplate} - */ -/** - * @method constructor - * @private - * @param {PTemplate} parent The parent template for this parameter. - * @param {String} k The parameter name. - */ -PTemplate.Parameter = function Parameter(parent, k) { - var doc = parent.ownerDocument; - var param = parent._template.template.params[k]; - var valDiv = doc.createElement('div'); - valDiv.innerHTML = param.html; - this._name = k; - this._value = new PNodeList(parent.pdoc, parent, valDiv, { - update: function() { - var t = this.parent._template; - delete t.template.params[k].wt; - t.template.params[k].html = this.container.innerHTML; - this.parent._template = t; - }, - }); - var keyDiv = doc.createElement('div'); - this._key = new PNodeList(parent.pdoc, parent, keyDiv, { - update: function() { - var t = this.parent._template; - if (this._hasKey) { - if (!t.template.params[k].key) { - t.template.params[k].key = {}; - } - delete t.template.params[k].key.wt; - t.template.params[k].key.html = this.container.innerHTML; - } else { - delete t.template.params[k].key; - } - this.parent._template = t; - }, - }); - if (param.key && param.key.html) { - // T106852 means this doesn't always work. - keyDiv.innerHTML = param.key.html; - this._key._hasKey = true; - } -}; -Object.defineProperties(PTemplate.Parameter.prototype, { - /** - * @property {String} name - * The expanded parameter name. - * Unnamed parameters are given numeric indexes. - * @readonly - */ - name: { get: function() { return this._name; }, }, - /** - * @property {PNodeList|null} key - * Source nodes corresponding to the parameter name. - * For example, in `{{echo|{{echo|1}}=hello}}` the parameter name - * is `"1"`, but the `key` field would contain the `{{echo|1}}` - * template invocation, as a {@link PNodeList}. - */ - key: { - get: function() { return this._key._hasKey ? this._key : null; }, - set: function(v) { - if (v === null || v === undefined) { - this._key.container.innerHTML = ''; - this._key._hasKey = false; - } else { - this._key.container.innerHTML = - toHtmlStr(this._key.container, v); - } - this._key.update(); - }, - }, - /** - * @property {PNodeList} value - * The parameter value. - */ - value: { - get: function() { return this._value; }, - set: function(v) { - this._value.container.innerHTML = - toHtmlStr(this._value.container, v); - this._value.update(); - }, - }, - toWikitext: { - value: Promise.method(function() { - var k = this.key; - return Promise.join( - k ? k.toWikitext() : this.name, - this.value.toWikitext() - ).spread(function(keyWikitext, valueWikitext) { - return keyWikitext + '=' + valueWikitext; - }); - }), - }, - toString: { - value: function() { - var k = this.key; - return (k ? String(k) : this.name) + '=' + String(this.value); - }, - }, -}); - -/** - * PText represents ordinary unformatted text with no special properties. - * @class PText - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PText = function PText(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PText, PNode); -Object.defineProperties(PText.prototype, { - /** - * The actual text itself. - * @property {String} - */ - value: { - get: function() { - return this.node.data; - }, - set: function(v) { - this.node.data = v; - this.update(); - }, - }, - /** - * Extends {@link PNode#matches} to allow a target string to match - * if it matches this node's #value. - * @method - * @inheritdoc PNode#matches - * @param {Node|PNode|String} target - */ - matches: { - value: function(target) { - return PNode.prototype.matches.call(this, target) || - this.value === target; - }, - }, -}); -/** - * @ignore - * @static - * @private - */ -PText._selector = 'TEXT'; // non-standard selector - -/** - * PWikiLink represents an internal wikilink, like `[[Foo|Bar]]`. - * @class PWikiLink - * @extends PNode - */ -/** - * @method constructor - * @private - * @inheritdoc PNode#constructor - */ -PWikiLink = function PWikiLink(pdoc, parent, node, opts) { - PNode.call(this, pdoc, parent, node, opts); -}; -util.inherits(PWikiLink, PNode); -Object.defineProperties(PWikiLink.prototype, { - /** - * The title of the linked page. - * @property {String} - */ - title: { - // XXX url should be a PNodeList, but that requires handling - // typeof="mw:ExpandedAttrs" - get: function() { - return this.node.getAttribute('href').replace(/^.\//, ''); - }, - set: function(v) { - var href = './' + this.pdoc.env.normalizedTitleKey(v); - this.node.setAttribute('href', href); - this.update(); - }, - }, - /** - * The text to display, as a {@link PNodeList}. - * You can assign a String, Node, or PNodeList to mutate the text. - * @property {PNodeList} - */ - text: innerAccessor, - - _children: { value: function() { return [this.text]; }, }, -}); -/** - * @ignore - * @static - * @private - */ -PWikiLink._selector = 'a[rel="mw:WikiLink"]'; - -/** - * A PDoc object wraps an entire Parsoid document. Since it is an - * instance of {@link PNodeList}, you can filter it, mutate it, etc. - * But it also provides means to serialize the document as either - * HTML (via {@link #document} or {@link #toHtml}) or wikitext - * (via {@link #toWikitext}). - * @class - * @extends PNodeList - * @alternateClassName Parsoid.PDoc - */ -var PDoc = function PDoc(env, doc) { - PNodeList.call(this, this, null, doc.body); - this.env = env; -}; -util.inherits(PDoc, PNodeList); -Object.defineProperties(PDoc.prototype, { - /** - * An HTML {@link Document} representing article content conforming to the - * [MediaWiki DOM specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec). - * @property {Document} - */ - document: { - get: function() { return this.container.ownerDocument; }, - set: function(v) { this.container = v.body; }, - }, - /** - * Return a string representing the entire document as - * HTML conforming to the - * [MediaWiki DOM specification](https://www.mediawiki.org/wiki/Parsoid/MediaWiki_DOM_spec). - * @inheritdoc PNodeList#toHtml - * @method - */ - toHtml: { - value: function() { - // document.outerHTML is a Parsoid-ism; real browsers don't define it. - var html = this.document.outerHTML; - if (!html) { - html = this.document.body.outerHTML; - } - return html; - }, - }, -}); - -// Promise-using REPL, for easier debugging. -// We also handle `yield`, at least in common cases. -var repl = function() { - /* jshint evil:true */ - // The older version of jshint on jenkins is confused. - var Parsoid = require('../'); - console.log('Parsoid REPL', Parsoid.version); - var r = require('repl').start({ ignoreUndefined: true }); - // `var Parsoid = require('parsoid');` by default. - r.context.Parsoid = Parsoid; - // `var Promise = require('prfun');` by default. - r.context.Promise = Promise; - // Patch the `eval` method to wait for Promises to be resolved. - var oldEval = r.eval; - r.eval = function(cmd, context, filename, callback) { - // If `cmd` mentions `yield`, wrap it in a `function*` - if (/\byield\b/.test(cmd)) { - // Hack to support `var xyz = yield pdq...;`, convert it - // to `var xyz; ...{ xyz = yield pdq...; }...` - var m = /^(var\s+)(\w+)\s*=/.exec(cmd); - if (m) { cmd = cmd.slice(m[1].length); } - cmd = 'Promise.async(function*(){' + cmd + '})();'; - if (m) { cmd = m[1] + m[2] + ';' + cmd; } - } - oldEval.call(r, cmd, context, filename, function(e, v) { - if (e || !(typeof v === 'object' && typeof v.then === 'function')) { - return callback(e, v); - } - // OK, this is a promise! Wait for the result. - v.then(function(_v) { - callback(null, _v); - }, function(_e) { - callback(_e); - }); - }); - }; -}; - -module.exports = { - PDoc: PDoc, - PNodeList: PNodeList, - PNode: PNode, - PComment: PComment, - PExtLink: PExtLink, - PHeading: PHeading, - PHtmlEntity: PHtmlEntity, - PMedia: PMedia, - PTag: PTag, - PTemplate: PTemplate, - PText: PText, - PWikiLink: PWikiLink, - // Helper function for `Promise.map` - toWikitext: Promise.method(function(n) { return n.toWikitext(); }), - // Useful REPL that handles promises and `yield` well. - repl: repl, -}; diff --git a/tests/mocha/jsapi.js b/tests/mocha/jsapi.js index 96e3130..4333c36 100644 --- a/tests/mocha/jsapi.js +++ b/tests/mocha/jsapi.js @@ -3,7 +3,6 @@ "use strict"; var Parsoid = require('../../'); -var Promise = require('../../lib/utils/promise.js'); describe('Parsoid JS API', function() { it('converts empty wikitext to HTML', function() { @@ -19,357 +18,6 @@ res.should.have.property('out'); res.should.have.property('trailingNL'); res.out.should.have.property('outerHTML'); - }); - }); -}); - -describe('Examples from guides/jsapi', function() { - it('converts empty wikitext to HTML', function() { - return Parsoid.parse('', { pdoc: true}).then(function(pdoc) { - pdoc.should.have.property('document'); - pdoc.document.should.have.property('outerHTML'); - pdoc.document.body.children.length.should.equal(0); - }); - }); - it('converts simple wikitext to HTML', function() { - return Parsoid.parse('I love wikitext!', { pdoc: true}).then(function(pdoc) { - pdoc.should.have.property('document'); - pdoc.document.should.have.property('outerHTML'); - }); - }); - it('filters out templates', function() { - var text = "I has a template! {{foo|bar|baz|eggs=spam}} See it?\n"; - var pdoc, templates, template; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal(text); - templates = pdoc.filterTemplates(); - templates.length.should.equal(1); - return templates[0].toWikitext(); - }).then(function(wt) { - wt.should.equal('{{foo|bar|baz|eggs=spam}} See it?'); - template = templates[0]; - template.name.should.equal('foo'); - template.name = 'notfoo'; - return template.toWikitext(); - }).then(function(wt) { - wt.should.equal('{{notfoo|bar|baz|eggs=spam}} See it?'); - template.params.length.should.equal(3); - template.params[0].name.should.equal('1'); - template.params[1].name.should.equal('2'); - template.params[2].name.should.equal('eggs'); - return template.get(1).value.toWikitext(); - }).then(function(wt) { - wt.should.equal('bar'); - return template.get('eggs').value.toWikitext(); - }).then(function(wt) { - wt.should.equal('spam'); - }); - }); - it('filters templates, recursively', function() { - var text = "{{foo|{{bar}}={{baz|{{spam}}}}}}"; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var templates = pdoc.filterTemplates(); - // XXX note that {{bar}} as template name doesn't get handled; - // that's bug T106852 - templates.length.should.equal(3); - }); - }); - it('filters templates, non-recursively', function() { - var text = "{{foo|this {{includes a|template}}}}"; - var foo; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var templates = pdoc.filterTemplates({ recursive: false }); - templates.length.should.equal(1); - foo = templates[0]; - return foo.get(1).value.toWikitext(); - }).then(function(wt) { - wt.should.equal('this {{includes a|template}}'); - var more = foo.get(1).value.filterTemplates(); - more.length.should.equal(1); - return more[0].get(1).value.toWikitext(); - }).then(function(wt) { - wt.should.equal('template'); - }); - }); - it('is easy to mutate templates', function() { - var text = "{{cleanup}} '''Foo''' is a [[bar]]. {{uncategorized}}"; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - pdoc.filterTemplates().forEach(function(template) { - if (template.nameMatches('Cleanup') && !template.has('date')) { - template.add('date', 'July 2012'); - } - if (template.nameMatches('uncategorized')) { - template.name = 'bar-stub'; - } - }); - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal("{{cleanup|date=July 2012}} '''Foo''' is a [[bar]]. {{bar-stub}}"); - }); - }); -}); - -describe('Further examples of PDoc API', function() { - it('is easy to mutate templates (2)', function() { - // Works even on nested templates! - var text = "{{1x|{{cleanup}} '''Foo''' is a [[bar]].}} {{uncategorized}}"; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - pdoc.filterTemplates().forEach(function(template) { - if (template.nameMatches('Cleanup') && !template.has('date')) { - template.add('date', 'July 2012'); - // Works even when there are special characters - template.add('test1', '{{foo}}&bar|bat<p>'); - template.add('test2', Parsoid.PNodeList.fromHTML(pdoc, "I'm so <b>bold</b>!")); - } - }); - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal("{{1x|{{cleanup|date=July 2012|test1=<nowiki>{{foo}}</nowiki>&bar{{!}}bat<nowiki><p></nowiki>|test2=I'm so '''bold'''!}} '''Foo''' is a [[bar]].}} {{uncategorized}}"); - }); - }); - it('is safe to mutate template arguments', function() { - var text = "{{1x|foo|bar}}"; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var t = pdoc.filterTemplates()[0]; - t.remove(1); - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('{{1x||bar}}'); - }); - }); - it('is safe to mutate template arguments (2)', function() { - var text = "{{1x|foo|bar}}"; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var t = pdoc.filterTemplates()[0]; - var param1 = t.get(1); - var param2 = t.get(2); - param2.value = param1.value; - param1.value = '|'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('{{1x|{{!}}|foo}}'); - }); - }); - it('filters and mutates headings', function() { - var text = "= one =\n== two ==\n=== three ===\n==== four ====\nbody"; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var headings = pdoc.filterHeadings(); - headings.length.should.equal(4); - headings[0].level.should.equal(1); - headings[1].level.should.equal(2); - headings[2].level.should.equal(3); - headings[3].level.should.equal(4); - headings[0].title.toHtml().should.equal(' one '); - headings[1].title.toHtml().should.equal(' two '); - headings[2].title.toHtml().should.equal(' three '); - headings[3].title.toHtml().should.equal(' four '); - headings[0].title = '=0='; - headings[1].title = headings[2].title; - headings[3].level = 3; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('=<nowiki>=0=</nowiki>=\n== three ==\n=== three ===\n\n=== four ===\nbody\n'); - }); - }); - it('filters and mutates headings inside templates', function() { - var text = "{{1x|1=\n= one =\n}}"; - var pdoc, headings; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - headings = pdoc.filterHeadings(); - headings.length.should.equal(1); - headings[0].level = 2; - return headings[0].toWikitext(); - }).then(function(wt) { - wt.should.equal('== one ==\n'); - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('{{1x|1=\n== one ==\n}}'); - headings[0].title = 'two'; - return headings[0].toWikitext(); - }).then(function(wt) { - wt.should.equal('== two ==\n'); - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('{{1x|1=\n== two ==\n}}'); - }); - }); - it('filters and mutates external links', function() { - var text = "[http://example.com {{1x|link content}}]"; - var pdoc, extlinks; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - extlinks = pdoc.filterExtLinks(); - extlinks.length.should.equal(1); - String(extlinks[0].url).should.equal('http://example.com'); - return extlinks[0].title.toWikitext(); - }).then(function(wt) { - wt.should.equal('{{1x|link content}}'); - extlinks[0].title = ']'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('[http://example.com <nowiki>]</nowiki>]\n'); - }); - }); - it('filters and mutates wiki links', function() { - var text = "[[foo|1]] {{1x|[[bar|2]]}} [[{{1x|bat}}|3]]"; - var pdoc, extlinks; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - extlinks = pdoc.filterWikiLinks(); - extlinks.length.should.equal(3); - return Promise.all([ - extlinks[0].title, - extlinks[0].text.toWikitext(), - extlinks[1].title, - extlinks[1].text.toWikitext(), - extlinks[2].text.toWikitext(), - ]); - }).then(function(all) { - all[0].should.equal('Foo'); - all[1].should.equal('1'); - all[2].should.equal('Bar'); - all[3].should.equal('2'); - all[4].should.equal('3'); - extlinks[0].title = extlinks[0].text = 'foobar'; - extlinks[1].text = 'A'; - extlinks[2].text = 'B'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('[[foobar]] {{1x|[[bar|A]]}} [[{{1x|bat}}|B]]\n'); - }); - }); - it('filters and mutates html entities', function() { - var text = '&{{1x|"}}'; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var entities = pdoc.filterHtmlEntities(); - entities.length.should.equal(2); - entities[0].normalized.should.equal('&'); - entities[1].normalized.should.equal('"'); - entities[0].normalized = '<'; - entities[1].normalized = '>'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('<{{1x|>}}\n'); - }); - }); - it('filters and mutates comments', function() { - var text = '<!-- foo --> {{1x|<!--bar-->}}'; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var comments = pdoc.filterComments(); - comments.length.should.equal(2); - comments[0].contents.should.equal(' foo '); - comments[1].contents.should.equal('bar'); - comments[0].contents = '<!-- ha! -->'; - comments[1].contents = '--'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('<!--<!-- ha! -->--> {{1x|<!------>}}'); - }); - }); - it('filters and mutates images', function() { - var text = '[[File:SomeFile1.jpg]] [[File:SomeFile2.jpg|thumb|caption]]'; - var pdoc, media; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - media = pdoc.filterMedia(); - media.length.should.equal(2); - media[0].should.have.property('caption', null); - return media[1].caption.toWikitext(); - }).then(function(wt) { - wt.should.equal('caption'); - media[0].caption = '|'; - media[1].caption = null; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('[[File:SomeFile1.jpg|<nowiki>|</nowiki>]] [[File:SomeFile2.jpg|thumb]]'); - media[0].caption = null; - media[1].caption = '|'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('[[File:SomeFile1.jpg]] [[File:SomeFile2.jpg|thumb|<nowiki>|</nowiki>]]'); - }); - }); - it('filters and mutates text', function() { - var text = 'foo {{1x|bar}}'; - var pdoc, texts; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - texts = pdoc.filterText({ recursive: false }); - texts.length.should.equal(1); - texts = pdoc.filterText({ recursive: true }); - texts.length.should.equal(2); - texts[0].value.should.equal('foo '); - texts[1].value.should.equal('bar'); - texts[0].value = 'FOO '; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('FOO {{1x|bar}}\n'); - texts[1].value = 'BAR'; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal('FOO {{1x|BAR}}\n'); - }); - }); - it.skip('filters and mutates text (2)', function() { - var text = '{{{1x|{{!}}}}\n| foo\n|}'; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - var texts = pdoc.filterText(); - texts.length.should.equal(1); - // XXX this doesn't work yet, see note at end of - // https://www.mediawiki.org/wiki/Specs/HTML/1.2.1#Transclusion_content - // for details. ("Editing support for the interspersed wikitext...") - texts[0].value.should.equal(' foo'); - }); - }); - it('allows mutation using wikitext', function() { - var text = '== heading =='; - var pdoc, headings; - return Parsoid.parse(text, { pdoc: true }).then(function(_pdoc) { - pdoc = _pdoc; - headings = pdoc.filterHeadings(); - headings.length.should.equal(1); - // Note that even if the wikitext is unbalanced, the result - // will be balanced. The bold face doesn't escape the heading! - return Parsoid.PNodeList.fromWikitext(pdoc, "'''bold"); - }).then(function(pnl) { - headings[0].title = pnl; - return pdoc.toWikitext(); - }).then(function(wt) { - wt.should.equal("== '''bold''' ==\n"); - }); - }); - it('allows iteration using length and get()', function() { - var text = '== 1 ==\n[http://example.com 2]<!-- 3 --> {{1x|4}} 5 [[Foo|6]]'; - return Parsoid.parse(text, { pdoc: true }).then(function(pdoc) { - pdoc.length.should.equal(3); - pdoc.get(0).should.be.instanceof(Parsoid.PHeading); - pdoc.get(1).should.be.instanceof(Parsoid.PText); - pdoc.get(2).should.be.instanceof(Parsoid.PTag); - pdoc.get(2).tagName.should.be.equal('p'); - var paragraph = pdoc.get(2).contents; - paragraph.length.should.equal(6); - paragraph.get(0).should.be.instanceof(Parsoid.PExtLink); - paragraph.get(1).should.be.instanceof(Parsoid.PComment); - paragraph.get(2).should.be.instanceof(Parsoid.PHtmlEntity); - paragraph.get(3).should.be.instanceof(Parsoid.PTemplate); - paragraph.get(4).should.be.instanceof(Parsoid.PText); - paragraph.get(5).should.be.instanceof(Parsoid.PWikiLink); - // Test indexOf with PNodes and Nodes - for (var i = 0; i < paragraph.length; i++) { - paragraph.indexOf(paragraph.get(i)).should.equal(i); - paragraph.indexOf(paragraph.get(i).node).should.equal(i); - pdoc.indexOf(paragraph.get(i), { recursive: true }).should.equal(2); - pdoc.indexOf(paragraph.get(i).node, { recursive: true }).should.equal(2); - } - // Test indexOf with strings - pdoc.indexOf(' 5 ').should.equal(-1); - pdoc.indexOf(' 5 ', { recursive: true }).should.equal(2); - paragraph.indexOf(' 5 ').should.equal(4); - paragraph.indexOf('\u00A0').should.equal(2); }); }); }); -- To view, visit https://gerrit.wikimedia.org/r/348111 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I03c089c68e8a9639b9ca775dd74fed75b25db5c4 Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits