jenkins-bot has submitted this change and it was merged.
Change subject: Support "standalone mode" for single-article conversion.
..
Support "standalone mode" for single-article conversion.
Change-Id: I1f53b0c811bfcbdf51317dfd622877f26763a4d4
---
M README.md
M bin/mw-ocg-texter
M lib/index.js
A lib/standalone.js
M package.json
5 files changed, 83 insertions(+), 8 deletions(-)
Approvals:
Cscott: Looks good to me, approved
jenkins-bot: Verified
diff --git a/README.md b/README.md
index 3ac0613..521407a 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,16 @@
bin/mw-ocg-texter --help
```
+## Standalone mode
+To convert a single article without the bundle creation step, use:
+```
+bin/mw-ocg-texter -h en.wikipedia.org -t "United States"
+```
+The `-h` option specifies the hostname of the wiki, and the `-t`
+option gives the title to convert. The content will be fetched
+from RESTBase and converted, with output to standard out (unless
+the `-o` option is given).
+
## Other ideas
This backend should implement the [Unicode Nearly Plain-Text Encoding of
Mathematics](http://unicode.org/notes/tn28/UTN28-PlainTextMath-v3.pdf)
diff --git a/bin/mw-ocg-texter b/bin/mw-ocg-texter
index d1a1de5..b06c0e3 100755
--- a/bin/mw-ocg-texter
+++ b/bin/mw-ocg-texter
@@ -20,11 +20,16 @@
.option('-D, --debug',
'Turn on debugging features (eg, full stack traces on
exceptions)')
.option('-T, --temporary-directory ',
- 'Use for temporaries, not $TMPDIR or /tmp', null);
+ 'Use for temporaries, not $TMPDIR or /tmp', null)
+ .option('-h, --domain ',
+ 'Wiki hostname to use to resolve the title
[en.wikipedia.org]', null)
+ .option('-t, --title ',
+ 'Don\'t use a bundle, download the given title
instead.',
+ null);
program.parse(process.argv);
-if (program.args.length === 0) {
+if (program.args.length === 0 && !program.title) {
console.error('A bundle filename or directory is required.');
return 1;
}
@@ -73,7 +78,15 @@
log: log
};
-texter.convert(options).catch(function(err) {
+var p;
+if (!program.title) {
+ p = texter.convert(options);
+} else {
+ options.domain = program.domain || 'en.wikipedia.org';
+ options.title = program.title;
+ p = require('../lib/standalone').convert(options);
+}
+p.catch(function(err) {
var msg = {
type: 'log',
level: 'error'
diff --git a/lib/index.js b/lib/index.js
index d01a1fa..9c20729 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -992,10 +992,10 @@
format.writeSummary(textEscape(metabook.summary).replace(/\s+/g, ' '));
}
- var pdb = new Db(
+ var pdb = options.pdb || new Db(
path.join(builddir, 'bundle', 'parsoid.db'), { readonly: true }
);
- var sidb = new Db(
+ var sidb = options.sidb || new Db(
path.join(builddir, 'bundle', 'siteinfo.db'), { readonly: true }
);
var write = {};
@@ -1006,7 +1006,9 @@
var document, base = '', articleLanguage;
var key = (item.wiki ? (item.wiki+'|') : '') + revid;
return pdb.get(key, 'nojson').then(function(data) {
- document = domino.createDocument(data);
+ // avoid redundant parsing in standalone mode (which
will
+ // return an object with a `document` property)
+ document = data.document || domino.createDocument(data);
var baseElem = document.querySelector('head >
base[href]');
if (baseElem) {
base = baseElem.getAttribute('href').
@@ -1109,5 +,6 @@
module.exports = {
name: json.name, // package name
version: json.version, // version # for this package
- convert: convert
+ convert: convert,
+ generateOutput: generateOutput // for use by standalone.js
};
diff --git a/lib/standalone.js b/lib/standalone.js
new file mode 100644
index 000..d6482ae
--- /dev/null
+++ b/lib/standalone.js
@@ -0,0 +1,46 @@
+"use strict";
+require('core-js/shim');
+var Promise = require('prfun');
+var main = require('./');
+var bundler = require('mw-ocg-bundler');
+
+var convert = module.exports.convert = function(options) {
+ // make metabook.
+ return bundler.metabook.fromArticles([{
+ prefix: options.prefix,
+ domain: options.domain,
+ title: options.title
+ }], options).then(function(metabook) {
+ var item = metabook.items[0];
+ var Parsoid = new bundler.parsoid(
+ metabook.wikis, options.apiVersion, options.log
+ );
+ var siteinfo = options.siteinfo ||