jenkins-bot has submitted this change and it was merged.

Change subject: Support "standalone mode" for single-article conversion.
......................................................................


Support "standalone mode" for single-article conversion.

Change-Id: I1f53b0c811bfcbdf51317dfd622877f26763a4d4
---
M README.md
M bin/mw-ocg-texter
M lib/index.js
A lib/standalone.js
M package.json
5 files changed, 83 insertions(+), 8 deletions(-)

Approvals:
  Cscott: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/README.md b/README.md
index 3ac0613..521407a 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,16 @@
 bin/mw-ocg-texter --help
 ```
 
+## Standalone mode
+To convert a single article without the bundle creation step, use:
+```
+bin/mw-ocg-texter -h en.wikipedia.org -t "United States"
+```
+The `-h` option specifies the hostname of the wiki, and the `-t`
+option gives the title to convert.  The content will be fetched
+from RESTBase and converted, with output to standard out (unless
+the `-o` option is given).
+
 ## Other ideas
 This backend should implement the [Unicode Nearly Plain-Text Encoding of
 Mathematics](http://unicode.org/notes/tn28/UTN28-PlainTextMath-v3.pdf)
diff --git a/bin/mw-ocg-texter b/bin/mw-ocg-texter
index d1a1de5..b06c0e3 100755
--- a/bin/mw-ocg-texter
+++ b/bin/mw-ocg-texter
@@ -20,11 +20,16 @@
        .option('-D, --debug',
                        'Turn on debugging features (eg, full stack traces on 
exceptions)')
        .option('-T, --temporary-directory <dir>',
-                       'Use <dir> for temporaries, not $TMPDIR or /tmp', null);
+                       'Use <dir> for temporaries, not $TMPDIR or /tmp', null)
+       .option('-h, --domain <domain name>',
+                       'Wiki hostname to use to resolve the title 
[en.wikipedia.org]', null)
+       .option('-t, --title <title>',
+                       'Don\'t use a bundle, download the given title 
instead.',
+                       null);
 
 program.parse(process.argv);
 
-if (program.args.length === 0) {
+if (program.args.length === 0 && !program.title) {
        console.error('A bundle filename or directory is required.');
        return 1;
 }
@@ -73,7 +78,15 @@
        log: log
 };
 
-texter.convert(options).catch(function(err) {
+var p;
+if (!program.title) {
+       p = texter.convert(options);
+} else {
+       options.domain = program.domain || 'en.wikipedia.org';
+       options.title = program.title;
+       p = require('../lib/standalone').convert(options);
+}
+p.catch(function(err) {
        var msg = {
                type: 'log',
                level: 'error'
diff --git a/lib/index.js b/lib/index.js
index d01a1fa..9c20729 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -992,10 +992,10 @@
                
format.writeSummary(textEscape(metabook.summary).replace(/\s+/g, ' '));
        }
 
-       var pdb = new Db(
+       var pdb = options.pdb || new Db(
                path.join(builddir, 'bundle', 'parsoid.db'), { readonly: true }
        );
-       var sidb = new Db(
+       var sidb = options.sidb || new Db(
                path.join(builddir, 'bundle', 'siteinfo.db'), { readonly: true }
        );
        var write = {};
@@ -1006,7 +1006,9 @@
                var document, base = '', articleLanguage;
                var key = (item.wiki ? (item.wiki+'|') : '') + revid;
                return pdb.get(key, 'nojson').then(function(data) {
-                       document = domino.createDocument(data);
+                       // avoid redundant parsing in standalone mode (which 
will
+                       // return an object with a `document` property)
+                       document = data.document || domino.createDocument(data);
                        var baseElem = document.querySelector('head > 
base[href]');
                        if (baseElem) {
                                base = baseElem.getAttribute('href').
@@ -1109,5 +1111,6 @@
 module.exports = {
        name: json.name, // package name
        version: json.version, // version # for this package
-       convert: convert
+       convert: convert,
+       generateOutput: generateOutput // for use by standalone.js
 };
diff --git a/lib/standalone.js b/lib/standalone.js
new file mode 100644
index 0000000..d6482ae
--- /dev/null
+++ b/lib/standalone.js
@@ -0,0 +1,46 @@
+"use strict";
+require('core-js/shim');
+var Promise = require('prfun');
+var main = require('./');
+var bundler = require('mw-ocg-bundler');
+
+var convert = module.exports.convert = function(options) {
+       // make metabook.
+       return bundler.metabook.fromArticles([{
+               prefix: options.prefix,
+               domain: options.domain,
+               title: options.title
+       }], options).then(function(metabook) {
+               var item = metabook.items[0];
+               var Parsoid = new bundler.parsoid(
+                       metabook.wikis, options.apiVersion, options.log
+               );
+               var siteinfo = options.siteinfo || new bundler.siteinfo(
+                       metabook.wikis, options.log
+               );
+               return siteinfo.fetch(item.wiki).then(function(si) {
+                       return Parsoid.fetch(si, item.wiki, item.title, null, 
2).then(function(pr) {
+                               var opts = Object.create(options);
+                               item.wiki = pr.wiki;
+                               item.title = pr.title;
+                               item.revision = pr.getRevisionId();
+                               // fake a db
+                               opts.pdb = {
+                                       get: function(key, nojson) {
+                                               return Promise.resolve({ 
document: pr.document });
+                                       }
+                               };
+                               opts.sidb = {
+                                       get: function(key, nojson) {
+                                               return Promise.resolve(si);
+                                       }
+                               };
+                               opts.status = {
+                                       createStage: function(){},
+                                       report: function(){},
+                               };
+                               return main.generateOutput(metabook, 
'/dont/use/this', opts);
+                       });
+               });
+       });
+};
diff --git a/package.json b/package.json
index 495c559..9115d57 100644
--- a/package.json
+++ b/package.json
@@ -16,7 +16,7 @@
   ],
   "license": "GPL-2.0",
   "dependencies": {
-    "commander": "~2.5.0",
+    "commander": "~2.8.1",
     "core-js": "~0.9.1",
     "domino": "~1.0.17",
     "linewrap": "~0.2.1",
@@ -25,6 +25,9 @@
     "sqlite3": "~3.0.8",
     "tmp": "~0.0.24"
   },
+  "optionalDependencies": {
+      "mw-ocg-bundler": 
"wikimedia/mediawiki-extensions-Collection-OfflineContentGenerator-bundler#master"
+  },
   "devDependencies": {
     "jshint": "~2.6.3",
     "mocha": "~2.0.1"

-- 
To view, visit https://gerrit.wikimedia.org/r/235124
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I1f53b0c811bfcbdf51317dfd622877f26763a4d4
Gerrit-PatchSet: 2
Gerrit-Project: 
mediawiki/extensions/Collection/OfflineContentGenerator/text_renderer
Gerrit-Branch: master
Gerrit-Owner: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: Nikerabbit <niklas.laxst...@gmail.com>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to