Cscott has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/102470


Change subject: Refactor: `Formatter` class tracks paragraph/inline state.
......................................................................

Refactor: `Formatter` class tracks paragraph/inline state.

We create a new class to buffer paragraphs and track the current
paragraph/inline/env state.  Then we can run the unicode bidi
algorithm on each paragraph when we emit it (this isn't completely
implemented yet).

Change-Id: Ic04cb626eee65ff302becfee93402e23c5440174
---
M lib/index.js
1 file changed, 347 insertions(+), 176 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Collection/OfflineContentGenerator/latex_renderer
 refs/changes/70/102470/1

diff --git a/lib/index.js b/lib/index.js
index a17571d..e9497c8 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -10,6 +10,7 @@
 var path = require('path');
 var stream = require('stream');
 var tmp = require('tmp');
+var ubidi = require('icu-bidi');
 var url = require('url');
 var when = require('when');
 tmp.setGracefulCleanup();
@@ -98,7 +99,9 @@
        // Set the default font
        "\\setmainfont[" + (SCRIPT_FONTS['default'].options||'') + "]{" + 
SCRIPT_FONTS['default'].name + "}",
 
-       "\\date{}\\author{}"
+       "\\date{}\\author{}",
+       // work around the fact that \\item uses [] instead of {}
+       "\\newcommand{\\myitem}[1]{\\item[#1]}"
 ].join("\n");
 
 var STD_FOOTER = [
@@ -140,6 +143,171 @@
                return match.replace(/'/, '\u2019');
        });
        return str;
+};
+
+var Formatter = function(stream, options) {
+       this.stream = stream;
+       this.options = options;
+       this.buffer = [];
+       this.decorations = [];
+       this.inline = 0;
+       this.pos = 0;
+       this.newEnv = this.newLine = this.newPara = true;
+       this.dir = 'ltr'; // XXX
+};
+Formatter.prototype.flush = function(opts) {
+       var deferred = when.defer();
+       this.envBreak();
+       this.stream.write('', 'utf8', function() {
+               deferred.resolve();
+       });
+       return deferred.promise;
+};
+Formatter.prototype._writeRaw = function(text) {
+       this.stream.write(text, 'utf8');
+};
+Formatter.prototype._writeRuns = function() {
+       var text = this.buffer.join('');
+       this._addDecoration({ type: 'end' }); // sentinel
+       // get visual runs in this text.
+       var p = new ubidi.Paragraph(text, {
+               paraLevel: (this.dir==='ltr') ? ubidi.DEFAULT_LTR : 
ubidi.DEFAULT_RTL
+       });
+
+       var stack = [];
+       var emitDecoration = function(d) {
+               switch (d.type) {
+               case 'start-inline':
+                       stack.push(d);
+                       this._writeRaw(d.value);
+                       this._writeRaw(d.delimiter || '{');
+                       break;
+               case 'end-inline':
+                       console.assert(stack.length, this.decorations);
+                       console.assert(stack[stack.length-1].value === d.value);
+                       stack.pop();
+                       this._writeRaw(d.delimiter || '}');
+                       break;
+               case 'start-block':
+               case 'end-block':
+               case 'raw':
+                       this._writeRaw(d.value); break;
+               default:
+                       console.assert(false);
+               }
+       }.bind(this);
+
+       var runs = p.countRuns(), pos, i, j, d;
+       for (i=j=pos=0; i < runs; i++) {
+               var run = p.getVisualRun(i);
+               // XXX open any decorations on stack
+               // XXX FIX ME XXX do something with run.dir
+               var runEnd = run.logicalStart + run.length;
+               for (;; j++) {
+                       d = this.decorations[j];
+                       if (!(d.pos < runEnd ||
+                                 d.pos === runEnd && /^end-/.test(d.type))) {
+                               break;
+                       }
+                       // write text up to this decoration
+                       this._writeRaw(texEscape(text.slice(pos, d.pos)));
+                       pos = d.pos;
+                       emitDecoration(d);
+               }
+               // emit any trailing text
+               this._writeRaw(texEscape(text.slice(pos, runEnd)));
+               pos = runEnd;
+               // XXX close any decorations on the stack
+       }
+       // emit decorations at end
+       for (; j<this.decorations.length-1; j++) {
+               d = this.decorations[j];
+               emitDecoration(d);
+       }
+       console.assert(stack.length===0, this.decorations, text);
+       // done; clear all the buffers
+       this.buffer.length = this.decorations.length = this.pos = 0;
+};
+Formatter.prototype.resetSOL = function() {
+       this.newEnv = this.newLine = this.newPara = true;
+};
+Formatter.prototype.envBreak = function() {
+       if (this.newEnv) { return; }
+       this._writeRuns();
+       this._writeRaw('\n');
+       this.newEnv = true;
+};
+Formatter.prototype.paragraphBreak = function() {
+       if (this.newPara) { return; }
+       if (this.inline) { return this.write(' '); }
+       this.envBreak();
+       this._writeRaw('\n');
+       this.newPara = this.newLine = true;
+};
+Formatter.prototype.lineBreak = function() {
+       if (this.newLine) { return; }
+       if (this.inline) { return this.write(' '); }
+       this.envBreak();
+       this._writeRaw('\\\\\n');
+       this.newLine = true;
+};
+Formatter.prototype._addDecoration = function(d) {
+       d.pos = this.pos;
+       this.decorations.push(d);
+       this.newEnv = this.newLine = this.newPara = false;
+       if (d.type==='start-inline') { this.inline++; }
+       else if (d.type==='end-inline') { this.inline--; }
+};
+Formatter.prototype.write = function(text) {
+       if (this.newEnv || this.newLine || this.newPara) {
+               text = text.replace(/^\s+/, ''); // kill leading space after nl
+               if (!text.length) { return; }
+               this.newEnv = this.newLine = this.newPara = false;
+       }
+       text = text.replace(/\s+/g, ' '); // remove newlines
+       this.buffer.push(text);
+       this.pos += text.length;
+};
+Formatter.prototype.writeDecorated = function(decoration, text) {
+       if (text === undefined) {
+               if (typeof(decoration)==='string') {
+                       decoration = {
+                               type: 'raw',
+                               value: decoration
+                       };
+               }
+               this._addDecoration(decoration);
+               return;
+       }
+       this._addDecoration({
+               type: 'start-inline',
+               value: decoration
+       });
+       if (typeof(text)==='function') { text = text(); }
+       if (typeof(text)==='string') { this.write(text); }
+       this._addDecoration({
+               type: 'end-inline',
+               value: decoration
+       });
+};
+// helpers for environments.
+Formatter.prototype.begin = function(env, opts) {
+       this.envBreak();
+       this._addDecoration({
+               type: 'start-block',
+               value: '\\begin{' + env + '}' + (opts ? ('[' + opts + ']') : '')
+       });
+       this.envBreak();
+       this.newLine = this.newPara = true;
+};
+Formatter.prototype.end = function(env) {
+       this.envBreak();
+       this._addDecoration({
+               type: 'end-block',
+               value: '\\end{' + env + '}'
+       });
+       this.envBreak();
+       this.newLine = this.newPara = true;
 };
 
 // Predicate to determine whether the given element will be a
@@ -195,8 +363,9 @@
 
 /* Document node visitor class.  Collects LaTeX output as it traverses the
  * document tree. */
-var Visitor = function(document, options) {
+var Visitor = function(document, format, options) {
        this.document = document;
+       this.format = format;
        this.options = options;
        this.output = [];
        this.templates = Object.create(null);
@@ -204,27 +373,15 @@
        this.currentLanguage = this.tocLanguage = options.lang || 'en';
        this.currentDirectionality = options.dir || 'ltr';
        this.usedLanguages = new Set();
-       this.insideParagraph = false;
+       this.listInfo = {};
 };
 
-// Helper function -- collect all text from the children of `node` as
-// HTML non-block/TeX non-paragraph content.  Invoke `f` with the result,
-// suitable for inclusion in a TeX non-paragraph context.
-Visitor.prototype.collect = function(node, f) {
-       var o = this.output, inside = this.insideParagraph;
-       this.output = [];
-       this.insideParagraph = false;
-       this.visitChildren(node);
-       // combine lines, compress paragraphs
-       var text = this.output.join('\n').
-               replace(/(^|\n)%[^\n]*(\n|$)/g, '$1'). // remove comments
-               replace(/%\n\s*/g, ''). // remove escaped newlines
-               replace(/%$/, '').
-               replace(/^\{\}/, ''). // remove escape for start of line 
whitespace
-               replace(/\n\n+/g, '\n'); // remove paragraphs
-       this.output = o;
-       this.insideParagraph = inside;
-       return f.call(this, text);
+// Helper function -- wrap the contents of the children of the node
+// with the given inline (non-paragraph) decoration
+Visitor.prototype.wrap = function(decoration, node) {
+       this.format.writeDecorated(decoration, function() {
+               this.visitChildren(node);
+       }.bind(this));
 };
 
 // Generic node visitor.  Dispatches to specialized visitors based on
@@ -235,15 +392,6 @@
        case node.ELEMENT_NODE:
                if (isHidden(node)) {
                        return;
-               }
-               if (isParagraph(node) && node !== this.insideParagraphGuard) {
-                       var guard = this.insideParagraphGuard;
-                       this.insideParagraph = false;
-                       this.insideParagraphGuard = node;
-                       var r = this.visit.apply(this, arguments); // recurse!
-                       this.insideParagraph = false;
-                       this.insideParagraphGuard = guard;
-                       return r;
                }
                // handle LANG attributes (which override everything else)
                var lang = node.getAttribute('lang') || this.currentLanguage;
@@ -283,23 +431,14 @@
 
        case node.TEXT_NODE:
        case node.CDATA_SECTION_NODE:
-               var text = texEscape(node.data);
-               // protect leading space; escape the trailing newline
-               text = text.replace(/^\s+/, '{} ') + '%';
-               if (text !== '%') {
-                       this.output.push(text);
-                       if (text !== '{} %') {
-                               this.insideParagraph = true;
-                       }
-               }
+               this.format.write(node.data.replace(/\s+/g, ' '));
                break;
 
        //case node.PROCESSING_INSTRUCTION_NODE:
        //case node.DOCUMENT_TYPE_NODE:
        //case node.COMMENT_NODE:
        default:
-               // convert into latex comment (for easier debugging)
-               this.output.push(texEscape(node.data).replace(/^/gm, '%'));
+               // swallow it up
                break;
        }
 };
@@ -328,72 +467,66 @@
        if (href && !this.inHeading && !node.querySelector('img')) {
                if (/^#/.test(href)) {
                        href = href.substring(1);
-                       return this.collect(node, function(contents) {
-                               this.output.push('\\hyperlink{' + href + '}' +
-                                                                '{' + contents 
+ '}%');
-                       });
+                       return this.wrap('\\hyperlink{' + href + '}', node);
                } else {
                        href = url.resolve(this.base, href);
                        href = href.replace(/[#%\\]/g, '\\$&'); // escape TeX 
specials
-                       return this.collect(node, function(contents) {
-                               this.output.push('\\href{' + href + '}{' + 
contents + '}%');
-                       });
+                       return this.wrap('\\href{' + href + '}', node);
                }
        }
        this.visitChildren(node);
 };
 
 Visitor.prototype.visitP = function(node) {
-       this.output.push("");
-       var o = this.output;
-       this.output = []; // make sure we don't emit a linebreak immediately
+       this.format.paragraphBreak();
        this.visitChildren(node);
-       this.output = o.concat(this.output);
-       this.output.push("");
+       this.format.paragraphBreak();
 };
 
-Visitor.prototype.visitSUB = function(node) {
-       return this.collect(node, function(contents) {
-               if (/^[0-9]+$/.test(contents)) {
-                       this.output.push('$_' + node.childNodes[0].data + '$%');
-               } else {
-                       this.output.push('\\textsubscript{' + contents + '}%');
-               }
-       });
+var tag2cmd = {
+       'B': '\\textbf',
+       'I': '\\emph',
+       'SUB': '\\textsubscript',
+       'SUP': '\\textsuperscript'
+};
+var visitINLINE =
+Visitor.prototype.visitB =
+Visitor.prototype.visitI = function(node, name) {
+       return this.wrap(tag2cmd[name || node.nodeName], node);
 };
 
-Visitor.prototype.visitSUP = function(node) {
-       return this.collect(node, function(contents) {
-               if (/^[0-9]+$/.test(contents)) {
-                       this.output.push('$^' + node.childNodes[0].data + '$%');
-               } else {
-                       this.output.push('\\textsuperscript{' + contents + 
'}%');
-               }
-       });
+Visitor.prototype.visitSUP =
+Visitor.prototype.visitSUB = function(node, name) {
+       name = name || node.nodeName;
+       if (node.childNodes.length === 1 &&
+               node.childNodes[0].nodeType === node.TEXT_NODE &&
+               /^[0-9]+$/.test(node.childNodes[0].data)) {
+               var c = (name === 'SUP') ? '^' : '_';
+               return this.format.writeDecorated(
+                       '$' + c + node.childNodes[0].data + '$'
+               );
+       }
+       return visitINLINE.call(this, node, name);
 };
 
-Visitor.prototype.visitB = function(node) {
-       return this.collect(node, function(contents) {
-               this.output.push('\\textbf{' + contents + '}%');
-       });
+var tag2env = {
+       'CENTER': 'center',
+       'BLOCKQUOTE': 'quotation',
+       'UL': 'itemize',
+       'OL': 'enumerate'
 };
-
-Visitor.prototype.visitI = function(node) {
-       return this.collect(node, function(contents) {
-               this.output.push('\\emph{' + contents + '}%');
-       });
-};
-
+var visitENV =
+Visitor.prototype.visitBLOCKQUOTE =
 Visitor.prototype.visitCENTER = function(node) {
-       this.output.push('\\begin{center}');
+       var envname = tag2env[node.nodeName];
+       this.format.begin(envname);
        this.visitChildren(node);
-       this.output.push('\\end{center}');
+       this.format.end(envname);
 };
 
 Visitor.prototype.visitBR = function(node) {
        /* jshint unused: vars */
-       if (!this.insideParagraph) { return; }
-       this.output.push('\\\\');
+       this.format.lineBreak();
 };
 
 // H1s are "at the same level as the page title".
@@ -405,37 +538,74 @@
                /* the article class doesn't allow chapters */
                return;
        }
+       if (this.inHeading) {
+               /* nested headings? no, sir! */
+               return;
+       }
        var level = LATEX_LEVELS[n];
-       var wasInHeading = this.inHeading;
-       this.inHeading = true;
+       this.format.paragraphBreak();
+
        var tocPoly = Polyglossia.lookup(this.tocLanguage);
        var curPoly = Polyglossia.lookup(this.currentLanguage);
        if (this.currentLanguage !== this.tocLanguage) {
-               
this.output.push('\\begin{'+tocPoly.env+'}['+tocPoly.options+']%');
+               this.format.begin(tocPoly.env, tocPoly.options);
        }
-       return this.collect(node, function(shortContents) {
-               this.inHeading = wasInHeading;
-               return this.collect(node, function(longContents) {
-                       if (this.currentLanguage !== this.tocLanguage) {
-                               // reset language and directionality
-                               if (this.currentDirectionality !== curPoly.dir) 
{
-                                       var cmdname = 
(this.currentDirectionality==='rtl') ?
-                                               'RL' : 'LR';
-                                       longContents = '\\' + cmdname + '{' + 
longContents + '}';
-                               }
-                               longContents = 
'\\text'+curPoly.lang+'['+curPoly.options+']'+
-                                       '{' + longContents + '}';
-                       }
-                       this.output.push(
-                               '\\' + level +
-                               '[' + shortContents + ']' +
-                               '{' + longContents + '}'
+       // reset the language/directionality
+       var setLangThenVisitChildren = function(node) {
+               if (this.currentLanguage !== this.tocLanguage) {
+                       // reset language and directionality
+                       this.format.writeDecorated(
+                               '\\text' + curPoly.lang +
+                                       ((curPoly.options && !this.inHeading) ?
+                                        ('[' + curPoly.options + ']') : ''),
+                               function() {
+                                       if (this.currentDirectionality !== 
curPoly.dir) {
+                                               var cmdname = 
(this.currentDirectionality==='rtl') ?
+                                                       'RL' : 'LR';
+                                               this.format.writeDecorated(
+                                                       '\\' + cmdname, 
function() {
+                                                               
this.format.resetSOL();
+                                                               
this.visitChildren(node);
+                                                       }.bind(this)
+                                               );
+                                       } else {
+                                               this.format.resetSOL();
+                                               this.visitChildren(node);
+                                       }
+                               }.bind(this)
                        );
-                       if (this.currentLanguage !== this.tocLanguage) {
-                               this.output.push('\\end{'+tocPoly.env+'}%');
-                       }
-               });
+               } else {
+                       this.format.resetSOL();
+                       this.visitChildren(node);
+               }
+       }.bind(this);
+       // evaluate the "index" heading
+       this.inHeading = true; // we can't use anything with [] args
+       this.format.writeDecorated({
+               type: 'start-inline',
+               value: '\\' + level,
+               delimiter: '['
        });
+       setLangThenVisitChildren(node);
+       this.format.writeDecorated({
+               type: 'end-inline',
+               value: '\\' + level,
+               delimiter: ']'
+       });
+       this.inHeading = false;
+       this.format.writeDecorated({
+               type: 'start-inline',
+               value: ''
+       });
+       setLangThenVisitChildren(node);
+       this.format.writeDecorated({
+               type: 'end-inline',
+               value: ''
+       });
+       if (this.currentLanguage !== this.tocLanguage) {
+               this.format.end(tocPoly.env);
+       }
+       this.format.paragraphBreak();
 };
 
 Visitor.prototype.visitH1 = function(node) { return this.visitHn(node, 1); };
@@ -446,21 +616,18 @@
 Visitor.prototype.visitH6 = function(node) { return this.visitHn(node, 6); };
 
 Visitor.prototype['visitREL=dc:references'] = function(node) {
-       return this.visitSUP(node);
+       return this.visitSUP(node, 'SUP');
 };
 
-Visitor.prototype.visitUL = function(node) {
-       if (!DomUtil.first_child(node)) { return; /* no items */ }
-       this.output.push('\\begin{itemize}');
-       this.visitChildren(node);
-       this.output.push('\\end{itemize}');
-};
-
+Visitor.prototype.visitUL =
 Visitor.prototype.visitOL = function(node) {
        if (!DomUtil.first_child(node)) { return; /* no items */ }
-       this.output.push('\\begin{enumerate}');
-       this.visitChildren(node);
-       this.output.push('\\end{enumerate}');
+       var wasListInfo = this.listInfo;
+       this.listInfo = {
+               type: node.nodeName
+       };
+       visitENV.call(this, node);
+       this.listInfo = wasListInfo;
 };
 
 Visitor.prototype.visitDL = function(node) {
@@ -491,51 +658,52 @@
        }
 
        // ok, generate description or quotation environment
+       var wasListInfo = this.listInfo;
+       this.listInfo = {
+               type: sawDT ? node.nodeName : 'BLOCKQUOTE'
+       };
        var envName = sawDT ? 'description' :
                this.options.parindent ? 'quotation' : 'quote';
-       var wasBlockQuote = this.inBlockQuote;
-       this.inBlockQuote = !sawDT;
-       this.output.push('\\begin{'+envName+'}');
+       this.format.begin(envName);
        // ensure that there's an item before any contents
        if (sawDT &&
                !(child.nodeType === node.ELEMENT_NODE && child.nodeName === 
'DT')) {
-               this.output.push('\\item');
+               this.format.writeDecorated('\\item{}');
+               this.format.resetSOL();
+               this.listInfo.sawDT = true;
        }
        this.visitChildren(node);
-       this.output.push('\\end{'+envName+'}');
-       this.inBlockQuote = wasBlockQuote;
+       this.format.end(envName);
+       this.listInfo = wasListInfo;
 };
 
 Visitor.prototype.visitDT = function(node) {
-       return this.collect(node, function(contents) {
-               this.output.push('\\item[' + contents + '] %');
-               this.insideParagraph = true;
-       });
+       this.listInfo.sawDT = false;
+       this.format.envBreak();
+       this.wrap('\\myitem', node);
+       this.format.resetSOL();
+       this.listInfo.sawDT = true;
 };
 
 Visitor.prototype.visitDD = function(node) {
-       if (this.inBlockQuote) {
+       if (this.listInfo.type === 'BLOCKQUOTE') {
                return this.visitP(node);
        }
        // verify that previous line was the DT, otherwise add blank DT
        var prev = DomUtil.node_before(node);
        if (!(prev === null || prev.nodeName === 'DT')) {
-               this.output.push('\\item');
-               this.insideParagraph = true;
+               this.format.envBreak();
+               this.format.writeDecorated('\\item{}');
+               this.format.resetSOL();
        }
        this.visitChildren(node);
 };
 
 Visitor.prototype.visitLI = function(node) {
-       this.output.push('\\item %');
-       this.insideParagraph = true;
+       this.format.envBreak();
+       this.format.writeDecorated('\\item{}');
+       this.format.resetSOL();
        this.visitChildren(node);
-};
-
-Visitor.prototype.visitBLOCKQUOTE = function(node) {
-       this.output.push('\\begin{quotation}');
-       this.visitChildren(node);
-       this.output.push('\\end{quotation}');
 };
 
 Visitor.prototype['visitREL=mw:referencedBy'] = function(node) {
@@ -545,19 +713,20 @@
 
 Visitor.prototype['visitTYPEOF=mw:Extension/references'] = function(node) {
        if (!node.childNodes.length) { return; /* no items */ }
-       this.insideParagraph = false;
-       this.output.push('\\begin{enumerate}\\small');
+       this.format.begin('enumerate');
+       this.format.writeDecorated('\\small\n');
        for (var i = 0, n = node.childNodes.length; i < n; i++) {
                var ref = node.childNodes[i];
                var name = texEscape('[' + (i+1) + ']');
                if (ref.id) {
                        name = '\\hypertarget{' + ref.id + '}{' + name + '}';
                }
-               this.output.push('\\item[' + name + ']');
+               this.format.envBreak();
+               this.format.writeDecorated('\\item[' + name + ']{}');
+               this.format.resetSOL();
                this.visitChildren(ref);
        }
-       this.output.push('\\end{enumerate}');
-       this.insideParagraph = false;
+       this.format.end('enumerate');
 };
 
 // tables
@@ -585,11 +754,11 @@
        if (this.inFloat) { return; } // xxx work around issues with inline 
images
        this.inFloat = true;
        // floats seem to revert to collectionLanguage
-       this.output.push('\\begin{figure}[tbh!]');
-       this.output.push('\\begin{center}');
+       this.format.begin('figure', 'tbh!')
+       this.format.begin('center');
        filename = filename.replace(/[%\\_]/g, '\\$&'); // escape TeX specials
-       
this.output.push('\\includegraphics[width=0.95\\columnwidth]{'+filename+'}');
-       this.output.push('\\end{center}');
+       
this.format.writeDecorated('\\includegraphics[width=0.95\\columnwidth]{'+filename+'}');
+       this.format.end('center');
        if (caption) {
                // we're not using \caption because we don't need figure 
numbering
                // also, \caption fights with \begin{center} ... \end{center}
@@ -598,40 +767,42 @@
                var curPoly = Polyglossia.lookup(this.currentLanguage);
                var direnv = this.currentDirectionality.toUpperCase();
                if (this.currentLanguage !== this.tocLanguage) {
-                       
this.output.push('\\begin{'+curPoly.env+'}['+curPoly.options+']%');
+                       this.format.begin(curPoly.env, curPoly.options);
                        if (this.currentDirectionality !== tocPoly.dir) {
-                               this.output.push('\\begin{'+direnv+'}%');
+                               this.format.begin(direnv);
                        }
                }
-               this.output.push('\\small\\itshape');
-               this.insideParagraph = false;
+               this.format.writeDecorated('\\small\\itshape\n');
+
                this.visitChildren(caption);
                if (this.currentLanguage !== this.tocLanguage) {
                        if (this.currentDirectionality !== tocPoly.dir) {
-                               this.output.push('\\end{'+direnv+'}%');
+                               this.format.end(direnv);
                        }
-                       this.output.push('\\end{'+curPoly.env+'}%');
+                       this.format.end(curPoly.env);
                }
                //this.output.push('}');
        }
-       this.output.push('\\end{figure}');
+       this.format.end('figure');
        this.inFloat = false;
 };
 
 Visitor.prototype['visitTYPEOF=mw:Extension/math'] = function(node, display) {
        // xxx: sanitize this string the same way the math extension does
 
+       this.format.envBreak();
        var math = JSON.parse(node.getAttribute('data-mw')).body.extsrc;
        var m = 
/^(\s*\\begin\s*\{\s*(?:eqnarray|equation|align|gather|falign|multiline|alignat))[*]?(\s*\}[\s\S]*\\end\s*\{[^\}*]+)[*]?(\}\s*)$/.exec(math);
        if (m) {
                // math expression contains its own environment
                // ensure we're using the * form so we don't get equation 
numbers
-               this.output.push(m[1]+'*'+m[2]+'*'+m[3]);
+               this.format.writeDecorated(m[1]+'*'+m[2]+'*'+m[3]);
+               this.format.envBreak();
                return;
        }
        var delimit = display ? '$$' : '$';
-       var eol = display ? '' : '%';
-       this.output.push(delimit + math + delimit + eol);
+       this.format.writeDecorated(delimit + math + delimit);
+       this.format.envBreak();
 };
 
 Visitor.prototype['visitLANG='] = function(node) {
@@ -647,14 +818,11 @@
                // can't use \text... commands inside the section label.
                r = this.visit(node);
        } else if (isParagraph(node)) {
-               this.output.push('\\begin{'+poly.env+'}['+poly.options+']%');
+               this.format.begin(poly.env, poly.options);
                r = this.visit(node);
-               this.output.push('\\end{'+poly.env+'}%');
+               this.format.end(poly.env);
        } else {
-               r = this.collect(node, function(contents) {
-                       
this.output.push('\\text'+poly.lang+'['+poly.options+']' +
-                                                        '{' + contents + '}%');
-               });
+               r = this.wrap('\\text'+poly.lang+'['+poly.options+']', node);
        }
        this.currentLanguage = savedLanguage;
        this.currentDirectionality = savedDirectionality;
@@ -672,15 +840,12 @@
                r = this.visit(node);
        } else if (isParagraph(node)) {
                var envname = dir.toUpperCase();
-               this.output.push('\\begin{' + envname + '}%');
+               this.format.begin(envname);
                r = this.visit(node);
-               this.output.push('\\end{' + envname + '}%');
+               this.format.end(envname);
        } else {
                var cmdname = (dir === 'rtl') ? 'RL' : 'LR';
-               r = this.collect(node, function(contents) {
-                       this.output.push('\\' + cmdname +
-                                                        '{' + contents + '}%');
-               });
+               r = this.wrap('\\' + cmdname, node);
        }
        this.currentDirectionality = savedDirectionality;
        return r;
@@ -718,9 +883,9 @@
        if (isMultipleImageTemplate(node)) {
                return this.visitMultipleImage(node);
        }
-       // xxx enforce line breaks before?
+       this.format.lineBreak();
        var r = this.visitChildren(node);
-       this.visitBR(node); // enforce line break after
+       this.format.lineBreak();
        return r;
 };
 
@@ -1018,7 +1183,11 @@
                }).then(function(siteinfo) {
                        articleLanguage = siteinfo.general.lang || 
collectionLanguage;
                }).then(function() {
-                       var visitor = new Visitor(document, {
+                       var format = new Formatter(
+                               fs.createWriteStream(outfile, { encoding: 
'utf8' }),
+                               options
+                       );
+                       var visitor = new Visitor(document, format, {
                                base: base,
                                imagemap: imagemap,
                                singleItem: singleItem,
@@ -1034,9 +1203,11 @@
                        visitor.visit(h1); // emit document title!
                        document.body.lang = document.body.lang || 
articleLanguage;
                        visitor.visit(document.body);
-                       var result = visitor.output.join('\n');
                        visitor.usedLanguages.forEach(function(l){ 
usedLanguages.add(l); });
-                       return P.call(fs.writeFile, fs, outfile, result, 
'utf8');
+                       format.paragraphBreak();
+                       return format.flush().then(function() {
+                               return P.call(format.stream.end, format.stream, 
'');
+                       });
                });
        };
        write.chapter = function(item) {

-- 
To view, visit https://gerrit.wikimedia.org/r/102470
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic04cb626eee65ff302becfee93402e23c5440174
Gerrit-PatchSet: 1
Gerrit-Project: 
mediawiki/extensions/Collection/OfflineContentGenerator/latex_renderer
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to