The branch, master, has been updated. - Log -----------------------------------------------------------------
commit be42f1398db05353bdab6fa328a4e86d11ce6b97 Author: Jean-Marc Lasgouttes <lasgout...@lyx.org> Date: Fri Jan 25 12:48:52 2013 +0100 Fix bug #5408: tex2lyx cannot handle verbatim code - Implement catcode setting in Parser - add a new Parser::verbatimStuff method that reads verbatim contents - use this method to parse "verbatim" environment. - use it to parse \verb too. - rename Parser::verbatimEnvironment to ertEnvironment. TODO: - use for other verbatim-like cases (Sweave chunk, lstlisting...) - factor out the function that outputs ERT (including line breaks) - maybe implement Parser::unparse (if needed) diff --git a/lib/layouts/stdlayouts.inc b/lib/layouts/stdlayouts.inc index 3ec97bd..a767d74 100644 --- a/lib/layouts/stdlayouts.inc +++ b/lib/layouts/stdlayouts.inc @@ -79,6 +79,7 @@ Style Verbatim ParbreakIsNewline 1 FreeSpacing 1 PassThru 1 + KeepEmpty 1 NewLine 0 ParSkip 0.4 TopSep 0.7 diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 6f7dbf3..e2af5f2 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -22,39 +22,6 @@ namespace lyx { namespace { -CatCode theCatcode[256]; - -void catInit() -{ - static bool init_done = false; - if (init_done) - return; - init_done = true; - - fill(theCatcode, theCatcode + 256, catOther); - fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter); - fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter); - - theCatcode[int('\\')] = catEscape; - theCatcode[int('{')] = catBegin; - theCatcode[int('}')] = catEnd; - theCatcode[int('$')] = catMath; - theCatcode[int('&')] = catAlign; - theCatcode[int('\n')] = catNewline; - theCatcode[int('#')] = catParameter; - theCatcode[int('^')] = catSuper; - theCatcode[int('_')] = catSub; - theCatcode[0x7f] = catIgnore; - theCatcode[int(' ')] = catSpace; - theCatcode[int('\t')] = catSpace; - theCatcode[int('\r')] = catNewline; - theCatcode[int('~')] = catActive; - theCatcode[int('%')] = catComment; - - // This is wrong! - theCatcode[int('@')] = catLetter; -} - /*! * Translate a line ending to '\n'. * \p c must have catcode catNewline, and it must be the last character read @@ -79,16 +46,8 @@ char_type getNewline(idocstream & is, char_type c) return c; } -CatCode catcode(char_type c) -{ - if (c < 256) - return theCatcode[(unsigned char)c]; - return catOther; } -} - - // // Token // @@ -158,7 +117,8 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags) Parser::Parser(idocstream & is) - : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8") + : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"), + theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES) { } @@ -166,7 +126,8 @@ Parser::Parser(idocstream & is) Parser::Parser(string const & s) : lineno_(0), pos_(0), iss_(new idocstringstream(from_utf8(s))), is_(*iss_), - encoding_iconv_("UTF-8") + encoding_iconv_("UTF-8"), + theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES) { } @@ -192,6 +153,57 @@ void Parser::setEncoding(std::string const & e, int const & p) } +void Parser::catInit() +{ + if (curr_cat_ == theCatcodesType_) + return; + curr_cat_ = theCatcodesType_; + + fill(theCatcode_, theCatcode_ + 256, catOther); + fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter); + fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter); + // This is wrong! + theCatcode_[int('@')] = catLetter; + + if (theCatcodesType_ == NORMAL_CATCODES) { + theCatcode_[int('\\')] = catEscape; + theCatcode_[int('{')] = catBegin; + theCatcode_[int('}')] = catEnd; + theCatcode_[int('$')] = catMath; + theCatcode_[int('&')] = catAlign; + theCatcode_[int('\n')] = catNewline; + theCatcode_[int('#')] = catParameter; + theCatcode_[int('^')] = catSuper; + theCatcode_[int('_')] = catSub; + theCatcode_[0x7f] = catIgnore; + theCatcode_[int(' ')] = catSpace; + theCatcode_[int('\t')] = catSpace; + theCatcode_[int('\r')] = catNewline; + theCatcode_[int('~')] = catActive; + theCatcode_[int('%')] = catComment; + } +} + +CatCode Parser::catcode(char_type c) const +{ + if (c < 256) + return theCatcode_[(unsigned char)c]; + return catOther; +} + + +void Parser::setCatcode(char c, CatCode cat) +{ + theCatcode_[(unsigned char)c] = cat; +} + + +void Parser::setCatcodes(cat_type t) +{ + theCatcodesType_ = t; +} + + void Parser::setEncoding(std::string const & e) { //cerr << "setting encoding to " << e << std::endl; @@ -472,7 +484,7 @@ string Parser::getFullParentheseArg() } -string const Parser::verbatimEnvironment(string const & name) +string const Parser::ertEnvironment(string const & name) { if (!good()) return string(); @@ -485,7 +497,7 @@ string const Parser::verbatimEnvironment(string const & name) } else if (t.asInput() == "\\begin") { string const env = getArg('{', '}'); os << "\\begin{" << env << '}' - << verbatimEnvironment(env) + << ertEnvironment(env) << "\\end{" << env << '}'; } else if (t.asInput() == "\\end") { string const end = getArg('{', '}'); @@ -545,6 +557,34 @@ string const Parser::plainCommand(char left, char right, string const & name) } +string const Parser::verbatimStuff(string const & end_string) +{ + if (!good()) + return string(); + + ostringstream oss; + size_t match_index = 0; + setCatcodes(VERBATIM_CATCODES); + for (Token t = get_token(); good(); t = get_token()) { + // FIXME t.asInput() might be longer than we need ? + if (t.asInput() == end_string.substr(match_index, + t.asInput().length())) { + match_index += t.asInput().length(); + if (match_index >= end_string.length()) + break; + } else if (match_index) { + oss << end_string.substr(0, match_index) << t.asInput(); + match_index = 0; + } else + oss << t.asInput(); + } + setCatcodes(NORMAL_CATCODES); + if (!good()) + cerr << "unexpected end of input" << endl; + return oss.str(); +} + + void Parser::tokenize_one() { catInit(); @@ -687,16 +727,4 @@ void Parser::reset() } -void Parser::setCatCode(char c, CatCode cat) -{ - theCatcode[(unsigned char)c] = cat; -} - - -CatCode Parser::getCatCode(char c) const -{ - return theCatcode[(unsigned char)c]; -} - - } // namespace lyx diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index 18a08eb..558b556 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -46,6 +46,12 @@ enum CatCode { catInvalid // 15 <delete> }; +enum cat_type { + NORMAL_CATCODES, + VERBATIM_CATCODES, + UNDECIDED_CATCODES +}; + enum { FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing @@ -135,6 +141,13 @@ public: /// ~Parser(); + /// + CatCode catcode(char_type c) const; + /// + void setCatcode(char c, CatCode cat); + /// set parser to normal or verbatim mode + void setCatcodes(cat_type t); + /// change the iconv encoding of the input stream /// according to the latex encoding and package void setEncoding(std::string const & encoding, int const & package); @@ -202,11 +215,11 @@ public: /*! * \returns the contents of the environment \p name. * <tt>\begin{name}</tt> must be parsed already, <tt>\end{name}</tt> - * is parsed but not returned. + * is parsed but not returned. This parses nested environments properly. */ - std::string const verbatimEnvironment(std::string const & name); + std::string const ertEnvironment(std::string const & name); /* - * The same as verbatimEnvironment(std::string const & name) but + * The same as ertEnvironment(std::string const & name) but * \begin and \end commands inside the name environment are not parsed. * This function is designed to parse verbatim environments. */ @@ -218,6 +231,14 @@ public: * This function is designed to parse verbatim commands. */ std::string const plainCommand(char left, char right, std::string const & name); + /* + * Basically the same as plainEnvironment() but the parsing is + * stopped at string \p end_string. Contrary to the other + * methods, this uses proper catcode setting. This function is + * designed to parse verbatim environments and command. The + * intention is to eventually replace all of its siblings. + */ + std::string const verbatimStuff(std::string const & end_string); /*! * Returns the character of the current token and increments * the token position. @@ -225,7 +246,7 @@ public: char getChar(); /// void error(std::string const & msg); - /// Parses one token from \p is + /// Parses one token from \p is void tokenize_one(); /// void push_back(Token const & t); @@ -256,12 +277,10 @@ public: std::string verbatimOption(); /// resets the parser to initial state void reset(); - /// - void setCatCode(char c, CatCode cat); - /// - CatCode getCatCode(char c) const; private: + /// Setup catcode table + void catInit(); /// int lineno_; /// @@ -276,6 +295,12 @@ private: idocstream & is_; /// iconv name of the current encoding std::string encoding_iconv_; + /// + CatCode theCatcode_[256]; + // + cat_type theCatcodesType_; + // + cat_type curr_cat_; }; diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 484b009..8d82f6c 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -1299,12 +1299,12 @@ void Preamble::parse(Parser & p, string const & forceclass, else if (t.cs() == "makeatletter") { // LyX takes care of this - p.setCatCode('@', catLetter); + p.setCatcode('@', catLetter); } else if (t.cs() == "makeatother") { // LyX takes care of this - p.setCatCode('@', catOther); + p.setCatcode('@', catOther); } else if (t.cs() == "newcommand" || t.cs() == "newcommandx" diff --git a/src/tex2lyx/table.cpp b/src/tex2lyx/table.cpp index 751020b..eed7bb9 100644 --- a/src/tex2lyx/table.cpp +++ b/src/tex2lyx/table.cpp @@ -786,7 +786,7 @@ void parse_table(Parser & p, ostream & os, bool is_long_tabular, // treat the nested environment as a block, don't // parse &, \\ etc, because they don't belong to our // table if they appear. - os << p.verbatimEnvironment(name); + os << p.ertEnvironment(name); os << "\\end{" << name << '}'; active_environments.pop_back(); } @@ -1227,7 +1227,7 @@ void handle_tabular(Parser & p, ostream & os, string const & name, angle = p.getArg('{', '}'); } active_environments.push_back(env); - p.verbatimEnvironment(env); + p.ertEnvironment(env); active_environments.pop_back(); p.skip_spaces(); if (!p.good() && support::isStrInt(angle)) diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 5204753..41ef192 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -924,7 +924,7 @@ void parse_box(Parser & p, ostream & os, unsigned outer_flags, // If yes, we need to output ERT. p.pushPosition(); if (inner_flags & FLAG_END) - p.verbatimEnvironment(inner_type); + p.ertEnvironment(inner_type); else p.verbatim_item(); p.skip_spaces(true); @@ -1435,27 +1435,36 @@ void parse_environment(Parser & p, ostream & os, bool outer, } else if (name == "verbatim") { - os << "\n\\end_layout\n\n\\begin_layout Verbatim\n"; - string const s = p.plainEnvironment("verbatim"); + // FIXME: this should go in the generic code that + // handles environments defined in layout file that + // have "PassThru 1". However, the code there is + // already too complicated for my taste. + parent_context.new_paragraph(os); + Context context(true, parent_context.textclass, + &parent_context.textclass[from_ascii("Verbatim")]); + context.check_layout(os); + string s = p.verbatimStuff("\\end{verbatim}"); + // ignore one newline at beginning or end of string + if (prefixIs(s, "\n")) + s.erase(0,1); + if (suffixIs(s, "\n")) + s.erase(s.length(),1); + string::const_iterator it2 = s.begin(); for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { - if (*it == '\\') - os << "\\backslash "; - else if (*it == '\n') { - it2 = it + 1; - // avoid adding an empty paragraph at the end - // FIXME: if there are 2 consecutive spaces at the end ignore it - // because LyX will re-add a \n - // This hack must be removed once bug 8049 is fixed! - if ((it + 1 != et) && (it + 2 != et || *it2 != '\n')) - os << "\n\\end_layout\n\\begin_layout Verbatim\n"; - } else + context.check_layout(os); + if (*it == '\\') { + os << "\n\\backslash\n"; + context.need_end_layout = true; + } else if (*it == '\n') { + context.new_paragraph(os); + } else { os << *it; + context.need_end_layout = true; + } } - os << "\n\\end_layout\n\n"; + context.new_paragraph(os); p.skip_spaces(); - // reset to Standard layout - os << "\n\\begin_layout Standard\n"; } else if (name == "CJK") { @@ -1758,7 +1767,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, parse_arguments("\\begin{" + name + "}", arguments, p, os, outer, parent_context); if (contents == verbatim) - handle_ert(os, p.verbatimEnvironment(name), + handle_ert(os, p.ertEnvironment(name), parent_context); else parse_text_snippet(p, os, FLAG_END, outer, @@ -3819,15 +3828,11 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "verb") { context.check_layout(os); - char const delimiter = p.next_token().character(); - // \verb is special: The usual escaping rules do not - // apply, e.g. "\verb+\+" is valid and denotes a single - // backslash (bug #4468). Therefore we do not allow - // escaping in getArg(). - string const arg = p.getArg(delimiter, delimiter, false); - ostringstream oss; - oss << "\\verb" << delimiter << arg << delimiter; - handle_ert(os, oss.str(), context); + // set catcodes to verbatim early, just in case. + p.setCatcodes(VERBATIM_CATCODES); + string delim = p.get_token().asInput(); + string const arg = p.verbatimStuff(delim); + handle_ert(os, "\\verb" + delim + arg + delim, context); } // Problem: \= creates a tabstop inside the tabbing environment @@ -4574,7 +4579,7 @@ string guessLanguage(Parser & p, string const & lang) p.setEncoding(encoding, Encoding::CJK); else p.setEncoding("UTF-8"); - string const text = p.verbatimEnvironment("CJK"); + string const text = p.ertEnvironment("CJK"); p.setEncoding(encoding_old); p.skip_spaces(); if (!where) { ----------------------------------------------------------------------- Summary of changes: lib/layouts/stdlayouts.inc | 1 + src/tex2lyx/Parser.cpp | 142 ++++++++++++++++++++++++++------------------ src/tex2lyx/Parser.h | 41 ++++++++++--- src/tex2lyx/Preamble.cpp | 4 +- src/tex2lyx/table.cpp | 4 +- src/tex2lyx/text.cpp | 61 ++++++++++--------- 6 files changed, 156 insertions(+), 97 deletions(-) hooks/post-receive -- The LyX Source Repository