Am Dienstag, 2. Januar 2007 22:34 schrieb Dov Feldstern:
> The patch basically works --- in other words, if the document's encoding 
> is set to "auto", then the latex file is generated (almost) correctly. 
> There is one small bug though, where a linebreak is lost (I'm pretty 
> sure it's due to this patch, I haven't looked at the code at all, 
> though): in the attached files, h.lyx is the original, h-wrong.tex is 
> the currently generated file, there should be another linebreak, as in 
> h-right.tex.

As I wrote, the patch is experimental. This updated one should produce 
correct paragraph breaks.

> With encoding set to "default" there's still a problem. I think that the 
> same patch should be applied to that case, too, except that in this 
> case, the \inputencoding statements should not appear in the generated 
> latex. But the encoding itself *should* take place, as it did here.

Yes, I think so too, but we need to change lyx2lyx also. I'll address that 
later.


Georg
Index: src/insets/insetbase.h
===================================================================
--- src/insets/insetbase.h	(Revision 16479)
+++ src/insets/insetbase.h	(Arbeitskopie)
@@ -369,7 +369,12 @@ public:
 	virtual void write(Buffer const &, std::ostream &) const {}
 	/// read inset in .lyx format
 	virtual void read(Buffer const &, LyXLex &) {}
-	/// returns the number of rows (\n's) of generated tex code.
+	/** Export the inset to LaTeX.
+	 *  Don't use a temporary stringstream if the final output is
+	 *  supposed to go to a file.
+	 *  \sa Buffer::writeLaTeXSource for the reason.
+	 *  \return the number of rows (\n's) of generated LaTeX code.
+	 */
 	virtual int latex(Buffer const &, odocstream &,
 			  OutputParams const &) const { return 0; }
 	/// returns true to override begin and end inset in file
Index: src/output_latex.h
===================================================================
--- src/output_latex.h	(Revision 16479)
+++ src/output_latex.h	(Arbeitskopie)
@@ -20,11 +20,16 @@
 namespace lyx {
 
 class Buffer;
+class BufferParams;
+class Encoding;
 class OutputParams;
 class TexRow;
 
-/// Just a wrapper for the method below, first creating the ofstream.
-
+/** Export \p paragraphs of buffer \p buf to LaTeX.
+    Don't use a temporary stringstream for \p os if the final output is
+    supposed to go to a file.
+    \sa Buffer::writeLaTeXSource for the reason.
+ */
 void latexParagraphs(Buffer const & buf,
 		     ParagraphList const & paragraphs,
 		     odocstream & ofs,
@@ -32,6 +37,10 @@ void latexParagraphs(Buffer const & buf,
 		     OutputParams const &,
 		     std::string const & everypar = std::string());
 
+/// Switch the encoding of \p os from \p oldEnc to \p newEnc if needed.
+/// \return the number of characters written to \p os.
+int switchEncoding(odocstream & os, BufferParams const & bparams,
+                   Encoding const & oldEnc, Encoding const & newEnc);
 
 } // namespace lyx
 
Index: src/paragraph_pimpl.C
===================================================================
--- src/paragraph_pimpl.C	(Revision 16479)
+++ src/paragraph_pimpl.C	(Arbeitskopie)
@@ -483,7 +483,8 @@ void Paragraph::Pimpl::simpleTeXSpecialC
 				os << '\n';
 			} else {
 				if (open_font) {
-					column += running_font.latexWriteEndChanges(os, basefont, basefont);
+					column += running_font.latexWriteEndChanges(
+						os, basefont, basefont, bparams);
 					open_font = false;
 				}
 				basefont = owner_->getLayoutFont(bparams, outerfont);
@@ -536,10 +537,8 @@ void Paragraph::Pimpl::simpleTeXSpecialC
 #endif
 		// some insets cannot be inside a font change command
 		if (open_font && inset->noFontChange()) {
-			column +=running_font.
-				latexWriteEndChanges(os,
-						     basefont,
-						     basefont);
+			column += running_font.latexWriteEndChanges(
+					os, basefont, basefont, bparams);
 			open_font = false;
 			basefont = owner_->getLayoutFont(bparams, outerfont);
 			running_font = basefont;
Index: src/buffer.h
===================================================================
--- src/buffer.h	(Revision 16479)
+++ src/buffer.h	(Arbeitskopie)
@@ -146,13 +146,31 @@ public:
 	/// Write file. Returns \c false if unsuccesful.
 	bool writeFile(support::FileName const &) const;
 
-	/// Just a wrapper for the method below, first creating the ofstream.
+	/// Just a wrapper for writeLaTeXSource, first creating the ofstream.
 	bool makeLaTeXFile(support::FileName const & filename,
 			   std::string const & original_path,
 			   OutputParams const &,
 			   bool output_preamble = true,
 			   bool output_body = true);
-	///
+	/** Export the buffer to LaTeX.
+	    If \p os is a file stream, and params().inputenc == "auto", and
+	    the buffer contains text in different languages with more than
+	    one encoding, then this method will change the encoding
+	    associated to \p os. Therefore you must not call this method with
+	    a string stream if the output is supposed to go to a file. \code
+	    odocfstream ofs;
+	    ofs.open("test.tex");
+	    writeLaTeXSource(ofs, ...);
+	    ofs.close();
+	    \endcode is NOT equivalent to \code
+	    odocstringstream oss;
+	    writeLaTeXSource(oss, ...);
+	    odocfstream ofs;
+	    ofs.open("test.tex");
+	    ofs << oss.str();
+	    ofs.close();
+	    \endcode
+	 */
 	void writeLaTeXSource(odocstream & os,
 			   std::string const & original_path,
 			   OutputParams const &,
Index: src/support/docstream.C
===================================================================
--- src/support/docstream.C	(Revision 16479)
+++ src/support/docstream.C	(Arbeitskopie)
@@ -294,6 +294,32 @@ odocfstream::odocfstream(const char* s, 
 	open(s, mode);
 }
 
+
+SetEnc setEncoding(string const & encoding)
+{
+	return SetEnc(encoding);
+}
+
+
+odocstream & operator<<(odocstream & os, SetEnc e)
+{
+	if (std::has_facet<iconv_codecvt_facet>(os.rdbuf()->getloc())) {
+		// This stream must be a file stream, since we never imbue
+		// any other stream with a locale having a iconv_codecvt_facet.
+		// Flush the stream so that all pending output is written
+		// with the old encoding.
+		os.flush();
+		std::locale locale(os.rdbuf()->getloc(),
+			new iconv_codecvt_facet(e.encoding, std::ios_base::out));
+		// FIXME Does changing the codecvt facet of an open file
+		// stream always work? It does with gcc 4.1, but I have read
+		// somewhere that it does not with MSVC.
+		// What does the standard say?
+		os.imbue(locale);
+	}
+	return os;
+}
+
 }
 
 #if (!defined(HAVE_WCHAR_T) || SIZEOF_WCHAR_T != 4) && defined(__GNUC__)
Index: src/support/docstream.h
===================================================================
--- src/support/docstream.h	(Revision 16479)
+++ src/support/docstream.h	(Arbeitskopie)
@@ -77,6 +77,25 @@ odocstream & operator<<(odocstream & os,
     return os;
 }
 
+/// Helper struct for changing stream encoding
+struct SetEnc {
+	SetEnc(std::string const & e) : encoding(e) {}
+	std::string encoding;
+};
+
+/// Helper function for changing stream encoding
+SetEnc setEncoding(std::string const & encoding);
+
+/** Change the encoding of \p os to \p e.encoding.
+    \p e.encoding must be a valid iconv name of an 8bit encoding.
+    This does nothing if the stream is not a file stream, since only
+    file streams do have an associated 8bit encoding.
+    Usage: \code
+    os << setEncoding("ISO-8859-1");
+    \endcode
+ */
+odocstream & operator<<(odocstream & os, SetEnc e);
+
 }
 
 #endif
Index: src/lyxfont.C
===================================================================
--- src/lyxfont.C	(Revision 16479)
+++ src/lyxfont.C	(Arbeitskopie)
@@ -23,6 +23,7 @@
 #include "LColor.h"
 #include "lyxlex.h"
 #include "lyxrc.h"
+#include "output_latex.h"
 
 #include "support/lstrings.h"
 
@@ -737,7 +738,8 @@ void LyXFont::lyxWriteChanges(LyXFont co
 /// Writes the head of the LaTeX needed to impose this font
 // Returns number of chars written.
 int LyXFont::latexWriteStartChanges(odocstream & os, LyXFont const & base,
-				    LyXFont const & prev) const
+                                    LyXFont const & prev,
+                                    BufferParams const & bparams) const
 {
 	int count = 0;
 	bool env = false;
@@ -760,6 +762,8 @@ int LyXFont::latexWriteStartChanges(odoc
 			count += tmp.length();
 		}
 	}
+	count += switchEncoding(os, bparams, bparams.encoding(),
+	                        *(language()->encoding()));
 
 	if (number() == ON && prev.number() != ON &&
 	    language()->lang() == "hebrew") {
@@ -833,7 +837,8 @@ int LyXFont::latexWriteStartChanges(odoc
 // Returns number of chars written
 // This one corresponds to latexWriteStartChanges(). (Asger)
 int LyXFont::latexWriteEndChanges(odocstream & os, LyXFont const & base,
-				  LyXFont const & next) const
+                                  LyXFont const & next,
+                                  BufferParams const & bparams) const
 {
 	int count = 0;
 	bool env = false;
@@ -897,6 +902,8 @@ int LyXFont::latexWriteEndChanges(odocst
 		os << '}';
 		++count;
 	}
+	count += switchEncoding(os, bparams, *(language()->encoding()),
+	                        bparams.encoding());
 
 	return count;
 }
Index: src/lyxfont.h
===================================================================
--- src/lyxfont.h	(Revision 16479)
+++ src/lyxfont.h	(Arbeitskopie)
@@ -300,14 +300,17 @@ public:
 	    font state active now.
 	*/
 	int latexWriteStartChanges(odocstream &, LyXFont const & base,
-				   LyXFont const & prev) const;
+	                           LyXFont const & prev,
+	                           BufferParams const &) const;
 
 	/** Writes the tail of the LaTeX needed to change to this font.
 	    Returns number of chars written. Base is the font state we want
 	    to achieve.
 	*/
 	int latexWriteEndChanges(odocstream &, LyXFont const & base,
-				 LyXFont const & next) const;
+	                         LyXFont const & next,
+	                         BufferParams const &) const;
+
 
 	/// Build GUI description of font state
 	docstring const stateText(BufferParams * params) const;
Index: src/paragraph.C
===================================================================
--- src/paragraph.C	(Revision 16479)
+++ src/paragraph.C	(Arbeitskopie)
@@ -958,7 +958,8 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 		if (i == body_pos) {
 			if (body_pos > 0) {
 				if (open_font) {
-					column += running_font.latexWriteEndChanges(os, basefont, basefont);
+					column += running_font.latexWriteEndChanges(
+						os, basefont, basefont, bparams);
 					open_font = false;
 				}
 				basefont = getLayoutFont(bparams, outerfont);
@@ -998,9 +999,10 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 		    (font != running_font ||
 		     font.language() != running_font.language()))
 		{
-			column += running_font.latexWriteEndChanges(os,
-								    basefont,
-								    (i == body_pos-1) ? basefont : font);
+			column += running_font.latexWriteEndChanges(
+					os, basefont,
+					(i == body_pos-1) ? basefont : font,
+					bparams);
 			running_font = basefont;
 			open_font = false;
 		}
@@ -1019,8 +1021,8 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 		     font.language() != running_font.language()) &&
 			i != body_pos - 1)
 		{
-			column += font.latexWriteStartChanges(os, basefont,
-							      last_font);
+			column += font.latexWriteStartChanges(
+					os, basefont, last_font, bparams);
 			running_font = font;
 			open_font = true;
 		}
@@ -1056,11 +1058,11 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 		if (next_) {
 			running_font
 				.latexWriteEndChanges(os, basefont,
-						      next_->getFont(bparams,
-						      0, outerfont));
+					next_->getFont(bparams, 0, outerfont),
+					bparams);
 		} else {
 			running_font.latexWriteEndChanges(os, basefont,
-							  basefont);
+							  basefont, bparams);
 		}
 #else
 #ifdef WITH_WARNINGS
@@ -1068,7 +1070,8 @@ bool Paragraph::simpleTeXOnePar(Buffer c
 //#warning there as we start another \selectlanguage with the next paragraph if
 //#warning we are in need of this. This should be fixed sometime (Jug)
 #endif
-		running_font.latexWriteEndChanges(os, basefont,  basefont);
+		running_font.latexWriteEndChanges(os, basefont, basefont,
+		                                  bparams);
 #endif
 	}
 
Index: src/output_latex.C
===================================================================
--- src/output_latex.C	(Revision 16479)
+++ src/output_latex.C	(Arbeitskopie)
@@ -29,7 +29,6 @@
 #include "insets/insetoptarg.h"
 
 #include "support/lstrings.h"
-#include "support/unicode.h"
 
 
 namespace lyx {
@@ -237,7 +236,7 @@ ParagraphList::const_iterator
 TeXOnePar(Buffer const & buf,
 	  ParagraphList const & paragraphs,
 	  ParagraphList::const_iterator pit,
-	  odocstream & ucs4, TexRow & texrow,
+	  odocstream & os, TexRow & texrow,
 	  OutputParams const & runparams_in,
 	  string const & everypar)
 {
@@ -247,7 +246,7 @@ TeXOnePar(Buffer const & buf,
 	bool further_blank_line = false;
 	LyXLayout_ptr style;
 
-	// In an an inset with unlimited length (all in one row),
+	// In an inset with unlimited length (all in one row),
 	// force layout to default
 	if (!pit->forceDefaultParagraphs())
 		style = pit->layout();
@@ -263,6 +262,8 @@ TeXOnePar(Buffer const & buf,
 		(pit != paragraphs.begin())
 		? boost::prior(pit)->getParLanguage(bparams)
 		: doc_language;
+	Encoding const & encoding(*(language->encoding()));
+	Encoding const & doc_encoding(*(doc_language->encoding()));
 
 	if (language->babel() != previous_language->babel()
 	    // check if we already put language command in TeXEnvironment()
@@ -275,51 +276,35 @@ TeXOnePar(Buffer const & buf,
 		if (!lyxrc.language_command_end.empty() &&
 		    previous_language->babel() != doc_language->babel())
 		{
-			ucs4 << from_ascii(subst(lyxrc.language_command_end,
+			os << from_ascii(subst(lyxrc.language_command_end,
 				"$$lang",
 				previous_language->babel()))
-			     << endl;
+			   << endl;
 			texrow.newline();
 		}
 
 		if (lyxrc.language_command_end.empty() ||
 		    language->babel() != doc_language->babel())
 		{
-			ucs4 << from_ascii(subst(
+			os << from_ascii(subst(
 				lyxrc.language_command_begin,
 				"$$lang",
 				language->babel()))
-			     << endl;
+			   << endl;
 			texrow.newline();
 		}
 	}
 
-	// FIXME thailatex does not support the inputenc package, so we
-	// ignore switches from/to tis620-0 encoding here. This does of
-	// course only work as long as the non-thai text contains ASCII
-	// only, but it is the best we can do.
-	bool const use_thailatex = (language->encoding()->name() == "tis620-0" ||
-	                            previous_language->encoding()->name() == "tis620-0");
-	if (bparams.inputenc == "auto" &&
-	    language->encoding() != previous_language->encoding() &&
-	    !use_thailatex) {
-		ucs4 << "\\inputencoding{"
-		     << from_ascii(language->encoding()->latexName())
-		     << "}\n";
+	// FIXME we switch from the document encoding to the encoding of
+	// this paragraph, since I could not figure out the correct logic
+	// to take the encoding of the previous paragraph into account.
+	// This may result in some unneeded encoding changes.
+	if (switchEncoding(os, bparams, doc_encoding, encoding)) {
+		os << '\n';
 		texrow.newline();
 	}
-	// We need to output the paragraph to a temporary stream if we
-	// need to change the encoding. Don't do this if the result does
-	// not go to a file but to the builtin source viewer.
-	odocstringstream par_stream;
-	bool const change_encoding = !runparams_in.dryrun &&
-			bparams.inputenc == "auto" &&
-			language->encoding() != doc_language->encoding() &&
-			!use_thailatex;
-	// don't trigger the copy ctor because it's private on msvc 
-	odocstream & os = *(change_encoding ? &par_stream : &ucs4);
 
-	// In an an inset with unlimited length (all in one row),
+	// In an inset with unlimited length (all in one row),
 	// don't allow any special options in the paragraph
 	if (!pit->forceDefaultParagraphs()) {
 		if (pit->params().startOfAppendix()) {
@@ -376,8 +361,10 @@ TeXOnePar(Buffer const & buf,
 
 	// FIXME UNICODE
 	os << from_utf8(everypar);
-	bool need_par = pit->simpleTeXOnePar(buf, bparams,
-					     outerFont(std::distance(paragraphs.begin(), pit), paragraphs),
+	LyXFont const outerfont =
+		outerFont(std::distance(paragraphs.begin(), pit),
+			  paragraphs);
+	bool need_par = pit->simpleTeXOnePar(buf, bparams, outerfont,
 					     os, texrow, runparams);
 
 	// Make sure that \\par is done with the font of the last
@@ -389,9 +376,6 @@ TeXOnePar(Buffer const & buf,
 	// Is this really needed ? (Dekel)
 	// We do not need to use to change the font for the last paragraph
 	// or for a command.
-	LyXFont const outerfont =
-		outerFont(std::distance(paragraphs.begin(), pit),
-			  paragraphs);
 
 	LyXFont const font =
 		(pit->empty()
@@ -458,6 +442,15 @@ TeXOnePar(Buffer const & buf,
 		}
 	}
 
+	// FIXME we switch from the encoding of this paragraph to the
+	// document encoding, since I could not figure out the correct logic
+	// to take the encoding of the previous paragraph into account.
+	// This may result in some unneeded encoding changes.
+	if (switchEncoding(os, bparams, encoding, doc_encoding)) {
+		os << '\n';
+		texrow.newline();
+	}
+
 	if (boost::next(pit) == paragraphs.end()
 	    && language->babel() != doc_language->babel()) {
 		// Since \selectlanguage write the language to the aux file,
@@ -469,13 +462,13 @@ TeXOnePar(Buffer const & buf,
 				lyxrc.language_command_begin,
 				"$$lang",
 				doc_language->babel()))
-			   << endl;
+			   << '\n';
 		else
 			os << from_ascii(subst(
 				lyxrc.language_command_end,
 				"$$lang",
 				language->babel()))
-			   << endl;
+			   << '\n';
 		texrow.newline();
 	}
 
@@ -491,59 +484,6 @@ TeXOnePar(Buffer const & buf,
 	    lyxerr.debugging(Debug::LATEX))
 		lyxerr << "TeXOnePar...done " << &*boost::next(pit) << endl;
 
-	if (change_encoding) {
-		lyxerr[Debug::LATEX] << "Converting paragraph to encoding "
-			<< language->encoding()->iconvName() << endl;
-		docstring const par = par_stream.str();
-		// Convert the paragraph to the 8bit encoding that we need to
-		// output.
-		std::vector<char> const encoded = lyx::ucs4_to_eightbit(par.c_str(),
-			par.size(), language->encoding()->iconvName());
-		// Interpret this as if it was in the 8 bit encoding of the
-		// document language and convert it back to UCS4. That means
-		// that faked does not contain pure UCS4 anymore, but what
-		// will be written to the output file will be correct, because
-		// the real output stream will do a UCS4 -> document language
-		// encoding conversion.
-		// This is of course a hack, but not a bigger one than mixing
-		// two encodings in one file.
-		// FIXME: Catch iconv conversion errors and display an error
-		// dialog.
-
-		// Here follows an explanation how I (gb) came to the current
-		// solution:
-
-		// codecvt facets are only used by file streams -> OK, maybe
-		// we could use file streams and not generic streams in the
-		// latex() methods? No, that does not  work, we use them at
-		// several places to write to string streams.
-		// Next try: Maybe we could do something else than codecvt
-		// in our streams, and  add a setEncoding() method? That
-		// does not work unless we rebuild the functionality of file
-		// and string streams, since both odocfstream and
-		// odocstringstream inherit from std::basic_ostream<docstring>
-		// and we can  neither add a method to that class nor change
-		// the inheritance of the file and string streams.
-
-		// What might be possible is to encapsulate the real file and
-		// string streams in our own version, and use a homemade
-		// streambuf that would do the encoding conversion and then
-		// forward to the real stream. That would probably work, but
-		// would require far more code and a good understanding of
-		// stream buffers to get it right.
-
-		// Another idea by JMarc is to use a modifier like
-		// os << setencoding("iso-8859-1");
-		// That currently looks like the best idea.
-
-		std::vector<char_type> const faked = lyx::eightbit_to_ucs4(&(encoded[0]),
-			encoded.size(), doc_language->encoding()->iconvName());
-		std::vector<char_type>::const_iterator const end = faked.end();
-		std::vector<char_type>::const_iterator it = faked.begin();
-		for (; it != end; ++it)
-			ucs4.put(*it);
-	}
-
 	return ++pit;
 }
 
@@ -647,4 +587,24 @@ void latexParagraphs(Buffer const & buf,
 }
 
 
+int switchEncoding(odocstream & os, BufferParams const & bparams,
+                   Encoding const & oldEnc, Encoding const & newEnc)
+{
+	// FIXME thailatex does not support the inputenc package, so we
+	// ignore switches from/to tis620-0 encoding here. This does of
+	// course only work as long as the non-thai text contains ASCII
+	// only, but it is the best we can do.
+	if (bparams.inputenc == "auto" && oldEnc.name() != newEnc.name() &&
+	    oldEnc.name() != "tis620-0" && newEnc.name() != "tis620-0") {
+		lyxerr[Debug::LATEX] << "Changing LaTeX encoding from "
+		                     << oldEnc.name() << " to "
+		                     << newEnc.name() << endl;
+		os << setEncoding(newEnc.iconvName());
+		docstring const inputenc(from_ascii(newEnc.latexName()));
+		os << "\\inputencoding{" << inputenc << '}';
+		return 16 + inputenc.length();
+	}
+	return 0;
+}
+
 } // namespace lyx

Reply via email to