Re: Bidi support in Lyx 1.5 svn

Georg Baum Wed, 27 Dec 2006 10:23:20 -0800

On Friday 22 December 2006 13:59, Georg Baum wrote:
> Anyway, here is a patch that gets rid of the encoding list in the
> preferences dialog. It also adds some other encodings I found in my tetex
> installation. Do we still miss some (I know, arabic is missing)?


I installed texlive and added all encodings that I found and that are 
supported by iconv. I also tested and fixed documents in thai language. 
Unfortunately thai does not fit well into the standard babel/inputenc scheme, 
so we have to hardcode tis620 encoding support a bit.
Finally I got rid of all "unknown" entries in lib/encodings. To my knowledge 
all encodings in lib/encosings do work now (provided you have the 
correct .def file installed). 

This updated patch is going in tomorrow if nobody complains.


Georg

Index: src/LaTeXFeatures.C
===================================================================
--- src/LaTeXFeatures.C	(Revision 16404)
+++ src/LaTeXFeatures.C	(Arbeitskopie)
@@ -221,7 +221,11 @@
 	LanguageList::const_iterator it  = UsedLanguages_.begin();
 	LanguageList::const_iterator end = UsedLanguages_.end();
 	for (; it != end; ++it)
-		if ((*it)->encoding()->latexName() != doc_encoding)
+		// thailatex does not use the inputenc package, but sets up
+		// babel directly for tis620-0 encoding, therefore we must
+		// not add tis620-0 to the encoding set.
+		if ((*it)->encoding()->latexName() != doc_encoding &&
+		    (*it)->encoding()->name() != "tis620-0")
 			encodings.insert((*it)->encoding()->latexName());
 	return encodings;
 }
Index: src/encoding.h
===================================================================
--- src/encoding.h	(Revision 16404)
+++ src/encoding.h	(Arbeitskopie)
@@ -52,6 +52,16 @@
 public:
 	///
 	typedef std::map<std::string, Encoding> EncodingList;
+	/// iterator to iterate over all encodings.
+	/// We hide the fact that our encoding list is implemented as a map.
+	class const_iterator : public EncodingList::const_iterator {
+		typedef EncodingList::const_iterator base;
+	public:
+		const_iterator() : base() {}
+		const_iterator(base const & b) : base(b) {}
+		Encoding const & operator*() const { return base::operator*().second; }
+		Encoding const * operator->() const { return &(base::operator*().second); }
+	};
 	///
 	Encodings();
 	///
@@ -62,6 +72,11 @@
 	Encoding const * getFromLaTeXName(std::string const & name) const;
 
 	///
+	const_iterator begin() const { return encodinglist.begin(); }
+	///
+	const_iterator end() const { return encodinglist.end(); }
+
+	///
 	enum Letter_Form {
 		///
 		FORM_ISOLATED,
Index: src/paragraph_pimpl.C
===================================================================
--- src/paragraph_pimpl.C	(Revision 16404)
+++ src/paragraph_pimpl.C	(Arbeitskopie)
@@ -663,7 +663,9 @@
 		case 0x20ac:    // EURO SIGN
 			if (isEncoding(bparams, font, "latin9")
 			    || isEncoding(bparams, font, "cp1251")
-			    || isEncoding(bparams, font, "utf8")) {
+			    || isEncoding(bparams, font, "utf8")
+			    || isEncoding(bparams, font, "latin10")
+			    || isEncoding(bparams, font, "cp858")) {
 				os.put(c);
 			} else {
 				os << "\\texteuro{}";
Index: src/buffer.C
===================================================================
--- src/buffer.C	(Revision 16404)
+++ src/buffer.C	(Arbeitskopie)
@@ -141,7 +141,7 @@
 
 namespace {
 
-int const LYX_FORMAT = 255;
+int const LYX_FORMAT = 256;
 
 } // namespace anon
 
Index: src/bufferparams.C
===================================================================
--- src/bufferparams.C	(Revision 16404)
+++ src/bufferparams.C	(Arbeitskopie)
@@ -851,14 +851,28 @@
 		std::set<string> encodings =
 			features.getEncodingSet(doc_encoding);
 
-		os << "\\usepackage[";
-		std::set<string>::const_iterator it = encodings.begin();
-		std::set<string>::const_iterator const end = encodings.end();
-		for (; it != end; ++it)
-			os << from_ascii(*it) << ',';
-		os << from_ascii(doc_encoding) << "]{inputenc}\n";
-		texrow.newline();
-	} else if (inputenc != "default") {
+		// thailatex does not use the inputenc package, but sets up
+		// babel directly for tis620-0 encoding, therefore we must
+		// not request inputenc for tis620-0 encoding
+		if (!encodings.empty() || doc_encoding != "tis620-0") {
+			os << "\\usepackage[";
+			std::set<string>::const_iterator it = encodings.begin();
+			std::set<string>::const_iterator const end = encodings.end();
+			if (it != end) {
+				os << from_ascii(*it);
+				++it;
+			}
+			for (; it != end; ++it)
+				os << ',' << from_ascii(*it);
+			if (doc_encoding != "tis620-0") {
+				if (!encodings.empty())
+					os << ',';
+				os << from_ascii(doc_encoding);
+			}
+			os << "]{inputenc}\n";
+			texrow.newline();
+		}
+	} else if (inputenc != "default" && inputenc != "tis620-0") {
 		os << "\\usepackage[" << from_ascii(inputenc)
 		   << "]{inputenc}\n";
 		texrow.newline();
Index: src/frontends/qt4/QDocumentDialog.C
===================================================================
--- src/frontends/qt4/QDocumentDialog.C	(Revision 16404)
+++ src/frontends/qt4/QDocumentDialog.C	(Arbeitskopie)
@@ -25,6 +25,7 @@
 #include "qt_helpers.h"
 
 #include "bufferparams.h"
+#include "encoding.h"
 #include "gettext.h"
 #include "helper_funcs.h" // getSecond()
 #include "language.h"
@@ -56,18 +57,6 @@
 namespace frontend {
 
 
-namespace {
-
-// FIXME: This list is incomplete. It should not be hardcoded but come from
-// the available encodings in src/encodings.C
-char const * encodings[] = { "LaTeX default", "latin1", "latin2",
-	"latin3", "latin4", "latin5", "latin9",
-	"koi8-r", "koi8-u", "cp866", "cp1251",
-	"iso88595", "pt154", "utf8", 0
-};
-
-}
-
 QDocumentDialog::QDocumentDialog(QDocument * form)
 	: form_(form),
 	lang_(getSecond(getLanguageData(false)))
@@ -298,10 +287,13 @@
 			toqstr(lit->first));
 	}
 
-	int k = 0;
-	while (encodings[k]) {
-		langModule->encodingCO->addItem(qt_(encodings[k++]));
-	}
+	// Always put the default encoding in the first position.
+	// It is special because the displayed text is translated.
+	langModule->encodingCO->addItem(qt_("LaTeX default"));
+	Encodings::const_iterator it = encodings.begin();
+	Encodings::const_iterator const end = encodings.end();
+	for (; it != end; ++it)
+		langModule->encodingCO->addItem(toqstr(it->latexName()));
 
 	langModule->quoteStyleCO->addItem(qt_("``text''"));
 	langModule->quoteStyleCO->addItem(qt_("''text''"));
@@ -669,11 +661,11 @@
 		params.inputenc = "auto";
 	} else {
 		int i = langModule->encodingCO->currentIndex();
-		if (i == 0) {
+		if (i == 0)
 			params.inputenc = "default";
-		} else {
-			params.inputenc = encodings[i];
-		}
+		else
+			params.inputenc =
+				fromqstr(langModule->encodingCO->currentText());
 	}
 
 	InsetQuotes::quote_language lga = InsetQuotes::EnglishQ;
@@ -956,16 +948,13 @@
 		if (params.inputenc == "default") {
 			langModule->encodingCO->setCurrentIndex(0);
 		} else {
-			int i = 0;
-			while (encodings[i]) {
-				if (encodings[i] == params.inputenc) {
-					langModule->encodingCO->setCurrentIndex(i);
-					break;
-				}
-				++i;
-			}
-			// FIXME: possible data loss because of encodings is
-			// incomplete
+			int const i = langModule->encodingCO->findText(
+					toqstr(params.inputenc));
+			if (i >= 0)
+				langModule->encodingCO->setCurrentIndex(i);
+			else
+				// unknown encoding. Set to default.
+				langModule->defaultencodingCB->setChecked(true);
 		}
 	}
 
Index: src/output_latex.C
===================================================================
--- src/output_latex.C	(Revision 16404)
+++ src/output_latex.C	(Arbeitskopie)
@@ -294,8 +294,15 @@
 		}
 	}
 
+	// FIXME thailatex does not support the inputenc package, so we
+	// ignore switches from/to tis620-0 encoding here. This does of
+	// course only work as long as the non-thai text contains ASCII
+	// only, but it is the best we can do.
+	bool const use_thailatex = (language->encoding()->name() == "tis620-0" ||
+	                            previous_language->encoding()->name() == "tis620-0");
 	if (bparams.inputenc == "auto" &&
-	    language->encoding() != previous_language->encoding()) {
+	    language->encoding() != previous_language->encoding() &&
+	    !use_thailatex) {
 		ucs4 << "\\inputencoding{"
 		     << from_ascii(language->encoding()->latexName())
 		     << "}\n";
@@ -307,7 +314,8 @@
 	odocstringstream par_stream;
 	bool const change_encoding = !runparams_in.dryrun &&
 			bparams.inputenc == "auto" &&
-			language->encoding() != doc_language->encoding();
+			language->encoding() != doc_language->encoding() &&
+			!use_thailatex;
 	// don't trigger the copy ctor because it's private on msvc 
 	odocstream & os = *(change_encoding ? &par_stream : &ucs4);
 
Index: lib/lyx2lyx/LyX.py
===================================================================
--- lib/lyx2lyx/LyX.py	(Revision 16404)
+++ lib/lyx2lyx/LyX.py	(Arbeitskopie)
@@ -73,7 +73,7 @@
                    ("1_2",     [220], generate_minor_versions("1.2" , 4)),
                    ("1_3",     [221], generate_minor_versions("1.3" , 7)),
                    ("1_4", range(222,246), generate_minor_versions("1.4" , 3)),
-                   ("1_5", range(246,256), generate_minor_versions("1.5" , 0))]
+                   ("1_5", range(246,257), generate_minor_versions("1.5" , 0))]
 
 
 def formats_list():
Index: lib/lyx2lyx/lyx_1_5.py
===================================================================
--- lib/lyx2lyx/lyx_1_5.py	(Revision 16404)
+++ lib/lyx2lyx/lyx_1_5.py	(Arbeitskopie)
@@ -646,6 +646,21 @@
     i = i + 1
 
 
+def revert_encodings(document):
+    " Set new encodings to auto. "
+    encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
+                 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
+                 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc in encodings:
+            document.header[i] = "\\inputencoding auto"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
 ##
 # Conversion hub
 #
@@ -660,9 +675,11 @@
            [252, [convert_commandparams, convert_bibitem]],
            [253, []],
            [254, [convert_esint]],
-           [255, []]]
+           [255, []],
+           [256, []]]
 
-revert =  [[254, [revert_clearpage, revert_cleardoublepage]],
+revert =  [[255, [revert_encodings]],
+           [254, [revert_clearpage, revert_cleardoublepage]],
            [253, [revert_esint]],
            [252, [revert_nomenclature, revert_printnomenclature]],
            [251, [revert_commandparams]],
Index: lib/encodings
===================================================================
--- lib/encodings	(Revision 16404)
+++ lib/encodings	(Arbeitskopie)
@@ -1,8 +1,5 @@
 # FIXME: Have a look at the encodings known by the inputenc package and add
 # missing ones. Caution: File format change!
-# FIXME: Find out whether this file is used for more than LaTeX file.
-# generation. If not it doews not make make sense to have encodings with
-# "unknown" LaTeX name.
 
 # Order of names: LyX name LaTeX name iconv name
 
@@ -24,35 +21,91 @@
 Encoding iso8859-5 iso88595 ISO-8859-5
 End
 
-Encoding iso8859-6 unknown ISO-8859-6
+# Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/
+Encoding iso8859-6 8859-6 ISO-8859-6
 End
 
 Encoding iso8859-7 iso-8859-7 ISO-8859-7
 End
 
+Encoding iso8859-8 8859-8 ISO-8859-8
+End
+
 Encoding iso8859-9 latin5 ISO-8859-9
 End
 
+# Not standard, see http://www.vtex.lt/tex/littex/index.html
 Encoding iso8859-13 l7xenc ISO-8859-13
 End
 
 Encoding iso8859-15 latin9 ISO-8859-15
 End
 
-Encoding cp1255 cp1255 CP1255
+Encoding iso8859-16 latin10 ISO-8859-16
 End
 
+Encoding cp437 cp437 CP437
+End
+
+# cp437, but on position 225 is sz instead of beta
+Encoding cp437de cp437de CP437
+End
+
+Encoding cp850 cp850 CP850
+End
+
+Encoding cp852 cp852 CP852
+End
+
+Encoding cp855 cp855 CP855
+End
+
+Encoding cp858 cp858 CP858
+End
+
+Encoding cp862 cp862 CP862
+End
+
+Encoding cp865 cp865 CP865
+End
+
+Encoding cp866 cp866 CP866
+End
+
+Encoding cp1250 cp1250 CP1250
+End
+
 Encoding cp1251 cp1251 CP1251
 End
 
+Encoding cp1252 cp1252 CP1252
+End
+
+Encoding cp1255 cp1255 CP1255
+End
+
+# Not standard, see http://tug.ctan.org/tex-archive/language/arabic/arabi/arabi/texmf/latex/arabi/
+Encoding cp1256 cp1256 CP1256
+End
+
+Encoding cp1257 cp1257 CP1257
+End
+
 Encoding koi8 koi8-r KOI8-R
 End
 
 Encoding koi8-u koi8-u KOI8-U
 End
 
-Encoding tis620-0 unknown TIS-620-0
+# This one needs hardcoded support, since the inputenc package does not know
+# tis620-0, and thailatex sets up babel directly to use tis620-0, so the value
+# for inputenc is never output to .tex files (but needed for the hardcoded
+# tis620-0 support).
+Encoding tis620-0 tis620-0 TIS620-0
 End
 
 Encoding pt154 pt154 PT154
 End
+
+Encoding pt254 pt254 PT254
+End
Index: development/FORMAT
===================================================================
--- development/FORMAT	(Revision 16404)
+++ development/FORMAT	(Arbeitskopie)
@@ -1,6 +1,11 @@
 LyX file-format changes
 -----------------------�
 
+2006-12-22  Georg Baum  <[EMAIL PROTECTED]>
+
+	* format incremented to 256: allow some new inputenc settings.
+	For the complete list, see lib/lyx2lyx/lyx_1_5.py.
+
 2006-11-25  Georg Baum  <[EMAIL PROTECTED]>
 
 	* format incremented to 255: new insets for \clearpage and

Re: Bidi support in Lyx 1.5 svn

Reply via email to