Abdelrazak Younes wrote:
> Abdelrazak Younes wrote:
>> If you try out the document attached in bug 3561
>> (http://bugzilla.lyx.org/show_bug.cgi?id=3561) and View->Source, the
>> Encoding::init() will take 40 seconds on my system.
>>
>> With the attached patch, this goes down to 25 seconds.
Your patch (+ the part that is already in) has several problems:
- char ucs4_to_eightbit(char_type ucs4, string const & encoding) does not
make sense at all. You cannot guarantee that the result is only one char
(besides it is unused).
- The name of ucs4_to_multibytes is misleading: This function does exactly
the same as ucs4_to_eightbit, only optimized for one UCS4 char
- Now there are two maps with ucs4 -> 8bit iconv processors, one is enough
and more efficient.
- ucs4_to_multibytes silently fails for exotic conversions that result in
more than 4 bytes. AFAIK LyX currently doesn't support such an encoding,
but AFAIK some exist and they could be supported in the future.
- there is no reason not to use the optimized map lookup in
eightbit_to_ucs4, too.
Consider the attached (untested) version if you like.
Georg
Index: src/support/unicode.cpp
===================================================================
--- src/support/unicode.cpp (Revision 18336)
+++ src/support/unicode.cpp (Arbeitskopie)
@@ -288,59 +288,54 @@ vector<char_type>
eightbit_to_ucs4(char const * s, size_t ls, string const & encoding)
{
static map<string, IconvProcessor> processors;
- if (processors.find(encoding) == processors.end()) {
+ map<string, IconvProcessor>::iterator it = processors.find(encoding);
+ if (it == processors.end()) {
IconvProcessor processor(ucs4_codeset, encoding.c_str());
- processors.insert(make_pair(encoding, processor));
+ it = processors.insert(make_pair(encoding, processor)).first;
}
- return iconv_convert<char_type>(processors[encoding], s, ls);
+ return iconv_convert<char_type>(it->second, s, ls);
}
-vector<char>
-ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
+namespace {
+
+/// processors for UCS4 -> 8bit encoding conversions
+map<string, IconvProcessor> ucs4_processors;
+
+
+/// Get processor for UCS4 -> \p encoding conversion
+inline IconvProcessor & get_ucs4_processor(string const & encoding)
{
- static map<string, IconvProcessor> processors;
- if (processors.find(encoding) == processors.end()) {
+ map<string, IconvProcessor>::iterator it = ucs4_processors.find(encoding);
+ if (it == ucs4_processors.end()) {
IconvProcessor processor(encoding.c_str(), ucs4_codeset);
- processors.insert(make_pair(encoding, processor));
+ return ucs4_processors.insert(make_pair(encoding, processor)).first->second;
}
- return iconv_convert<char>(processors[encoding], ucs4str, ls);
+ return it->second;
}
+}
-char ucs4_to_eightbit(char_type ucs4, string const & encoding)
-{
- static map<string, IconvProcessor> processors;
- map<string, IconvProcessor>::iterator it = processors.find(encoding);
- if (it == processors.end()) {
- IconvProcessor processor(encoding.c_str(), ucs4_codeset);
- it = processors.insert(make_pair(encoding, processor)).first;
- }
- char out;
- int const bytes = it->second.convert((char *)(&ucs4), 4, &out, 1);
- if (bytes > 0)
- return out;
- return 0;
+vector<char>
+ucs4_to_eightbit(char_type const * ucs4str, size_t ls, string const & encoding)
+{
+ return iconv_convert<char>(get_ucs4_processor(encoding), ucs4str, ls);
}
-void ucs4_to_multibytes(char_type ucs4, vector<char> & out,
+void ucs4_to_eightbit(char_type ucs4, vector<char> & out,
string const & encoding)
{
- static map<string, IconvProcessor> processors;
- map<string, IconvProcessor>::iterator it = processors.find(encoding);
- if (it == processors.end()) {
- IconvProcessor processor(encoding.c_str(), ucs4_codeset);
- it = processors.insert(make_pair(encoding, processor)).first;
- }
out.resize(4);
- int bytes = it->second.convert((char *)(&ucs4), 4, &out[0], 4);
- if (bytes > 0)
+ int bytes = get_ucs4_processor(encoding).convert((char *)(&ucs4), 4, &out[0], 4);
+ if (bytes >= 0)
out.resize(bytes);
else
- out.clear();
+ // Use unoptimized version.
+ // Does only happen for exotic encodings
+ out = ucs4_to_eightbit(&ucs4, 1, encoding);
}
} // namespace lyx
Index: src/support/unicode.h
===================================================================
--- src/support/unicode.h (Revision 18336)
+++ src/support/unicode.h (Arbeitskopie)
@@ -89,12 +89,9 @@ eightbit_to_ucs4(char const * s, size_t
std::vector<char>
ucs4_to_eightbit(char_type const * ucs4str, size_t ls, std::string const & encoding);
-/// convert ucs4 character \p c to encoding \p encoding.
+/// convert ucs4 character \p ucs4 to encoding \p encoding.
/// \p encoding must be a valid iconv 8bit encoding
-char ucs4_to_eightbit(char_type c, std::string const & encoding);
-
-///
-void ucs4_to_multibytes(char_type ucs4, std::vector<char> & out,
+void ucs4_to_eightbit(char_type ucs4, std::vector<char> & out,
std::string const & encoding);
extern char const * ucs4_codeset;
Index: src/Encoding.cpp
===================================================================
--- src/Encoding.cpp (Revision 18336)
+++ src/Encoding.cpp (Arbeitskopie)
@@ -171,8 +171,9 @@ void Encoding::init() const
// they do not have a direct representation as a single byte,
// therefore we need to check all UCS4 code points.
// This is expensive!
+ std::vector<char> eightbit;
for (char_type c = 0; c < max_ucs4; ++c) {
- std::vector<char> const eightbit = ucs4_to_eightbit(&c, 1, iconvName_);
+ ucs4_to_eightbit(c, eightbit, iconvName_);
if (!eightbit.empty()) {
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it == unicodesymbols.end() || !it->second.force)