Lars Gullik Bjønnes wrote:
> Conversion between the different unicode encodings are pretty cheap.
Yes, but what I am more concerned about are lots of ucs4_to_utf8 or vice
versa in the code. That just makes it a bit less readable.
> | Since the po
> | files will eventually be in utf8 it seems natural to use utf8 for
> | _(), too.
>
> Yes. However to make us able to ignore the norm of the po files I am
> going to use bind_textdomain_codeset so that we always get utf-8.
Good.
Here comes the next bit: I discovered that the result of
std::vector<char> ucs4_to_utf8(boost::uint32_t c)
was never used as a vector. I changed it to std::string, and that simplifies
the code. In particular it removes manual fiddling with the terminating
'\0', which we should not do IMHO.
Is this change OK with you?
Finally the last hunk of the patch makes the ToC in plain text output
readable. Without the other changes I would need to do the '\0' hack here
as well.
Georg
Index: src/output_plaintext.C
===================================================================
--- src/output_plaintext.C (Revision 14700)
+++ src/output_plaintext.C (Arbeitskopie)
@@ -234,8 +234,7 @@ void asciiParagraph(Buffer const & buf,
break;
default: {
- std::vector<char> const tmp = ucs4_to_utf8(c);
- word.append(tmp.begin(), tmp.end());
+ word += ucs4_to_utf8(c);
if (runparams.linelen > 0 &&
currlinelen + word.length() > runparams.linelen)
{
Index: src/lyxfunc.C
===================================================================
--- src/lyxfunc.C (Revision 14695)
+++ src/lyxfunc.C (Arbeitskopie)
@@ -323,8 +323,7 @@ void LyXFunc::processKeySym(LyXKeySymPtr
if (func.action == LFUN_SELF_INSERT) {
if (encoded_last_key != 0) {
- std::vector<char> tmp = ucs4_to_utf8(encoded_last_key);
- string const arg(tmp.begin(), tmp.end());
+ string const arg = ucs4_to_utf8(encoded_last_key);
dispatch(FuncRequest(LFUN_SELF_INSERT, arg,
FuncRequest::KEYBOARD));
lyxerr[Debug::KEY]
Index: src/support/unicode.C
===================================================================
--- src/support/unicode.C (Revision 14695)
+++ src/support/unicode.C (Arbeitskopie)
@@ -275,7 +275,7 @@ std::vector<char> ucs4_to_utf8(std::vect
}
-std::vector<char> ucs4_to_utf8(boost::uint32_t c)
+std::string const ucs4_to_utf8(boost::uint32_t c)
{
std::vector<char> in;
in.push_back(static_cast<char>((c & 0xff000000) >> 24));
@@ -283,5 +291,5 @@ std::vector<char> ucs4_to_utf8(boost::ui
in.push_back(static_cast<char>((c & 0x0000ff00) >> 8));
in.push_back(static_cast<char>(c & 0x000000ff));
std::vector<char> res = iconv_convert("UTF-8", "UCS-4", in);
- return res;
+ return string(res.begin(), res.end());
}
Index: src/support/unicode.h
===================================================================
--- src/support/unicode.h (Revision 14695)
+++ src/support/unicode.h (Arbeitskopie)
@@ -34,7 +34,7 @@ ucs4_to_ucs2(boost::uint32_t c);
std::vector<char>
ucs4_to_utf8(std::vector<boost::uint32_t> const & ucs4str);
-std::vector<char>
+std::string const
ucs4_to_utf8(boost::uint32_t c);
#endif
Index: src/paragraph.C
===================================================================
--- src/paragraph.C (Revision 14695)
+++ src/paragraph.C (Arbeitskopie)
@@ -214,11 +214,9 @@ void Paragraph::write(Buffer const & buf
}
// this check is to amend a bug. LyX sometimes
// inserts '\0' this could cause problems.
- if (c != '\0') {
- std::vector<char> tmp = ucs4_to_utf8(c);
- tmp.push_back('\0');
- os << &tmp[0];
- } else
+ if (c != '\0')
+ os << ucs4_to_utf8(c);
+ else
lyxerr << "ERROR (Paragraph::writeFile):"
" NULL char in structure." << endl;
++column;
@@ -1591,7 +1589,7 @@ string const Paragraph::asString(Buffer
for (pos_type i = beg; i < end; ++i) {
value_type const c = getUChar(buffer.params(), i);
if (isPrintable(c))
- os << c;
+ os << ucs4_to_utf8(c);
else if (c == META_INSET)
getInset(i)->textString(buffer, os, runparams);
}