Am Dienstag, 5. September 2006 14:35 schrieb Abdelrazak Younes:
> Peter Kümmel wrote:
> > In my ordinary text editor (ultraedit) it doen't loock like a text
file.
>
> Same for me in wordpad... attached the file.
Can you please try this updated test program? The screen output should look
like in stream.log. I attached also the generated file. In the old version
I forgot a const for two virtual methods, therefore the versions of thwe
base class were called.
I believe that this version should work on any OS.
Georg
e4 f6 fc
c4 d6 dc
from: 0x8053470 inbytesleft: 88 outbytesleft: 132
20 61 62 63 64 a 20 e4 f6 fc a 20 c4 d6 dc a 20 61 62 63 64 a
20 0 0 0 61 0 0 0 62 0 0 0 63 0 0 0 64 0 0 0 a 0 0 0 20 0 0 0 e4 0 0 0 f6 0 0 0 fc 0 0 0 a 0 0 0 20 0 0 0 c4 0 0 0 d6 0 0 0 dc 0 0 0 a 0 0 0 20 0 0 0 61 0 0 0 62 0 0 0 63 0 0 0 64 0 0 0 a 0 0 0
inbytesleft: 0 outbytesleft: 104
20 61 62 63 64 a 20 ffffffc3 ffffffa4 ffffffc3 ffffffb6 ffffffc3 ffffffbc a 20 ffffffc3 ffffff84 ffffffc3 ffffff96 ffffffc3 ffffff9c a 20 61 62 63 64 a
abcd
äöü
ÃÃÃ
abcd
#include <cerrno>
#include <cstdio>
#include <iconv.h>
#include <iostream>
#include <fstream>
#include <locale>
#define ICONV_CONST
namespace boost {
typedef unsigned int uint32_t;
}
namespace std {
#ifdef __GNUC__
// We get undefined references to these virtual methods. This looks like
// a bug in gcc. The implementation here does not do anything useful, since
// it is overriden in utf8_codecvt_facet and ascii_ctype_facet.
template<> codecvt<boost::uint32_t, char, mbstate_t>::result
codecvt<boost::uint32_t, char, mbstate_t>::do_out(mbstate_t &, const boost::uint32_t *, const boost::uint32_t *, const boost::uint32_t *&,
char *, char *, char *&) const { return error; }
template<> codecvt<boost::uint32_t, char, mbstate_t>::result
codecvt<boost::uint32_t, char, mbstate_t>::do_unshift(mbstate_t &, char *, char *, char *&) const { return error; }
template<> codecvt<boost::uint32_t, char, mbstate_t>::result
codecvt<boost::uint32_t, char, mbstate_t>::do_in(mbstate_t &, const char *, const char *, const char *&,
boost::uint32_t*, boost::uint32_t*, boost::uint32_t*&) const { return error; }
template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_encoding() const throw() { return 0; }
template<> bool codecvt<boost::uint32_t, char, mbstate_t>::do_always_noconv() const throw() { return true; }
template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_length(mbstate_t &, const char *, const char *, size_t) const { return 1; }
template<> int codecvt<boost::uint32_t, char, mbstate_t>::do_max_length() const throw() { return 4; }
template<> ctype<boost::uint32_t>::~ctype() {}
template<> bool
ctype<boost::uint32_t>::do_is(ctype<boost::uint32_t>::mask, boost::uint32_t) const { return false; }
template<> boost::uint32_t const *
ctype<boost::uint32_t>::do_is(const boost::uint32_t *, const boost::uint32_t *, ctype<boost::uint32_t>::mask *) const { return 0; }
template<> const boost::uint32_t *
ctype<boost::uint32_t>::do_scan_is(ctype<boost::uint32_t>::mask, const boost::uint32_t *, const boost::uint32_t *) const { return 0; }
template<> const boost::uint32_t *
ctype<boost::uint32_t>::do_scan_not(ctype<boost::uint32_t>::mask, const boost::uint32_t *, const boost::uint32_t *) const { return 0; }
template<> boost::uint32_t ctype<boost::uint32_t>::do_toupper(boost::uint32_t) const { return 0; }
template<> const boost::uint32_t * ctype<boost::uint32_t>::do_toupper(boost::uint32_t *, boost::uint32_t const *) const { return 0; }
template<> boost::uint32_t ctype<boost::uint32_t>::do_tolower(boost::uint32_t) const { return 0; }
template<> const boost::uint32_t * ctype<boost::uint32_t>::do_tolower(boost::uint32_t *, boost::uint32_t const *) const { return 0; }
template<> boost::uint32_t ctype<boost::uint32_t>::do_widen(char) const { return 0; }
template<> const char *
ctype<boost::uint32_t>::do_widen(const char *, const char *, boost::uint32_t *) const { return 0; }
template<> char
ctype<boost::uint32_t>::do_narrow(const boost::uint32_t, char) const { return 0; }
template<> const boost::uint32_t *
ctype<boost::uint32_t>::do_narrow(const boost::uint32_t *, const boost::uint32_t *, char, char *) const { return 0; }
#endif
}
namespace lyx {
typedef boost::uint32_t char_type;
}
// codecvt_facet for conversion of lyx::char_type (internal representation) to UTF8 (external representation)
class utf8_codecvt_facet : public std::codecvt<lyx::char_type, char, std::mbstate_t> {
typedef std::codecvt<lyx::char_type, char, std::mbstate_t> base;
public:
explicit utf8_codecvt_facet(size_t refs = 0);
protected:
virtual ~utf8_codecvt_facet();
virtual result do_out(state_type &, intern_type const * from,
intern_type const * from_end, intern_type const *& from_next,
extern_type * to, extern_type * to_end,
extern_type *& to_next) const;
virtual result do_unshift(state_type &, extern_type * to, extern_type *, extern_type *& to_next) const;
virtual result do_in(state_type &,
extern_type const * from, extern_type const * from_end,
extern_type const *& from_next,
intern_type * to, intern_type * to_end,
intern_type *& to_next) const;
virtual int do_encoding() const throw();
virtual bool do_always_noconv() const throw();
virtual int do_length(state_type & state, extern_type const * from, extern_type const * end, size_t max) const;
virtual int do_max_length() const throw();
private:
inline base::result do_iconv(iconv_t cd, char const ** from, size_t * inbytesleft, char ** to, size_t * outbytesleft) const
{
fprintf(stderr, "from: %p inbytesleft: %d outbytesleft: %d\n", *from, *inbytesleft, *outbytesleft);
for (size_t i = 0; i < *inbytesleft / sizeof(intern_type); ++i) {
intern_type const * buf = reinterpret_cast<intern_type const *>(*from);
unsigned int c = buf[i];
fprintf(stderr, "%x ", c);
}
fprintf(stderr, "\n");
for (size_t i = 0; i < *inbytesleft; ++i) {
unsigned char const * buf = reinterpret_cast<unsigned char const *>(*from);
unsigned int c = buf[i];
fprintf(stderr, "%x ", c);
}
fprintf(stderr, "\n");
char const * to_start = *to;
size_t converted = iconv(cd, const_cast<char ICONV_CONST **>(from), inbytesleft, to, outbytesleft);
if (converted == (size_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv: %s\n", errno, strerror(errno));
switch(errno) {
case EINVAL:
case E2BIG:
fprintf(stderr, "partial result. inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft);
fflush(stderr);
return base::partial;
case EILSEQ:
default:
fprintf(stderr, "error result. inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft);
fflush(stderr);
return base::error;
}
}
fprintf(stderr, " inbytesleft: %d outbytesleft: %d\n", *inbytesleft, *outbytesleft); fflush(stderr);
for (size_t i = 0; i < size_t(*to - to_start); ++i) {
unsigned int c = to_start[i];
fprintf(stderr, "%x ", c);
}
fprintf(stderr, "\n");
if (*to == to_start)
return base::noconv;
return base::ok;
}
mutable iconv_t in_cd_;
mutable iconv_t out_cd_;
};
utf8_codecvt_facet::utf8_codecvt_facet(size_t refs)
: base(refs), in_cd_((iconv_t)(-1)), out_cd_((iconv_t)(-1))
{
}
utf8_codecvt_facet::~utf8_codecvt_facet()
{
if (in_cd_ != (iconv_t)(-1))
if (iconv_close(in_cd_) == -1) {
fprintf(stderr, "Error %d returned from iconv_close(in_cd_): %s\n", errno, strerror(errno));
fflush(stderr);
}
if (out_cd_ != (iconv_t)(-1))
if (iconv_close(out_cd_) == -1) {
fprintf(stderr, "Error %d returned from iconv_close(out_cd_): %s\n", errno, strerror(errno));
fflush(stderr);
}
}
utf8_codecvt_facet::result utf8_codecvt_facet::do_out(state_type &, intern_type const * from,
intern_type const * from_end, intern_type const *& from_next,
extern_type * to, extern_type * to_end,
extern_type *& to_next) const
{
if (out_cd_ == (iconv_t)(-1)) {
out_cd_ = iconv_open("UTF-8", "UCS-4LE");
if (out_cd_ == (iconv_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv_open(out_cd_): %s\n", errno, strerror(errno));
fflush(stderr);
throw std::exception();
}
}
size_t inbytesleft = (from_end - from) * sizeof(intern_type);
size_t outbytesleft = (to_end - to) * sizeof(extern_type);
from_next = from;
to_next = to;
return do_iconv(out_cd_, reinterpret_cast<char const **>(&from_next), &inbytesleft, &to_next, &outbytesleft);
}
utf8_codecvt_facet::result utf8_codecvt_facet::do_unshift(state_type &, extern_type * to, extern_type *, extern_type *& to_next) const
{
// utf8 does not use shifting
to_next = to;
return base::noconv;
}
utf8_codecvt_facet::result utf8_codecvt_facet::do_in(state_type &,
extern_type const * from, extern_type const * from_end,
extern_type const *& from_next,
intern_type * to, intern_type * to_end,
intern_type *& to_next) const
{
if (in_cd_ == (iconv_t)(-1)) {
in_cd_ = iconv_open("UCS-4", "UTF-8");
if (in_cd_ == (iconv_t)(-1)) {
fprintf(stderr, "Error %d returned from iconv_open(in_cd_): %s\n", errno, strerror(errno));
fflush(stderr);
throw std::exception();
}
}
size_t inbytesleft = (from_end - from) * sizeof(extern_type);
size_t outbytesleft = (to_end - to) * sizeof(intern_type);
from_next = from;
to_next = to;
return do_iconv(in_cd_, &from_next, &inbytesleft, reinterpret_cast<char **>(&to_next), &outbytesleft);
}
int utf8_codecvt_facet::do_encoding() const throw()
{
return 0;
}
bool utf8_codecvt_facet::do_always_noconv() const throw()
{
return false;
}
int utf8_codecvt_facet::do_length(state_type & /*state*/, extern_type const * from, extern_type const * end, size_t max) const
{
#if 0
intern_type * to = new intern_type[max];
intern_type * to_end = to + max;
intern_type * to_next = to;
extern_type const * from_next = from;
do_in(state, from, end, from_next, to, to_end, to_next);
delete[] to;
return to_next - to;
#endif
size_t const length = end - from;
return std::min(length, max);
}
int utf8_codecvt_facet::do_max_length() const throw()
{
// UTF8 uses at most 6 bytes to represent one code point
return 6;
}
// ctype facet for UCS4 streams. Widening and narrowing is restricted to
// ASCII, since we do not need anything else.
class ascii_ctype_facet : public std::ctype<lyx::char_type>
{
public:
typedef lyx::char_type char_type;
public:
explicit ascii_ctype_facet(size_t refs = 0) : std::ctype<char_type>(refs) {}
protected:
virtual ~ascii_ctype_facet() {}
virtual char_type do_widen(char c) const
{
if (static_cast<unsigned char>(c) < 128)
return c;
throw std::bad_cast();
}
virtual const char* do_widen(const char* lo, const char* hi, char_type* dest) const
{
while (lo < hi) {
if (static_cast<unsigned char>(*lo) >= 128)
throw std::bad_cast();
*dest = *lo;
++lo;
++dest;
}
return hi;
}
virtual char do_narrow(char_type wc, char dfault) const
{
if (wc < 128)
return wc;
if (wctob(wc) == EOF)
return dfault;
throw std::bad_cast();
}
virtual const char_type * do_narrow(const char_type * lo, const char_type * hi, char dfault, char * dest) const
{
while (lo < hi) {
if (*lo < 128)
*dest = *lo;
else {
if (wctob(*lo) == EOF)
*dest = dfault;
else
throw std::bad_cast();
}
++lo;
++dest;
}
return hi;
}
};
std::basic_ostream<lyx::char_type> & operator<<(std::basic_ostream<lyx::char_type> & os, char c)
{
return os.put(lyx::char_type(c));
}
int main()
{
std::locale const utf8_1(std::locale("C"), new utf8_codecvt_facet);
std::locale const utf8(utf8_1, new ascii_ctype_facet);
// std::cerr << "utf8 has std::codecvt<lyx::char_type, char, std::mbstate_t> facet: " << std::has_facet<std::codecvt<lyx::char_type, char, std::mbstate_t> >(utf8) << std::endl;
// std::cerr << "utf8 has utf8_codecvt_facet facet: " << std::has_facet<utf8_codecvt_facet>(utf8) << std::endl;
// std::cerr << "utf8 has std::ctype<lyx::char_type> facet: " << std::has_facet<std::ctype<lyx::char_type> >(utf8) << std::endl;
std::basic_ofstream<lyx::char_type> os;
os.imbue(utf8);
os.open("stream.out");
os << " abc";
os.put(lyx::char_type('d'));
os << '\n';
lyx::char_type ae = 0xe4;
lyx::char_type oe = 0xf6;
lyx::char_type ue = 0xfc;
lyx::char_type Ae = 0xc4;
lyx::char_type Oe = 0xd6;
lyx::char_type Ue = 0xdc;
std::basic_string<lyx::char_type> s;
s += Ae;
s += Oe;
s += Ue;
os << ' '; os.put(ae); os.put(oe); os.put(ue); os << '\n';
std::cerr << std::hex << ' ' << ae << ' ' << oe << ' ' << ue << '\n';
os << ' ' << s << '\n';
std::cerr << std::hex << ' ' << Ae << ' ' << Oe << ' ' << Ue << '\n';
os << " abcd\n";
return 0;
}