I make utf8 text file for test. It contains utf8-encoded
ksx1001(ksc5601) chars. When I convert it to EUC-KR with iconv, no
conversion error occur.
On X environment, I made TextProperty from Xutf8TextListToTextProperty
and I converted the generated text property to the TextList by calling
XmbTextPropertyToTextList. On UTF-8 locale, there was no error. But on
ko_KR.EUC-KR locale, some errors occured. Almost chars converted
successfully, but below chars did not:
Unconvertable: U+00B7 [(2)]
Unconvertable: U+00A8 [(2)]
Unconvertable: U+00AD [(2)]
Unconvertable: U+00B1 [(2)]
Unconvertable: U+00D7 [(2)]
Unconvertable: U+00F7 [(2)]
Unconvertable: U+00B0 [(2)]
Unconvertable: U+00A7 [(2)]
Unconvertable: U+00B4 [(2)]
Unconvertable: U+00B8 [(2)]
Unconvertable: U+00A1 [(2)]
Unconvertable: U+00BF [(2)]
Unconvertable: U+00A4 [(2)]
Unconvertable: U+00B6 [(2)]
Unconvertable: U+20AC [(3)]
Unconvertable: U+00AE [(2)]
Unconvertable: U+00C6 [(2)]
Unconvertable: U+00D0 [(2)]
Unconvertable: U+00AA [(2)]
Unconvertable: U+00D8 [(2)]
Unconvertable: U+00BA [(2)]
Unconvertable: U+00DE [(2)]
Unconvertable: U+00BD [(2)]
Unconvertable: U+00BC [(2)]
Unconvertable: U+00BE [(2)]
Unconvertable: U+00E6 [(2)]
Unconvertable: U+00F0 [(2)]
Unconvertable: U+00F8 [(2)]
Unconvertable: U+00DF [(2)]
Unconvertable: U+00FE [(2)]
Unconvertable: U+00B9 [(2)]
Unconvertable: U+00B2 [(2)]
Unconvertable: U+00B3 [(2)]
I think this is not desirable result.
Above chars are define in ksx1001, so iconv converts them correctly.
But on X, it fails. Is it right behavior?
So I made small test case code and attached those files.
You can reproduce this problem by following this instructions:
$ gmake
$ LANG=ko_KR.UTF-8 ./conversion utf8.txt # no error
$ LANG=ko_KR.euckr ./conversion utf8.txt # error
$ iconv -f utf8 -t euckr utf8.txt /dev/null # no error
PS.
I'm not a member of this mailing list, so please cc me.
CFLAGS = -I/usr/X11R6/include $(shell pkg-config --cflags glib-2.0)
LIBS = -L/usr/X11R6/lib -lX11 $(shell pkg-config --libs glib-2.0)
all:
gcc -Wall -ggdb $(CFLAGS) $(LIBS) -o conversion conversion.c
#include stdio.h
#include string.h
#include locale.h
#include X11/Xlib.h
#include X11/Xutil.h
#include glib.h
static Display *display;
void textpropertytest(char *utf8)
{
int n, ret;
XTextProperty text_prop;
char *list[2];
char **return_list = NULL;
list[0] = utf8;
list[1] = 0;
n = 1;
ret = Xutf8TextListToTextProperty(display, list, n,
XCompoundTextStyle,
text_prop);
if (ret != Success) {
fprintf(stderr, Error on utf8 - TextProperty (%d)\n, ret);
} else {
ret = XmbTextPropertyToTextList(display, text_prop, return_list, n);
if (ret != Success) {
printf(Unconvertable: U+%04X [%s(%d)]\n,
g_utf8_get_char(utf8), utf8, strlen(utf8));
}
XFreeStringList(return_list);
}
}
int
main(void)
{
char *locale_name;
char buf[256];
char utf8char[10];
const gchar *end;
gchar *ptr;
gchar *strend;
locale_name = setlocale(LC_ALL, );
fprintf(stderr, Locale: %s\n, locale_name);
display = XOpenDisplay(NULL);
if (display == NULL) {
fprintf(stderr, Error on XOpenDisplay\n);
return 1;
}
while (!feof(stdin)) {
fgets(buf, sizeof(buf), stdin);
if (g_utf8_validate(buf, -1, end)) {
ptr = buf;
strend = strchr(buf, '\0');
while (ptr strend) {
memset(utf8char, 0, sizeof(utf8char));
g_utf8_strncpy(utf8char, ptr, 1);
if (strlen(utf8char) 0)
textpropertytest(utf8char);
ptr = g_utf8_find_next_char(ptr, NULL);
}
} else {
fprintf(stderr, Invalid utf8 string: %s\n, end);
}
}
XCloseDisplay(display);
return 0;
}
A1A0:
A1B0:
A1C0:
A1D0:
A1E0:
A1F0:
A2A0:
A2B0:
A2C0:
A2D0:
A2E0:
A2F0:
A3A0:
A3B0:
A3C0:
A3D0:
A3E0:
A3F0:
A4A0:
A4B0:
A4C0:
A4D0:
A4E0:
A4F0:
A5A0:
A5B0:
A5C0:
A5D0:
A5E0:
A5F0:
A6A0:
A6B0:
A6C0:
A6D0:
A6E0:
A6F0:
A7A0:
A7B0:
A7C0:
A7D0:
A7E0:
A7F0:
A8A0:
A8B0:
A8C0:
A8D0:
A8E0:
A8F0:
A9A0:
A9B0:
A9C0:
A9D0:
A9E0:
A9F0:
AAA0:
AAB0:
AAC0:
AAD0:
AAE0:
AAF0:
ABA0:
ABB0:
ABC0:
ABD0:
ABE0:
ABF0:
ACA0:
ACB0:
ACC0:
ACD0:
ACE0:
ACF0:
ADA0:
ADB0:
ADC0:
ADD0:
ADE0:
ADF0:
AEA0:
AEB0:
AEC0:
AED0:
AEE0:
AEF0:
AFA0:
AFB0:
AFC0:
AFD0:
AFE0:
AFF0:
B0A0:
B0B0:
B0C0:
B0D0:
B0E0:
B0F0:
B1A0:
B1B0:
B1C0:
B1D0:
B1E0:
B1F0:
B2A0:
B2B0:
B2C0:
B2D0:
B2E0:
B2F0:
B3A0:
B3B0:
B3C0:
B3D0:
B3E0:
B3F0:
B4A0:
B4B0:
B4C0:
B4D0:
B4E0:
B4F0:
B5A0:
B5B0:
B5C0:
B5D0:
B5E0:
B5F0:
B6A0:
B6B0:
B6C0:
B6D0:
B6E0:
B6F0:
B7A0:
B7B0:
B7C0:
B7D0:
B7E0:
B7F0:
B8A0:
B8B0:
B8C0:
B8D0:
B8E0:
B8F0:
B9A0:
B9B0:
B9C0:
B9D0:
B9E0:
B9F0:
BAA0:
BAB0:
BAC0:
BAD0:
BAE0:
BAF0:
BBA0:
BBB0:
BBC0:
BBD0:
BBE0:
BBF0:
BCA0:
BCB0:
BCC0:
BCD0:
BCE0:
BCF0:
BDA0:
BDB0:
BDC0:
BDD0:
BDE0:
BDF0:
BEA0:
BEB0:
BEC0:
BED0:
BEE0:
BEF0:
BFA0:
BFB0:
BFC0:
BFD0:
BFE0:
BFF0:
C0A0:
C0B0:
C0C0:
C0D0:
C0E0:
C0F0:
C1A0:
C1B0:
C1C0:
C1D0:
C1E0:
C1F0:
C2A0:
C2B0:
C2C0:
C2D0:
C2E0:
C2F0:
C3A0:
C3B0:
C3C0:
C3D0:
C3E0:
C3F0:
C4A0:
C4B0:
C4C0:
C4D0:
C4E0:
C4F0:
C5A0: