-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 Configuration Information [Automatically generated, do not change]: Machine: x86_64 OS: linux-gnu Compiler: gcc Compilation CFLAGS: -DPROGRAM='bash' -DCONF_HOSTTYPE='x86_64' - -DCONF_OSTYPE='linux-gnu' -DCONF_MACHTYPE='x86_64-pc-linux-gnu' - -DCONF_VENDOR='pc' -DLOCALEDIR='/usr/share/locale' -DPACKAGE='bash' - -DSHELL -DHAVE_CONFIG_H -I. -I../bash -I../bash/include - -I../bash/lib -g -O2 -Wall uname output: Linux DETH00 3.0.0-15-generic #26-Ubuntu SMP Fri Jan 20 17:23:00 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux Machine Type: x86_64-pc-linux-gnu
Bash Version: 4.2 Patch Level: 10 Release Status: release Description: Current u32toutf8 only encode values below 0xffff correctly. wchar_t can be ambiguous size better in my opinion to use unsigned long, or uint32_t, or something clearer. Repeat-By: -------' Fix: diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c index d34fa08..3f7d378 100644 - --- a/lib/sh/unicode.c +++ b/lib/sh/unicode.c @@ -54,7 +54,7 @@ extern const char *locale_charset __P((void)); extern char *get_locale_var __P((char *)); #endif - -static int u32init = 0; +static int u32init = 0; static int utf8locale = 0; #if defined (HAVE_ICONV) static iconv_t localconv; @@ -115,26 +115,61 @@ u32tochar (wc, s) } int - -u32toutf8 (wc, s) - - wchar_t wc; +u32toutf8 (c, s) + unsigned long c; char *s; { int l; - - l = (wc < 0x0080) ? 1 : ((wc < 0x0800) ? 2 : 3); - - - - if (wc < 0x0080) - - s[0] = (unsigned char)wc; - - else if (wc < 0x0800) + if (c <= 0x7F) + { + s[0] = (char)c; + l = 1; + } + else if (c <= 0x7FF) + { + s[0] = (c >> 6) | 0xc0; /* 110x xxxx */ + s[1] = (c & 0x3f) | 0x80; /* 10xx xxxx */ + l = 2; + } + else if (c <= 0xFFFF) + { + s[0] = (c >> 12) | 0xe0; /* 1110 xxxx */ + s[1] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */ + s[2] = (c & 0x3f) | 0x80; /* 10xx xxxx */ + l = 3; + } + else if (c <= 0x1FFFFF) { - - s[0] = (wc >> 6) | 0xc0; - - s[1] = (wc & 0x3f) | 0x80; + s[0] = (c >> 18) | 0xf0; /* 1111 0xxx */ + s[1] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */ + s[2] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */ + s[3] = ( c & 0x3f) | 0x80; /* 10xx xxxx */ + l = 4; + } + else if (c <= 0x3FFFFFF) + { + s[0] = (c >> 24) | 0xf8; /* 1111 10xx */ + s[1] = ((c >> 18) & 0x3f) | 0x80; /* 10xx xxxx */ + s[2] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */ + s[3] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */ + s[4] = ( c & 0x3f) | 0x80; /* 10xx xxxx */ + l = 5; + } + else if (c <= 0x7FFFFFFF) + { + s[0] = (c >> 30) | 0xfc; /* 1111 110x */ + s[1] = ((c >> 24) & 0x3f) | 0x80; /* 10xx xxxx */ + s[2] = ((c >> 18) & 0x3f) | 0x80; /* 10xx xxxx */ + s[3] = ((c >> 12) & 0x3f) | 0x80; /* 10xx xxxx */ + s[4] = ((c >> 6) & 0x3f) | 0x80; /* 10xx xxxx */ + s[5] = ( c & 0x3f) | 0x80; /* 10xx xxxx */ + l = 6; } else { - - s[0] = (wc >> 12) | 0xe0; - - s[1] = ((wc >> 6) & 0x3f) | 0x80; - - s[2] = (wc & 0x3f) | 0x80; + /* Error Invalid UTF-8 */ + l = 0; } s[l] = '\0'; return l; @@ -150,7 +185,7 @@ u32cconv (c, s) -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/ iQEcBAEBAgAGBQJPP3/tAAoJEKUDtR0WmS059CcH/iIyBOGhf0IgSmnIFyw0YLpA 3ZWSaXWoEZodrDr1fX67hj2424icXm9fTZw70G+rS1YjtCfm86O/Qou4VNROylAv TbjPUWkHRWVci7IqcDGb1tNWRrulxUvNFA/Uc1xBtKckAO6HHHRTYFa+sCkd5Fnx dm7e0iMTqMMmL/dUwB+di+hSkGD+ZXS1vY76wizdwG7CteUxAVunse+ffP7TRYbn K86Whc7p7llG12hruCPGArc9iS7YiBaC/XNIKXmN7fn93dhQTcdzzk/UTGmaZgDk cQk4R7/NBljP4LtQtKwX4JYAi5XJM5TeSLykL97UFxW/5OGM+SmSVJbKLlHU/mQ= =EJUb -----END PGP SIGNATURE-----