On 3/18/12 1:26 PM, [email protected] wrote:
> Bash Version: 4.2
> Patch Level: 24
> Release Status: release
>
> Description:
> Some UTF-8 multibyte characters are not printed correctly but UTF-8
> generally works as the "ä" in "März" (displayed via ls) works.
>
> Repeat-By:
> bash-4.2$ äää
> bash: $'\303\244\303\244\303\244': command not found
>
> bash-4.1$ äää
> bash: äää: command not found
Bash-4.2 changed the command-not-found message to attempt to make sure that
all the characters it displayed were printable, to catch weird characters
in filenames and prevent the display from messing up the terminal. That
code needed some more work to accommodate multibyte characters. Try the
attached patch and see if it fixes your problem; it's from the current
development sources.
Chet
--
``The lyf so short, the craft so long to lerne.'' - Chaucer
``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, ITS, CWRU [email protected] http://cnswww.cns.cwru.edu/~chet/
*** ../bash-4.2-patched/lib/sh/strtrans.c 2010-11-06 19:29:14.000000000 -0400
--- lib/sh/strtrans.c 2012-03-19 20:03:04.000000000 -0400
***************
*** 31,34 ****
--- 31,37 ----
#include "shell.h"
+ #include "shmbchar.h"
+ #include "shmbutil.h"
+
#ifdef ESC
#undef ESC
***************
*** 209,212 ****
--- 212,220 ----
int l, rsize;
unsigned char c;
+ size_t slen, clen;
+ #if defined (HANDLE_MULTIBYTE)
+ int b;
+ wchar_t wc;
+ #endif
if (str == 0 || *str == 0)
***************
*** 220,227 ****
*r++ = '\'';
! for (s = str, l = 0; *s; s++)
{
- c = *s;
l = 1; /* 1 == add backslash; 0 == no backslash */
switch (c)
{
--- 228,239 ----
*r++ = '\'';
! s = str;
! slen = strlen (str);
!
! for (s = str; c = *s; s++)
{
l = 1; /* 1 == add backslash; 0 == no backslash */
+ clen = 1;
+
switch (c)
{
***************
*** 244,248 ****
--- 256,266 ----
break;
default:
+ #if defined (HANDLE_MULTIBYTE)
+ b = is_basic (c);
+ if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 || iswprint (wc) == 0)) ||
+ (b == 1 && ISPRINT (c) == 0))
+ #else
if (ISPRINT (c) == 0)
+ #endif
{
*r++ = '\\';
***************
*** 257,261 ****
if (l)
*r++ = '\\';
! *r++ = c;
}
--- 275,284 ----
if (l)
*r++ = '\\';
!
! if (clen == 1)
! *r++ = c;
! else
! for (b = 0; b < (int)clen; c = b ? *++s : c)
! *r++ = c;
}
***************
*** 267,270 ****
--- 290,324 ----
}
+ #if defined (HANDLE_MULTIBYTE)
+ int
+ ansic_wshouldquote (string)
+ const char *string;
+ {
+ const wchar_t *wcs;
+ wchar_t wcc;
+
+ wchar_t *wcstr = NULL;
+ size_t wclen, slen;
+
+
+ slen = mbstowcs (wcstr, string, 0);
+
+ if (slen == -1)
+ slen = 0;
+ wcstr = (wchar_t *)xmalloc (sizeof (wchar_t) * (slen + 1));
+ mbstowcs (wcstr, string, slen + 1);
+
+ for (wcs = wcstr; wcc = *wcs; wcs++)
+ if (iswprint(wcc) == 0)
+ {
+ free (wcstr);
+ return 1;
+ }
+
+ free (wcstr);
+ return 0;
+ }
+ #endif
+
/* return 1 if we need to quote with $'...' because of non-printing chars. */
int
***************
*** 279,284 ****
for (s = string; c = *s; s++)
! if (ISPRINT (c) == 0)
! return 1;
return 0;
--- 333,344 ----
for (s = string; c = *s; s++)
! {
! #if defined (HANDLE_MULTIBYTE)
! if (is_basic (c) == 0)
! return (ansic_wshouldquote (s));
! #endif
! if (ISPRINT (c) == 0)
! return 1;
! }
return 0;