On 3/18/12 1:26 PM, dennis.birkh...@rwth-aachen.de wrote:

> Bash Version: 4.2
> Patch Level: 24
> Release Status: release
> 
> Description:
>       Some UTF-8 multibyte characters are not printed correctly but UTF-8 
> generally works as the "ä" in "März" (displayed via ls) works.
> 
> Repeat-By:
>       bash-4.2$ äää
>       bash: $'\303\244\303\244\303\244': command not found
>       
>       bash-4.1$ äää
>       bash: äää: command not found

Bash-4.2 changed the command-not-found message to attempt to make sure that
all the characters it displayed were printable, to catch weird characters
in filenames and prevent the display from messing up the terminal.  That
code needed some more work to accommodate multibyte characters.  Try the
attached patch and see if it fixes your problem; it's from the current
development sources.

Chet
-- 
``The lyf so short, the craft so long to lerne.'' - Chaucer
                 ``Ars longa, vita brevis'' - Hippocrates
Chet Ramey, ITS, CWRU    c...@case.edu    http://cnswww.cns.cwru.edu/~chet/
*** ../bash-4.2-patched/lib/sh/strtrans.c	2010-11-06 19:29:14.000000000 -0400
--- lib/sh/strtrans.c	2012-03-19 20:03:04.000000000 -0400
***************
*** 31,34 ****
--- 31,37 ----
  #include "shell.h"
  
+ #include "shmbchar.h"
+ #include "shmbutil.h"
+ 
  #ifdef ESC
  #undef ESC
***************
*** 209,212 ****
--- 212,220 ----
    int l, rsize;
    unsigned char c;
+   size_t slen, clen;
+ #if defined (HANDLE_MULTIBYTE)
+   int b;
+   wchar_t wc;
+ #endif
  
    if (str == 0 || *str == 0)
***************
*** 220,227 ****
    *r++ = '\'';
  
!   for (s = str, l = 0; *s; s++)
      {
-       c = *s;
        l = 1;		/* 1 == add backslash; 0 == no backslash */
        switch (c)
  	{
--- 228,239 ----
    *r++ = '\'';
  
!   s = str;
!   slen = strlen (str);
! 
!   for (s = str; c = *s; s++)
      {
        l = 1;		/* 1 == add backslash; 0 == no backslash */
+       clen = 1;
+ 
        switch (c)
  	{
***************
*** 244,248 ****
--- 256,266 ----
  	  break;
  	default:
+ #if defined (HANDLE_MULTIBYTE)
+ 	  b = is_basic (c);
+ 	  if ((b == 0 && ((clen = mbrtowc (&wc, s, MB_CUR_MAX, 0)) < 0 || iswprint (wc) == 0)) ||
+ 	      (b == 1 && ISPRINT (c) == 0))
+ #else
  	  if (ISPRINT (c) == 0)
+ #endif
  	    {
  	      *r++ = '\\';
***************
*** 257,261 ****
        if (l)
  	*r++ = '\\';
!       *r++ = c;
      }
  
--- 275,284 ----
        if (l)
  	*r++ = '\\';
! 
!       if (clen == 1)
! 	*r++ = c;
!       else
! 	for (b = 0; b < (int)clen; c = b ? *++s : c)
! 	  *r++ = c;
      }
  
***************
*** 267,270 ****
--- 290,324 ----
  }
  
+ #if defined (HANDLE_MULTIBYTE)
+ int
+ ansic_wshouldquote (string)
+      const char *string;
+ {
+   const wchar_t *wcs;
+   wchar_t wcc;
+ 
+   wchar_t *wcstr = NULL;
+   size_t wclen, slen;
+ 
+ 
+   slen = mbstowcs (wcstr, string, 0);
+ 
+   if (slen == -1)
+     slen = 0;
+   wcstr = (wchar_t *)xmalloc (sizeof (wchar_t) * (slen + 1));
+   mbstowcs (wcstr, string, slen + 1);
+ 
+   for (wcs = wcstr; wcc = *wcs; wcs++)
+     if (iswprint(wcc) == 0)
+       {
+ 	free (wcstr);
+ 	return 1;
+       }
+ 
+   free (wcstr);
+   return 0;
+ }
+ #endif
+ 
  /* return 1 if we need to quote with $'...' because of non-printing chars. */
  int
***************
*** 279,284 ****
  
    for (s = string; c = *s; s++)
!     if (ISPRINT (c) == 0)
!       return 1;
  
    return 0;
--- 333,344 ----
  
    for (s = string; c = *s; s++)
!     {
! #if defined (HANDLE_MULTIBYTE)
!       if (is_basic (c) == 0)
! 	return (ansic_wshouldquote (s));
! #endif
!       if (ISPRINT (c) == 0)
! 	return 1;
!     }
  
    return 0;

Reply via email to