Hi, I wonder if there any plans on adding multibyte support for ls(1)? Or maybe there's a reason why it's not a great idea (which I am not aware of)? Anyway, here's a patch I have. It's based on DragonFlyBSD's ls.
diff -u ls/ls.c ls/ls.c --- ls/ls.c Wed Nov 24 17:39:05 2010 +++ ls/ls.c Tue Jan 4 19:44:35 2011 @@ -42,6 +42,7 @@ #include <errno.h> #include <fts.h> #include <grp.h> +#include <locale.h> #include <pwd.h> #include <stdio.h> #include <stdlib.h> @@ -101,6 +102,8 @@ int ch, fts_options, notused; int kflag = 0; char *p; + + setlocale(LC_ALL, ""); /* Terminal defaults to -Cq, non-terminal defaults to -1. */ if (isatty(STDOUT_FILENO)) { diff -u ls/util.c ls/util.c --- ls/util.c Wed Nov 24 17:39:05 2010 +++ ls/util.c Tue Jan 4 21:04:22 2011 @@ -35,12 +35,14 @@ #include <sys/types.h> #include <sys/stat.h> +#include <sys/limits.h> #include <ctype.h> #include <fts.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> #include "ls.h" #include "extern.h" @@ -48,11 +50,44 @@ int putname(char *name) { - int len; + mbstate_t mbs; + wchar_t wc; + int i, len; + size_t clen; - for (len = 0; *name; len++, name++) - putchar((!isprint(*name) && f_nonprint) ? '?' : *name); - return len; + memset(&mbs, 0, sizeof(mbs)); + len = 0; + while ((clen = mbrtowc(&wc, name, MB_LEN_MAX, &mbs)) != 0) { + if (clen == (size_t)-1) { + if (f_nonprint) + putchar('?'); + else + putchar((unsigned char)*name); + name++; + len++; + memset(&mbs, 0, sizeof(mbs)); + continue; + } + if (clen == (size_t)-2) { + if (f_nonprint) { + putchar('?'); + len++; + } else + len += printf("%s", name); + break; + } + if (f_nonprint && !iswprint(wc)) { + putchar('?'); + name += clen; + len++; + continue; + } + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)name[i]); + name += clen; + len += wcwidth(wc); + } + return (len); } void