Hi, here is one program that i'd better not break.
This isn't used in the ramdisks, right? Is anybody aware of any reacharound? A few remarks: * The important use cases are non-ASCII characters in command line arguments, environment variables, and program names. Even though i don't intend to support non-ASCII user and group names in general, it costs nothing to handle them gracefully here, the code even becomes a few lines shorter. * The specific UTF-8 handling function mbswprint() - for "multi-byte string width-limited print" - is yet again somewhat different from what other utilities needed, both because no other utility needed this kind of truncation yet and because ps(1) wants to do a peculiar vis(3) encoding. Anyway, the function is almost as short as the others and uses the same interfaces in the same ways, plus vis(3). * It might be arguable whether the vis(3) encoding done by ps(1) is ideal. But that seems like a different topic. For now, i'm making sure that nothing changes in the C/POSIX locale and that even in a UTF-8 locale, nothing changes for invalid bytes. Even in a UTF-8 locale, the new code only lets through printable UTF-8 characters, doing correct columnation for them. * In a UTF-8 locale, i'm replacing valid, but non-printable UTF-8 characters with the Unicode replacement character U+FFFD. Letting them through would be a bad idea because they might get assigned dangerous purposes in the future. Alternatively, they could be treated just like invalid bytes, C-encoding the bytes individually. I don't feel strongly about that. * For a single function (mbswprint) used from a single file (print.c), i don't see the point in polluting header files. It's more straightforward and easier on the eye to just put the prototype into the one file using it. * The width limiting code in fmt.c, used only from print.c, becomes completely obsolete; if this gets committed, i'll cvs rm the file. The new file utf8.c includes the functionality in simpler form, along with the more complicated multibyte character handling. Yet, the new file is shorter and cannot fail in malloc(3). * Grand total, the diff is -142 +114 lines. Comments, concerns, OKs? Ingo Index: Makefile =================================================================== RCS file: /cvs/src/bin/ps/Makefile,v retrieving revision 1.9 diff -u -p -r1.9 Makefile --- Makefile 16 Jul 2014 19:57:34 -0000 1.9 +++ Makefile 15 Dec 2015 08:12:45 -0000 @@ -1,7 +1,7 @@ # $OpenBSD: Makefile,v 1.9 2014/07/16 19:57:34 okan Exp $ PROG= ps -SRCS= fmt.c keyword.c nlist.c print.c ps.c +SRCS= keyword.c nlist.c print.c ps.c utf8.c DPADD= ${LIBM} ${LIBKVM} LDADD= -lm -lkvm Index: extern.h =================================================================== RCS file: /cvs/src/bin/ps/extern.h,v retrieving revision 1.17 diff -u -p -r1.17 extern.h --- extern.h 29 Jun 2015 15:03:33 -0000 1.17 +++ extern.h 15 Dec 2015 08:12:45 -0000 @@ -48,8 +48,6 @@ void command(const struct kinfo_proc *, void cputime(const struct kinfo_proc *, VARENT *); int donlist(void); void emulname(const struct kinfo_proc *, VARENT *); -void fmt_puts(const char *, int *); -void fmt_putc(int, int *); double getpcpu(const struct kinfo_proc *); double getpmem(const struct kinfo_proc *); void gname(const struct kinfo_proc *, VARENT *); Index: print.c =================================================================== RCS file: /cvs/src/bin/ps/print.c,v retrieving revision 1.64 diff -u -p -r1.64 print.c --- print.c 25 Oct 2015 15:26:53 -0000 1.64 +++ print.c 15 Dec 2015 08:12:45 -0000 @@ -55,6 +55,8 @@ extern kvm_t *kd; extern int needenv, needcomm, neednlist, commandonly; +int mbswprint(const char *, int, int); /* utf8.c */ + static char *cmdpart(char *); #define min(a,b) ((a) < (b) ? (a) : (b)) @@ -97,6 +99,13 @@ command(const struct kinfo_proc *kp, VAR int left, wantspace = 0; char **argv, **p; + /* + * Determine the available number of display columns. + * Always decrement and check after writing. + * No check is needed before mbswprint() + * and after writing the last data, though. + */ + v = ve->var; if (ve->next != NULL || termwidth != UNLIMITED) { if (ve->next == NULL) { @@ -106,74 +115,76 @@ command(const struct kinfo_proc *kp, VAR } else left = v->width; } else - left = -1; + left = INT_MAX; + if (needenv && kd != NULL) { argv = kvm_getenvv(kd, kp, termwidth); if ((p = argv) != NULL) { while (*p) { - fmt_puts(*p, &left); + if (wantspace) { + putchar(' '); + left--; + } + left -= mbswprint(*p, left, 0); + if (left == 0) + return; p++; - if (*p) - fmt_putc(' ', &left); - else - wantspace = 1; + wantspace = 1; } } } else argv = NULL; + if (needcomm) { if (!commandonly) { if (kd != NULL) { argv = kvm_getargv(kd, kp, termwidth); if ((p = argv) != NULL) { - if (wantspace) { - fmt_putc(' ', &left); - wantspace = 0; - } while (*p) { - fmt_puts(*p, &left); + if (wantspace) { + putchar(' '); + left--; + } + left -= mbswprint(*p, left, 0); + if (left == 0) + return; p++; - if (*p) - fmt_putc(' ', &left); - else - wantspace = 1; + wantspace = 1; } } } if (argv == NULL || argv[0] == '\0' || strcmp(cmdpart(argv[0]), kp->p_comm)) { if (wantspace) { - fmt_putc(' ', &left); - wantspace = 0; + putchar(' '); + if (--left == 0) + return; } - fmt_putc('(', &left); - fmt_puts(kp->p_comm, &left); - fmt_putc(')', &left); + putchar('('); + left--; + left -= mbswprint(kp->p_comm, left, 0); + if (left == 0) + return; + putchar(')'); + left--; } } else { if (wantspace) { - fmt_putc(' ', &left); - wantspace = 0; + putchar(' '); + left--; } - fmt_puts(kp->p_comm, &left); + left -= mbswprint(kp->p_comm, left, 0); } } - if (ve->next && left > 0) { - if (wantspace) { - fmt_putc(' ', &left); - wantspace = 0; - } - printf("%*s", left, ""); - } + if (ve->next != NULL) + while (left-- > 0) + putchar(' '); } void ucomm(const struct kinfo_proc *kp, VARENT *ve) { - VAR *v; - - v = ve->var; - (void)printf("%-*s", v->width, kp->p_comm); + mbswprint(kp->p_comm, ve->var->width, ve->next != NULL); } void @@ -182,16 +193,11 @@ curwd(const struct kinfo_proc *kp, VAREN int name[] = { CTL_KERN, KERN_PROC_CWD, kp->p_pid }; char path[PATH_MAX]; size_t pathlen = sizeof path; - int left; - - left = ve->var->width; if (!kvm_sysctl_only || sysctl(name, 3, path, &pathlen, NULL, 0) != 0) *path = '\0'; - fmt_puts(path, &left); - if (ve->next != NULL && left) - (void)printf("%-*s", left, ""); + mbswprint(path, ve->var->width, ve->next != NULL); } void @@ -202,9 +208,10 @@ logname(const struct kinfo_proc *kp, VAR v = ve->var; if (kp->p_login[0]) { int n = min(v->width, LOGIN_NAME_MAX); - (void)printf("%-*.*s", n, n, kp->p_login); - if (v->width > n) - (void)printf("%*s", v->width - n, ""); + mbswprint(kp->p_login, n, ve->next != NULL); + if (ve->next != NULL) + while (n++ < v->width) + putchar(' '); } else (void)printf("%-*s", v->width, "-"); } @@ -308,41 +315,29 @@ pnice(const struct kinfo_proc *kp, VAREN void euname(const struct kinfo_proc *kp, VARENT *ve) { - VAR *v; - - v = ve->var; - (void)printf("%-*s", - (int)v->width, user_from_uid(kp->p_uid, 0)); + mbswprint(user_from_uid(kp->p_uid, 0), ve->var->width, + ve->next != NULL); } void runame(const struct kinfo_proc *kp, VARENT *ve) { - VAR *v; - - v = ve->var; - (void)printf("%-*s", - (int)v->width, user_from_uid(kp->p_ruid, 0)); + mbswprint(user_from_uid(kp->p_ruid, 0), ve->var->width, + ve->next != NULL); } void gname(const struct kinfo_proc *kp, VARENT *ve) { - VAR *v; - - v = ve->var; - (void)printf("%-*s", - (int)v->width, group_from_gid(kp->p_gid, 0)); + mbswprint(group_from_gid(kp->p_gid, 0), ve->var->width, + ve->next != NULL); } void rgname(const struct kinfo_proc *kp, VARENT *ve) { - VAR *v; - - v = ve->var; - (void)printf("%-*s", - (int)v->width, group_from_gid(kp->p_rgid, 0)); + mbswprint(group_from_gid(kp->p_rgid, 0), ve->var->width, + ve->next != NULL); } void Index: ps.c =================================================================== RCS file: /cvs/src/bin/ps/ps.c,v retrieving revision 1.68 diff -u -p -r1.68 ps.c --- ps.c 11 Nov 2015 03:20:02 -0000 1.68 +++ ps.c 15 Dec 2015 08:12:45 -0000 @@ -44,6 +44,7 @@ #include <errno.h> #include <fcntl.h> #include <kvm.h> +#include <locale.h> #include <nlist.h> #include <paths.h> #include <pwd.h> @@ -98,6 +99,8 @@ main(int argc, char *argv[]) int all, ch, flag, i, fmt, lineno, nentries; int prtheader, showthreads, wflag, kflag, what, Uflag, xflg; char *nlistf, *memf, *swapf, *cols, errbuf[_POSIX2_LINE_MAX]; + + setlocale(LC_CTYPE, ""); if ((cols = getenv("COLUMNS")) != NULL && *cols != '\0') { const char *errstr; Index: utf8.c =================================================================== RCS file: utf8.c diff -N utf8.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ utf8.c 15 Dec 2015 08:12:45 -0000 @@ -0,0 +1,61 @@ +/* $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $ */ + +/* + * Copyright (c) 2015 Ingo Schwarze <schwa...@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * witH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <vis.h> +#include <wchar.h> + +int +mbswprint(const char *mbs, int maxwidth, int trail) +{ + char buf[5]; + wchar_t wc; + int len; /* length in bytes of UTF-8 encoded string */ + int width; /* display width of a single Unicode char */ + int total_width; /* display width of what is printed */ + + total_width = 0; + while (*mbs != '\0' && total_width < maxwidth) { + len = mbtowc(&wc, mbs, MB_CUR_MAX); + if (len == -1) { + (void)mbtowc(NULL, NULL, MB_CUR_MAX); + len = 1; + } + if (len == 1) + width = vis(buf, mbs[0], + VIS_TAB | VIS_NL | VIS_CSTYLE, mbs[1]) - buf; + else if ((width = wcwidth(wc)) == -1) { + /* U+FFFD replacement character */ + memcpy(buf, "\357\277\275\0", 4); + width = 1; + } else + strncpy(buf, mbs, len); + if (total_width + width > maxwidth) + break; + fputs(buf, stdout); + total_width += width; + mbs += len; + } + if (trail) + while (total_width++ < maxwidth) + putchar(' '); + return total_width; +}