Hi,

here is one program that i'd better not break.

This isn't used in the ramdisks, right?
Is anybody aware of any reacharound?

A few remarks:
 * The important use cases are non-ASCII characters in command line
   arguments, environment variables, and program names.  Even though
   i don't intend to support non-ASCII user and group names in
   general, it costs nothing to handle them gracefully here, the
   code even becomes a few lines shorter.
 * The specific UTF-8 handling function mbswprint() - for "multi-byte
   string width-limited print" - is yet again somewhat different
   from what other utilities needed, both because no other utility
   needed this kind of truncation yet and because ps(1) wants to
   do a peculiar vis(3) encoding.  Anyway, the function is almost
   as short as the others and uses the same interfaces in the same
   ways, plus vis(3).
 * It might be arguable whether the vis(3) encoding done by ps(1)
   is ideal.  But that seems like a different topic.  For now, i'm
   making sure that nothing changes in the C/POSIX locale and that
   even in a UTF-8 locale, nothing changes for invalid bytes.  Even
   in a UTF-8 locale, the new code only lets through printable UTF-8
   characters, doing correct columnation for them.
 * In a UTF-8 locale, i'm replacing valid, but non-printable UTF-8
   characters with the Unicode replacement character U+FFFD.  Letting
   them through would be a bad idea because they might get assigned
   dangerous purposes in the future.  Alternatively, they could be
   treated just like invalid bytes, C-encoding the bytes individually.
   I don't feel strongly about that.
 * For a single function (mbswprint) used from a single file
   (print.c), i don't see the point in polluting header files.
   It's more straightforward and easier on the eye to just put the
   prototype into the one file using it.
 * The width limiting code in fmt.c, used only from print.c, becomes
   completely obsolete; if this gets committed, i'll cvs rm the
   file.  The new file utf8.c includes the functionality in simpler
   form, along with the more complicated multibyte character handling.
   Yet, the new file is shorter and cannot fail in malloc(3).
 * Grand total, the diff is -142 +114 lines.

Comments, concerns, OKs?
  Ingo


Index: Makefile
===================================================================
RCS file: /cvs/src/bin/ps/Makefile,v
retrieving revision 1.9
diff -u -p -r1.9 Makefile
--- Makefile    16 Jul 2014 19:57:34 -0000      1.9
+++ Makefile    15 Dec 2015 08:12:45 -0000
@@ -1,7 +1,7 @@
 #      $OpenBSD: Makefile,v 1.9 2014/07/16 19:57:34 okan Exp $
 
 PROG=  ps
-SRCS=  fmt.c keyword.c nlist.c print.c ps.c
+SRCS=  keyword.c nlist.c print.c ps.c utf8.c
 DPADD= ${LIBM} ${LIBKVM}
 LDADD= -lm -lkvm
 
Index: extern.h
===================================================================
RCS file: /cvs/src/bin/ps/extern.h,v
retrieving revision 1.17
diff -u -p -r1.17 extern.h
--- extern.h    29 Jun 2015 15:03:33 -0000      1.17
+++ extern.h    15 Dec 2015 08:12:45 -0000
@@ -48,8 +48,6 @@ void   command(const struct kinfo_proc *,
 void    cputime(const struct kinfo_proc *, VARENT *);
 int     donlist(void);
 void    emulname(const struct kinfo_proc *, VARENT *);
-void    fmt_puts(const char *, int *);
-void    fmt_putc(int, int *);
 double  getpcpu(const struct kinfo_proc *);
 double  getpmem(const struct kinfo_proc *);
 void    gname(const struct kinfo_proc *, VARENT *);
Index: print.c
===================================================================
RCS file: /cvs/src/bin/ps/print.c,v
retrieving revision 1.64
diff -u -p -r1.64 print.c
--- print.c     25 Oct 2015 15:26:53 -0000      1.64
+++ print.c     15 Dec 2015 08:12:45 -0000
@@ -55,6 +55,8 @@
 extern kvm_t *kd;
 extern int needenv, needcomm, neednlist, commandonly;
 
+int mbswprint(const char *, int, int);  /* utf8.c */
+
 static char *cmdpart(char *);
 
 #define        min(a,b)        ((a) < (b) ? (a) : (b))
@@ -97,6 +99,13 @@ command(const struct kinfo_proc *kp, VAR
        int left, wantspace = 0;
        char **argv, **p;
 
+       /*
+        * Determine the available number of display columns.
+        * Always decrement and check after writing.
+        * No check is needed before mbswprint()
+        * and after writing the last data, though.
+        */
+
        v = ve->var;
        if (ve->next != NULL || termwidth != UNLIMITED) {
                if (ve->next == NULL) {
@@ -106,74 +115,76 @@ command(const struct kinfo_proc *kp, VAR
                } else
                        left = v->width;
        } else
-               left = -1;
+               left = INT_MAX;
+
        if (needenv && kd != NULL) {
                argv = kvm_getenvv(kd, kp, termwidth);
                if ((p = argv) != NULL) {
                        while (*p) {
-                               fmt_puts(*p, &left);
+                               if (wantspace) {
+                                       putchar(' ');
+                                       left--;
+                               }
+                               left -= mbswprint(*p, left, 0);
+                               if (left == 0)
+                                       return;
                                p++;
-                               if (*p)
-                                       fmt_putc(' ', &left);
-                               else
-                                       wantspace = 1;
+                               wantspace = 1;
                        }
                }
        } else
                argv = NULL;
+
        if (needcomm) {
                if (!commandonly) {
                        if (kd != NULL) {
                                argv = kvm_getargv(kd, kp, termwidth);
                                if ((p = argv) != NULL) {
-                                       if (wantspace) {
-                                               fmt_putc(' ', &left);
-                                               wantspace = 0;
-                                       }
                                        while (*p) {
-                                               fmt_puts(*p, &left);
+                                               if (wantspace) {
+                                                       putchar(' ');
+                                                       left--;
+                                               }
+                                               left -= mbswprint(*p, left, 0);
+                                               if (left == 0)
+                                                       return;
                                                p++;
-                                               if (*p)
-                                                       fmt_putc(' ', &left);
-                                               else
-                                                       wantspace = 1;
+                                               wantspace = 1;
                                        }
                                }
                        }
                        if (argv == NULL || argv[0] == '\0' ||
                            strcmp(cmdpart(argv[0]), kp->p_comm)) {
                                if (wantspace) {
-                                       fmt_putc(' ', &left);
-                                       wantspace = 0;
+                                       putchar(' ');
+                                       if (--left == 0)
+                                               return;
                                }
-                               fmt_putc('(', &left);
-                               fmt_puts(kp->p_comm, &left);
-                               fmt_putc(')', &left);
+                               putchar('(');
+                               left--;
+                               left -= mbswprint(kp->p_comm, left, 0);
+                               if (left == 0)
+                                       return;
+                               putchar(')');
+                               left--;
                        }
                } else {
                        if (wantspace) {
-                               fmt_putc(' ', &left);
-                               wantspace = 0;
+                               putchar(' ');
+                               left--;
                        }
-                       fmt_puts(kp->p_comm, &left);
+                       left -= mbswprint(kp->p_comm, left, 0);
                }
        }
-       if (ve->next && left > 0) {
-               if (wantspace) {
-                       fmt_putc(' ', &left);
-                       wantspace = 0;
-               }
-               printf("%*s", left, "");
-       }
+       if (ve->next != NULL)
+               while (left-- > 0)
+                       putchar(' ');
 }
 
 void
 ucomm(const struct kinfo_proc *kp, VARENT *ve)
 {
-       VAR *v;
-
-       v = ve->var;
-       (void)printf("%-*s", v->width, kp->p_comm);
+       mbswprint(kp->p_comm, ve->var->width, ve->next != NULL);
 }
 
 void
@@ -182,16 +193,11 @@ curwd(const struct kinfo_proc *kp, VAREN
        int name[] = { CTL_KERN, KERN_PROC_CWD, kp->p_pid };
        char path[PATH_MAX];
        size_t pathlen = sizeof path;
-       int left;
-
-       left = ve->var->width;
 
        if (!kvm_sysctl_only || sysctl(name, 3, path, &pathlen, NULL, 0) != 0)
                *path = '\0';
 
-       fmt_puts(path, &left);
-       if (ve->next != NULL && left)
-               (void)printf("%-*s", left, "");
+       mbswprint(path, ve->var->width, ve->next != NULL);
 }
 
 void
@@ -202,9 +208,10 @@ logname(const struct kinfo_proc *kp, VAR
        v = ve->var;
        if (kp->p_login[0]) {
                int n = min(v->width, LOGIN_NAME_MAX);
-               (void)printf("%-*.*s", n, n, kp->p_login);
-               if (v->width > n)
-                       (void)printf("%*s", v->width - n, "");
+               mbswprint(kp->p_login, n, ve->next != NULL);
+               if (ve->next != NULL)
+                       while (n++ < v->width)
+                               putchar(' ');
        } else
                (void)printf("%-*s", v->width, "-");
 }
@@ -308,41 +315,29 @@ pnice(const struct kinfo_proc *kp, VAREN
 void
 euname(const struct kinfo_proc *kp, VARENT *ve)
 {
-       VAR *v;
-
-       v = ve->var;
-       (void)printf("%-*s",
-           (int)v->width, user_from_uid(kp->p_uid, 0));
+       mbswprint(user_from_uid(kp->p_uid, 0), ve->var->width,
+           ve->next != NULL);
 }
 
 void
 runame(const struct kinfo_proc *kp, VARENT *ve)
 {
-       VAR *v;
-
-       v = ve->var;
-       (void)printf("%-*s",
-           (int)v->width, user_from_uid(kp->p_ruid, 0));
+       mbswprint(user_from_uid(kp->p_ruid, 0), ve->var->width,
+           ve->next != NULL);
 }
 
 void
 gname(const struct kinfo_proc *kp, VARENT *ve)
 {
-       VAR *v;
-
-       v = ve->var;
-       (void)printf("%-*s",
-           (int)v->width, group_from_gid(kp->p_gid, 0));
+       mbswprint(group_from_gid(kp->p_gid, 0), ve->var->width,
+           ve->next != NULL);
 }
 
 void
 rgname(const struct kinfo_proc *kp, VARENT *ve)
 {
-       VAR *v;
-
-       v = ve->var;
-       (void)printf("%-*s",
-           (int)v->width, group_from_gid(kp->p_rgid, 0));
+       mbswprint(group_from_gid(kp->p_rgid, 0), ve->var->width,
+           ve->next != NULL);
 }
 
 void
Index: ps.c
===================================================================
RCS file: /cvs/src/bin/ps/ps.c,v
retrieving revision 1.68
diff -u -p -r1.68 ps.c
--- ps.c        11 Nov 2015 03:20:02 -0000      1.68
+++ ps.c        15 Dec 2015 08:12:45 -0000
@@ -44,6 +44,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <kvm.h>
+#include <locale.h>
 #include <nlist.h>
 #include <paths.h>
 #include <pwd.h>
@@ -98,6 +99,8 @@ main(int argc, char *argv[])
        int all, ch, flag, i, fmt, lineno, nentries;
        int prtheader, showthreads, wflag, kflag, what, Uflag, xflg;
        char *nlistf, *memf, *swapf, *cols, errbuf[_POSIX2_LINE_MAX];
+
+       setlocale(LC_CTYPE, "");
 
        if ((cols = getenv("COLUMNS")) != NULL && *cols != '\0') {
                const char *errstr;
Index: utf8.c
===================================================================
RCS file: utf8.c
diff -N utf8.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ utf8.c      15 Dec 2015 08:12:45 -0000
@@ -0,0 +1,61 @@
+/*     $OpenBSD: utf8.c,v 1.1 2015/12/01 18:36:13 schwarze Exp $       */
+
+/*
+ * Copyright (c) 2015 Ingo Schwarze <schwa...@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * witH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vis.h>
+#include <wchar.h>
+
+int
+mbswprint(const char *mbs, int maxwidth, int trail)
+{
+       char      buf[5];
+       wchar_t   wc;
+       int       len;  /* length in bytes of UTF-8 encoded string */
+       int       width;  /* display width of a single Unicode char */
+       int       total_width;  /* display width of what is printed */
+
+       total_width = 0;
+       while (*mbs != '\0' && total_width < maxwidth) {
+               len = mbtowc(&wc, mbs, MB_CUR_MAX);
+               if (len == -1) {
+                       (void)mbtowc(NULL, NULL, MB_CUR_MAX);
+                       len = 1;
+               }
+               if (len == 1)
+                       width = vis(buf, mbs[0],
+                           VIS_TAB | VIS_NL | VIS_CSTYLE, mbs[1]) - buf;
+               else if ((width = wcwidth(wc)) == -1) {
+                       /* U+FFFD replacement character */
+                       memcpy(buf, "\357\277\275\0", 4);
+                       width = 1;
+               } else
+                       strncpy(buf, mbs, len);
+               if (total_width + width > maxwidth)
+                       break;
+               fputs(buf, stdout);
+               total_width += width;
+               mbs += len;
+       }
+       if (trail)
+               while (total_width++ < maxwidth)
+                       putchar(' ');
+       return total_width;
+}

Reply via email to