commit 1513c2b7662c1e7f475ce1c45b61f6a76d665c8b
Author: FRIGN <[email protected]>
Date:   Sun Feb 8 21:24:22 2015 +0100

    Refactor unexpand(1) code and manpage, adding tablist support
    
    as already seen for expand(1), only twice as complicated.

diff --git a/README b/README
index 8400dad..daedbc3 100644
--- a/README
+++ b/README
@@ -73,7 +73,7 @@ The following tools are implemented ('*' == finished, '#' == 
UTF-8 support,
 =* true            yes                             none
 =* tty             yes                             none
 =* uname           yes                             none
-#  unexpand        yes                             none
+#* unexpand        yes                             none
 =  uniq            no                              -f, -s
 =  unlink          yes                             none
 =  uudecode        no                              -o
diff --git a/unexpand.1 b/unexpand.1
index 506d9ed..c5a44b9 100644
--- a/unexpand.1
+++ b/unexpand.1
@@ -1,30 +1,45 @@
-.Dd January 30, 2015
+.Dd February 8, 2015
 .Dt UNEXPAND 1
 .Os sbase
 .Sh NAME
 .Nm unexpand
-.Nd convert blanks to tabs
+.Nd unexpand spaces to tabs
 .Sh SYNOPSIS
 .Nm
 .Op Fl a
-.Op Fl t Ar n
+.Op Fl t Ar tablist
 .Op Ar file ...
 .Sh DESCRIPTION
 .Nm
-processes the named
-.Ar files
-or the standard input, writing the
-standard output with consecutive blanks (spaces and tabs) converted
-into tabs. Backspace characters are preserved into the output and
-decrement the column count for tab calculations.
+converts spaces to tabs in each
+.Ar file
+as specified in
+.Ar tablist .
+If no file is given,
+.Nm
+reads from stdin.
+.Pp
+Backspace characters are preserved and decrement the column count
+for tab calculations.
 .Sh OPTIONS
 .Bl -tag -width Ds
 .It Fl a
-Convert blanks to tabs everywhere, not just at the start of lines.
-.It Fl t Ar n
-Set tab size to
-.Ar n
-spaces (default: 8).
+Convert spaces to tabs everywhere, not just at the start of lines.
+.It Fl t Ar tablist
+Specify tab size or tabstops.
+.Ar tablist
+is a list of one (in the former case) or multiple (in the latter case)
+strictly positive integers separated by ' ' or ','.
+.Pp
+The default
+.Ar tablist
+is "8".
 .El
 .Sh SEE ALSO
 .Xr expand 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
diff --git a/unexpand.c b/unexpand.c
index 839eac6..8f1fe07 100644
--- a/unexpand.c
+++ b/unexpand.c
@@ -1,5 +1,5 @@
 /* See LICENSE file for copyright and license details. */
-#include <limits.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <wchar.h>
@@ -7,114 +7,162 @@
 #include "utf.h"
 #include "util.h"
 
-static void unexpand(const char *, FILE *);
+static int     aflag      = 0;
+static size_t *tablist    = NULL;
+static int     tablistlen = 8;
 
-static int aflag = 0;
-static int tabsize = 8;
-
-static void
-usage(void)
+static size_t
+parselist(const char *s)
 {
-       eprintf("usage: %s [-a] [-t n] [file ...]\n", argv0);
+        size_t i;
+        char  *p, *tmp;
+
+        tmp = estrdup(s);
+        for (i = 0; (p = strsep(&tmp, " ,")); i++) {
+                if (*p == '\0')
+                        eprintf("empty field in tablist\n");
+                tablist = erealloc(tablist, (i + 1) * sizeof(*tablist));
+                tablist[i] = estrtonum(p, 1, MIN(LLONG_MAX, SIZE_MAX));
+                if (i > 0 && tablist[i - 1] >= tablist[i])
+                        eprintf("tablist must be ascending\n");
+        }
+        tablist = erealloc(tablist, (i + 1) * sizeof(*tablist));
+        return i;
 }
 
-int
-main(int argc, char *argv[])
+static void
+unexpandspan(size_t last, size_t col)
 {
-       FILE *fp;
-       int ret = 0;
+       size_t off, i, j;
+       Rune r;
 
-       ARGBEGIN {
-       case 't':
-               tabsize = estrtonum(EARGF(usage()), 0, INT_MAX);
-               if (tabsize <= 0)
-                       eprintf("unexpand: invalid tabsize\n");
-               /* Fallthrough: -t implies -a */
-       case 'a':
-               aflag = 1;
-               break;
-       default:
-               usage();
-       } ARGEND;
+       if (tablistlen == 1) {
+               i = 0;
+               off = last % tablist[i];
 
-       if (argc == 0) {
-               unexpand("<stdin>", stdin);
+               if ((col - last) + off >= tablist[i] && last < col)
+                       last -= off;
+
+               r = '\t';
+               for (; last + tablist[i] <= col; last += tablist[i])
+                       writerune("<stdout>", stdout, &r);
+               r = ' ';
+               for (; last < col; last++)
+                       writerune("<stdout>", stdout, &r);
        } else {
-               for (; argc > 0; argc--, argv++) {
-                       if (!(fp = fopen(argv[0], "r"))) {
-                               weprintf("fopen %s:", argv[0]);
-                               ret = 1;
-                               continue;
-                       }
-                       unexpand(argv[0], fp);
-                       fclose(fp);
+               for (i = 0; i < tablistlen; i++)
+                       if (col < tablist[i])
+                               break;
+               for (j = 0; j < tablistlen; j++)
+                       if (last < tablist[j])
+                               break;
+               r = '\t';
+               for (; j < i; j++) {
+                       writerune("<stdout>", stdout, &r);
+                       last = tablist[j];
                }
+               r = ' ';
+               for (; last < col; last++)
+                       writerune("<stdout>", stdout, &r);
        }
-       return ret;
-}
-
-static void
-unexpandspan(unsigned int n, unsigned int col)
-{
-       unsigned int off = (col-n) % tabsize;
-       Rune r;
-
-       if (n + off >= tabsize && n > 1)
-               n += off;
 
-       r = '\t';
-       for (; n >= tabsize; n -= tabsize)
-               writerune("<stdout>", stdout, &r);
-       r = ' ';
-       while (n--)
-               writerune("<stdout>", stdout, &r);
 }
 
 static void
 unexpand(const char *file, FILE *fp)
 {
-       unsigned int n = 0, col = 0;
        Rune r;
+       size_t last = 0, col = 0, i;
        int bol = 1;
 
-       while (1) {
-               if (!readrune(file, fp, &r))
-                       break;
-
+       while (readrune(file, fp, &r)) {
                switch (r) {
                case ' ':
-                       if (bol || aflag)
-                               n++;
+                       if (!bol && !aflag)
+                               last++;
                        col++;
                        break;
                case '\t':
-                       if (bol || aflag)
-                               n += tabsize - col % tabsize;
-                       col += tabsize - col % tabsize;
+                       if (tablistlen == 1) {
+                               if (!bol && !aflag)
+                                       last += tablist[0] - col % tablist[0];
+                               col += tablist[0] - col % tablist[0];
+                       } else {
+                               for (i = 0; i < tablistlen; i++)
+                                       if (col < tablist[i])
+                                               break;
+                               if (!bol && !aflag)
+                                       last = tablist[i];
+                               col = tablist[i];
+                       }
                        break;
                case '\b':
                        if (bol || aflag)
-                               unexpandspan(n, col);
+                               unexpandspan(last, col);
                        col -= (col > 0);
-                       n = 0;
+                       last = col;
                        bol = 0;
                        break;
                case '\n':
                        if (bol || aflag)
-                               unexpandspan(n, col);
-                       n = col = 0;
+                               unexpandspan(last, col);
+                       last = col = 0;
                        bol = 1;
                        break;
                default:
                        if (bol || aflag)
-                               unexpandspan(n, col);
-                       n = 0;
-                       col++;
+                               unexpandspan(last, col);
+                       last = ++col;
                        bol = 0;
+                       break;
                }
                if ((r != ' ' && r != '\t') || (!aflag && !bol))
                        writerune("<stdout>", stdout, &r);
        }
-       if (n > 0 && (bol || aflag))
-               unexpandspan(n, col);
+       if (last < col && (bol || aflag))
+               unexpandspan(last, col);
+}
+
+static void
+usage(void)
+{
+       eprintf("usage: %s [-a] [-t tablist] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+       FILE *fp;
+       int ret = 0;
+       char *tl = "8";
+
+       ARGBEGIN {
+       case 't':
+               tl = EARGF(usage());
+               if (!*tl)
+                       eprintf("tablist cannot be empty\n");
+               /* Fallthrough: -t implies -a */
+       case 'a':
+               aflag = 1;
+               break;
+       default:
+               usage();
+       } ARGEND;
+
+       tablistlen = parselist(tl);
+
+       if (argc == 0)
+               unexpand("<stdin>", stdin);
+       else {
+               for (; argc > 0; argc--, argv++) {
+                       if (!(fp = fopen(argv[0], "r"))) {
+                               weprintf("fopen %s:", argv[0]);
+                               ret = 1;
+                               continue;
+                       }
+                       unexpand(argv[0], fp);
+                       fclose(fp);
+               }
+       }
+       return ret;
 }

Reply via email to