commit 99d3d3834e676f786d0d9b579cb975ed2f0eb4ca
Author: FRIGN <[email protected]>
Date:   Sun Jan 25 14:31:02 2015 +0100

    Add tablist support and a mandoc-manpage to expand(1)
    
    and mark it as finished in the README.
    
    This is another example showing how broken the GNU coreutils are:
    
    $ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
    äää    üüü    ööö
    $ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
    äää  üüü  ööö
    
    This is due to the fact that they are still not UTF8-aware and
    actually see "ä" as two single characters, expanding the "äää" with
    4 spaces to a tab of length 10.
    The correct way however is to expand the "äää" with 2 spaces to a
    tab of length 5.
    One can only imagine how this silently breaks a lot of code around
    the world.
    WHAT WERE THEY THINKING?

diff --git a/README b/README
index eb2441c..2826981 100644
--- a/README
+++ b/README
@@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == 
UTF-8 support,
 =  du              no                              -H, -L, (-x)
 =* echo            yes                             none
 =* env             yes                             none
-#  expand          yes                             none
+#* expand          yes                             none
    expr            yes                             none
 =* false           yes                             none
    fold            yes                             none
diff --git a/expand.1 b/expand.1
index 6ae819b..ffdaea6 100644
--- a/expand.1
+++ b/expand.1
@@ -1,25 +1,50 @@
-.TH EXPAND 1 sbase\-VERSION
-.SH NAME
-expand \- expand tabs to spaces
-.SH SYNOPSIS
-.B expand
-.RB [ \-t
-.IR n ]
-.RI [ file ...]
-.SH DESCRIPTION
-expand processes the named files or the standard input, writing the
-standard output with tabs changed into spaces.  Backspace characters
-are preserved into the output and decrement the column count for tab
-calculations.
-.SH OPTIONS
-.TP
-.BI \-i
-Only change tabs to spaces at the start of lines.
-.TP
-.BI \-t " n"
-Expand tabs to
-.I n
-spaces.  We currently support only a single numerical argument.
-.SH SEE ALSO
-.IR unexpand (1),
-.IR fold (1)
+.Dd January 25, 2015
+.Dt EXPAND 1 sbase\-VERSION
+.Sh NAME
+.Nm expand
+.Nd expand tabs to spaces
+.Sh SYNOPSIS
+.Nm expand
+.Op Fl i
+.Op Fl t Ar tablist
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+converts tabs to spaces in each
+.Ar file
+as specified in
+.Ar tablist .
+If no file is given,
+.Nm
+reads from stdin.
+.Pp
+Backspace characters are preserved and decrement the column count
+for tab calculations.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl i
+Only expand tabs at the beginning of lines, i.e. expand each
+line until a character different from '\et' and ' ' is reached.
+.It Fl t Ar tablist
+Specify tab size or tabstops.
+.Ar tablist
+is a list of one (in the former case) or multiple (in the latter case)
+strictly positive integers separated by ' ' or ','.
+.Pp
+The default
+.Ar tablist
+is "8".
+.El
+.Sh SEE ALSO
+.Xr unexpand 1 ,
+.Xr fold 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The
+.Op Fl i
+flag is an extension to that specification
diff --git a/expand.c b/expand.c
index 35b7a9a..78b5454 100644
--- a/expand.c
+++ b/expand.c
@@ -1,89 +1,86 @@
 /* See LICENSE file for copyright and license details. */
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "utf.h"
 #include "util.h"
 
-static int expand(const char *, FILE *, int);
+static int     iflag      = 0;
+static size_t *tablist    = NULL;
+static size_t  tablistlen = 0;
 
-static int iflag = 0;
-
-static void
-usage(void)
+static size_t
+parselist(const char *s, size_t slen)
 {
-       eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
-}
-
-int
-main(int argc, char *argv[])
-{
-       FILE *fp;
-       int tabstop = 8;
-       int ret = 0;
+       size_t i, m, len;
+       char *sep;
 
-       ARGBEGIN {
-       case 'i':
-               iflag = 1;
-               break;
-       case 't':
-               tabstop = estrtol(EARGF(usage()), 0);
-               if (!tabstop)
-                       eprintf("tab size cannot be zero\n");
-               break;
-       default:
-               usage();
-       } ARGEND;
+       if (s[0] == ',' || s[0] == ' ')
+               eprintf("expand: tablist can't begin with a ',' or ' '.\n");
+       if (s[slen - 1] == ',' || s[slen - 1] == ' ')
+               eprintf("expand: tablist can't end with a ',' or ' '.\n");
 
-       if (argc == 0) {
-               expand("<stdin>", stdin, tabstop);
-       } else {
-               for (; argc > 0; argc--, argv++) {
-                       if (!(fp = fopen(argv[0], "r"))) {
-                               weprintf("fopen %s:", argv[0]);
-                               ret = 1;
-                               continue;
-                       }
-                       expand(argv[0], fp, tabstop);
-                       fclose(fp);
+       len = 1;
+       for (i = 0; i < slen; i++) {
+               if (s[i] == ',' || s[i] == ' ') {
+                       if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
+                               eprintf("expand: empty field in tablist.\n");
+                       len++;
                }
        }
-       return ret;
+       tablist = emalloc((len + 1) * sizeof(size_t));
+
+       m = 0;
+       for (i = 0; i < slen; i += sep - (s + i) + 1) {
+               tablist[m++] = strtol(s + i, &sep, 0);
+               if (tablist[m - 1] == 0)
+                       eprintf("expand: tab size can't be zero.\n");
+               if (*sep && *sep != ',' && *sep != ' ')
+                       eprintf("expand: invalid number in tablist.\n");
+               if (m > 1 && tablist[m - 1] < tablist[m - 2])
+                       eprintf("expand: tablist must be ascending.\n");
+       }
+
+       /* tab length = 1 for the overflowing case later in the matcher */
+       tablist[len] = 1;
+       return len;
 }
 
 static int
-expand(const char *file, FILE *fp, int tabstop)
+expand(const char *file, FILE *fp)
 {
-       int col = 0;
+       size_t bol = 1, col = 0, i;
        Rune r;
-       int bol = 1;
-
-       for (;;) {
-               if (!readrune(file, fp, &r))
-                       break;
 
+       while (readrune(file, fp, &r)) {
                switch (r) {
                case '\t':
+                       if (tablistlen == 1)
+                               i = 0;
+                       else for (i = 0; i < tablistlen; i++)
+                               if (col < tablist[i])
+                                       break;
                        if (bol || !iflag) {
                                do {
                                        col++;
                                        putchar(' ');
-                               } while (col % tabstop);
+                               } while (col % tablist[i]);
                        } else {
                                putchar('\t');
-                               col += tabstop - col % tabstop;
+                               col = tablist[i];
                        }
                        break;
                case '\b':
+                       bol = 0;
                        if (col)
                                col--;
-                       bol = 0;
-                       writerune("<stdout>", stdout, &r);
+                       putchar('\b');
                        break;
                case '\n':
-                       col = 0;
                        bol = 1;
-                       writerune("<stdout>", stdout, &r);
+                       col = 0;
+                       putchar('\n');
                        break;
                default:
                        col++;
@@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
 
        return 0;
 }
+
+static void
+usage(void)
+{
+       eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+       FILE *fp;
+       char *tl = "8";
+       int   ret = 0;
+
+       ARGBEGIN {
+       case 'i':
+               iflag = 1;
+               break;
+       case 't':
+               tl = EARGF(usage());
+               if (!*tl)
+                       eprintf("expand: tablist cannot be empty.\n");
+               break;
+       default:
+               usage();
+       } ARGEND;
+
+       tablistlen = parselist(tl, strlen(tl));
+
+       if (argc == 0)
+               expand("<stdin>", stdin);
+       else {
+               for (; argc > 0; argc--, argv++) {
+                       if (!(fp = fopen(argv[0], "r"))) {
+                               weprintf("fopen %s:", argv[0]);
+                               ret = 1;
+                               continue;
+                       }
+                       expand(argv[0], fp);
+                       fclose(fp);
+               }
+       }
+       return ret;
+}

Reply via email to