From: Devin Hussey <[email protected]>

I added the seq builtin.

usage: seq [-w] [-f format] [-s string] [-t string] [first [incr]] last

Some notes:
    - 100% from scratch, aside from the small num_width algorithm I took from
      StackOverflow and credited accordingly.
    - I optimize the math by using integer math if we don't have any decimals,
      intmax_t is large enough to support the number range, and if the user
      didn't specify a custom format. GNU Coreutils also does this.
      - If we use integers, we precalculate the width and snprintf a format
        specifer from that for the -w option.
    - I reuse some of the printf code to act as the validator for the format
      string. I want to separate this more for the best possible const
      correctness, though (I use a cast), and so the conditionals are a little
      more clear.
    - It functionally matches the one supplied with macOS and presumably BSD,
      but it's faster: It is reliably slightly faster on my system (a wimpy
      2009 MacBook) to call this
          ./dash -c 'seq 1 10000'
      than it is to call
          seq 1 10000
      directly, even multiple times, from a bash shell.

Other changes:
    - I fixed a few silly gotos in the file, replacing them with negated
      conditional blocks or early returns.
    - I isolated the printf parsing into its own function so we can reuse it.
    - I reordered the static functions to go above the main functions. This
      makes it easier to follow.
    - I changed conv_escape to return the number of characters read instead
      of the string itself so it can accept a constant string without any
      ugly casting.

Signed-off-by: Devin Hussey <[email protected]>
---
 src/bltin/printf.c  | 451 +++++++++++++++++++++++++++++++++++---------
 src/builtins.def.in |   1 +
 2 files changed, 361 insertions(+), 91 deletions(-)

diff --git a/src/bltin/printf.c b/src/bltin/printf.c
index 7785735..7d67514 100644
--- a/src/bltin/printf.c
+++ b/src/bltin/printf.c
@@ -37,17 +37,18 @@
 #include <limits.h>
 #include <stdarg.h>
 #include <stdlib.h>
+#include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 
 static int      conv_escape_str(char *, char **);
-static char    *conv_escape(char *, int *);
+static size_t   conv_escape(const char *, int *);
 static int      getchr(void);
 static double   getdouble(void);
 static uintmax_t getuintmax(int);
 static char    *getstr(void);
 static char    *mklong(const char *, const char *);
-static void      check_conversion(const char *, const char *);
+static void             check_conversion(const char *, const char *);
 
 static int     rval;
 static char  **gargv;
@@ -105,109 +106,129 @@ static int print_escape_str(const char *f, int *param, 
int *array, char *s)
 
        q[-1] = (!!((f[1] - 's') | done) - 1) & f[2];
        total += !!q[-1];
-       if (f[1] == 's')
-               goto easy;
 
-       p = makestrspace(len, q);
-       memset(p, 'X', total);
-       p[total] = 0;
+       if (f[1] != 's') {
+               p = makestrspace(len, q);
+               memset(p, 'X', total);
+               p[total] = 0;
 
-       q = stackblock();
-       total = ASPF(&p, f, p);
+               q = stackblock();
+               total = ASPF(&p, f, p);
 
-       len = strchrnul(p, 'X') - p;
-       memcpy(p + len, q, strspn(p + len, "X"));
+               len = strchrnul(p, 'X') - p;
+               memcpy(p + len, q, strspn(p + len, "X"));
+       }
 
-easy:
        out1mem(p, total);
 
        popstackmark(&smark);
        return done;
 }
 
-
-int printfcmd(int argc, char *argv[])
+/*
+ * Either parse or validate a printf string.
+ *
+ * If seq is zero, it will print the formatted characters, functioning for
+ * the printf builtin. format will be modified.
+ *
+ * If seq is non-zero, it will validate a format string for seq, which
+ * only allows EeFfGg, without doing anything else. format will not be 
modified.
+ */
+static int parse_format(char *format, int seq)
 {
-       char *fmt;
-       char *format;
+#define SKIP1  "#-+ 0"
+#define SKIP2  "*0123456789"
        int ch;
+       /* whether we found a format argument in format for seq */
+       int found = 0;
+       char *fmt;
+       /*
+        * Basic algorithm is to scan the format string for conversion
+        * specifications -- once one is found, find out if the field
+        * width or precision is a '*'; if it is, gather up value. 
+        * Note, format strings are reused as necessary to use up the
+        * provided arguments, arguments of zero/null string are 
+        * provided to use up the format string.
+        */
 
-       rval = 0;
-
-       nextopt(nullstr);
-
-       argv = argptr;
-       format = *argv;
-
-       if (!format)
-               error("usage: printf format [arg ...]");
-
-       gargv = ++argv;
+       /* find next format specification */
+       for (fmt = format; (ch = *fmt++) ;) {
 
-#define SKIP1  "#-+ 0"
-#define SKIP2  "*0123456789"
-       do {
-               /*
-                * Basic algorithm is to scan the format string for conversion
-                * specifications -- once one is found, find out if the field
-                * width or precision is a '*'; if it is, gather up value. 
-                * Note, format strings are reused as necessary to use up the
-                * provided arguments, arguments of zero/null string are 
-                * provided to use up the format string.
-                */
+               const char *start;
+               char nextch;
+               int array[2];
+               int *param;
 
-               /* find next format specification */
-               for (fmt = format; (ch = *fmt++) ;) {
-                       char *start;
-                       char nextch;
-                       int array[2];
-                       int *param;
-
-                       if (ch == '\\') {
-                               int c_ch;
-                               fmt = conv_escape(fmt, &c_ch);
-                               ch = c_ch;
-                               goto pc;
-                       }
-                       if (ch != '%' || (*fmt == '%' && (++fmt || 1))) {
+               if (ch == '\\') {
+                       int c_ch;
+                       fmt += conv_escape(fmt, &c_ch);
+                       ch = c_ch;
+                       goto pc;
+               }
+               if (ch != '%' || (*fmt == '%' && (++fmt || 1))) {
 pc:
+                       /* We only output in printf mode */
+                       if (!seq)
                                putchar(ch);
-                               continue;
-                       }
+                       continue;
+               }
 
-                       /* Ok - we've found a format specification,
-                          Save its address for a later printf(). */
-                       start = fmt - 1;
-                       param = array;
+               /* Ok - we've found a format specification,
+                  Save its address for a later printf(). */
+               start = fmt - 1;
+               param = array;
+
+               /* skip to field width */
+               fmt += strspn(fmt, SKIP1);
+               if (*fmt == '*') {
+                       ++fmt;
+                       *param++ = getuintmax(1);
+               } else {
+                       /* skip to possible '.',
+                        * get following precision
+                        */
+                       fmt += strspn(fmt, SKIP2);
+               }
 
-                       /* skip to field width */
-                       fmt += strspn(fmt, SKIP1);
+               if (*fmt == '.') {
+                       ++fmt;
                        if (*fmt == '*') {
                                ++fmt;
                                *param++ = getuintmax(1);
-                       } else {
-                               /* skip to possible '.',
-                                * get following precision
-                                */
+                       } else
                                fmt += strspn(fmt, SKIP2);
-                       }
+               }
 
-                       if (*fmt == '.') {
-                               ++fmt;
-                               if (*fmt == '*') {
-                                       ++fmt;
-                                       *param++ = getuintmax(1);
-                               } else
-                                       fmt += strspn(fmt, SKIP2);
-                       }
+               ch = *fmt;
+               if (!ch)
+                       error("missing format character");
 
-                       ch = *fmt;
-                       if (!ch)
-                               error("missing format character");
+               /* When invoked as seq, we can only use EeFfGg, and we only use
+                * it once. We actually printf later, this is just a syntax 
check. */
+               if (seq) {
+                       switch (ch) {
+                       case 'e':
+                       case 'E':
+                       case 'f':
+                       case 'F':
+                       case 'g':
+                       case 'G':
+                               /* Only allow it once. */
+                               if (!found) {
+                                       found = 1;
+                                       break;
+                               }
+                               /* else FALLTHROUGH */
+                       default:
+                               error("'%s': invalid format string", format);
+                               break;
+                       }
+               } else {
                        /* null terminate format string to we can use it
                           as an argument to printf. */
                        nextch = fmt[1];
                        fmt[1] = 0;
+
                        switch (ch) {
 
                        case 'b':
@@ -215,7 +236,8 @@ pc:
                                /* escape if a \c was encountered */
                                if (print_escape_str(start, param, array,
                                                     getstr()))
-                                       goto out;
+                                       return rval;
+
                                *fmt = 'b';
                                break;
                        case 'c': {
@@ -261,13 +283,13 @@ pc:
                        }
                        *++fmt = nextch;
                }
-       } while (gargv != argv && *gargv);
+       }
+       if (seq)
+               return found;
 
-out:
-       return rval;
+       return 0;
 }
 
-
 /*
  * Print SysV echo(1) style escape string 
  *     Halts processing string if a \c escape is encountered.
@@ -303,7 +325,7 @@ conv_escape_str(char *str, char **sp)
                        str++;
 
                /* Finally test for sequences valid in the format string */
-               str = conv_escape(str - 1, &c);
+               str += conv_escape(str - 1, &c);
        } while (STPUTC(c, cp), (char)ch);
 
        *sp = cp;
@@ -314,11 +336,11 @@ conv_escape_str(char *str, char **sp)
 /*
  * Print "standard" escape characters 
  */
-static char *
-conv_escape(char *str, int *conv_ch)
+static size_t conv_escape(const char *str, int *conv_ch)
 {
        int value;
        int ch;
+       size_t i = 0;
 
        ch = *str;
 
@@ -333,8 +355,8 @@ conv_escape(char *str, int *conv_ch)
                value = 0;
                do {
                        value <<= 3;
-                       value += octtobin(*str++);
-               } while (isodigit(*str) && --ch);
+                       value += octtobin(str[i++]);
+               } while (isodigit(str[0]) && --ch);
                goto out;
 
        case '\\':      value = '\\';   break;  /* backslash */
@@ -347,10 +369,10 @@ conv_escape(char *str, int *conv_ch)
        case 'v':       value = '\v';   break;  /* vertical-tab */
        }
 
-       str++;
+       i++;
 out:
        *conv_ch = value;
-       return str;
+       return i;
 }
 
 static char *
@@ -405,17 +427,17 @@ getuintmax(int sign)
 
        cp = *gargv;
        if (cp == NULL)
-               goto out;
+               return val;
        gargv++;
 
        val = (unsigned char) cp[1];
        if (*cp == '\"' || *cp == '\'')
-               goto out;
+               return val;
 
        errno = 0;
        val = sign ? strtoimax(cp, &ep, 0) : strtoumax(cp, &ep, 0);
        check_conversion(cp, ep);
-out:
+
        return val;
 }
 
@@ -454,6 +476,253 @@ check_conversion(const char *s, const char *ep)
        }
 }
 
+
+/* seq stuff here */
+
+#define PO10_LIMIT (INTMAX_MAX/10)
+
+/*
+ * Gets the width in digits of an integer. We return uint8_t to truncate the 
value.
+ * Credit goes to Curd at https://stackoverflow.com/a/4143288 for the original 
algorithm
+ * which I modified slightly.
+ */
+static uint8_t num_width(intmax_t i)
+{
+       intmax_t n = 1, po10 = 10;
+
+       /* Flip the sign and increase n for the sign */
+       if (i < 0) {
+               i = -i;
+               ++n;
+       }
+
+       while (i >= po10) {
+               ++n;
+               if (po10 > PO10_LIMIT)
+                       break;
+               po10 *= 10;
+       }
+       return (uint8_t)n;
+}
+
+static inline void sequsage(void)
+{
+       error("usage: seq [-w] [-f format] [-s string] [-t string] [first 
[incr]] last");
+}
+
+/* Checks if a double is in the range of intmax_t. */
+static inline int in_int_range(const double x)
+{
+       return (x >= INTMAX_MIN && x <= INTMAX_MAX);
+}
+
+/* Whether we want to use int math or double math. */
+static int seq_use_int = 0;
+
+/* strtod which automatically errors on failure and sets seq_use_int if 
needed. */
+static inline double strtod_seq(char *str)
+{
+       char *end;
+       double ret = strtod(str, &end);
+
+       /* strtod will set end to the first non-matching character.
+        * If end is equal to str or end[0] is not a null terminator,
+        * we have a bad number. */
+       if (end == str || end[0] != '\0')
+               error("%s: invalid number", str);
+
+       /* Check if we have a '.' in the number, if so, force double math. */
+       if (seq_use_int && strchr(str, '.') != NULL) {
+               seq_use_int = 0;
+       }
+       return ret;
+}
+
+/*
+ * The seqcmd builtin.
+ */
+int seqcmd(int argc, char **argv)
+{
+       const char *fmt = NULL;
+       char *sep = NULL;
+       char *endstr = NULL;
+
+       double first = 1.0;
+       double incr = 1.0;
+       double last = 0.0;
+
+       int seq_wflag = 0;
+       seq_use_int = 1;
+
+       if (argc == 1)
+               sequsage();
+
+       --argc; ++argv;
+
+       int c;
+
+       opterr = 0; optind = 0; optreset = 1;
+       while ((c = getopt(argc, argv, "+f:s:t:w")) != -1) {
+               switch (c) {
+               /* Format string */
+               case 'f':
+                       /* no empty strings allowed */
+                       if (optarg[0] == '\0') {
+                               sequsage();
+                       } else {
+                               fmt = optarg;
+                               /* We force using double math if they specified 
format */
+                               seq_use_int = 0;
+                       }
+                       break;
+               /* Separator */
+               case 's':
+                       sep = optarg;
+                       break;
+               /* Terminator */
+               case 't':
+                       endstr = optarg;
+                       break;
+               /* Constant width */
+               case 'w':
+                       seq_wflag = 1;
+                       break;
+               case '?':
+                       /* Negative numbers trigger getopt. We force exit the 
loop and rewind when this happens. */
+                       if (isdigit(optopt)) {
+                               --optind;
+                               goto out;
+                       }
+                       /* else FALLTHROUGH */
+               default:
+                       sequsage();
+                       break;
+               }
+       }
+out:
+       argc -= optind; argv += optind;
+
+       if (argc < 1 || argc > 3)
+               sequsage();
+
+       /* first is always the first number when there is more than 1 argument 
*/
+       if (argc > 1) {
+               first = strtod_seq(*argv);
+               --argc; ++argv;
+       }
+
+       /* incr is in the middle. */
+       if (argc == 2) {
+               incr = strtod_seq(*argv);
+               /* Don't allow negative or zero values for incr. */
+               if (incr < 0.0) {
+                       error("increment/decrement must be greater than zero");
+               }
+               --argc; ++argv;
+       }
+
+       /* the last argument is always the last number */
+       last = strtod_seq(*argv);
+
+       /* check if we have whole numbers larger than what can be represented 
in intmax_t */
+       if (seq_use_int) {
+               seq_use_int = in_int_range(first) && in_int_range(last);
+       }
+
+       /* If first is less than last, we go backwards. */
+       int forwards = first <= last;
+       if (!forwards) {
+               incr *= -1.0;
+       }
+
+       const char default_sep = '\n';
+
+       if (fmt == NULL) {
+               /* If the user did not specify a format and only used whole 
numbers, we can cast away the
+                * double and use faster int math. glibc also does this. */
+               if (seq_use_int) {
+                       intmax_t int_first = (intmax_t)first;
+                       intmax_t int_last  = (intmax_t)last;
+                       const intmax_t int_incr  = (const intmax_t)incr;
+
+                       /* We snprintf the width format into the buffer. */
+                       /* % 2 5 5 sizeof(PRIdMAX) - we have an extra digit 
even though int64_t is max 20 digits. */
+                       char fmt_w_int[5 + sizeof(PRIdMAX)];
+
+                       if (seq_wflag) {
+                               /* Calculate the width we need for this string. 
We use uint8_t to be safe. */
+                               const uint8_t first_width = 
num_width(int_first);
+                               const uint8_t last_width = num_width(int_last);
+                               const uint8_t width = (first_width > 
last_width) ? first_width : last_width;
+
+                               /* snprintf the custom format string, resulting 
in %<width>d. */
+                               snprintf(fmt_w_int, sizeof(fmt_w_int), "%%%u" 
PRIdMAX, width);
+                       }
+                       /* Loop through the numbers with int math. */
+                       for (intmax_t i = int_first; (forwards) ? (i <= 
int_last) : (i >= int_last); i += int_incr) {
+                               /* printf either the string we just made above, 
or "%" PRIdMAX if not. */
+                               printf(seq_wflag ? fmt_w_int : "%" PRIdMAX, i);
+                               /* separator */
+                               if (sep == NULL)
+                                       putchar(default_sep);
+                               else
+                                       print_escape_str("%s", NULL, NULL, sep);
+                       }
+                       /* Everything else is double math */
+                       goto end;
+               } else {
+                       /* -w uses %e, otherwise, it uses %g. */
+                       fmt = (seq_wflag) ? "%e" : "%g";
+               }
+       } else {
+               /* We reuse the code in printf's code to validate our format 
string.
+                * It is safe, albeit ugly, to cast away the const. We don't 
change it and it isn't readonly
+                * anyways (it will be from argv). */
+               parse_format((char *)fmt, 1);
+       }
+
+       /* Loop through the numbers with double math. */
+       for (double i = first; forwards ? (i <= last) : (i >= last); i += incr) 
{
+               printf(fmt, i);
+               /* separator */
+               if (sep == NULL)
+                       putchar(default_sep);
+               else
+                       print_escape_str("%s", NULL, NULL, sep);
+       }
+
+end:
+       /* Print the ending string */
+       if (endstr != NULL)
+               print_escape_str("%s", NULL, NULL, endstr);
+       return 0;
+}
+
+int printfcmd(int argc, char *argv[])
+{
+       char *format;
+
+       rval = 0;
+
+       nextopt(nullstr);
+
+       argv = argptr;
+       format = *argv;
+
+       if (!format)
+               error("usage: printf format [arg ...]");
+
+       gargv = ++argv;
+
+       do {
+               int ret = parse_format(format, 0);
+               if (ret != 0)
+                       return ret;
+       } while (gargv != argv && *gargv);
+
+       return rval;
+}
+
 int
 echocmd(int argc, char **argv)
 {
diff --git a/src/builtins.def.in b/src/builtins.def.in
index 95e420c..e15811b 100644
--- a/src/builtins.def.in
+++ b/src/builtins.def.in
@@ -76,6 +76,7 @@ printfcmd     printf
 pwdcmd         -u pwd
 readcmd                -u read
 returncmd      -s return
+seqcmd         -u seq
 setcmd         -s set
 shiftcmd       -s shift
 timescmd       -s times
-- 
2.19.0

Reply via email to