We reduce overhead if we only parse the user's format string once.  To
achieve that, this patch does the following:

- Move "format" into main().  We don't need it as a global anymore.

- Move buffer allocation into fmtfmt().  As claudio@ mentioned in a
  different thread, we need at most (3 * strlen(format) + 1) bytes for
  buf, the parsed format string.  I have added a comment explaining the
  allocation.  I also left an assert(3) to confirm my math.  Unsure
  whether or not to leave the assert(3) in... we only run the assert
  once, so it isn't very costly.

- In fmtfmt(), preallocate a flat 512 bytes for outbuf.  We aren't using
  the 10x allocation for buf anymore, so keeping it for outbuf seems
  arbitrary. If we're going to use a magic number I figure it may as
  well be large enough for practical timestamps and a power of two.
  Feel free to suggest something else.

- Call fmtfmt() where we used to do buffer allocation in main().

- When parsing the user format string in fmtfmt(), keep a list of
  where each microsecond substring lands in buf.  We'll need it later.

- Move the printing part of fmtfmt() into a new function, fmtprint().
  fmtprint() is now called from the main loop instead of fmtfmt().

- In fmtprint(), before calling strftime(3), update any microsecond
  substrings in buf using the list we built earlier in fmtfmt().  Note
  that if there aren't any such substrings we don't call snprintf(3)
  at all.

--

Okay, on to the numbers.  My benchmark input is a million newlines:

$ yes '' | head -n 1000000 > newline-1M.txt

The benchmark is "real time taken to timestamp the input."

Patched ts(1) is about 45% faster using the empty format string.
N=100.

x ts-head.dat1
+ ts-patch.dat1
    N         Min         Max      Median         Avg        Stddev
x 100   1.7420306    1.820921   1.7468652   1.7504513   0.010192689
+ 100  0.96225744  0.98482864  0.96404194  0.96658115  0.0052265094
Difference at 99.5% confidence
        -0.78387 +/- 0.00353946
        -44.781% +/- 0.202203%
        (Student's t, pooled s = 0.00809961)

Patched ts(1) is about 25% faster using the default format string,
i.e. '%b %d %H:%M:%S'.  N=100.

x ts-head.dat2
+ ts-patch.dat2
    N        Min        Max     Median        Avg       Stddev
x 100  4.7128656  4.9162049  4.7212578  4.7313946  0.026306241
+ 100  3.5083849  3.7382005  3.5126801  3.5271755   0.03256854
Difference at 99.5% confidence
        -1.20422 +/- 0.0129365
        -25.4517% +/- 0.273418%
        (Student's t, pooled s = 0.0296034)

Patched ts(1) is about 10% faster using the format string '%FT%.TZ'.
This format is similar to the ISO 8601 timestamp format but with added
microsecond granularity.  N=100.

x ts-head.dat4
+ ts-patch.dat4
    N        Min        Max     Median        Avg       Stddev
x 100  6.5432762  7.0483151  6.5909038  6.6034806  0.065535466
+ 100  5.9177588  6.5244303  5.9288786  5.9535684  0.074405632
Difference at 99.5% confidence
        -0.649912 +/- 0.0306379
        -9.84196% +/- 0.463966%
        (Student's t, pooled s = 0.070111)

All differences are statistically significant at a 99.5 CI.

--

Thoughts?  Tweaks?  ok?

Index: ts.c
===================================================================
RCS file: /cvs/src/usr.bin/ts/ts.c,v
retrieving revision 1.8
diff -u -p -r1.8 ts.c
--- ts.c        7 Jul 2022 10:40:25 -0000       1.8
+++ ts.c        13 Jul 2022 05:41:55 -0000
@@ -17,8 +17,10 @@
  */
 
 #include <sys/types.h>
+#include <sys/queue.h>
 #include <sys/time.h>
 
+#include <assert.h>
 #include <err.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -27,18 +29,25 @@
 #include <unistd.h>
 #include <time.h>
 
-static char            *format = "%b %d %H:%M:%S";
+SIMPLEQ_HEAD(, usec) usec_queue = SIMPLEQ_HEAD_INITIALIZER(usec_queue);
+struct usec {
+       SIMPLEQ_ENTRY(usec) next;
+       char *pos;
+};
+
 static char            *buf;
 static char            *outbuf;
 static size_t           bufsize;
 static size_t           obsize;
 
-static void             fmtfmt(const struct timespec *);
+static void             fmtfmt(const char *);
+static void             fmtprint(const struct timespec *);
 static void __dead      usage(void);
 
 int
 main(int argc, char *argv[])
 {
+       char *format = "%b %d %H:%M:%S";
        int iflag, mflag, sflag;
        int ch, prev;
        struct timespec start, now, utc_offset, ts;
@@ -75,18 +84,7 @@ main(int argc, char *argv[])
        if ((iflag && sflag) || argc > 1)
                usage();
 
-       if (argc == 1)
-               format = *argv;
-
-       bufsize = strlen(format) + 1;
-       if (bufsize > SIZE_MAX / 10)
-               errx(1, "format string too big");
-       bufsize *= 10;
-       obsize = bufsize;
-       if ((buf = calloc(1, bufsize)) == NULL)
-               err(1, NULL);
-       if ((outbuf = calloc(1, obsize)) == NULL)
-               err(1, NULL);
+       fmtfmt(argc == 1 ? *argv : format);
 
        /* force UTC for interval calculations */
        if (iflag || sflag)
@@ -106,7 +104,7 @@ main(int argc, char *argv[])
                                timespecadd(&now, &utc_offset, &ts);
                        else
                                ts = now;
-                       fmtfmt(&ts);
+                       fmtprint(&ts);
                        if (iflag)
                                start = now;
                }
@@ -132,15 +130,30 @@ usage(void)
  * so you can format while you format
  */
 static void
-fmtfmt(const struct timespec *ts)
+fmtfmt(const char *format)
 {
-       struct tm *tm;
-       char *f, us[7];
-
-       if ((tm = localtime(&ts->tv_sec)) == NULL)
-               err(1, "localtime");
+       char *f;
+       struct usec *u;
+       size_t len;
+
+       /*
+        * The nonstandard specifiers (%.S, %.s, and %.T) are all
+        * 3-byte sequences that require 9 bytes of space in the
+        * parsed format string.  No other specifiers require extra
+        * space.  If we allocate triple the length of the input
+        * string we'll have enough room.
+        */
+       len = strlen(format);
+       if (len > SIZE_MAX / 3)
+               errx(1, "format is too long");
+       bufsize = len * 3;
+       if (bufsize + 1 < bufsize)
+               errx(1, "format is too long");
+       bufsize++;      /* +1 for the NUL */
+       buf = calloc(1, bufsize);
+       if (buf == NULL)
+               err(1, NULL);
 
-       snprintf(us, sizeof(us), "%06ld", ts->tv_nsec / 1000);
        strlcpy(buf, format, bufsize);
        f = buf;
 
@@ -159,14 +172,45 @@ fmtfmt(const struct timespec *ts)
                        f[0] = f[1];
                        f[1] = '.';
                        f += 2;
+                       u = malloc(sizeof u);
+                       if (u == NULL)
+                               err(1, NULL);
+                       u->pos = f;
+                       SIMPLEQ_INSERT_TAIL(&usec_queue, u, next);
                        l = strlen(f);
                        memmove(f + 6, f, l + 1);
-                       memcpy(f, us, 6);
                        f += 6;
                }
        } while (*f != '\0');
+       len = strlen(buf);
+       assert(len + 1 <= bufsize);
+       buf[len] = '\0';
+
+       obsize = 512;
+       outbuf = calloc(1, obsize);
+       if (outbuf == NULL)
+               err(1, NULL);
+}
+
+static void
+fmtprint(const struct timespec *ts)
+{
+       struct usec *u;
+       struct tm *tm;
+       char us[7];
+       int len;
+
+       if ((tm = localtime(&ts->tv_sec)) == NULL)
+               err(1, "localtime");
+
+       /* Update any microsecond substrings in the format buffer. */
+       if (!SIMPLEQ_EMPTY(&usec_queue)) {
+               len = snprintf(us, sizeof(us), "%06ld", ts->tv_nsec / 1000);
+               assert(len == 6);
+               SIMPLEQ_FOREACH(u, &usec_queue, next)
+                       memcpy(u->pos, us, 6);
+       }
 
-       *outbuf = '\0';
        if (*buf != '\0') {
                while (strftime(outbuf, obsize, buf, tm) == 0) {
                        if ((outbuf = reallocarray(outbuf, 2, obsize)) == NULL)

Reply via email to