We reduce overhead if we only parse the user's format string once. To
achieve that, this patch does the following:
- Move "format" into main(). We don't need it as a global anymore.
- Move buffer allocation into fmtfmt(). As claudio@ mentioned in a
different thread, we need at most (3 * strlen(format) + 1) bytes for
buf, the parsed format string. I have added a comment explaining the
allocation. I also left an assert(3) to confirm my math. Unsure
whether or not to leave the assert(3) in... we only run the assert
once, so it isn't very costly.
- In fmtfmt(), preallocate a flat 512 bytes for outbuf. We aren't using
the 10x allocation for buf anymore, so keeping it for outbuf seems
arbitrary. If we're going to use a magic number I figure it may as
well be large enough for practical timestamps and a power of two.
Feel free to suggest something else.
- Call fmtfmt() where we used to do buffer allocation in main().
- When parsing the user format string in fmtfmt(), keep a list of
where each microsecond substring lands in buf. We'll need it later.
- Move the printing part of fmtfmt() into a new function, fmtprint().
fmtprint() is now called from the main loop instead of fmtfmt().
- In fmtprint(), before calling strftime(3), update any microsecond
substrings in buf using the list we built earlier in fmtfmt(). Note
that if there aren't any such substrings we don't call snprintf(3)
at all.
--
Okay, on to the numbers. My benchmark input is a million newlines:
$ yes '' | head -n 1000000 > newline-1M.txt
The benchmark is "real time taken to timestamp the input."
Patched ts(1) is about 45% faster using the empty format string.
N=100.
x ts-head.dat1
+ ts-patch.dat1
N Min Max Median Avg Stddev
x 100 1.7420306 1.820921 1.7468652 1.7504513 0.010192689
+ 100 0.96225744 0.98482864 0.96404194 0.96658115 0.0052265094
Difference at 99.5% confidence
-0.78387 +/- 0.00353946
-44.781% +/- 0.202203%
(Student's t, pooled s = 0.00809961)
Patched ts(1) is about 25% faster using the default format string,
i.e. '%b %d %H:%M:%S'. N=100.
x ts-head.dat2
+ ts-patch.dat2
N Min Max Median Avg Stddev
x 100 4.7128656 4.9162049 4.7212578 4.7313946 0.026306241
+ 100 3.5083849 3.7382005 3.5126801 3.5271755 0.03256854
Difference at 99.5% confidence
-1.20422 +/- 0.0129365
-25.4517% +/- 0.273418%
(Student's t, pooled s = 0.0296034)
Patched ts(1) is about 10% faster using the format string '%FT%.TZ'.
This format is similar to the ISO 8601 timestamp format but with added
microsecond granularity. N=100.
x ts-head.dat4
+ ts-patch.dat4
N Min Max Median Avg Stddev
x 100 6.5432762 7.0483151 6.5909038 6.6034806 0.065535466
+ 100 5.9177588 6.5244303 5.9288786 5.9535684 0.074405632
Difference at 99.5% confidence
-0.649912 +/- 0.0306379
-9.84196% +/- 0.463966%
(Student's t, pooled s = 0.070111)
All differences are statistically significant at a 99.5 CI.
--
Thoughts? Tweaks? ok?
Index: ts.c
===================================================================
RCS file: /cvs/src/usr.bin/ts/ts.c,v
retrieving revision 1.8
diff -u -p -r1.8 ts.c
--- ts.c 7 Jul 2022 10:40:25 -0000 1.8
+++ ts.c 13 Jul 2022 05:41:55 -0000
@@ -17,8 +17,10 @@
*/
#include <sys/types.h>
+#include <sys/queue.h>
#include <sys/time.h>
+#include <assert.h>
#include <err.h>
#include <stdint.h>
#include <stdio.h>
@@ -27,18 +29,25 @@
#include <unistd.h>
#include <time.h>
-static char *format = "%b %d %H:%M:%S";
+SIMPLEQ_HEAD(, usec) usec_queue = SIMPLEQ_HEAD_INITIALIZER(usec_queue);
+struct usec {
+ SIMPLEQ_ENTRY(usec) next;
+ char *pos;
+};
+
static char *buf;
static char *outbuf;
static size_t bufsize;
static size_t obsize;
-static void fmtfmt(const struct timespec *);
+static void fmtfmt(const char *);
+static void fmtprint(const struct timespec *);
static void __dead usage(void);
int
main(int argc, char *argv[])
{
+ char *format = "%b %d %H:%M:%S";
int iflag, mflag, sflag;
int ch, prev;
struct timespec start, now, utc_offset, ts;
@@ -75,18 +84,7 @@ main(int argc, char *argv[])
if ((iflag && sflag) || argc > 1)
usage();
- if (argc == 1)
- format = *argv;
-
- bufsize = strlen(format) + 1;
- if (bufsize > SIZE_MAX / 10)
- errx(1, "format string too big");
- bufsize *= 10;
- obsize = bufsize;
- if ((buf = calloc(1, bufsize)) == NULL)
- err(1, NULL);
- if ((outbuf = calloc(1, obsize)) == NULL)
- err(1, NULL);
+ fmtfmt(argc == 1 ? *argv : format);
/* force UTC for interval calculations */
if (iflag || sflag)
@@ -106,7 +104,7 @@ main(int argc, char *argv[])
timespecadd(&now, &utc_offset, &ts);
else
ts = now;
- fmtfmt(&ts);
+ fmtprint(&ts);
if (iflag)
start = now;
}
@@ -132,15 +130,30 @@ usage(void)
* so you can format while you format
*/
static void
-fmtfmt(const struct timespec *ts)
+fmtfmt(const char *format)
{
- struct tm *tm;
- char *f, us[7];
-
- if ((tm = localtime(&ts->tv_sec)) == NULL)
- err(1, "localtime");
+ char *f;
+ struct usec *u;
+ size_t len;
+
+ /*
+ * The nonstandard specifiers (%.S, %.s, and %.T) are all
+ * 3-byte sequences that require 9 bytes of space in the
+ * parsed format string. No other specifiers require extra
+ * space. If we allocate triple the length of the input
+ * string we'll have enough room.
+ */
+ len = strlen(format);
+ if (len > SIZE_MAX / 3)
+ errx(1, "format is too long");
+ bufsize = len * 3;
+ if (bufsize + 1 < bufsize)
+ errx(1, "format is too long");
+ bufsize++; /* +1 for the NUL */
+ buf = calloc(1, bufsize);
+ if (buf == NULL)
+ err(1, NULL);
- snprintf(us, sizeof(us), "%06ld", ts->tv_nsec / 1000);
strlcpy(buf, format, bufsize);
f = buf;
@@ -159,14 +172,45 @@ fmtfmt(const struct timespec *ts)
f[0] = f[1];
f[1] = '.';
f += 2;
+ u = malloc(sizeof u);
+ if (u == NULL)
+ err(1, NULL);
+ u->pos = f;
+ SIMPLEQ_INSERT_TAIL(&usec_queue, u, next);
l = strlen(f);
memmove(f + 6, f, l + 1);
- memcpy(f, us, 6);
f += 6;
}
} while (*f != '\0');
+ len = strlen(buf);
+ assert(len + 1 <= bufsize);
+ buf[len] = '\0';
+
+ obsize = 512;
+ outbuf = calloc(1, obsize);
+ if (outbuf == NULL)
+ err(1, NULL);
+}
+
+static void
+fmtprint(const struct timespec *ts)
+{
+ struct usec *u;
+ struct tm *tm;
+ char us[7];
+ int len;
+
+ if ((tm = localtime(&ts->tv_sec)) == NULL)
+ err(1, "localtime");
+
+ /* Update any microsecond substrings in the format buffer. */
+ if (!SIMPLEQ_EMPTY(&usec_queue)) {
+ len = snprintf(us, sizeof(us), "%06ld", ts->tv_nsec / 1000);
+ assert(len == 6);
+ SIMPLEQ_FOREACH(u, &usec_queue, next)
+ memcpy(u->pos, us, 6);
+ }
- *outbuf = '\0';
if (*buf != '\0') {
while (strftime(outbuf, obsize, buf, tm) == 0) {
if ((outbuf = reallocarray(outbuf, 2, obsize)) == NULL)