Elukey has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/322257

Change subject: [WIP] Refactor the parsing functions out of the main C file
......................................................................

[WIP] Refactor the parsing functions out of the main C file

This change should be a no-op, the plan is to move parsing
related functions on separate files and expose only the
functions needed via header files.
The next step will be to remove as much as possible
the use or global status from the parsing functions,
allowing them to be idempotent (and unit-testable).

Bug: T147440
Change-Id: Icaf31dd569fd87a6c19a5062832ccdd647955e77
---
M Makefile
A parsers.c
A parsers.h
A string_utils.c
A string_utils.h
M varnishkafka.c
6 files changed, 597 insertions(+), 397 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/operations/software/varnish/varnishkafka 
refs/changes/57/322257/1

diff --git a/Makefile b/Makefile
index 1370bc8..5c00253 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 
 PROG    = varnishkafka
-SRCS    = varnishkafka.c config.c base64.c
+SRCS    = varnishkafka.c config.c base64.c string_utils.c parsers.c
 
 DESTDIR?=/usr/local
 
diff --git a/parsers.c b/parsers.c
new file mode 100644
index 0000000..ab2fa8c
--- /dev/null
+++ b/parsers.c
@@ -0,0 +1,359 @@
+/*
+ * varnishkafka
+ *
+ * Copyright (c) 2016 Wikimedia Foundation
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#define _GNU_SOURCE          /* for strndupa() */
+#include <ctype.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <syslog.h>
+#include <netdb.h>
+#include <limits.h>
+#include <stdbool.h>
+
+#include "varnishkafka.h"
+#include "string_utils.h"
+#include "base64.h"
+
+/**
+ * Allocate persistent memory space ('len' bytes) in lp scratch
+ */
+static char *scratch_alloc (struct logline *lp, size_t len) {
+    char *ptr;
+
+    if (unlikely(len > conf.scratch_size || (conf.scratch_size - len) < 
lp->sof)) {
+        vk_log("SCRATCH", LOG_CRIT, "Ran out of scratch_size, limit is %zu", 
conf.scratch_size);
+        exit(99);
+    }
+
+    ptr = lp->scratch + lp->sof;
+    lp->sof += len;
+    return ptr;
+}
+
+
+/**
+ * essentially, scratch_alloc + memcpy
+ */
+static char *scratch_cpy (struct logline *lp, const char *src, size_t len) {
+    char *dst;
+
+    dst = scratch_alloc(lp, len);
+    memcpy(dst, src, len);
+    return dst;
+}
+
+
+/**
+ * like scratch_cpy, but escapes all unprintable characters as well as the
+ * ones defined in 'map' below.  sets *len to the escaped length of the result
+ */
+static char* scratch_cpy_esc (struct logline *lp, const char *src, size_t* 
len) {
+    static const char *map[256] = {
+        ['\t'] = "\\t",
+        ['\n'] = "\\n",
+        ['\r'] = "\\r",
+        ['\v'] = "\\v",
+        ['\f'] = "\\f",
+        ['"']  = "\\\"",
+        [' ']  = "\\ ",
+    };
+
+
+    /* Allocate initial space for escaped string.
+     * The maximum expansion size per character is 5 (octal coding).
+     */
+    const size_t in_len = *len;
+    char dst[in_len * 5];
+    char *d = dst;
+    const char *s = src;
+    const char *srcend = src + in_len;
+
+    while (s < srcend) {
+        size_t outlen = 1;
+        const char *out;
+        char tmp[6];
+
+        if (unlikely((out = map[(int)*s]) != NULL)) {
+            /* Escape from 'map' */
+            outlen = 2;
+        } else if (unlikely(!isprint(*s))) {
+            /* Escape non-printables as \<octal> */
+            sprintf(tmp, "\%04o", (int)*s);
+            out = tmp;
+            outlen = 5;
+        } else {
+            /* No escaping */
+            out = s;
+        }
+
+        assert(outlen < (in_len * 5));
+
+        if (likely(outlen == 1)) {
+            *(d++) = *out;
+        } else {
+            memcpy(d, out, outlen);
+            d += outlen;
+        }
+
+        s++;
+    }
+
+    assert(d > dst);
+    const size_t out_len = d - dst;
+    *len = out_len;
+    return scratch_cpy(lp, dst, out_len);
+}
+
+
+/**
+ * sprintf into a new scratch allocation.  *len_out will be set to the length
+ * of the result in the scratch.
+ */
+__attribute__((format(printf,3,4)))
+static char* scratch_printf (struct logline *lp, size_t* len_out, const char 
*fmt, ...) {
+    va_list ap, ap2;
+    int r;
+
+    va_copy(ap2, ap);
+    va_start(ap2, fmt);
+    r = vsnprintf(NULL, 0, fmt, ap2);
+    va_end(ap2);
+
+    assert(r > 0);
+    size_t rst = (size_t)r;
+
+    *len_out = rst;
+    char *dst = scratch_alloc(lp, rst + 1);
+
+    va_start(ap, fmt);
+    vsnprintf(dst, r+1, fmt, ap);
+    va_end(ap);
+
+    return dst;
+}
+
+/**
+ * raw assign to lp->match, only used by match_assign() below
+ */
+void match_assign0 (const struct tag *tag, struct logline *lp,
+                    const char *ptr, size_t len) {
+    lp->match[tag->fmt->idx].ptr = ptr;
+    lp->match[tag->fmt->idx].len = len;
+}
+
+/**
+ * Assign 'ptr' of size 'len' as a match for 'tag' in logline 'lp'.
+ *
+ * if 'ptr' is non-persistent (e.g. stack allocation, as opposed to original
+ * VSL shm tag payload), you must set force_copy to 'true'
+ */
+void match_assign (const struct tag *tag, struct logline *lp,
+                   const char *ptr, size_t len, bool force_copy) {
+
+    if (unlikely(tag->fmt->flags & FMT_F_ESCAPE)) {
+        size_t len_io = len;
+        char* escaped = scratch_cpy_esc(lp, ptr, &len_io);
+        match_assign0(tag, lp, escaped, len_io);
+    } else {
+        if (force_copy) /* copy volatile data */
+            match_assign0(tag, lp, scratch_cpy(lp, ptr, len), len);
+        else  /* point to persistent data */
+            match_assign0(tag, lp, ptr, len);
+    }
+}
+
+/**
+ * Misc parsers for formatters
+ * (check format_parse() for more info)
+ */
+
+/**
+ * Parse a URL (without query string) retrieved from a tag's payload.
+ */
+size_t parse_U (const struct tag *tag, struct logline *lp,
+                const char *ptr, size_t len) {
+    const char *qs;
+    size_t slen = len;
+
+    // Remove the query string if present
+    if ((qs = strnchr(ptr, len, '?')))
+        slen = (qs - ptr);
+
+    match_assign(tag, lp, ptr, slen, false);
+    return slen;
+}
+
+/**
+ * Parse a query-string retrieved from a tag's payload.
+ */
+size_t parse_q (const struct tag *tag, struct logline *lp,
+                const char *ptr, size_t len) {
+    const char *qs;
+    size_t slen = len;
+
+    if (!(qs = strnchr(ptr, len, '?')))
+        return 0;
+
+    slen = len - (qs - ptr);
+
+    match_assign(tag, lp, qs, slen, false);
+    return slen;
+}
+
+/**
+ * Parse a timestamp retrieved from a tag's payload.
+ */
+size_t parse_t (const struct tag *tag, struct logline *lp,
+                const char *ptr, size_t UNUSED len) {
+    struct tm tm;
+    const char *timefmt = "[%d/%b/%Y:%T %z]";
+    const int timelen   = 64;
+    size_t tlen;
+
+    /*
+     * The special format for tag->var "end:strftime" is used
+     * to force Varnishkafka to use the SLT_Timestamp 'Resp' instead
+     * of 'Start' for timestamp formatters. The prefix is removed
+     * from 'timefmt' accordingly.
+     */
+    if (tag->var){
+        const char *fmt_tmp = tag->var;
+        // Remove APACHE_LOG_END_PREFIX from the format string
+        if (tag->flags & TAG_F_TIMESTAMP_END) {
+            fmt_tmp += strlen(APACHE_LOG_END_PREFIX);
+        }
+        /* If the rest of the format string without the
+         * 'end:' prefix is not empty, use it
+         * in place of the default.
+         */
+        if (*fmt_tmp)
+            timefmt = fmt_tmp;
+    }
+
+    time_t t = strtoul(ptr, NULL, 10);
+    localtime_r(&t, &tm);
+
+    char dst[timelen];
+
+    /* Format time string */
+    tlen = strftime(dst, timelen, timefmt, &tm);
+
+    match_assign(tag, lp, dst, tlen, true);
+    return tlen;
+}
+
+size_t parse_auth_user (const struct tag *tag, struct logline *lp,
+                        const char *ptr, size_t len) {
+    size_t rlen = len - 6/*"basic "*/;
+    size_t ulen;
+    char *q;
+
+    if (unlikely(rlen == 0 || strncasecmp(ptr, "basic ", 6) || (rlen % 2)))
+        return 0;
+
+    /* Calculate base64 decoded length */
+    if (unlikely(!(ulen = (rlen * 4) / 3)))
+        return 0;
+
+    /* Protect our stack */
+    if (unlikely(ulen > 1000))
+        return 0;
+
+    char tmp[ulen + 1];
+
+    if ((ulen = VB64_decode2(tmp, ulen, ptr+6, rlen)) <= 0)
+        return 0;
+
+    /* Strip password */
+    if ((q = strnchr_noconst(tmp, ulen, ':')))
+        *q = '\0';
+
+    const size_t out_len = strlen(tmp);
+    match_assign(tag, lp, tmp, out_len, true);
+    return out_len;
+}
+
+
+/* The VCL_call is used for several info; this function matches the only
+ * ones that varnishkafka cares about and discards the other ones.
+ */
+size_t parse_vcl_handling (const struct tag *tag, struct logline *lp,
+                           const char *ptr, size_t len) {
+    if ((len == 3 && !strncmp(ptr, "HIT", 3)) ||
+        (len == 4 && (!strncmp(ptr, "MISS", 4) ||
+              !strncmp(ptr, "PASS", 4)))) {
+        match_assign(tag, lp, ptr, len, false);
+        return len;
+    }
+    return 0;
+}
+
+
+size_t parse_seq (const struct tag *tag, struct logline *lp,
+                  const char *ptr UNUSED, size_t len UNUSED) {
+    size_t len_out = 0;
+    char* out = scratch_printf(lp, &len_out, "%"PRIu64, conf.sequence_number);
+    match_assign(tag, lp, out, len_out, false);
+    return len_out;
+}
+
+
+size_t parse_DT (const struct tag *tag, struct logline *lp,
+                 const char *ptr, size_t len) {
+
+    /* SLT_Timestamp logs timing info in ms */
+    double time_taken_ms;
+
+    /* ptr points to the original tag string, so we
+     * need to extract the double field needed
+     */
+    if (!(time_taken_ms = atof(strndupa(ptr, len))))
+        return 0;
+
+    size_t len_out = 0;
+
+    if (tag->fmt->id == (int)'D') {
+        char* out = scratch_printf(lp, &len_out, "%.0f", time_taken_ms * 
1000000.0f);
+        match_assign(tag, lp, out, len_out, false);
+    } else if (tag->fmt->id == (int)'T') {
+        char* out = scratch_printf(lp, &len_out, "%f", time_taken_ms);
+        match_assign(tag, lp, out, len_out, false);
+    }
+
+    return len_out;
+}
\ No newline at end of file
diff --git a/parsers.h b/parsers.h
new file mode 100644
index 0000000..b5cb69a
--- /dev/null
+++ b/parsers.h
@@ -0,0 +1,84 @@
+/*
+ * varnishkafka
+ *
+ * Copyright (c) 2016 Wikimedia Foundation
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * raw assign to lp->match, only used by match_assign() below
+ */
+void match_assign0 (const struct tag *tag, struct logline *lp,
+                    const char *ptr, size_t len);
+
+/**
+ * Assign 'ptr' of size 'len' as a match for 'tag' in logline 'lp'.
+ *
+ * if 'ptr' is non-persistent (e.g. stack allocation, as opposed to original
+ * VSL shm tag payload), you must set force_copy to 'true'
+ */
+void match_assign (const struct tag *tag, struct logline *lp,
+                   const char *ptr, size_t len, bool force_copy);
+
+/**
+ * Misc parsers for formatters
+ * (check format_parse() for more info)
+ */
+
+/**
+ * Parse a URL (without query string) retrieved from a tag's payload.
+ */
+size_t parse_U (const struct tag *tag, struct logline *lp,
+                const char *ptr, size_t len);
+
+/**
+ * Parse a query-string retrieved from a tag's payload.
+ */
+size_t parse_q (const struct tag *tag, struct logline *lp,
+                const char *ptr, size_t len);
+
+/**
+ * Parse a timestamp retrieved from a tag's payload.
+ */
+size_t parse_t (const struct tag *tag, struct logline *lp,
+                const char *ptr, size_t len);
+
+size_t parse_auth_user (const struct tag *tag, struct logline *lp,
+                        const char *ptr, size_t len);
+
+
+/* The VCL_call is used for several info; this function matches the only
+ * ones that varnishkafka cares about and discards the other ones.
+ */
+size_t parse_vcl_handling (const struct tag *tag, struct logline *lp,
+                           const char *ptr, size_t len);
+
+
+size_t parse_seq (const struct tag *tag, struct logline *lp,      
+                  const char *ptr UNUSED, size_t len);
+
+
+size_t parse_DT (const struct tag *tag, struct logline *lp,
+                 const char *ptr, size_t len);
diff --git a/string_utils.c b/string_utils.c
new file mode 100644
index 0000000..c7d9166
--- /dev/null
+++ b/string_utils.c
@@ -0,0 +1,101 @@
+/*
+ * varnishkafka
+ *
+ * Copyright (c) 2013 Wikimedia Foundation
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+
+char *strnchr_noconst (char *s, size_t len, int c) {
+       char *end = s + len;
+       while (s < end) {
+               if (*s == c)
+                       return s;
+               s++;
+       }
+
+       return NULL;
+}
+
+
+const char *strnchr (const char *s, size_t len, int c) {
+       const char *end = s + len;
+       while (s < end) {
+               if (*s == c)
+                       return s;
+               s++;
+       }
+
+       return NULL;
+}
+
+
+const char *strnchrs (const char *s, size_t len, const char *match) {
+       const char *end = s + len;
+       char map[256] = {};
+       while (*match)
+               map[(int)*(match++)] = 1;
+
+       while (s < end) {
+               if (map[(int)*s])
+                       return s;
+               s++;
+       }
+
+       return NULL;
+}
+
+
+int column_get (int col, char delim, const char *ptr, size_t len,
+                      const char **dst, size_t *dstlen) {
+       const char *s = ptr;
+       const char *b = s;
+       const char *end = s + len;
+       int i = 0;
+
+       while (s < end) {
+               if (*s != delim) {
+                       s++;
+                       continue;
+               }
+
+               if (s != b && col == ++i) {
+                       *dst = b;
+                       *dstlen = (s - b);
+                       return 1;
+               }
+
+               b = ++s;
+       }
+
+       if (s != b && col == ++i) {
+               *dst = b;
+               *dstlen = (s - b);
+               return 1;
+       }
+
+       return 0;
+}
\ No newline at end of file
diff --git a/string_utils.h b/string_utils.h
new file mode 100644
index 0000000..d793921
--- /dev/null
+++ b/string_utils.h
@@ -0,0 +1,50 @@
+/*
+ * varnishkafka
+ *
+ * Copyright (c) 2013 Wikimedia Foundation
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+char *strnchr_noconst (char *s, size_t len, int c);
+
+const char *strnchr (const char *s, size_t len, int c);
+
+/**
+ * Looks for any matching character from 'match' in 's' and returns
+ * a pointer to the first match, or NULL if none of 'match' matched 's'.
+ */
+const char *strnchrs (const char *s, size_t len, const char *match);
+
+/**
+ * Splits 'ptr' (with length 'len') by delimiter 'delim' and assigns
+ * the Nth ('col') column to '*dst' and '*dstlen'.
+ * Does not modify the input data ('ptr'), only points to it.
+ *
+ * Returns 1 if the column was found, else 0.
+ *
+ * NOTE: Columns start at 1.
+ */
+int column_get (int col, char delim, const char *ptr, size_t len,
+                       const char **dst, size_t *dstlen);
\ No newline at end of file
diff --git a/varnishkafka.c b/varnishkafka.c
index c395290..30b6734 100644
--- a/varnishkafka.c
+++ b/varnishkafka.c
@@ -49,6 +49,8 @@
 #include <stdbool.h>
 
 #include "varnishkafka.h"
+#include "string_utils.h"
+#include "parsers.h"
 #include "base64.h"
 
 /* Kafka handle */
@@ -259,402 +261,6 @@
 
        return 0;
 }
-
-
-/**
- * Allocate persistent memory space ('len' bytes) in lp scratch
- */
-static char *scratch_alloc (struct logline *lp, size_t len) {
-       char *ptr;
-
-       if (unlikely(len > conf.scratch_size || (conf.scratch_size - len) < 
lp->sof)) {
-               vk_log("SCRATCH", LOG_CRIT, "Ran out of scratch_size, limit is 
%zu", conf.scratch_size);
-               exit(99);
-       }
-
-       ptr = lp->scratch + lp->sof;
-       lp->sof += len;
-       return ptr;
-}
-
-
-/**
- * essentially, scratch_alloc + memcpy
- */
-static char *scratch_cpy (struct logline *lp, const char *src, size_t len) {
-       char *dst;
-
-       dst = scratch_alloc(lp, len);
-       memcpy(dst, src, len);
-       return dst;
-}
-
-
-/**
- * like scratch_cpy, but escapes all unprintable characters as well as the
- * ones defined in 'map' below.  sets *len to the escaped length of the result
- */
-static char* scratch_cpy_esc (struct logline *lp, const char *src, size_t* 
len) {
-       static const char *map[256] = {
-               ['\t'] = "\\t",
-               ['\n'] = "\\n",
-               ['\r'] = "\\r",
-               ['\v'] = "\\v",
-               ['\f'] = "\\f",
-               ['"']  = "\\\"",
-               [' ']  = "\\ ",
-       };
-
-
-       /* Allocate initial space for escaped string.
-        * The maximum expansion size per character is 5 (octal coding).
-        */
-       const size_t in_len = *len;
-       char dst[in_len * 5];
-       char *d = dst;
-       const char *s = src;
-       const char *srcend = src + in_len;
-
-       while (s < srcend) {
-               size_t outlen = 1;
-               const char *out;
-               char tmp[6];
-
-               if (unlikely((out = map[(int)*s]) != NULL)) {
-                       /* Escape from 'map' */
-                       outlen = 2;
-               } else if (unlikely(!isprint(*s))) {
-                       /* Escape non-printables as \<octal> */
-                       sprintf(tmp, "\%04o", (int)*s);
-                       out = tmp;
-                       outlen = 5;
-               } else {
-                       /* No escaping */
-                       out = s;
-               }
-
-               assert(outlen < (in_len * 5));
-
-               if (likely(outlen == 1)) {
-                       *(d++) = *out;
-               } else {
-                       memcpy(d, out, outlen);
-                       d += outlen;
-               }
-
-               s++;
-       }
-
-       assert(d > dst);
-       const size_t out_len = d - dst;
-       *len = out_len;
-       return scratch_cpy(lp, dst, out_len);
-}
-
-
-/**
- * sprintf into a new scratch allocation.  *len_out will be set to the length
- * of the result in the scratch.
- */
-__attribute__((format(printf,3,4)))
-static char* scratch_printf (struct logline *lp, size_t* len_out, const char 
*fmt, ...) {
-       va_list ap, ap2;
-       int r;
-
-       va_copy(ap2, ap);
-       va_start(ap2, fmt);
-       r = vsnprintf(NULL, 0, fmt, ap2);
-       va_end(ap2);
-
-       assert(r > 0);
-       size_t rst = (size_t)r;
-
-       *len_out = rst;
-       char *dst = scratch_alloc(lp, rst + 1);
-
-       va_start(ap, fmt);
-       vsnprintf(dst, r+1, fmt, ap);
-       va_end(ap);
-
-       return dst;
-}
-
-/**
- * raw assign to lp->match, only used by match_assign() below
- */
-static void match_assign0 (const struct tag *tag, struct logline *lp,
-                                 const char *ptr, size_t len) {
-       lp->match[tag->fmt->idx].ptr = ptr;
-       lp->match[tag->fmt->idx].len = len;
-}
-
-
-/**
- * Assign 'ptr' of size 'len' as a match for 'tag' in logline 'lp'.
- *
- * if 'ptr' is non-persistent (e.g. stack allocation, as opposed to original
- * VSL shm tag payload), you must set force_copy to 'true'
- */
-static void match_assign (const struct tag *tag, struct logline *lp,
-                         const char *ptr, size_t len, bool force_copy) {
-
-       if (unlikely(tag->fmt->flags & FMT_F_ESCAPE)) {
-               size_t len_io = len;
-               char* escaped = scratch_cpy_esc(lp, ptr, &len_io);
-               match_assign0(tag, lp, escaped, len_io);
-       } else {
-               if (force_copy) /* copy volatile data */
-                       match_assign0(tag, lp, scratch_cpy(lp, ptr, len), len);
-               else  /* point to persistent data */
-                       match_assign0(tag, lp, ptr, len);
-       }
-}
-
-
-static char *strnchr_noconst (char *s, size_t len, int c) {
-       char *end = s + len;
-       while (s < end) {
-               if (*s == c)
-                       return s;
-               s++;
-       }
-
-       return NULL;
-}
-
-static const char *strnchr (const char *s, size_t len, int c) {
-       const char *end = s + len;
-       while (s < end) {
-               if (*s == c)
-                       return s;
-               s++;
-       }
-
-       return NULL;
-}
-
-
-/**
- * Looks for any matching character from 'match' in 's' and returns
- * a pointer to the first match, or NULL if none of 'match' matched 's'.
- */
-static const char *strnchrs (const char *s, size_t len, const char *match) {
-       const char *end = s + len;
-       char map[256] = {};
-       while (*match)
-               map[(int)*(match++)] = 1;
-
-       while (s < end) {
-               if (map[(int)*s])
-                       return s;
-               s++;
-       }
-
-       return NULL;
-}
-
-
-/**
- * Splits 'ptr' (with length 'len') by delimiter 'delim' and assigns
- * the Nth ('col') column to '*dst' and '*dstlen'.
- * Does not modify the input data ('ptr'), only points to it.
- *
- * Returns 1 if the column was found, else 0.
- *
- * NOTE: Columns start at 1.
- */
-static int column_get (int col, char delim, const char *ptr, size_t len,
-                      const char **dst, size_t *dstlen) {
-       const char *s = ptr;
-       const char *b = s;
-       const char *end = s + len;
-       int i = 0;
-
-       while (s < end) {
-               if (*s != delim) {
-                       s++;
-                       continue;
-               }
-
-               if (s != b && col == ++i) {
-                       *dst = b;
-                       *dstlen = (s - b);
-                       return 1;
-               }
-
-               b = ++s;
-       }
-
-       if (s != b && col == ++i) {
-               *dst = b;
-               *dstlen = (s - b);
-               return 1;
-       }
-
-       return 0;
-}
-
-
-
-/**
- * Misc parsers for formatters
- * (check format_parse() for more info)
- */
-
-/**
- * Parse a URL (without query string) retrieved from a tag's payload.
- */
-static size_t parse_U (const struct tag *tag, struct logline *lp,
-                   const char *ptr, size_t len) {
-       const char *qs;
-       size_t slen = len;
-
-       // Remove the query string if present
-       if ((qs = strnchr(ptr, len, '?')))
-               slen = (qs - ptr);
-
-       match_assign(tag, lp, ptr, slen, false);
-       return slen;
-}
-
-/**
- * Parse a query-string retrieved from a tag's payload.
- */
-static size_t parse_q (const struct tag *tag, struct logline *lp,
-                   const char *ptr, size_t len) {
-       const char *qs;
-       size_t slen = len;
-
-       if (!(qs = strnchr(ptr, len, '?')))
-               return 0;
-
-       slen = len - (qs - ptr);
-
-       match_assign(tag, lp, qs, slen, false);
-       return slen;
-}
-
-/**
- * Parse a timestamp retrieved from a tag's payload.
- */
-static size_t parse_t (const struct tag *tag, struct logline *lp,
-                   const char *ptr, size_t len) {
-       struct tm tm;
-       const char *timefmt = "[%d/%b/%Y:%T %z]";
-       const int timelen   = 64;
-       size_t tlen;
-
-       /*
-        * The special format for tag->var "end:strftime" is used
-        * to force Varnishkafka to use the SLT_Timestamp 'Resp' instead
-        * of 'Start' for timestamp formatters. The prefix is removed
-        * from 'timefmt' accordingly.
-        */
-       if (tag->var){
-               const char *fmt_tmp = tag->var;
-               // Remove APACHE_LOG_END_PREFIX from the format string
-               if (tag->flags & TAG_F_TIMESTAMP_END) {
-                       fmt_tmp += strlen(APACHE_LOG_END_PREFIX);
-               }
-               /* If the rest of the format string without the
-                * 'end:' prefix is not empty, use it
-                * in place of the default.
-                */
-               if (*fmt_tmp)
-                       timefmt = fmt_tmp;
-       }
-
-       time_t t = strtoul(ptr, NULL, 10);
-       localtime_r(&t, &tm);
-
-       char dst[timelen];
-
-       /* Format time string */
-       tlen = strftime(dst, timelen, timefmt, &tm);
-
-       match_assign(tag, lp, dst, tlen, true);
-       return tlen;
-}
-
-static size_t parse_auth_user (const struct tag *tag, struct logline *lp,
-                           const char *ptr, size_t len) {
-       size_t rlen = len - 6/*"basic "*/;
-       size_t ulen;
-       char *q;
-
-       if (unlikely(rlen == 0 || strncasecmp(ptr, "basic ", 6) || (rlen % 2)))
-               return 0;
-
-       /* Calculate base64 decoded length */
-       if (unlikely(!(ulen = (rlen * 4) / 3)))
-               return 0;
-
-       /* Protect our stack */
-       if (unlikely(ulen > 1000))
-               return 0;
-
-       char tmp[ulen + 1];
-
-       if ((ulen = VB64_decode2(tmp, ulen, ptr+6, rlen)) <= 0)
-               return 0;
-
-       /* Strip password */
-       if ((q = strnchr_noconst(tmp, ulen, ':')))
-               *q = '\0';
-
-       const size_t out_len = strlen(tmp);
-       match_assign(tag, lp, tmp, out_len, true);
-       return out_len;
-}
-
-
-/* The VCL_call is used for several info; this function matches the only
- * ones that varnishkafka cares about and discards the other ones.
- */
-static size_t parse_vcl_handling (const struct tag *tag, struct logline *lp,
-                          const char *ptr, size_t len) {
-       if ((len == 3 && !strncmp(ptr, "HIT", 3)) ||
-           (len == 4 && (!strncmp(ptr, "MISS", 4) ||
-                         !strncmp(ptr, "PASS", 4)))) {
-               match_assign(tag, lp, ptr, len, false);
-               return len;
-       }
-       return 0;
-}
-
-static size_t parse_seq (const struct tag *tag, struct logline *lp,
-                     const char *ptr UNUSED, size_t len UNUSED) {
-       size_t len_out = 0;
-       char* out = scratch_printf(lp, &len_out, "%"PRIu64, 
conf.sequence_number);
-       match_assign(tag, lp, out, len_out, false);
-       return len_out;
-}
-
-static size_t parse_DT (const struct tag *tag, struct logline *lp,
-                    const char *ptr, size_t len) {
-
-       /* SLT_Timestamp logs timing info in ms */
-       double time_taken_ms;
-
-       /* ptr points to the original tag string, so we
-        * need to extract the double field needed
-        */
-       if (!(time_taken_ms = atof(strndupa(ptr, len))))
-               return 0;
-
-       size_t len_out = 0;
-
-       if (tag->fmt->id == (int)'D') {
-               char* out = scratch_printf(lp, &len_out, "%.0f", time_taken_ms 
* 1000000.0f);
-               match_assign(tag, lp, out, len_out, false);
-       } else if (tag->fmt->id == (int)'T') {
-               char* out = scratch_printf(lp, &len_out, "%f", time_taken_ms);
-               match_assign(tag, lp, out, len_out, false);
-       }
-
-       return len_out;
-}
-
 
 
 /**

-- 
To view, visit https://gerrit.wikimedia.org/r/322257
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Icaf31dd569fd87a6c19a5062832ccdd647955e77
Gerrit-PatchSet: 1
Gerrit-Project: operations/software/varnish/varnishkafka
Gerrit-Branch: master
Gerrit-Owner: Elukey <ltosc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to