Willy, please find another halog series attached.
1. Some small changes to the new -qry/-query flag. 2. A new -hdr flag, resolving my own GitHub issue. Best regards Tim Düsterhus Developer WoltLab GmbH -- WoltLab GmbH Nedlitzer Str. 27B 14469 Potsdam Tel.: +49 331 96784338 [email protected] www.woltlab.com Managing director: Marcel Werk AG Potsdam HRB 26795 P
>From b5f3f0a1dafc85854574c77f3df0e3a31ee0136d Mon Sep 17 00:00:00 2001 From: Tim Duesterhus <[email protected]> Date: Thu, 28 Oct 2021 15:53:37 +0200 Subject: [PATCH 1/5] DOC: halog: Move the `-qry` parameter into the correct section in help text To: [email protected] Cc: [email protected] This is not an output filter, but instead a modifier. Specifically "only one may be used at a time" is not true. see 24b8d693b202b01b649f64ed878d8f9dd1b242e4 --- admin/halog/halog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/admin/halog/halog.c b/admin/halog/halog.c index 2d6c17f45..76e68cebf 100644 --- a/admin/halog/halog.c +++ b/admin/halog/halog.c @@ -183,6 +183,7 @@ void help() " -m <lines> limit output to the first <lines> lines\n" " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n" " you can also use -n to start from earlier then field %d\n" + " -qry preserve the query string for per-URL (-u*) statistics\n" "\n" "Output filters - only one may be used at a time\n" " -c only report the number of lines that would have been printed\n" @@ -197,8 +198,7 @@ void help() " -u : by URL, -uc : request count, -ue : error count\n" " -ua : average response time, -ut : average total time\n" " -uao, -uto: average times computed on valid ('OK') requests\n" - " -uba, -ubt: average bytes returned, total bytes returned\n" - " -qry preserve the query string for per-URL statistics\n", + " -uba, -ubt: average bytes returned, total bytes returned\n", SOURCE_FIELD,SOURCE_FIELD ); exit(0); -- 2.33.1
>From 50d5f579bc35fcffbf06a2fa2dbc8a4fd6944135 Mon Sep 17 00:00:00 2001 From: Tim Duesterhus <[email protected]> Date: Thu, 28 Oct 2021 16:36:03 +0200 Subject: [PATCH 2/5] MINOR: halog: Rename -qry to -query To: [email protected] Cc: [email protected] With the query flag moved into the correct help section, there is enough space for two additional characters. --- admin/halog/halog.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/admin/halog/halog.c b/admin/halog/halog.c index 76e68cebf..89590cfed 100644 --- a/admin/halog/halog.c +++ b/admin/halog/halog.c @@ -148,7 +148,7 @@ void usage(FILE *output, const char *msg) "Usage: halog [-h|--help] for long help\n" " halog [-q] [-c] [-m <lines>]\n" " {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n" - " [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-qry]\n" + " [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-query]\n" " [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n" "\n", msg ? msg : "" @@ -183,7 +183,7 @@ void help() " -m <lines> limit output to the first <lines> lines\n" " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n" " you can also use -n to start from earlier then field %d\n" - " -qry preserve the query string for per-URL (-u*) statistics\n" + " -query preserve the query string for per-URL (-u*) statistics\n" "\n" "Output filters - only one may be used at a time\n" " -c only report the number of lines that would have been printed\n" @@ -833,7 +833,7 @@ int main(int argc, char **argv) filter |= FILT_COUNT_URL_BAVG; else if (strcmp(argv[0], "-ubt") == 0) filter |= FILT_COUNT_URL_BTOT; - else if (strcmp(argv[0], "-qry") == 0) + else if (strcmp(argv[0], "-query") == 0) filter2 |= FILT2_PRESERVE_QUERY; else if (strcmp(argv[0], "-ic") == 0) filter |= FILT_COUNT_IP_COUNT; -- 2.33.1
>From d0cdbf68f41bfd43f5cbb1bb0cc1b745fe261dfd Mon Sep 17 00:00:00 2001 From: Tim Duesterhus <[email protected]> Date: Thu, 28 Oct 2021 15:55:49 +0200 Subject: [PATCH 3/5] CLEANUP: halog: Use consistent indentation in help() To: [email protected] Cc: [email protected] Consistently use 1 Tab per line. --- admin/halog/halog.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/admin/halog/halog.c b/admin/halog/halog.c index 89590cfed..47f10a741 100644 --- a/admin/halog/halog.c +++ b/admin/halog/halog.c @@ -181,10 +181,10 @@ void help() " -v invert the input filtering condition\n" " -q don't report errors/warnings\n" " -m <lines> limit output to the first <lines> lines\n" - " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n" - " you can also use -n to start from earlier then field %d\n" + " -s <skip_n_fields> skip n fields from the beginning of a line (default %d)\n" + " you can also use -n to start from earlier then field %d\n" " -query preserve the query string for per-URL (-u*) statistics\n" - "\n" + "\n" "Output filters - only one may be used at a time\n" " -c only report the number of lines that would have been printed\n" " -pct output connect and response times percentiles\n" @@ -199,7 +199,7 @@ void help() " -ua : average response time, -ut : average total time\n" " -uao, -uto: average times computed on valid ('OK') requests\n" " -uba, -ubt: average bytes returned, total bytes returned\n", - SOURCE_FIELD,SOURCE_FIELD + SOURCE_FIELD,SOURCE_FIELD ); exit(0); } -- 2.33.1
>From af9a71765c2e81b433b29fd98b529470d8db502d Mon Sep 17 00:00:00 2001 From: Tim Duesterhus <[email protected]> Date: Thu, 28 Oct 2021 17:06:23 +0200 Subject: [PATCH 4/5] BUG/MINOR: halog: Add missing newlines in die() messages To: [email protected] Cc: [email protected] This newline is required to correctly print the usage. --- admin/halog/halog.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/admin/halog/halog.c b/admin/halog/halog.c index 47f10a741..fe11aa81d 100644 --- a/admin/halog/halog.c +++ b/admin/halog/halog.c @@ -709,36 +709,36 @@ int main(int argc, char **argv) break; if (strcmp(argv[0], "-ad") == 0) { - if (argc < 2) die("missing option for -ad"); + if (argc < 2) die("missing option for -ad\n"); argc--; argv++; filter |= FILT_ACC_DELAY; filter_acc_delay = atol(*argv); } else if (strcmp(argv[0], "-ac") == 0) { - if (argc < 2) die("missing option for -ac"); + if (argc < 2) die("missing option for -ac\n"); argc--; argv++; filter |= FILT_ACC_COUNT; filter_acc_count = atol(*argv); } else if (strcmp(argv[0], "-rt") == 0) { - if (argc < 2) die("missing option for -rt"); + if (argc < 2) die("missing option for -rt\n"); argc--; argv++; filter |= FILT_TIME_RESP; filter_time_resp = atol(*argv); } else if (strcmp(argv[0], "-RT") == 0) { - if (argc < 2) die("missing option for -RT"); + if (argc < 2) die("missing option for -RT\n"); argc--; argv++; filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP; filter_time_resp = atol(*argv); } else if (strcmp(argv[0], "-s") == 0) { - if (argc < 2) die("missing option for -s"); + if (argc < 2) die("missing option for -s\n"); argc--; argv++; skip_fields = atol(*argv); } else if (strcmp(argv[0], "-m") == 0) { - if (argc < 2) die("missing option for -m"); + if (argc < 2) die("missing option for -m\n"); argc--; argv++; lines_max = atol(*argv); } @@ -771,13 +771,13 @@ int main(int argc, char **argv) else if (strcmp(argv[0], "-tc") == 0) filter |= FILT_COUNT_TERM_CODES; else if (strcmp(argv[0], "-tcn") == 0) { - if (argc < 2) die("missing option for -tcn"); + if (argc < 2) die("missing option for -tcn\n"); argc--; argv++; filter |= FILT_TERM_CODE_NAME; filter_term_code_name = *argv; } else if (strcmp(argv[0], "-TCN") == 0) { - if (argc < 2) die("missing option for -TCN"); + if (argc < 2) die("missing option for -TCN\n"); argc--; argv++; filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME; filter_term_code_name = *argv; @@ -785,7 +785,7 @@ int main(int argc, char **argv) else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) { char *sep, *str; - if (argc < 2) die("missing option for -hs/-HS ([min]:[max])"); + if (argc < 2) die("missing option for -hs/-HS ([min]:[max])\n"); filter |= FILT_HTTP_STATUS; if (argv[0][1] == 'H') filter |= FILT_INVERT_HTTP_STATUS; @@ -803,7 +803,7 @@ int main(int argc, char **argv) else if (strcmp(argv[0], "-time") == 0) { char *sep, *str; - if (argc < 2) die("missing option for -time ([min]:[max])"); + if (argc < 2) die("missing option for -time ([min]:[max])\n"); filter2 |= FILT2_TIMESTAMP; argc--; argv++; -- 2.33.1
>From 2e09f62a989dfe3ef2a0965c8f42681db9af937e Mon Sep 17 00:00:00 2001 From: Tim Duesterhus <[email protected]> Date: Thu, 28 Oct 2021 17:24:02 +0200 Subject: [PATCH 5/5] MINOR: halog: Add support for extracting captures using -hdr To: [email protected] Cc: [email protected] This resolves GitHub issue #1146. --- admin/halog/halog.c | 109 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 4 deletions(-) diff --git a/admin/halog/halog.c b/admin/halog/halog.c index fe11aa81d..9b202d731 100644 --- a/admin/halog/halog.c +++ b/admin/halog/halog.c @@ -119,6 +119,7 @@ struct url_stat { #define FILT2_TIMESTAMP 0x01 #define FILT2_PRESERVE_QUERY 0x02 +#define FILT2_EXTRACT_CAPTURE 0x04 unsigned int filter = 0; unsigned int filter2 = 0; @@ -139,6 +140,7 @@ void filter_count_term_codes(const char *accept_field, const char *time_field, s void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr); void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr); void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr); +void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int); void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr); void usage(FILE *output, const char *msg) @@ -147,9 +149,11 @@ void usage(FILE *output, const char *msg) "%s" "Usage: halog [-h|--help] for long help\n" " halog [-q] [-c] [-m <lines>]\n" - " {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n" + " {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic\n" + " |-hdr <block>:<field>\n" + " }\n" " [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-query]\n" - " [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n" + " [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] <log\n" "\n", msg ? msg : "" ); @@ -198,7 +202,8 @@ void help() " -u : by URL, -uc : request count, -ue : error count\n" " -ua : average response time, -ut : average total time\n" " -uao, -uto: average times computed on valid ('OK') requests\n" - " -uba, -ubt: average bytes returned, total bytes returned\n", + " -uba, -ubt: average bytes returned, total bytes returned\n" + " -hdr output captured header at the given <block>:<field>\n", SOURCE_FIELD,SOURCE_FIELD ); exit(0); @@ -699,6 +704,7 @@ int main(int argc, char **argv) int filter_time_resp = 0; int filt_http_status_low = 0, filt_http_status_high = 0; unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0; + unsigned int filt2_capture_block = 0, filt2_capture_field = 0; int skip_fields = 1; void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL; @@ -837,6 +843,26 @@ int main(int argc, char **argv) filter2 |= FILT2_PRESERVE_QUERY; else if (strcmp(argv[0], "-ic") == 0) filter |= FILT_COUNT_IP_COUNT; + else if (strcmp(argv[0], "-hdr") == 0) { + char *sep, *str; + + if (argc < 2) die("missing option for -hdr (<block>:<field>)\n"); + filter2 |= FILT2_EXTRACT_CAPTURE; + + argc--; argv++; + str = *argv; + sep = strchr(str, ':'); + if (!sep) + die("missing colon in -hdr (<block>:<field>)\n"); + else + *sep++ = 0; + + filt2_capture_block = *str ? atol(str) : 1; + filt2_capture_field = *sep ? atol(sep) : 1; + + if (filt2_capture_block < 1 || filt2_capture_field < 1) + die("block and field must be at least 1 for -hdr (<block>:<field>)\n"); + } else if (strcmp(argv[0], "-o") == 0) { if (output_file) die("Fatal: output file name already specified.\n"); @@ -850,7 +876,7 @@ int main(int argc, char **argv) argv++; } - if (!filter) + if (!filter && !filter2) die("No action specified.\n"); if (filter & FILT_ACC_COUNT && !filter_acc_count) @@ -1064,6 +1090,8 @@ int main(int argc, char **argv) if (line_filter) { if (filter & FILT_COUNT_IP_COUNT) filter_count_ip(source_field, accept_field, time_field, &t); + else if (filter2 & FILT2_EXTRACT_CAPTURE) + filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field); else line_filter(accept_field, time_field, &t); } @@ -1323,6 +1351,79 @@ void filter_output_line(const char *accept_field, const char *time_field, struct lines_out++; } +void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field) +{ + const char *e, *f; + + if (time_field) + e = field_start(time_field, METH_FIELD - TIME_FIELD + 1); + else + e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1); + + while (block-- > 0) { + /* Scan until the start of a capture block ('{') until the URL ('"'). */ + while ((*e != '"' && *e != '{') && *e) { + /* Note: some syslog servers escape quotes ! */ + if (*e == '\\' && e[1] == '"') + break; + + e = field_start(e, 2); + } + + if (unlikely(!*e)) { + truncated_line(linenum, line); + return; + } + + /* We reached the URL, no more captures will follow. */ + if (*e != '{') { + puts(""); + lines_out++; + return; + } + + /* e points the the opening brace of the capture block. */ + + e++; + } + + /* We are in the first field of the selected capture block. */ + + while (--field > 0) { + while ((*e != '|' && *e != '}') && *e) + e++; + + if (unlikely(!*e)) { + truncated_line(linenum, line); + return; + } + + if (*e != '|') { + puts(""); + lines_out++; + return; + } + + /* e points to the pipe. */ + + e++; + } + + f = e; + + while ((*f != '|' && *f != '}') && *f) + f++; + + if (unlikely(!*f)) { + truncated_line(linenum, line); + return; + } + + fwrite(e, f - e, 1, stdout); + putchar('\n'); + lines_out++; +} + void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr) { struct timer *t2; -- 2.33.1

