Willy,
On 10/28/21 6:50 PM, Willy Tarreau wrote:
Just one thing (but do not worry, I'll rearrange it later), I initially
took great care of limiting the output to 80 columns for having suffered
a few times from it, and I think it broke when time ranges were added.
ack
From 2e09f62a989dfe3ef2a0965c8f42681db9af937e Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 17:24:02 +0200
Subject: [PATCH 5/5] MINOR: halog: Add support for extracting captures using
-hdr
To: [email protected]
Cc: [email protected]
This resolves GitHub issue #1146.
On this one, please provide a description. It's fine to mention issues or
any external info, but commit messages will outlive any external site, and
they must contain a description of what is done. It seems to me that you're
extracting the fields from the capture, and also that you're doing a special
case of the quotes. I'm not sure how they appear on the output though. We
don't need many details, but just a few lines about this would help.
The check for the quote is to detect the start of the request method.
I've taken that logic from `filter_count_url`. This function skips the
captures by searching for the `"` in front of `GET` (or whatever method
there is).
I've indicated the quote in question in this example log line:
127.0.0.1:54972 [28/Oct/2021:19:14:25.534] test test/<NOSRV> 0/-1/-1/-1/0 403 192 - -
PR-- 2/2/0/0/3 0/0 {foo||bar} {||} "GET / HTTP/1.1"
^
I've attached an updated patch with an extensive explanation :-)
Best regards
Tim Düsterhus
Developer WoltLab GmbH
--
WoltLab GmbH
Nedlitzer Str. 27B
14469 Potsdam
Tel.: +49 331 96784338
[email protected]
www.woltlab.com
Managing director:
Marcel Werk
AG Potsdam HRB 26795 P
>From db0356b61a32ccd7b4015cea66b4e2b7d8ff0097 Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 17:24:02 +0200
Subject: [PATCH 5/5] MINOR: halog: Add support for extracting captures using
-hdr
To: [email protected]
Cc: [email protected]
This patch adds support for extracting captured header fields to halog. A field
can be extracted by passing the `-hdr <block>:<field>` output filter.
Both `<block>` and `<field>` are 1-indexed.
`<block>` refers to the index of the brace-delimited list of headers. If both
request and response headers are captured, then request headers are referenced
by `<block> = 1`, response headers are `2`. If only one direction is captured,
there will only be a single block `1`.
`<field>` refers to a single field within the selected block.
The output will contain one line, possibly empty, per log line processed.
Passing a non-existent `<block>` or `<field>` will result in an empty line.
Example:
capture request header a len 50
capture request header b len 50
capture request header c len 50
capture response header d len 50
capture response header e len 50
capture response header f len 50
`-srv 1:1` will extract request header `a`
`-srv 1:2` will extract request header `b`
`-srv 1:3` will extract request header `c`
`-srv 2:3` will extract response header `f`
This resolves GitHub issue #1146.
---
admin/halog/halog.c | 109 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 105 insertions(+), 4 deletions(-)
diff --git a/admin/halog/halog.c b/admin/halog/halog.c
index fe11aa81d..9b202d731 100644
--- a/admin/halog/halog.c
+++ b/admin/halog/halog.c
@@ -119,6 +119,7 @@ struct url_stat {
#define FILT2_TIMESTAMP 0x01
#define FILT2_PRESERVE_QUERY 0x02
+#define FILT2_EXTRACT_CAPTURE 0x04
unsigned int filter = 0;
unsigned int filter2 = 0;
@@ -139,6 +140,7 @@ void filter_count_term_codes(const char *accept_field, const char *time_field, s
void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr);
void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr);
void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr);
+void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int);
void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr);
void usage(FILE *output, const char *msg)
@@ -147,9 +149,11 @@ void usage(FILE *output, const char *msg)
"%s"
"Usage: halog [-h|--help] for long help\n"
" halog [-q] [-c] [-m <lines>]\n"
- " {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n"
+ " {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic\n"
+ " |-hdr <block>:<field>\n"
+ " }\n"
" [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-query]\n"
- " [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n"
+ " [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] <log\n"
"\n",
msg ? msg : ""
);
@@ -198,7 +202,8 @@ void help()
" -u : by URL, -uc : request count, -ue : error count\n"
" -ua : average response time, -ut : average total time\n"
" -uao, -uto: average times computed on valid ('OK') requests\n"
- " -uba, -ubt: average bytes returned, total bytes returned\n",
+ " -uba, -ubt: average bytes returned, total bytes returned\n"
+ " -hdr output captured header at the given <block>:<field>\n",
SOURCE_FIELD,SOURCE_FIELD
);
exit(0);
@@ -699,6 +704,7 @@ int main(int argc, char **argv)
int filter_time_resp = 0;
int filt_http_status_low = 0, filt_http_status_high = 0;
unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
+ unsigned int filt2_capture_block = 0, filt2_capture_field = 0;
int skip_fields = 1;
void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
@@ -837,6 +843,26 @@ int main(int argc, char **argv)
filter2 |= FILT2_PRESERVE_QUERY;
else if (strcmp(argv[0], "-ic") == 0)
filter |= FILT_COUNT_IP_COUNT;
+ else if (strcmp(argv[0], "-hdr") == 0) {
+ char *sep, *str;
+
+ if (argc < 2) die("missing option for -hdr (<block>:<field>)\n");
+ filter2 |= FILT2_EXTRACT_CAPTURE;
+
+ argc--; argv++;
+ str = *argv;
+ sep = strchr(str, ':');
+ if (!sep)
+ die("missing colon in -hdr (<block>:<field>)\n");
+ else
+ *sep++ = 0;
+
+ filt2_capture_block = *str ? atol(str) : 1;
+ filt2_capture_field = *sep ? atol(sep) : 1;
+
+ if (filt2_capture_block < 1 || filt2_capture_field < 1)
+ die("block and field must be at least 1 for -hdr (<block>:<field>)\n");
+ }
else if (strcmp(argv[0], "-o") == 0) {
if (output_file)
die("Fatal: output file name already specified.\n");
@@ -850,7 +876,7 @@ int main(int argc, char **argv)
argv++;
}
- if (!filter)
+ if (!filter && !filter2)
die("No action specified.\n");
if (filter & FILT_ACC_COUNT && !filter_acc_count)
@@ -1064,6 +1090,8 @@ int main(int argc, char **argv)
if (line_filter) {
if (filter & FILT_COUNT_IP_COUNT)
filter_count_ip(source_field, accept_field, time_field, &t);
+ else if (filter2 & FILT2_EXTRACT_CAPTURE)
+ filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field);
else
line_filter(accept_field, time_field, &t);
}
@@ -1323,6 +1351,79 @@ void filter_output_line(const char *accept_field, const char *time_field, struct
lines_out++;
}
+void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field)
+{
+ const char *e, *f;
+
+ if (time_field)
+ e = field_start(time_field, METH_FIELD - TIME_FIELD + 1);
+ else
+ e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1);
+
+ while (block-- > 0) {
+ /* Scan until the start of a capture block ('{') until the URL ('"'). */
+ while ((*e != '"' && *e != '{') && *e) {
+ /* Note: some syslog servers escape quotes ! */
+ if (*e == '\\' && e[1] == '"')
+ break;
+
+ e = field_start(e, 2);
+ }
+
+ if (unlikely(!*e)) {
+ truncated_line(linenum, line);
+ return;
+ }
+
+ /* We reached the URL, no more captures will follow. */
+ if (*e != '{') {
+ puts("");
+ lines_out++;
+ return;
+ }
+
+ /* e points the the opening brace of the capture block. */
+
+ e++;
+ }
+
+ /* We are in the first field of the selected capture block. */
+
+ while (--field > 0) {
+ while ((*e != '|' && *e != '}') && *e)
+ e++;
+
+ if (unlikely(!*e)) {
+ truncated_line(linenum, line);
+ return;
+ }
+
+ if (*e != '|') {
+ puts("");
+ lines_out++;
+ return;
+ }
+
+ /* e points to the pipe. */
+
+ e++;
+ }
+
+ f = e;
+
+ while ((*f != '|' && *f != '}') && *f)
+ f++;
+
+ if (unlikely(!*f)) {
+ truncated_line(linenum, line);
+ return;
+ }
+
+ fwrite(e, f - e, 1, stdout);
+ putchar('\n');
+ lines_out++;
+}
+
void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr)
{
struct timer *t2;
--
2.33.1