Willy,

please find another halog series attached.

1. Some small changes to the new -qry/-query flag.
2. A new -hdr flag, resolving my own GitHub issue.

Best regards
Tim Düsterhus
Developer WoltLab GmbH

--

WoltLab GmbH
Nedlitzer Str. 27B
14469 Potsdam

Tel.: +49 331 96784338

[email protected]
www.woltlab.com

Managing director:
Marcel Werk

AG Potsdam HRB 26795 P
>From b5f3f0a1dafc85854574c77f3df0e3a31ee0136d Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 15:53:37 +0200
Subject: [PATCH 1/5] DOC: halog: Move the `-qry` parameter into the correct
 section in help text
To: [email protected]
Cc: [email protected]

This is not an output filter, but instead a modifier. Specifically "only one
may be used at a time" is not true.

see 24b8d693b202b01b649f64ed878d8f9dd1b242e4
---
 admin/halog/halog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/admin/halog/halog.c b/admin/halog/halog.c
index 2d6c17f45..76e68cebf 100644
--- a/admin/halog/halog.c
+++ b/admin/halog/halog.c
@@ -183,6 +183,7 @@ void help()
 	       " -m <lines>              limit output to the first <lines> lines\n"
                " -s <skip_n_fields>      skip n fields from the beginning of a line (default %d)\n"
                "                         you can also use -n to start from earlier then field %d\n"
+	       " -qry                    preserve the query string for per-URL (-u*) statistics\n"
                "\n"
 	       "Output filters - only one may be used at a time\n"
 	       " -c    only report the number of lines that would have been printed\n"
@@ -197,8 +198,7 @@ void help()
 	       "       -u : by URL, -uc : request count, -ue : error count\n"
 	       "       -ua : average response time, -ut : average total time\n"
 	       "       -uao, -uto: average times computed on valid ('OK') requests\n"
-	       "       -uba, -ubt: average bytes returned, total bytes returned\n"
-	       " -qry  preserve the query string for per-URL statistics\n",
+	       "       -uba, -ubt: average bytes returned, total bytes returned\n",
                SOURCE_FIELD,SOURCE_FIELD
 	       );
 	exit(0);
-- 
2.33.1

>From 50d5f579bc35fcffbf06a2fa2dbc8a4fd6944135 Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 16:36:03 +0200
Subject: [PATCH 2/5] MINOR: halog: Rename -qry to -query
To: [email protected]
Cc: [email protected]

With the query flag moved into the correct help section, there is enough space
for two additional characters.
---
 admin/halog/halog.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/admin/halog/halog.c b/admin/halog/halog.c
index 76e68cebf..89590cfed 100644
--- a/admin/halog/halog.c
+++ b/admin/halog/halog.c
@@ -148,7 +148,7 @@ void usage(FILE *output, const char *msg)
 		"Usage: halog [-h|--help] for long help\n"
 		"       halog [-q] [-c] [-m <lines>]\n"
 		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n"
-		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-qry]\n"
+		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-query]\n"
 		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n"
 		"\n",
 		msg ? msg : ""
@@ -183,7 +183,7 @@ void help()
 	       " -m <lines>              limit output to the first <lines> lines\n"
                " -s <skip_n_fields>      skip n fields from the beginning of a line (default %d)\n"
                "                         you can also use -n to start from earlier then field %d\n"
-	       " -qry                    preserve the query string for per-URL (-u*) statistics\n"
+	       " -query                  preserve the query string for per-URL (-u*) statistics\n"
                "\n"
 	       "Output filters - only one may be used at a time\n"
 	       " -c    only report the number of lines that would have been printed\n"
@@ -833,7 +833,7 @@ int main(int argc, char **argv)
 			filter |= FILT_COUNT_URL_BAVG;
 		else if (strcmp(argv[0], "-ubt") == 0)
 			filter |= FILT_COUNT_URL_BTOT;
-		else if (strcmp(argv[0], "-qry") == 0)
+		else if (strcmp(argv[0], "-query") == 0)
 			filter2 |= FILT2_PRESERVE_QUERY;
 		else if (strcmp(argv[0], "-ic") == 0)
 			filter |= FILT_COUNT_IP_COUNT;
-- 
2.33.1

>From d0cdbf68f41bfd43f5cbb1bb0cc1b745fe261dfd Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 15:55:49 +0200
Subject: [PATCH 3/5] CLEANUP: halog: Use consistent indentation in help()
To: [email protected]
Cc: [email protected]

Consistently use 1 Tab per line.
---
 admin/halog/halog.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/admin/halog/halog.c b/admin/halog/halog.c
index 89590cfed..47f10a741 100644
--- a/admin/halog/halog.c
+++ b/admin/halog/halog.c
@@ -181,10 +181,10 @@ void help()
 	       " -v                      invert the input filtering condition\n"
 	       " -q                      don't report errors/warnings\n"
 	       " -m <lines>              limit output to the first <lines> lines\n"
-               " -s <skip_n_fields>      skip n fields from the beginning of a line (default %d)\n"
-               "                         you can also use -n to start from earlier then field %d\n"
+	       " -s <skip_n_fields>      skip n fields from the beginning of a line (default %d)\n"
+	       "                         you can also use -n to start from earlier then field %d\n"
 	       " -query                  preserve the query string for per-URL (-u*) statistics\n"
-               "\n"
+	       "\n"
 	       "Output filters - only one may be used at a time\n"
 	       " -c    only report the number of lines that would have been printed\n"
 	       " -pct  output connect and response times percentiles\n"
@@ -199,7 +199,7 @@ void help()
 	       "       -ua : average response time, -ut : average total time\n"
 	       "       -uao, -uto: average times computed on valid ('OK') requests\n"
 	       "       -uba, -ubt: average bytes returned, total bytes returned\n",
-               SOURCE_FIELD,SOURCE_FIELD
+	       SOURCE_FIELD,SOURCE_FIELD
 	       );
 	exit(0);
 }
-- 
2.33.1

>From af9a71765c2e81b433b29fd98b529470d8db502d Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 17:06:23 +0200
Subject: [PATCH 4/5] BUG/MINOR: halog: Add missing newlines in die() messages
To: [email protected]
Cc: [email protected]

This newline is required to correctly print the usage.
---
 admin/halog/halog.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/admin/halog/halog.c b/admin/halog/halog.c
index 47f10a741..fe11aa81d 100644
--- a/admin/halog/halog.c
+++ b/admin/halog/halog.c
@@ -709,36 +709,36 @@ int main(int argc, char **argv)
 			break;
 
 		if (strcmp(argv[0], "-ad") == 0) {
-			if (argc < 2) die("missing option for -ad");
+			if (argc < 2) die("missing option for -ad\n");
 			argc--; argv++;
 			filter |= FILT_ACC_DELAY;
 			filter_acc_delay = atol(*argv);
 		}
 		else if (strcmp(argv[0], "-ac") == 0) {
-			if (argc < 2) die("missing option for -ac");
+			if (argc < 2) die("missing option for -ac\n");
 			argc--; argv++;
 			filter |= FILT_ACC_COUNT;
 			filter_acc_count = atol(*argv);
 		}
 		else if (strcmp(argv[0], "-rt") == 0) {
-			if (argc < 2) die("missing option for -rt");
+			if (argc < 2) die("missing option for -rt\n");
 			argc--; argv++;
 			filter |= FILT_TIME_RESP;
 			filter_time_resp = atol(*argv);
 		}
 		else if (strcmp(argv[0], "-RT") == 0) {
-			if (argc < 2) die("missing option for -RT");
+			if (argc < 2) die("missing option for -RT\n");
 			argc--; argv++;
 			filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP;
 			filter_time_resp = atol(*argv);
 		}
 		else if (strcmp(argv[0], "-s") == 0) {
-			if (argc < 2) die("missing option for -s");
+			if (argc < 2) die("missing option for -s\n");
 			argc--; argv++;
 			skip_fields = atol(*argv);
 		}
 		else if (strcmp(argv[0], "-m") == 0) {
-			if (argc < 2) die("missing option for -m");
+			if (argc < 2) die("missing option for -m\n");
 			argc--; argv++;
 			lines_max = atol(*argv);
 		}
@@ -771,13 +771,13 @@ int main(int argc, char **argv)
 		else if (strcmp(argv[0], "-tc") == 0)
 			filter |= FILT_COUNT_TERM_CODES;
 		else if (strcmp(argv[0], "-tcn") == 0) {
-			if (argc < 2) die("missing option for -tcn");
+			if (argc < 2) die("missing option for -tcn\n");
 			argc--; argv++;
 			filter |= FILT_TERM_CODE_NAME;
 			filter_term_code_name = *argv;
 		}
 		else if (strcmp(argv[0], "-TCN") == 0) {
-			if (argc < 2) die("missing option for -TCN");
+			if (argc < 2) die("missing option for -TCN\n");
 			argc--; argv++;
 			filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME;
 			filter_term_code_name = *argv;
@@ -785,7 +785,7 @@ int main(int argc, char **argv)
 		else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) {
 			char *sep, *str;
 
-			if (argc < 2) die("missing option for -hs/-HS ([min]:[max])");
+			if (argc < 2) die("missing option for -hs/-HS ([min]:[max])\n");
 			filter |= FILT_HTTP_STATUS;
 			if (argv[0][1] == 'H')
 				filter |= FILT_INVERT_HTTP_STATUS;
@@ -803,7 +803,7 @@ int main(int argc, char **argv)
 		else if (strcmp(argv[0], "-time") == 0) {
 			char *sep, *str;
 
-			if (argc < 2) die("missing option for -time ([min]:[max])");
+			if (argc < 2) die("missing option for -time ([min]:[max])\n");
 			filter2 |= FILT2_TIMESTAMP;
 
 			argc--; argv++;
-- 
2.33.1

>From 2e09f62a989dfe3ef2a0965c8f42681db9af937e Mon Sep 17 00:00:00 2001
From: Tim Duesterhus <[email protected]>
Date: Thu, 28 Oct 2021 17:24:02 +0200
Subject: [PATCH 5/5] MINOR: halog: Add support for extracting captures using
 -hdr
To: [email protected]
Cc: [email protected]

This resolves GitHub issue #1146.
---
 admin/halog/halog.c | 109 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 105 insertions(+), 4 deletions(-)

diff --git a/admin/halog/halog.c b/admin/halog/halog.c
index fe11aa81d..9b202d731 100644
--- a/admin/halog/halog.c
+++ b/admin/halog/halog.c
@@ -119,6 +119,7 @@ struct url_stat {
 
 #define FILT2_TIMESTAMP         0x01
 #define FILT2_PRESERVE_QUERY    0x02
+#define FILT2_EXTRACT_CAPTURE   0x04
 
 unsigned int filter = 0;
 unsigned int filter2 = 0;
@@ -139,6 +140,7 @@ void filter_count_term_codes(const char *accept_field, const char *time_field, s
 void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr);
 void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr);
 void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr);
+void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int, unsigned int);
 void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr);
 
 void usage(FILE *output, const char *msg)
@@ -147,9 +149,11 @@ void usage(FILE *output, const char *msg)
 		"%s"
 		"Usage: halog [-h|--help] for long help\n"
 		"       halog [-q] [-c] [-m <lines>]\n"
-		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n"
+		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic\n"
+		"           |-hdr <block>:<field>\n"
+		"       }\n"
 		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>] [-query]\n"
-		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n"
+		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] <log\n"
 		"\n",
 		msg ? msg : ""
 		);
@@ -198,7 +202,8 @@ void help()
 	       "       -u : by URL, -uc : request count, -ue : error count\n"
 	       "       -ua : average response time, -ut : average total time\n"
 	       "       -uao, -uto: average times computed on valid ('OK') requests\n"
-	       "       -uba, -ubt: average bytes returned, total bytes returned\n",
+	       "       -uba, -ubt: average bytes returned, total bytes returned\n"
+	       " -hdr  output captured header at the given <block>:<field>\n",
 	       SOURCE_FIELD,SOURCE_FIELD
 	       );
 	exit(0);
@@ -699,6 +704,7 @@ int main(int argc, char **argv)
 	int filter_time_resp = 0;
 	int filt_http_status_low = 0, filt_http_status_high = 0;
 	unsigned int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
+	unsigned int filt2_capture_block = 0, filt2_capture_field = 0;
 	int skip_fields = 1;
 
 	void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
@@ -837,6 +843,26 @@ int main(int argc, char **argv)
 			filter2 |= FILT2_PRESERVE_QUERY;
 		else if (strcmp(argv[0], "-ic") == 0)
 			filter |= FILT_COUNT_IP_COUNT;
+		else if (strcmp(argv[0], "-hdr") == 0) {
+			char *sep, *str;
+
+			if (argc < 2) die("missing option for -hdr (<block>:<field>)\n");
+			filter2 |= FILT2_EXTRACT_CAPTURE;
+
+			argc--; argv++;
+			str = *argv;
+			sep = strchr(str, ':');
+			if (!sep)
+				die("missing colon in -hdr (<block>:<field>)\n");
+			else
+				*sep++ = 0;
+			
+			filt2_capture_block = *str ? atol(str) : 1;
+			filt2_capture_field = *sep ? atol(sep) : 1;
+
+			if (filt2_capture_block < 1 || filt2_capture_field < 1)
+				die("block and field must be at least 1 for -hdr (<block>:<field>)\n");
+		}
 		else if (strcmp(argv[0], "-o") == 0) {
 			if (output_file)
 				die("Fatal: output file name already specified.\n");
@@ -850,7 +876,7 @@ int main(int argc, char **argv)
 		argv++;
 	}
 
-	if (!filter)
+	if (!filter && !filter2)
 		die("No action specified.\n");
 
 	if (filter & FILT_ACC_COUNT && !filter_acc_count)
@@ -1064,6 +1090,8 @@ int main(int argc, char **argv)
 		if (line_filter) {
 			if (filter & FILT_COUNT_IP_COUNT)
 				filter_count_ip(source_field, accept_field, time_field, &t);
+			else if (filter2 & FILT2_EXTRACT_CAPTURE)
+				filter_extract_capture(accept_field, time_field, filt2_capture_block, filt2_capture_field);
 			else
 				line_filter(accept_field, time_field, &t);
 		}
@@ -1323,6 +1351,79 @@ void filter_output_line(const char *accept_field, const char *time_field, struct
 	lines_out++;
 }
 
+void filter_extract_capture(const char *accept_field, const char *time_field, unsigned int block, unsigned int field)
+{
+	const char *e, *f;
+
+	if (time_field)
+		e = field_start(time_field, METH_FIELD - TIME_FIELD + 1);
+	else
+		e = field_start(accept_field, METH_FIELD - ACCEPT_FIELD + 1);
+
+	while (block-- > 0) {
+		/* Scan until the start of a capture block ('{') until the URL ('"'). */
+		while ((*e != '"' && *e != '{') && *e) {
+			/* Note: some syslog servers escape quotes ! */
+			if (*e == '\\' && e[1] == '"')
+				break;
+
+			e = field_start(e, 2);
+		}
+
+		if (unlikely(!*e)) {
+			truncated_line(linenum, line);
+			return;
+		}
+
+		/* We reached the URL, no more captures will follow. */
+		if (*e != '{') {
+			puts("");
+			lines_out++;
+			return;
+		}
+
+		/* e points the the opening brace of the capture block. */
+
+		e++;
+	}
+
+	/* We are in the first field of the selected capture block. */
+
+	while (--field > 0) {
+		while ((*e != '|' && *e != '}') && *e)
+			e++;
+
+		if (unlikely(!*e)) {
+			truncated_line(linenum, line);
+			return;
+		}
+
+		if (*e != '|') {
+			puts("");
+			lines_out++;
+			return;
+		}
+
+		/* e points to the pipe. */
+
+		e++;
+	}
+
+	f = e;
+
+	while ((*f != '|' && *f != '}') && *f)
+		f++;
+
+	if (unlikely(!*f)) {
+		truncated_line(linenum, line);
+		return;
+	}
+
+	fwrite(e, f - e, 1, stdout);
+	putchar('\n');
+	lines_out++;
+}
+
 void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr)
 {
 	struct timer *t2;
-- 
2.33.1

Reply via email to