Hi,

This patch extends functionality of field/word converters, so it's
possible to count fields/words from the beginning/end of the string and
extract multiple fields/words if needed. Change is backward compatible
and should cleanly apply to both 1.8 & 1.9 branches.
Regards,

Marcin Deranek

>From 3393f952788e26e6e8add5b6ca472d3e765b57ca Mon Sep 17 00:00:00 2001
From: Marcin Deranek <marcin.dera...@booking.com>
Date: Mon, 16 Apr 2018 14:30:46 +0200
Subject: [PATCH] Extend functionality for field/word converters

Extend functionality of field/word converters, so it's possible
to extract field(s)/word(s) counting from the beginning/end and/or
extract multiple fields/words (including separators) eg.

str(f1_f2_f3__f5),field(2,_,2)  # f2_f3
str(f1_f2_f3__f5),field(2,_,0)  # f2_f3__f5
str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3

str(w1_w2_w3___w4),word(3,_,2)  # w3___w4
str(w1_w2_w3___w4),word(2,_,0)  # w2_w3___w4
str(w1_w2_w3___w4),word(-2,_,3) # w1_w2_w3
str(w1_w2_w3___w4),word(-3,_,0) # w1_w2

Change is backward compatible.
---
 doc/configuration.txt |  34 ++++++++++---
 src/sample.c          | 132 +++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 125 insertions(+), 41 deletions(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 1e0b26f8..a9f75579 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -12907,10 +12907,20 @@ even
   Returns a boolean TRUE if the input value of type signed integer is even
   otherwise returns FALSE. It is functionally equivalent to "not,and(1),bool".
 
-field(<index>,<delimiters>)
-  Extracts the substring at the given index considering given delimiters from
-  an input string. Indexes start at 1 and delimiters are a string formatted
-  list of chars.
+field(<index>,<delimiters>[,<count>])
+  Extracts the substring at the given index counting from the beginning
+  (positive index) or from the end (negative index) considering given delimiters
+  from an input string. Indexes start at 1 or -1 and delimiters are a string
+  formatted list of chars. Optionally you can specify <count> of fields to
+  extract (default: 1). Value of 0 indicates extraction of all remaining
+  fields.
+
+  Example :
+      str(f1_f2_f3__f5),field(5,_)    # f5
+      str(f1_f2_f3__f5),field(2,_,0)  # f2_f3__f5
+      str(f1_f2_f3__f5),field(2,_,2)  # f2_f3
+      str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
+      str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
 
 hex
   Converts a binary input sample to a hex string containing two hex digits per
@@ -13440,9 +13450,19 @@ utime(<format>[,<offset>])
       # e.g.  20140710162350 127.0.0.1:57325
       log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp
 
-word(<index>,<delimiters>)
-  Extracts the nth word considering given delimiters from an input string.
-  Indexes start at 1 and delimiters are a string formatted list of chars.
+word(<index>,<delimiters>[,<count>])
+  Extracts the nth word counting from the beginning (positive index) or from
+  the end (negative index) considering given delimiters from an input string.
+  Indexes start at 1 or -1 and delimiters are a string formatted list of chars.
+  Optionally you can specify <count> of words to extract (default: 1).
+  Value of 0 indicates extraction of all remaining words.
+
+  Example :
+      str(f1_f2_f3__f5),word(4,_)    # f5
+      str(f1_f2_f3__f5),word(2,_,0)  # f2_f3__f5
+      str(f1_f2_f3__f5),word(3,_,2)  # f3__f5
+      str(f1_f2_f3__f5),word(-2,_,3) # f1_f2_f3
+      str(f1_f2_f3__f5),word(-3,_,0) # f1_f2
 
 wt6([<avalanche>])
   Hashes a binary input sample into an unsigned 32-bit quantity using the WT6
diff --git a/src/sample.c b/src/sample.c
index 71ee59f0..154beb5c 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -1997,27 +1997,54 @@ static int sample_conv_field_check(struct arg *args, struct sample_conv *conv,
  */
 static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private)
 {
-	unsigned int field;
+	int field;
 	char *start, *end;
 	int i;
+	int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
 
 	if (!arg_p[0].data.sint)
 		return 0;
 
-	field = 1;
-	end = start = smp->data.u.str.str;
-	while (end - smp->data.u.str.str < smp->data.u.str.len) {
-
-		for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
-			if (*end == arg_p[1].data.str.str[i]) {
-				if (field == arg_p[0].data.sint)
-					goto found;
-				start = end+1;
-				field++;
-				break;
+	if (arg_p[0].data.sint < 0) {
+		field = -1;
+		end = start = smp->data.u.str.str + smp->data.u.str.len;
+		while (start > smp->data.u.str.str) {
+			for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+				if (*(start-1) == arg_p[1].data.str.str[i]) {
+					if (field == arg_p[0].data.sint) {
+						if (count == 1)
+							goto found;
+						else if (count > 1)
+							count--;
+					} else {
+						end = start-1;
+						field--;
+					}
+					break;
+				}
 			}
+			start--;
+		}
+	} else {
+		field = 1;
+		end = start = smp->data.u.str.str;
+		while (end - smp->data.u.str.str < smp->data.u.str.len) {
+			for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+				if (*end == arg_p[1].data.str.str[i]) {
+					if (field == arg_p[0].data.sint) {
+						if (count == 1)
+							goto found;
+						else if (count > 1)
+							count--;
+					} else {
+						start = end+1;
+						field++;
+					}
+					break;
+				}
+			}
+			end++;
 		}
-		end++;
 	}
 
 	/* Field not found */
@@ -2048,37 +2075,74 @@ found:
  */
 static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private)
 {
-	unsigned int word;
+	int word;
 	char *start, *end;
 	int i, issep, inword;
+	int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
 
 	if (!arg_p[0].data.sint)
 		return 0;
 
 	word = 0;
 	inword = 0;
-	end = start = smp->data.u.str.str;
-	while (end - smp->data.u.str.str < smp->data.u.str.len) {
-		issep = 0;
-		for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
-			if (*end == arg_p[1].data.str.str[i]) {
-				issep = 1;
-				break;
+	if (arg_p[0].data.sint < 0) {
+		end = start = smp->data.u.str.str + smp->data.u.str.len;
+		while (start > smp->data.u.str.str) {
+			issep = 0;
+			for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+				if (*(start-1) == arg_p[1].data.str.str[i]) {
+					issep = 1;
+					break;
+				}
 			}
-		}
-		if (!inword) {
-			if (!issep) {
-				word++;
-				start = end;
-				inword = 1;
+			if (!inword) {
+				if (!issep) {
+					if (word != arg_p[0].data.sint) {
+						word--;
+						end = start;
+					}
+					inword = 1;
+				}
 			}
+			else if (issep) {
+				if (word == arg_p[0].data.sint)
+					if (count == 1)
+						goto found;
+					else if (count > 1)
+						count--;
+				inword = 0;
+			}
+			start--;
 		}
-		else if (issep) {
-			if (word == arg_p[0].data.sint)
-				goto found;
-			inword = 0;
+	} else {
+		end = start = smp->data.u.str.str;
+		while (end - smp->data.u.str.str < smp->data.u.str.len) {
+			issep = 0;
+			for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+				if (*end == arg_p[1].data.str.str[i]) {
+					issep = 1;
+					break;
+				}
+			}
+			if (!inword) {
+				if (!issep) {
+					if (word != arg_p[0].data.sint) {
+						word++;
+						start = end;
+					}
+					inword = 1;
+				}
+			}
+			else if (issep) {
+				if (word == arg_p[0].data.sint)
+					if (count == 1)
+						goto found;
+					else if (count > 1)
+						count--;
+				inword = 0;
+			}
+			end++;
 		}
-		end++;
 	}
 
 	/* Field not found */
@@ -2928,8 +2992,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
 	{ "xxh64",  sample_conv_xxh64,     ARG1(0,SINT), NULL, SMP_T_BIN,  SMP_T_SINT  },
 	{ "json",   sample_conv_json,      ARG1(1,STR),  sample_conv_json_check, SMP_T_STR,  SMP_T_STR },
 	{ "bytes",  sample_conv_bytes,     ARG2(1,SINT,SINT), NULL, SMP_T_BIN,  SMP_T_BIN },
-	{ "field",  sample_conv_field,     ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR,  SMP_T_STR },
-	{ "word",   sample_conv_word,      ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR,  SMP_T_STR },
+	{ "field",  sample_conv_field,     ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR,  SMP_T_STR },
+	{ "word",   sample_conv_word,      ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR,  SMP_T_STR },
 	{ "regsub", sample_conv_regsub,    ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR },
 	{ "sha1",   sample_conv_sha1,      0,            NULL, SMP_T_BIN,  SMP_T_BIN  },
 	{ "concat", sample_conv_concat,    ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR,  SMP_T_STR },
-- 
2.16.1

Reply via email to