Hi, This patch extends functionality of field/word converters, so it's possible to count fields/words from the beginning/end of the string and extract multiple fields/words if needed. Change is backward compatible and should cleanly apply to both 1.8 & 1.9 branches. Regards,
Marcin Deranek
>From 3393f952788e26e6e8add5b6ca472d3e765b57ca Mon Sep 17 00:00:00 2001 From: Marcin Deranek <marcin.dera...@booking.com> Date: Mon, 16 Apr 2018 14:30:46 +0200 Subject: [PATCH] Extend functionality for field/word converters Extend functionality of field/word converters, so it's possible to extract field(s)/word(s) counting from the beginning/end and/or extract multiple fields/words (including separators) eg. str(f1_f2_f3__f5),field(2,_,2) # f2_f3 str(f1_f2_f3__f5),field(2,_,0) # f2_f3__f5 str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_ str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3 str(w1_w2_w3___w4),word(3,_,2) # w3___w4 str(w1_w2_w3___w4),word(2,_,0) # w2_w3___w4 str(w1_w2_w3___w4),word(-2,_,3) # w1_w2_w3 str(w1_w2_w3___w4),word(-3,_,0) # w1_w2 Change is backward compatible. --- doc/configuration.txt | 34 ++++++++++--- src/sample.c | 132 +++++++++++++++++++++++++++++++++++++------------- 2 files changed, 125 insertions(+), 41 deletions(-) diff --git a/doc/configuration.txt b/doc/configuration.txt index 1e0b26f8..a9f75579 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -12907,10 +12907,20 @@ even Returns a boolean TRUE if the input value of type signed integer is even otherwise returns FALSE. It is functionally equivalent to "not,and(1),bool". -field(<index>,<delimiters>) - Extracts the substring at the given index considering given delimiters from - an input string. Indexes start at 1 and delimiters are a string formatted - list of chars. +field(<index>,<delimiters>[,<count>]) + Extracts the substring at the given index counting from the beginning + (positive index) or from the end (negative index) considering given delimiters + from an input string. Indexes start at 1 or -1 and delimiters are a string + formatted list of chars. Optionally you can specify <count> of fields to + extract (default: 1). Value of 0 indicates extraction of all remaining + fields. + + Example : + str(f1_f2_f3__f5),field(5,_) # f5 + str(f1_f2_f3__f5),field(2,_,0) # f2_f3__f5 + str(f1_f2_f3__f5),field(2,_,2) # f2_f3 + str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_ + str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3 hex Converts a binary input sample to a hex string containing two hex digits per @@ -13440,9 +13450,19 @@ utime(<format>[,<offset>]) # e.g. 20140710162350 127.0.0.1:57325 log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp -word(<index>,<delimiters>) - Extracts the nth word considering given delimiters from an input string. - Indexes start at 1 and delimiters are a string formatted list of chars. +word(<index>,<delimiters>[,<count>]) + Extracts the nth word counting from the beginning (positive index) or from + the end (negative index) considering given delimiters from an input string. + Indexes start at 1 or -1 and delimiters are a string formatted list of chars. + Optionally you can specify <count> of words to extract (default: 1). + Value of 0 indicates extraction of all remaining words. + + Example : + str(f1_f2_f3__f5),word(4,_) # f5 + str(f1_f2_f3__f5),word(2,_,0) # f2_f3__f5 + str(f1_f2_f3__f5),word(3,_,2) # f3__f5 + str(f1_f2_f3__f5),word(-2,_,3) # f1_f2_f3 + str(f1_f2_f3__f5),word(-3,_,0) # f1_f2 wt6([<avalanche>]) Hashes a binary input sample into an unsigned 32-bit quantity using the WT6 diff --git a/src/sample.c b/src/sample.c index 71ee59f0..154beb5c 100644 --- a/src/sample.c +++ b/src/sample.c @@ -1997,27 +1997,54 @@ static int sample_conv_field_check(struct arg *args, struct sample_conv *conv, */ static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private) { - unsigned int field; + int field; char *start, *end; int i; + int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1; if (!arg_p[0].data.sint) return 0; - field = 1; - end = start = smp->data.u.str.str; - while (end - smp->data.u.str.str < smp->data.u.str.len) { - - for (i = 0 ; i < arg_p[1].data.str.len ; i++) { - if (*end == arg_p[1].data.str.str[i]) { - if (field == arg_p[0].data.sint) - goto found; - start = end+1; - field++; - break; + if (arg_p[0].data.sint < 0) { + field = -1; + end = start = smp->data.u.str.str + smp->data.u.str.len; + while (start > smp->data.u.str.str) { + for (i = 0 ; i < arg_p[1].data.str.len ; i++) { + if (*(start-1) == arg_p[1].data.str.str[i]) { + if (field == arg_p[0].data.sint) { + if (count == 1) + goto found; + else if (count > 1) + count--; + } else { + end = start-1; + field--; + } + break; + } } + start--; + } + } else { + field = 1; + end = start = smp->data.u.str.str; + while (end - smp->data.u.str.str < smp->data.u.str.len) { + for (i = 0 ; i < arg_p[1].data.str.len ; i++) { + if (*end == arg_p[1].data.str.str[i]) { + if (field == arg_p[0].data.sint) { + if (count == 1) + goto found; + else if (count > 1) + count--; + } else { + start = end+1; + field++; + } + break; + } + } + end++; } - end++; } /* Field not found */ @@ -2048,37 +2075,74 @@ found: */ static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private) { - unsigned int word; + int word; char *start, *end; int i, issep, inword; + int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1; if (!arg_p[0].data.sint) return 0; word = 0; inword = 0; - end = start = smp->data.u.str.str; - while (end - smp->data.u.str.str < smp->data.u.str.len) { - issep = 0; - for (i = 0 ; i < arg_p[1].data.str.len ; i++) { - if (*end == arg_p[1].data.str.str[i]) { - issep = 1; - break; + if (arg_p[0].data.sint < 0) { + end = start = smp->data.u.str.str + smp->data.u.str.len; + while (start > smp->data.u.str.str) { + issep = 0; + for (i = 0 ; i < arg_p[1].data.str.len ; i++) { + if (*(start-1) == arg_p[1].data.str.str[i]) { + issep = 1; + break; + } } - } - if (!inword) { - if (!issep) { - word++; - start = end; - inword = 1; + if (!inword) { + if (!issep) { + if (word != arg_p[0].data.sint) { + word--; + end = start; + } + inword = 1; + } } + else if (issep) { + if (word == arg_p[0].data.sint) + if (count == 1) + goto found; + else if (count > 1) + count--; + inword = 0; + } + start--; } - else if (issep) { - if (word == arg_p[0].data.sint) - goto found; - inword = 0; + } else { + end = start = smp->data.u.str.str; + while (end - smp->data.u.str.str < smp->data.u.str.len) { + issep = 0; + for (i = 0 ; i < arg_p[1].data.str.len ; i++) { + if (*end == arg_p[1].data.str.str[i]) { + issep = 1; + break; + } + } + if (!inword) { + if (!issep) { + if (word != arg_p[0].data.sint) { + word++; + start = end; + } + inword = 1; + } + } + else if (issep) { + if (word == arg_p[0].data.sint) + if (count == 1) + goto found; + else if (count > 1) + count--; + inword = 0; + } + end++; } - end++; } /* Field not found */ @@ -2928,8 +2992,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT }, { "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR }, { "bytes", sample_conv_bytes, ARG2(1,SINT,SINT), NULL, SMP_T_BIN, SMP_T_BIN }, - { "field", sample_conv_field, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR }, - { "word", sample_conv_word, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR }, + { "field", sample_conv_field, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR }, + { "word", sample_conv_word, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR }, { "regsub", sample_conv_regsub, ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR }, { "sha1", sample_conv_sha1, 0, NULL, SMP_T_BIN, SMP_T_BIN }, { "concat", sample_conv_concat, ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR, SMP_T_STR }, -- 2.16.1