Repository: incubator-metron Updated Branches: refs/heads/Metron_0.4.0 3e16292d9 -> 68bd6c520 (forced update)
METRON-830 Adding StringFunctions to Stellar - chop, prependifmissing, appendifmissing and countmatches (anandsubbu via mattf-horton) closes apache/incubator-metron#516 Project: http://git-wip-us.apache.org/repos/asf/incubator-metron/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-metron/commit/e6992d56 Tree: http://git-wip-us.apache.org/repos/asf/incubator-metron/tree/e6992d56 Diff: http://git-wip-us.apache.org/repos/asf/incubator-metron/diff/e6992d56 Branch: refs/heads/Metron_0.4.0 Commit: e6992d56816c37f8f63274a5a1ce33d37d69dc68 Parents: d30d8b9 Author: anandsubbu <asubraman...@hortonworks.com> Authored: Sun Apr 16 23:13:38 2017 -0700 Committer: mattf <ma...@apache.org> Committed: Sun Apr 16 23:13:38 2017 -0700 ---------------------------------------------------------------------- metron-platform/metron-common/README.md | 44 ++- .../common/dsl/functions/StringFunctions.java | 132 ++++++++- .../dsl/functions/StringFunctionsTest.java | 267 +++++++++++++++++-- 3 files changed, 417 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-metron/blob/e6992d56/metron-platform/metron-common/README.md ---------------------------------------------------------------------- diff --git a/metron-platform/metron-common/README.md b/metron-platform/metron-common/README.md index 8b910f8..de3a1b7 100644 --- a/metron-platform/metron-common/README.md +++ b/metron-platform/metron-common/README.md @@ -99,11 +99,15 @@ In the core language functions, we support basic functional programming primitiv | | | ---------- | | [ `ABS`](../../metron-analytics/metron-statistics#abs) | +| [ `APPEND_IF_MISSING`](#append_if_missing) | | [ `BIN`](../../metron-analytics/metron-statistics#bin) | | [ `BLOOM_ADD`](#bloom_add) | | [ `BLOOM_EXISTS`](#bloom_exists) | | [ `BLOOM_INIT`](#bloom_init) | | [ `BLOOM_MERGE`](#bloom_merge) | +| [ `CHOP`](#chop) | +| [ `CHOMP`](#chomp) | +| [ `COUNT_MATCHES`](#count_matches) | | [ `DAY_OF_MONTH`](#day_of_month) | | [ `DAY_OF_WEEK`](#day_of_week) | | [ `DAY_OF_YEAR`](#day_of_year) | @@ -145,9 +149,10 @@ In the core language functions, we support basic functional programming primitiv | [ `MAP`](#map) | | [ `MAP_EXISTS`](#map_exists) | | [ `MONTH`](#month) | +| [ `PREPEND_IF_MISSING`](#prepend_if_missing) | | [ `PROFILE_GET`](#profile_get) | -| [ `PROFILE_FIXED`](#profile_fixed) | -| [ `PROFILE_WINDOW`](#profile_window) | +| [ `PROFILE_FIXED`](#profile_fixed) | +| [ `PROFILE_WINDOW`](#profile_window) | | [ `PROTOCOL_TO_NAME`](#protocol_to_name) | | [ `REDUCE`](#reduce) | | [ `REGEXP_MATCH`](#regexp_match) | @@ -192,6 +197,14 @@ In the core language functions, we support basic functional programming primitiv | [ `WEEK_OF_YEAR`](#week_of_year) | | [ `YEAR`](#year) | +### `APPEND_IF_MISSING` + * Description: Appends the suffix to the end of the string if the string does not already end with any of the suffixes. + * Input: + * string - The string to be appended. + * suffix - The string suffix to append to the end of the string. + * additionalsuffix - Optional - Additional string suffix that is a valid terminator. + * Returns: A new String if prefix was prepended, the same string otherwise. + ### `BLOOM_ADD` * Description: Adds an element to the bloom filter passed in * Input: @@ -219,6 +232,25 @@ In the core language functions, we support basic functional programming primitiv * bloomfilters - A list of bloom filters to merge * Returns: Bloom Filter or null if the list is empty +### `CHOP` + * Description: Remove the last character from a String + * Input: + * string - the String to chop last character from, may be null + * Returns: String without last character, null if null String input + +### `CHOMP` + * Description: Removes one newline from end of a String if it's there, otherwise leave it alone. A newline is "\n", "\r", or "\r\n" + * Input: + * string - the String to chomp a newline from, may be null + * Returns: String without newline, null if null String input + +### `COUNT_MATCHES` + * Description: Counts how many times the substring appears in the larger string. + * Input: + * string - the CharSequence to check, may be null. + * substring/character - the substring or character to count, may be null. + * Returns: the number of non-overlapping occurrences, 0 if either CharSequence is null. + ### `DAY_OF_MONTH` * Description: The numbered day within the month. The first day within the month has a value of 1. * Input: @@ -480,6 +512,14 @@ In the core language functions, we support basic functional programming primitiv * dateTime - The datetime as a long representing the milliseconds since unix epoch * Returns: The current month (0-based). +### `PREPEND_IF_MISSING` + * Description: Prepends the prefix to the start of the string if the string does not already start with any of the prefixes. + * Input: + * string - The string to be prepended. + * prefix - The string prefix to prepend to the start of the string. + * additionalprefix - Optional - Additional string prefix that is valid. + * Returns: A new String if prefix was prepended, the same string otherwise. + ### `PROFILE_GET` * Description: Retrieves a series of values from a stored profile. * Input: http://git-wip-us.apache.org/repos/asf/incubator-metron/blob/e6992d56/metron-platform/metron-common/src/main/java/org/apache/metron/common/dsl/functions/StringFunctions.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-common/src/main/java/org/apache/metron/common/dsl/functions/StringFunctions.java b/metron-platform/metron-common/src/main/java/org/apache/metron/common/dsl/functions/StringFunctions.java index 1086da3..1ae790d 100644 --- a/metron-platform/metron-common/src/main/java/org/apache/metron/common/dsl/functions/StringFunctions.java +++ b/metron-platform/metron-common/src/main/java/org/apache/metron/common/dsl/functions/StringFunctions.java @@ -21,7 +21,7 @@ package org.apache.metron.common.dsl.functions; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.metron.common.dsl.BaseStellarFunction; import org.apache.metron.common.dsl.ParseException; import org.apache.metron.common.dsl.Stellar; @@ -343,4 +343,134 @@ public class StringFunctions { return String.format(format, formatArgs); } } + + @Stellar( name="CHOMP" + , description = "Removes one newline from end of a String if it's there, otherwise leave it alone. A newline is \"\\n\", \"\\r\", or \"\\r\\n\"" + , params = { "the String to chomp a newline from, may be null"} + , returns = "String without newline, null if null String input" + ) + public static class Chomp extends BaseStellarFunction { + + @Override + public Object apply(List<Object> strings) { + + if(strings == null || strings.size() == 0 ) { + throw new IllegalArgumentException("[CHOMP] missing argument: string to be chopped"); + } + String var = strings.get(0) == null?null: (String) strings.get(0); + if(var == null) { + return null; + } + else if(var.length() == 0) { + return var; + } + else { + return StringUtils.chomp(var); + } + } + } + @Stellar( name="CHOP" + , description = "Remove the last character from a String" + , params = { "the String to chop last character from, may be null"} + , returns = "String without last character, null if null String input" + ) + public static class Chop extends BaseStellarFunction { + + @Override + public Object apply(List<Object> strings) { + + if(strings == null || strings.size() == 0 ) { + throw new IllegalArgumentException("[CHOP] missing argument: string to be chopped"); + } + String var = strings.get(0) == null?null: (String) strings.get(0); + if(var == null) { + return null; + } + else if(var.length() == 0) { + return var; + } + else { + return StringUtils.chop(var); + } + } + } + + @Stellar( name = "PREPEND_IF_MISSING" + , description = "Prepends the prefix to the start of the string if the string does not already start with any of the prefixes" + , params = { + "str - The string." + , "prefix - The string prefix to prepend to the start of the string" + , "additionalprefix - Optional - Additional string prefix that is valid" + } + , returns = "A new String if prefix was prepended, the same string otherwise." + ) + public static class PrependIfMissing extends BaseStellarFunction { + + @Override + public Object apply(List<Object> strings) { + + String prefixed; + switch (strings.size()) { + case 2: prefixed = StringUtils.prependIfMissing((String) strings.get(0), (String) strings.get(1)); + break; + case 3: prefixed = StringUtils.prependIfMissing((String) strings.get(0), (String) strings.get(1), (String) strings.get(2)); + break; + default: throw new IllegalArgumentException("[PREPEND_IF_MISSING] incorrect arguments: " + strings.toString() + "\nUsage: PREPEND_IF_MISSING <String> <prefix> [<prefix>...]"); + } + return prefixed; + } + } + + @Stellar( name = "APPEND_IF_MISSING" + , description = "Appends the suffix to the end of the string if the string does not already end with any of the suffixes" + , params = { + "str - The string." + , "suffix - The string suffix to append to the end of the string" + , "additionalsuffix - Optional - Additional string suffix that is a valid terminator" + } + , returns = "A new String if suffix was appended, the same string otherwise." + ) + public static class AppendIfMissing extends BaseStellarFunction { + + @Override + public Object apply(List<Object> strings) { + + String suffixed; + switch (strings.size()) { + case 2: + suffixed = StringUtils.appendIfMissing((String) strings.get(0), (String) strings.get(1)); + break; + case 3: + suffixed = StringUtils.appendIfMissing((String) strings.get(0), (String) strings.get(1), (String) strings.get(2)); + break; + default: + throw new IllegalArgumentException("[APPEND_IF_MISSING] incorrect arguments. Usage: APPEND_IF_MISSING <String> <prefix> [<prefix>...]"); + } + return suffixed; + } + } + + @Stellar( name = "COUNT_MATCHES" + , description = "Counts how many times the substring appears in the larger string" + , params = { + "str - the CharSequence to check, may be null" + , "sub - the substring to count, may be null" + } + , returns = "the number of non-overlapping occurrences, 0 if either CharSequence is null" + ) + public static class CountMatches extends BaseStellarFunction { + + @Override + public Object apply(List<Object> strings) { + + if(strings.size() != 2) { + throw new IllegalArgumentException("[COUNT_MATCHES] incorrect arguments. Usage: COUNT_MATCHES <String> <substring>"); + } + + int matchcount; + matchcount = StringUtils.countMatches((String) strings.get(0), (String) strings.get(1)); + return matchcount; + } + } + } http://git-wip-us.apache.org/repos/asf/incubator-metron/blob/e6992d56/metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java ---------------------------------------------------------------------- diff --git a/metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java b/metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java index d8854f8..8899535 100644 --- a/metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java +++ b/metron-platform/metron-common/src/test/java/org/apache/metron/common/dsl/functions/StringFunctionsTest.java @@ -67,40 +67,40 @@ public class StringFunctionsTest { } @Test - public void testLeftRightFills() throws Exception{ + public void testLeftRightFills() throws Exception { final Map<String, Object> variableMap = new HashMap<String, Object>() {{ put("foo", null); put("bar", null); - put("notInt","oh my"); + put("notInt", "oh my"); }}; //LEFT - Object left = run("FILL_LEFT('123','X', 10)",new HashedMap()); + Object left = run("FILL_LEFT('123','X', 10)", new HashedMap()); Assert.assertNotNull(left); - Assert.assertEquals(10,((String)left).length()); - Assert.assertEquals("XXXXXXX123",(String)left); + Assert.assertEquals(10, ((String) left).length()); + Assert.assertEquals("XXXXXXX123", (String) left); //RIGHT Object right = run("FILL_RIGHT('123','X', 10)", new HashedMap()); Assert.assertNotNull(right); - Assert.assertEquals(10,((String)right).length()); - Assert.assertEquals("123XXXXXXX",(String)right); + Assert.assertEquals(10, ((String) right).length()); + Assert.assertEquals("123XXXXXXX", (String) right); //INPUT ALREADY LENGTH Object same = run("FILL_RIGHT('123','X', 3)", new HashedMap()); - Assert.assertEquals(3,((String)same).length()); - Assert.assertEquals("123",(String)same); + Assert.assertEquals(3, ((String) same).length()); + Assert.assertEquals("123", (String) same); //INPUT BIGGER THAN LENGTH Object tooBig = run("FILL_RIGHT('1234567890','X', 3)", new HashedMap()); - Assert.assertEquals(10,((String)tooBig).length()); - Assert.assertEquals("1234567890",(String)tooBig); + Assert.assertEquals(10, ((String) tooBig).length()); + Assert.assertEquals("1234567890", (String) tooBig); //NULL VARIABLES boolean thrown = false; - try{ + try { run("FILL_RIGHT('123',foo,bar)", variableMap); - }catch(ParseException pe) { + } catch (ParseException pe) { thrown = true; Assert.assertTrue(pe.getMessage().contains("are both required")); } @@ -108,9 +108,9 @@ public class StringFunctionsTest { thrown = false; // NULL LENGTH - try{ + try { run("FILL_RIGHT('123','X',bar)", variableMap); - }catch(ParseException pe) { + } catch (ParseException pe) { thrown = true; Assert.assertTrue(pe.getMessage().contains("are both required")); } @@ -118,9 +118,9 @@ public class StringFunctionsTest { thrown = false; // NULL FILL - try{ + try { run("FILL_RIGHT('123',foo, 7)", variableMap); - }catch(ParseException pe) { + } catch (ParseException pe) { thrown = true; Assert.assertTrue(pe.getMessage().contains("are both required")); } @@ -130,7 +130,7 @@ public class StringFunctionsTest { // NON INTEGER LENGTH try { run("FILL_RIGHT('123','X', 'z' )", new HashedMap()); - }catch(ParseException pe){ + } catch (ParseException pe) { thrown = true; Assert.assertTrue(pe.getMessage().contains("not a valid Integer")); } @@ -140,7 +140,7 @@ public class StringFunctionsTest { // EMPTY STRING PAD try { Object returnValue = run("FILL_RIGHT('123','', 10 )", new HashedMap()); - }catch(ParseException pe) { + } catch (ParseException pe) { thrown = true; Assert.assertTrue(pe.getMessage().contains("cannot be an empty")); } @@ -150,7 +150,7 @@ public class StringFunctionsTest { //MISSING LENGTH PARAMETER try { run("FILL_RIGHT('123',foo)", variableMap); - }catch(ParseException pe){ + } catch (ParseException pe) { thrown = true; Assert.assertTrue(pe.getMessage().contains("expects three")); } @@ -160,8 +160,8 @@ public class StringFunctionsTest { @Test public void shannonEntropyTest() throws Exception { //test empty string - Assert.assertEquals(0.0, (Double)run("STRING_ENTROPY('')", new HashMap<>()), 0.0); - Assert.assertEquals(0.0, (Double)run("STRING_ENTROPY(foo)", ImmutableMap.of("foo", "")), 0.0); + Assert.assertEquals(0.0, (Double) run("STRING_ENTROPY('')", new HashMap<>()), 0.0); + Assert.assertEquals(0.0, (Double) run("STRING_ENTROPY(foo)", ImmutableMap.of("foo", "")), 0.0); /* Now consider the string aaaaaaaaaabbbbbccccc or 10 a's followed by 5 b's and 5 c's. @@ -173,7 +173,7 @@ public class StringFunctionsTest { -p(a)*log_2(p(a)) - p(b)*log_2(p(b)) - p(c)*log_2(p(c)) = -0.5*-1 - 0.25*-2 - 0.25*-2 = 1.5 */ - Assert.assertEquals(1.5, (Double)run("STRING_ENTROPY(foo)", ImmutableMap.of("foo", "aaaaaaaaaabbbbbccccc")), 0.0); + Assert.assertEquals(1.5, (Double) run("STRING_ENTROPY(foo)", ImmutableMap.of("foo", "aaaaaaaaaabbbbbccccc")), 0.0); } @Test @@ -207,4 +207,225 @@ public class StringFunctionsTest { public void testFormatWithMissingArguments() throws Exception { run("FORMAT('missing arg: %d')", Collections.emptyMap()); } + + + /** + * CHOMP StringFunction + * + * @throws Exception + */ + @Test + public void testChomp() throws Exception { + Assert.assertEquals("abc", run("CHOMP('abc')", new HashedMap())); + Assert.assertEquals("abc", run("CHOMP(msg)", ImmutableMap.of("msg", "abc\r\n"))); + Assert.assertEquals("", run("CHOMP(msg)", ImmutableMap.of("msg", "\n"))); + Assert.assertEquals("", run("CHOMP('')", new HashedMap())); + Assert.assertEquals(null, run("CHOMP(msg)", new HashedMap())); + Assert.assertEquals(null, run("CHOMP(null)", new HashedMap())); + + // No input + boolean thrown = false; + try { + run("CHOMP()", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("missing argument")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Integer input + try { + run("CHOMP(123)", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("cannot be cast")); + } + Assert.assertTrue(thrown); + + } + + /** + * CHOP StringFunction + * + * @throws Exception + */ + @Test + public void testChop() throws Exception { + Assert.assertEquals("ab", run("CHOP('abc')", new HashedMap())); + Assert.assertEquals(null, run("CHOP(null)", new HashedMap())); + Assert.assertEquals(null, run("CHOP(msg)", new HashedMap())); + Assert.assertEquals("abc", run("CHOP(msg)", ImmutableMap.of("msg", "abc\r\n"))); + Assert.assertEquals("", run("CHOP(msg)", ImmutableMap.of("msg", ""))); + Assert.assertEquals("", run("CHOP(msg)", ImmutableMap.of("msg", "\n"))); + Assert.assertEquals("", run("CHOP('')", new HashedMap())); + + // No input + boolean thrown = false; + try { + run("CHOP()", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("missing argument")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Integer input + try { + run("CHOP(123)", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("cannot be cast")); + } + Assert.assertTrue(thrown); + + } + + /** + * PREPEND_IF_MISSING StringFunction + */ + @Test + public void testPrependIfMissing() throws Exception { + Assert.assertEquals("xyzabc", run("PREPEND_IF_MISSING('abc', 'xyz')", new HashedMap())); + Assert.assertEquals("xyzXYZabc", run("PREPEND_IF_MISSING('XYZabc', 'xyz', 'mno')", new HashedMap())); + Assert.assertEquals("mnoXYZabc", run("PREPEND_IF_MISSING('mnoXYZabc', 'xyz', 'mno')", new HashedMap())); + Assert.assertEquals(null, run("PREPEND_IF_MISSING(null, null, null)", new HashedMap())); + Assert.assertEquals("xyz", run("PREPEND_IF_MISSING('', 'xyz', null)", new HashedMap())); + + // No input + boolean thrown = false; + try { + run("PREPEND_IF_MISSING()", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Incorrect number of arguments - 1 + try { + run("PREPEND_IF_MISSING('abc')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Incorrect number of arguments - 2 + try { + run("PREPEND_IF_MISSING('abc', 'def', 'ghi', 'jkl')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Integer input + try { + run("PREPEND_IF_MISSING(123, 'abc')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("cannot be cast")); + } + Assert.assertTrue(thrown); + + } + + /** + * APPEND_IF_MISSING StringFunction + */ + @Test + public void testAppendIfMissing() throws Exception { + Assert.assertEquals("apachemetron", run("APPEND_IF_MISSING('apache', 'metron')", new HashedMap())); + Assert.assertEquals("abcXYZxyz", run("APPEND_IF_MISSING('abcXYZ', 'xyz', 'mno')", new HashedMap())); + Assert.assertEquals(null, run("APPEND_IF_MISSING(null, null, null)", new HashedMap())); + Assert.assertEquals("xyz", run("APPEND_IF_MISSING('', 'xyz', null)", new HashedMap())); + + // No input + boolean thrown = false; + try { + run("APPEND_IF_MISSING()", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Incorrect number of arguments - 1 + try { + run("APPEND_IF_MISSING('abc')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Incorrect number of arguments - 2 + try { + run("APPEND_IF_MISSING('abc', 'def', 'ghi', 'jkl')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Integer input + try { + run("APPEND_IF_MISSING(123, 'abc')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("cannot be cast")); + } + Assert.assertTrue(thrown); + + } + + /** + * COUNT_MATCHES StringFunction + */ + @Test + public void testCountMatches() throws Exception { + Assert.assertEquals(0, (int) run("COUNT_MATCHES(null, '*')", new HashedMap())); + Assert.assertEquals(2, (int) run("COUNT_MATCHES('apachemetron', 'e')", new HashedMap())); + Assert.assertEquals(2, (int) run("COUNT_MATCHES('anand', 'an')", new HashedMap())); + Assert.assertEquals(0, (int) run("COUNT_MATCHES('abcd', null)", new HashedMap())); + + // No input + boolean thrown = false; + try { + run("COUNT_MATCHES()", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Incorrect number of arguments - 1 + try { + run("COUNT_MATCHES('abc')", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("incorrect arguments")); + } + Assert.assertTrue(thrown); + thrown = false; + + // Integer input + try { + run("COUNT_MATCHES(123, 456)", Collections.emptyMap()); + } catch (ParseException pe) { + thrown = true; + Assert.assertTrue(pe.getMessage().contains("cannot be cast")); + } + Assert.assertTrue(thrown); + + } }