Repository: metron Updated Branches: refs/heads/master dcec5a7cf -> cf165ff30
METRON-933 STELLAR support for setting fields to regex match captures (ottobackwards) closes apache/metron#638 Project: http://git-wip-us.apache.org/repos/asf/metron/repo Commit: http://git-wip-us.apache.org/repos/asf/metron/commit/cf165ff3 Tree: http://git-wip-us.apache.org/repos/asf/metron/tree/cf165ff3 Diff: http://git-wip-us.apache.org/repos/asf/metron/diff/cf165ff3 Branch: refs/heads/master Commit: cf165ff306764641a2c7991ff93cf48228d7910d Parents: dcec5a7 Author: ottobackwards <ottobackwa...@gmail.com> Authored: Tue Jul 11 15:11:12 2017 -0400 Committer: otto <o...@apache.org> Committed: Tue Jul 11 15:11:12 2017 -0400 ---------------------------------------------------------------------- metron-stellar/stellar-common/README.md | 9 ++ .../stellar/common/utils/PatternCache.java | 37 +++++++ .../stellar/dsl/functions/RegExFunctions.java | 103 +++++++++++++++++++ .../stellar/dsl/functions/StringFunctions.java | 23 ----- .../dsl/functions/RegExFunctionsTest.java | 70 +++++++++++++ 5 files changed, 219 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/README.md ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/README.md b/metron-stellar/stellar-common/README.md index 2d2ffdd..8a2699d 100644 --- a/metron-stellar/stellar-common/README.md +++ b/metron-stellar/stellar-common/README.md @@ -161,6 +161,7 @@ In the core language functions, we support basic functional programming primitiv | [ `PROTOCOL_TO_NAME`](#protocol_to_name) | | [ `REDUCE`](#reduce) | | [ `REGEXP_MATCH`](#regexp_match) | +| [ `REGEXP_GROUP_VAL`](#regexp_group_val) | | [ `SPLIT`](#split) | | [ `STARTS_WITH`](#starts_with) | | [ `STATS_ADD`](../../metron-analytics/metron-statistics#stats_add) | @@ -571,6 +572,14 @@ In the core language functions, we support basic functional programming primitiv * string - The string to test * pattern - The proposed regex pattern * Returns: True if the regex pattern matches the string and false if otherwise. + +### `REGEXP_GROUP_VAL` + * Description: Returns the value of a group in a regex against a string + * Input: + * string - The string to test + * pattern - The proposed regex pattern + * group - The integer that selects what group to select, starting at 1 + * Returns: The value of the group, or null if not matched or no group at index. ### `STRING_ENTROPY` * Description: Computes the base-2 shannon entropy of a string. http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java new file mode 100644 index 0000000..f0fcee9 --- /dev/null +++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/common/utils/PatternCache.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.metron.stellar.common.utils; + +import java.util.HashMap; +import java.util.regex.Pattern; + +public enum PatternCache { + INSTANCE; + + private static final ThreadLocal<HashMap<String,Pattern>> _cache = ThreadLocal.withInitial(() -> + new HashMap<>()); + + public Pattern getPattern(String patternString){ + Pattern pattern = _cache.get().get(patternString); + if(pattern == null){ + pattern = Pattern.compile(patternString); + _cache.get().put(patternString,pattern); + } + return pattern; + } +} http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java new file mode 100644 index 0000000..a1ea229 --- /dev/null +++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/RegExFunctions.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.metron.stellar.dsl.functions; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.metron.stellar.common.utils.ConversionUtils; +import org.apache.metron.stellar.common.utils.PatternCache; +import org.apache.metron.stellar.dsl.BaseStellarFunction; +import org.apache.metron.stellar.dsl.Stellar; + +public class RegExFunctions { + + @Stellar(name = "REGEXP_MATCH", + description = "Determines whether a regex matches a string", + params = { + "string - The string to test", + "pattern - The proposed regex pattern" + }, + returns = "True if the regex pattern matches the string and false if otherwise.") + public static class RegexpMatch extends BaseStellarFunction { + + @Override + public Object apply(List<Object> list) { + if (list.size() < 2) { + throw new IllegalStateException( + "REGEXP_MATCH expects two args: [string, pattern] where pattern is a regexp pattern"); + } + String patternString = (String) list.get(1); + String str = (String) list.get(0); + if (str == null || patternString == null) { + return false; + } + return PatternCache.INSTANCE.getPattern(patternString).matcher(str).matches(); + } + } + + @Stellar(name = "REGEXP_GROUP_VAL", + description = "Returns the value of a group in a regex against a string", + params = { + "string - The string to test", + "pattern - The proposed regex pattern", + "group - integer that selects what group to select, starting at 1" + }, + returns = "The value of the group, or null if not matched or no group at index") + public static class RegexpGroupValue extends BaseStellarFunction { + + @Override + public Object apply(List<Object> list) { + if (list.size() != 3) { + throw new IllegalStateException( + "REGEXP_GROUP_VAL expects three args: [string, pattern, int]" + "" + + "where pattern is a regexp pattern"); + } + String stringPattern = (String) list.get(1); + String str = (String) list.get(0); + Integer groupNumber = ConversionUtils.convert(list.get(2), Integer.class); + + if (groupNumber == null) { + // group number was not a valid integer + return null; + } + + if (groupNumber == 0) { + // 0, by default is the entire input + // default to returning a non-null + return str; + } + + if (str == null || stringPattern == null) { + return null; + } + Pattern pattern = PatternCache.INSTANCE.getPattern(stringPattern); + Matcher matcher = pattern.matcher(str); + if (!matcher.matches()) { + return null; + } + + int groupCount = matcher.groupCount(); + if (groupCount == 0 || groupCount < groupNumber) { + return null; + } + return matcher.group(groupNumber); + } + } +} http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java index 83099bd..289fa7f 100644 --- a/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java +++ b/metron-stellar/stellar-common/src/main/java/org/apache/metron/stellar/dsl/functions/StringFunctions.java @@ -34,29 +34,6 @@ import java.util.Map; public class StringFunctions { - @Stellar(name="REGEXP_MATCH" - ,description = "Determines whether a regex matches a string" - , params = { - "string - The string to test" - ,"pattern - The proposed regex pattern" - } - , returns = "True if the regex pattern matches the string and false if otherwise.") - public static class RegexpMatch extends BaseStellarFunction { - - @Override - public Object apply(List<Object> list) { - if(list.size() < 2) { - throw new IllegalStateException("REGEXP_MATCH expects two args: [string, pattern] where pattern is a regexp pattern"); - } - String pattern = (String) list.get(1); - String str = (String) list.get(0); - if(str == null || pattern == null) { - return false; - } - return str.matches(pattern); - } - } - @Stellar(name="ENDS_WITH" ,description = "Determines whether a string ends with a specified suffix" , params = { http://git-wip-us.apache.org/repos/asf/metron/blob/cf165ff3/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java ---------------------------------------------------------------------- diff --git a/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java b/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java new file mode 100644 index 0000000..9728bd2 --- /dev/null +++ b/metron-stellar/stellar-common/src/test/java/org/apache/metron/stellar/dsl/functions/RegExFunctionsTest.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.metron.stellar.dsl.functions; + +import org.apache.metron.stellar.dsl.ParseException; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +import static org.apache.metron.stellar.common.utils.StellarProcessorUtils.runPredicate; + +public class RegExFunctionsTest { + + // test RegExMatch + @Test + public void testRegExMatch() throws Exception { + final Map<String, String> variableMap = new HashMap<String, String>() {{ + put("numbers", "12345"); + put("numberPattern", "\\d(\\d)(\\d).*"); + put("letters", "abcde"); + put("empty", ""); + }}; + + Assert.assertTrue(runPredicate("REGEXP_MATCH(numbers,numberPattern)", v -> variableMap.get(v))); + Assert.assertFalse(runPredicate("REGEXP_MATCH(letters,numberPattern)", v -> variableMap.get(v))); + } + + @Test + public void testRegExGroupVal() throws Exception { + final Map<String, String> variableMap = new HashMap<String, String>() {{ + put("numbers", "12345"); + put("numberPattern", "\\d(\\d)(\\d).*"); + put("numberPatternNoCaptures", "\\d\\d\\d.*"); + put("letters", "abcde"); + put("empty", ""); + }}; + Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(numbers,numberPattern,2) == '3'", v -> variableMap.get(v))); + Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(letters,numberPattern,2) == null", v -> variableMap.get(v))); + Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(empty,numberPattern,2) == null", v -> variableMap.get(v))); + Assert.assertTrue(runPredicate("REGEXP_GROUP_VAL(numbers,numberPatternNoCaptures,2) == null", v -> variableMap.get(v))); + + boolean caught = false; + try{ + runPredicate("REGEXP_GROUP_VAL(2) == null", v -> variableMap.get(v)); + }catch(ParseException | IllegalStateException ise){ + caught = true; + } + if(!caught){ + Assert.assertTrue("Did not fail on wrong number of parameters",false); + } + } +}