This is an automated email from the ASF dual-hosted git repository. mmiklavcic pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/metron.git
The following commit(s) were added to refs/heads/master by this push: new 3754ff3 METRON-2112 Normalize parser original_string handling (mmiklavc) closes apache/metron#1409 3754ff3 is described below commit 3754ff33f6cd149ffca57474d744e0298d4c172a Author: mmiklavc <michael.miklav...@gmail.com> AuthorDate: Thu May 30 14:54:17 2019 -0600 METRON-2112 Normalize parser original_string handling (mmiklavc) closes apache/metron#1409 --- metron-platform/metron-common/README.md | 4 +- .../java/org/apache/metron/common/Constants.java | 6 ++- .../data/jsonMapQuery/parsed/jsonMapExampleParsed | 20 +++---- .../parsed/jsonMapExampleParsed | 12 ++--- metron-platform/metron-parsing/README.md | 18 +++++-- .../org/apache/metron/parsers/ParserComponent.java | 3 ++ .../apache/metron/parsers/ParserRunnerImpl.java | 2 + .../apache/metron/parsers/json/JSONMapParser.java | 21 +++++--- .../metron/parsers/ParserRunnerImplTest.java | 45 +++++++++++++++- .../parsers/json/JSONMapParserQueryTest.java | 61 +++++++++++++++++++--- .../metron/parsers/json/JSONMapParserTest.java | 14 +++-- .../json/JSONMapParserWrappedQueryTest.java | 12 ++--- .../metron-parsing/metron-parsing-storm/README.md | 8 ++- 13 files changed, 169 insertions(+), 57 deletions(-) diff --git a/metron-platform/metron-common/README.md b/metron-platform/metron-common/README.md index 4d19769..f3082a5 100644 --- a/metron-platform/metron-common/README.md +++ b/metron-platform/metron-common/README.md @@ -87,7 +87,7 @@ but a convenient index is provided here: | [`es.port`](../metron-elasticsearch#esport) | Indexing | String | N/A | | [`es.date.format`](../metron-elasticsearch#esdateformat) | Indexing | String | `es_date_format` | | [`es.client.settings`](../metron-elasticsearch#esclientsettings) | Indexing | Object | N/A | -| [`indexing.writer.elasticsearch.setDocumentId`](../metron-indexing#elasticsearch) | Indexing | Boolean | N/A | +| [`indexing.writer.elasticsearch.setDocumentId`](../metron-indexing#elasticsearch) | Indexing | Boolean | N/A | | [`solr.zookeeper`](../metron-solr#configuration) | Indexing | String | `solr_zookeeper_url` | | [`solr.commitPerBatch`](../metron-solr#configuration) | Indexing | String | N/A | | [`solr.commit.soft`](../metron-solr#configuration) | Indexing | String | N/A | @@ -96,7 +96,7 @@ but a convenient index is provided here: | [`solr.collection`](../metron-solr#configuration) | Indexing | String | N/A | | [`solr.http.config`](../metron-solr#configuration) | Indexing | String | N/A | | [`fieldValidations`](#validation-framework) | Parsing | Object | N/A | -| [`parser.error.topic`](../metron-parsers#parsererrortopic) | Parsing | String | `parser_error_topic` | +| [`parser.error.topic`](../metron-parsing#parsererrortopic) | Parsing | String | `parser_error_topic` | | [`stellar.function.paths`](../../metron-stellar/stellar-common#stellarfunctionpaths) | Stellar | CSV String | N/A | | [`stellar.function.resolver.includes`](../../metron-stellar/stellar-common#stellarfunctionresolverincludesexcludes) | Stellar | CSV String | N/A | | [`stellar.function.resolver.excludes`](../../metron-stellar/stellar-common#stellarfunctionresolverincludesexcludes) | Stellar | CSV String | N/A | diff --git a/metron-platform/metron-common/src/main/java/org/apache/metron/common/Constants.java b/metron-platform/metron-common/src/main/java/org/apache/metron/common/Constants.java index 75d7caf..0dd1ca9 100644 --- a/metron-platform/metron-common/src/main/java/org/apache/metron/common/Constants.java +++ b/metron-platform/metron-common/src/main/java/org/apache/metron/common/Constants.java @@ -25,7 +25,7 @@ public class Constants { public static final String ZOOKEEPER_ROOT = "/metron"; public static final String ZOOKEEPER_TOPOLOGY_ROOT = ZOOKEEPER_ROOT + "/topology"; public static final long DEFAULT_CONFIGURED_BOLT_TIMEOUT = 5000; - public static final String SENSOR_TYPE = "source.type"; + public static final String SENSOR_TYPE = Fields.SENSOR_TYPE.getName(); public static final String SENSOR_TYPE_FIELD_PROPERTY = "source.type.field"; public static final String THREAT_SCORE_FIELD_PROPERTY = "threat.triage.score.field"; public static final String ENRICHMENT_TOPIC = "enrichments"; @@ -35,7 +35,7 @@ public class Constants { public static final String SIMPLE_HBASE_ENRICHMENT = "hbaseEnrichment"; public static final String SIMPLE_HBASE_THREAT_INTEL = "hbaseThreatIntel"; public static final String STELLAR_CONTEXT_CONF = "stellarContext"; - public static final String GUID = "guid"; + public static final String GUID = Fields.GUID.getName(); /** * The key in the global configuration that defines the global parser error topic. @@ -56,6 +56,8 @@ public class Constants { ,PROTOCOL("protocol") ,TIMESTAMP("timestamp") ,ORIGINAL("original_string") + ,GUID("guid") + ,SENSOR_TYPE("source.type") ,INCLUDES_REVERSE_TRAFFIC("includes_reverse_traffic") ; private static Map<String, Fields> nameToField; diff --git a/metron-platform/metron-integration-test/src/main/sample/data/jsonMapQuery/parsed/jsonMapExampleParsed b/metron-platform/metron-integration-test/src/main/sample/data/jsonMapQuery/parsed/jsonMapExampleParsed index b5658d5..812b95d 100644 --- a/metron-platform/metron-integration-test/src/main/sample/data/jsonMapQuery/parsed/jsonMapExampleParsed +++ b/metron-platform/metron-integration-test/src/main/sample/data/jsonMapQuery/parsed/jsonMapExampleParsed @@ -1,10 +1,10 @@ -{ "number" : 1, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] }","string" : "bar","timestamp":1000000000000, "source.type":"jsonMapQuery","guid":"this-is-random-uuid-will-be-36-chars" } -{ "number" : 2 , "original_string" : "{ \"number\" : 2 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 3 , "original_string" : "{ \"number\" : 3 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 4 , "original_string" : "{ \"number\" : 4 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 5 , "original_string" : "{ \"number\" : 5 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 6 , "original_string" : "{ \"number\" : 6 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 7 , "original_string" : "{ \"number\" : 7 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 8 , "original_string" : "{ \"number\" : 8 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 9 , "original_string" : "{ \"number\" : 9 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "number" : 10 , "original_string" : "{ \"number\" : 10 }", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} \ No newline at end of file +{ "number" : 1, "ignored" : [ "blah" ], "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}","string" : "bar","timestamp":1000000000000, "source.type":"jsonMapQuery","guid":"this-is-random-uuid-will-be-36-chars" } +{ "number" : 2, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 3, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 4, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 5, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 6, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 7, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 8, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 9, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "number" : 10, "original_string":"{\"foo\":[{ \"string\" : \"bar\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 2 },{ \"number\" : 3 },{ \"number\" : 4 },{ \"number\" : 5 },{ \"number\" : 6 },{ \"number\" : 7 },{ \"number\" : 8 },{ \"number\" : 9 },{ \"number\" : 10 }]}", "source.type":"jsonMapQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} diff --git a/metron-platform/metron-integration-test/src/main/sample/data/jsonMapWrappedQuery/parsed/jsonMapExampleParsed b/metron-platform/metron-integration-test/src/main/sample/data/jsonMapWrappedQuery/parsed/jsonMapExampleParsed index c6aac78..1476a96 100644 --- a/metron-platform/metron-integration-test/src/main/sample/data/jsonMapWrappedQuery/parsed/jsonMapExampleParsed +++ b/metron-platform/metron-integration-test/src/main/sample/data/jsonMapWrappedQuery/parsed/jsonMapExampleParsed @@ -1,6 +1,6 @@ -{ "string" : "foo", "number" : 1, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"foo\", \"number\" : 1, \"ignored\" : [ \"blah\" ] }","timestamp":1000000000000, "source.type":"jsonMapWrappedQuery","guid":"this-is-random-uuid-will-be-36-chars" } -{ "number" : 4 , "original_string" : "{ \"number\" : 4 }", "source.type":"jsonMapWrappedQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "string" : "bar", "number" : 2, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"bar\", \"number\" : 2, \"ignored\" : [ \"blah\" ] }","timestamp":1000000000000, "source.type":"jsonMapWrappedQuery","guid":"this-is-random-uuid-will-be-36-chars" } -{ "number" : 5 , "original_string" : "{ \"number\" : 5 }", "source.type":"jsonMapWrappedQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} -{ "string" : "baz", "number" : 3, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"baz\", \"number\" : 3, \"ignored\" : [ \"blah\" ] }","timestamp":1000000000000, "source.type":"jsonMapWrappedQuery","guid":"this-is-random-uuid-will-be-36-chars" } -{ "number" : 6 , "original_string" : "{ \"number\" : 6 }", "source.type":"jsonMapWrappedQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "string" : "foo", "number" : 1, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"foo\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 4 },","timestamp":1000000000000, "source.type":"jsonMapWrappedQuery","guid":"this-is-random-uuid-will-be-36-chars" } +{ "number" : 4 , "original_string":"{ \"string\" : \"foo\", \"number\" : 1, \"ignored\" : [ \"blah\" ] },{ \"number\" : 4 },", "source.type":"jsonMapWrappedQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "string" : "bar", "number" : 2, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"bar\", \"number\" : 2, \"ignored\" : [ \"blah\" ] },{ \"number\" : 5 },","timestamp":1000000000000, "source.type":"jsonMapWrappedQuery","guid":"this-is-random-uuid-will-be-36-chars" } +{ "number" : 5 , "original_string":"{ \"string\" : \"bar\", \"number\" : 2, \"ignored\" : [ \"blah\" ] },{ \"number\" : 5 },", "source.type":"jsonMapWrappedQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} +{ "string" : "baz", "number" : 3, "ignored" : [ "blah" ], "original_string":"{ \"string\" : \"baz\", \"number\" : 3, \"ignored\" : [ \"blah\" ] },{ \"number\" : 6 }","timestamp":1000000000000, "source.type":"jsonMapWrappedQuery","guid":"this-is-random-uuid-will-be-36-chars" } +{ "number" : 6 , "original_string":"{ \"string\" : \"baz\", \"number\" : 3, \"ignored\" : [ \"blah\" ] },{ \"number\" : 6 }", "source.type":"jsonMapWrappedQuery","timestamp":1000000000000,"guid":"this-is-random-uuid-will-be-36-chars"} diff --git a/metron-platform/metron-parsing/README.md b/metron-platform/metron-parsing/README.md index 3c6d26a..1a885f9 100644 --- a/metron-platform/metron-parsing/README.md +++ b/metron-platform/metron-parsing/README.md @@ -61,11 +61,21 @@ There are two general types types of parsers: * `ERROR` : Throw an error when a multidimensional map is encountered * `jsonpQuery` : A [JSON Path](#json_path) query string. If present, the result of the JSON Path query should be a list of messages. This is useful if you have a JSON document which contains a list or array of messages embedded in it, and you do not have another means of splitting the message. * `wrapInEntityArray` : `"true" or "false"`. If `jsonQuery` is present and this flag is present and set to `"true"`, the incoming message will be wrapped in a JSON entity and array. - for example: - `{"name":"value"},{"name2","value2}` will be wrapped as `{"message" : [{"name":"value"},{"name2","value2}]}`. - This is using the default value for `wrapEntityName` if that property is not set. - * `wrapEntityName` : Sets the name to use when wrapping JSON using `wrapInEntityArray`. The `jsonpQuery` should reference this name. + for example: + `{"name":"value"},{"name2","value2"}` will be wrapped as `{"message" : [{"name":"value"},{"name2","value2"}]}`. + This is using the default value for `wrapEntityName` if that property is not set. + * `wrapEntityName` : Sets the name to use when wrapping JSON using `wrapInEntityArray`. The `jsonpQuery` should reference this name. Only applicable if `jsonpQuery` and `wrapInEntityArray` are specified. * A field called `timestamp` is expected to exist and, if it does not, then current time is inserted. + * `overrideOriginalString` : A boolean setting that will change the way `original_string` is handled by the parser. The default value of `false` uses the global functionality that will append the unmodified original raw source message as an `original_string` field. + This is the recommended setting. Setting this option to `true` will use the individual substrings returned by the json query as the original_string. For example, a wrapped map such as `{"foo" : [{"name":"value"},{"name2","value2"}]}` + that uses the jsonpQuery, `$.foo`, will result in 2 messages returned. Using the default global `original_string` strategy, the messages returned would be: + * `{ "name" : "value", "original_string" : "{\"foo\" : [{\"name\":\"value\"},{\"name2\",\"value2\"}]}}` + * `{ "name2" : "value2", "original_string" : "{\"foo\" : [{\"name\":\"value\"},{\"name2\",\"value2\"}]}}` + Setting this value to `true` would result in messages with `original_string` set as follows: + * `{ "name" : "value", "original_string" : "{\"name\":\"value\"}}` + * `{ "name" : "value", "original_string" : "{\"name2\":\"value2\"}}` + One final important point to note, and word of caution about setting this property to `true`, is about how JSON PQuery handles parsing and searching the source raw message - it will **NOT** retain a pure raw sub-message. This is due to the JSON libraries under + the hood that normalize the JSON. The resulting generated `original_string` values may have a different property order and spacing. e.g. `{ "foo" :"bar" , "baz":"bang"}` would end up with an `original_string` that looks more like `{ "baz" : "bang", "foo" : "bar" }`. * Regular Expressions Parser * `recordTypeRegex` : A regular expression to uniquely identify a record type. * `messageHeaderRegex` : A regular expression used to extract fields from a message part which is common across all the messages. diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserComponent.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserComponent.java index c040acb..7e08924 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserComponent.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserComponent.java @@ -22,6 +22,9 @@ import org.apache.metron.parsers.interfaces.MessageFilter; import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; +/** + * Wrapper class to couple a MessageParser with a MessageFilter. + */ public class ParserComponent implements Serializable { private static final long serialVersionUID = 7880346740026374665L; diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java index dfad188..cb6c0c4 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java @@ -33,6 +33,7 @@ import java.util.function.Supplier; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.metron.common.Constants; +import org.apache.metron.common.Constants.Fields; import org.apache.metron.common.configuration.FieldTransformer; import org.apache.metron.common.configuration.FieldValidator; import org.apache.metron.common.configuration.ParserConfigurations; @@ -254,6 +255,7 @@ public class ParserRunnerImpl implements ParserRunner<JSONObject>, Serializable if (!message.containsKey(Constants.GUID)) { message.put(Constants.GUID, UUID.randomUUID().toString()); } + message.putIfAbsent(Fields.ORIGINAL.getName(), new String(rawMessage.getMessage())); MessageFilter<JSONObject> filter = sensorToParserComponentMap.get(sensorType).getFilter(); if (filter == null || filter.emit(message, stellarContext)) { boolean isInvalid = !parser.validate(message); diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java index 0acc96c..bb9bb54 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java @@ -35,7 +35,6 @@ import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.Set; - import org.apache.commons.lang3.StringUtils; import org.apache.metron.common.utils.JSONUtils; import org.apache.metron.parsers.BasicParser; @@ -91,6 +90,7 @@ public class JSONMapParser extends BasicParser { public static final String WRAP_JSON = "wrapInEntityArray"; public static final String WRAP_ENTITY_NAME = "wrapEntityName"; public static final String DEFAULT_WRAP_ENTITY_NAME = "messages"; + public static final String OVERRIDE_ORIGINAL_STRING = "overrideOriginalString"; private static final String WRAP_START_FMT = "{ \"%s\" : ["; private static final String WRAP_END = "]}"; @@ -100,12 +100,14 @@ public class JSONMapParser extends BasicParser { private String jsonpQuery = null; private String wrapEntityName = DEFAULT_WRAP_ENTITY_NAME; private boolean wrapJson = false; + private boolean overrideOriginalString = false; // adds original string values per sub-map @Override public void configure(Map<String, Object> config) { String strategyStr = (String) config.getOrDefault(MAP_STRATEGY_CONFIG, MapStrategy.DROP.name()); mapStrategy = MapStrategy.valueOf(strategyStr); + overrideOriginalString = (Boolean) config.getOrDefault(OVERRIDE_ORIGINAL_STRING, false); if (config.containsKey(JSONP_QUERY)) { typeRef = new TypeRef<List<Map<String, Object>>>() { }; jsonpQuery = (String) config.get(JSONP_QUERY); @@ -167,29 +169,32 @@ public class JSONMapParser extends BasicParser { @SuppressWarnings("unchecked") public List<JSONObject> parse(byte[] rawMessage) { try { - String originalString = new String(rawMessage); + String rawString = new String(rawMessage); List<Map<String, Object>> messages = new ArrayList<>(); // if configured, wrap the json in an entity and array if (wrapJson) { - originalString = wrapMessageJson(originalString); + rawString = wrapMessageJson(rawString); } if (!StringUtils.isEmpty(jsonpQuery)) { - Object parsedObject = JsonPath.parse(originalString).read(jsonpQuery, typeRef); + Object parsedObject = JsonPath.parse(rawString).read(jsonpQuery, typeRef); if (parsedObject != null) { messages.addAll((List<Map<String,Object>>)parsedObject); } } else { - messages.add(JSONUtils.INSTANCE.load(originalString, JSONUtils.MAP_SUPPLIER)); + messages.add(JSONUtils.INSTANCE.load(rawString, JSONUtils.MAP_SUPPLIER)); } ArrayList<JSONObject> parsedMessages = new ArrayList<>(); for (Map<String, Object> rawMessageMap : messages) { - JSONObject originalJsonObject = new JSONObject(rawMessageMap); JSONObject ret = normalizeJson(rawMessageMap); - // the original string is the original for THIS sub message - ret.put("original_string", originalJsonObject.toJSONString()); + if (overrideOriginalString) { + // override the global system default, which is to add the raw message as original_string + // the original string is the original for THIS sub message + JSONObject originalJsonObject = new JSONObject(rawMessageMap); + ret.put("original_string", originalJsonObject.toJSONString()); + } if (!ret.containsKey("timestamp")) { ret.put("timestamp", System.currentTimeMillis()); } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java index 2d04d40..c0a85da 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.Optional; import org.adrianwalker.multilinestring.Multiline; import org.apache.metron.common.Constants; +import org.apache.metron.common.Constants.Fields; import org.apache.metron.common.configuration.ParserConfigurations; import org.apache.metron.common.configuration.SensorParserConfig; import org.apache.metron.common.error.MetronError; @@ -213,6 +214,9 @@ public class ParserRunnerImplTest { } } + /** + * This is only testig the execute method. It mocks out processMessage(). + */ @Test public void shouldExecute() { parserRunner = spy(parserRunner); @@ -296,20 +300,23 @@ public class ParserRunnerImplTest { Assert.assertTrue(parserRunnerResults.getErrors().contains(expectedError)); } + /** + * This is only testing the processMessage method + */ @Test public void shouldPopulateMessagesOnProcessMessage() { JSONObject inputMessage = new JSONObject(); inputMessage.put("guid", "guid"); inputMessage.put("ip_src_addr", "192.168.1.1"); inputMessage.put("ip_dst_addr", "192.168.1.2"); - inputMessage.put("field1", "value"); - RawMessage rawMessage = new RawMessage("raw_message".getBytes(), new HashMap<>()); + RawMessage rawMessage = new RawMessage("raw_message_for_testing".getBytes(), new HashMap<>()); JSONObject expectedOutput = new JSONObject(); expectedOutput.put("guid", "guid"); expectedOutput.put("source.type", "bro"); expectedOutput.put("ip_src_addr", "192.168.1.1"); expectedOutput.put("ip_dst_addr", "192.168.1.2"); + expectedOutput.put(Fields.ORIGINAL.getName(), "raw_message_for_testing"); when(stellarFilter.emit(expectedOutput, parserRunner.getStellarContext())).thenReturn(true); when(broParser.validate(expectedOutput)).thenReturn(true); @@ -319,7 +326,38 @@ public class ParserRunnerImplTest { }}); Optional<ParserRunnerImpl.ProcessResult> processResult = parserRunner.processMessage("bro", inputMessage, rawMessage, broParser, parserConfigurations); + Assert.assertTrue(processResult.isPresent()); + Assert.assertFalse(processResult.get().isError()); + Assert.assertEquals(expectedOutput, processResult.get().getMessage()); + } + /** + * This is only testing the processMessage method + */ + @Test + public void shouldNotOverwriteOriginalStringAddedByParser() { + JSONObject inputMessage = new JSONObject(); + inputMessage.put("guid", "guid"); + inputMessage.put("ip_src_addr", "192.168.1.1"); + inputMessage.put("ip_dst_addr", "192.168.1.2"); + inputMessage.put(Fields.ORIGINAL.getName(), "original_string_added_by_parser"); + RawMessage rawMessage = new RawMessage("raw_message_for_testing".getBytes(), new HashMap<>()); + + JSONObject expectedOutput = new JSONObject(); + expectedOutput.put("guid", "guid"); + expectedOutput.put("source.type", "bro"); + expectedOutput.put("ip_src_addr", "192.168.1.1"); + expectedOutput.put("ip_dst_addr", "192.168.1.2"); + expectedOutput.put(Fields.ORIGINAL.getName(), "original_string_added_by_parser"); + + when(stellarFilter.emit(expectedOutput, parserRunner.getStellarContext())).thenReturn(true); + when(broParser.validate(expectedOutput)).thenReturn(true); + + parserRunner.setSensorToParserComponentMap(new HashMap<String, ParserComponent>() {{ + put("bro", new ParserComponent(broParser, stellarFilter)); + }}); + + Optional<ParserRunnerImpl.ProcessResult> processResult = parserRunner.processMessage("bro", inputMessage, rawMessage, broParser, parserConfigurations); Assert.assertTrue(processResult.isPresent()); Assert.assertFalse(processResult.get().isError()); Assert.assertEquals(expectedOutput, processResult.get().getMessage()); @@ -339,6 +377,7 @@ public class ParserRunnerImplTest { JSONObject expectedOutput = new JSONObject(); expectedOutput.put("guid", "guid"); expectedOutput.put("source.type", "bro"); + expectedOutput.put(Fields.ORIGINAL.getName(), "raw_message"); MetronError expectedMetronError = new MetronError() .withErrorType(Constants.ErrorType.PARSER_INVALID) .withSensorType(Collections.singleton("bro")) @@ -346,6 +385,7 @@ public class ParserRunnerImplTest { .addRawMessage(inputMessage); when(stellarFilter.emit(expectedOutput, parserRunner.getStellarContext())).thenReturn(true); + // This is the important switch. Not to be confused with field validators. when(broParser.validate(expectedOutput)).thenReturn(false); parserRunner.setSensorToParserComponentMap(new HashMap<String, ParserComponent>() {{ @@ -377,6 +417,7 @@ public class ParserRunnerImplTest { expectedOutput.put("ip_src_addr", "test"); expectedOutput.put("ip_dst_addr", "test"); expectedOutput.put("source.type", "bro"); + expectedOutput.put(Fields.ORIGINAL.getName(), "raw_message"); MetronError expectedMetronError = new MetronError() .withErrorType(Constants.ErrorType.PARSER_INVALID) .withSensorType(Collections.singleton("bro")) diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserQueryTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserQueryTest.java index 9f8c26b..babb0e2 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserQueryTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserQueryTest.java @@ -17,11 +17,14 @@ */ package org.apache.metron.parsers.json; +import static org.hamcrest.CoreMatchers.equalTo; + import com.google.common.collect.ImmutableMap; import java.util.HashMap; import java.util.List; import org.adrianwalker.multilinestring.Multiline; import org.apache.log4j.Level; +import org.apache.metron.common.Constants.Fields; import org.apache.metron.parsers.BasicParser; import org.apache.metron.test.utils.UnitTestHelper; import org.json.simple.JSONObject; @@ -61,10 +64,10 @@ public class JSONMapParserQueryTest { put(JSONMapParser.JSONP_QUERY, "$.foo"); }}); List<JSONObject> output = parser.parse(JSON_LIST.getBytes()); - Assert.assertEquals(output.size(), 2); - //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 5); + Assert.assertEquals(2, output.size()); JSONObject message = output.get(0); + // account for timestamp field in the size + Assert.assertEquals(4, message.size()); Assert.assertEquals("foo1", message.get("name")); Assert.assertEquals("bar", message.get("value")); Assert.assertEquals(1.0, message.get("number")); @@ -72,8 +75,12 @@ public class JSONMapParserQueryTest { Assert.assertTrue(message.get("timestamp") instanceof Number); Assert.assertNotNull(message.get("number")); Assert.assertTrue(message.get("number") instanceof Number); + Assert.assertThat("original_string should be handled external to the parser by default", + message.containsKey(Fields.ORIGINAL.getName()), equalTo(false)); message = output.get(1); + // account for timestamp field in the size + Assert.assertEquals(4, message.size()); Assert.assertEquals("foo2", message.get("name")); Assert.assertEquals("baz", message.get("value")); Assert.assertEquals(2.0, message.get("number")); @@ -81,7 +88,45 @@ public class JSONMapParserQueryTest { Assert.assertTrue(message.get("timestamp") instanceof Number); Assert.assertNotNull(message.get("number")); Assert.assertTrue(message.get("number") instanceof Number); + Assert.assertThat("original_string should be handled external to the parser by default", + message.containsKey(Fields.ORIGINAL.getName()), equalTo(false)); + } + + @Test + public void testOriginalStringHandledByParser() { + JSONMapParser parser = new JSONMapParser(); + parser.configure(new HashMap<String, Object>() {{ + put(JSONMapParser.JSONP_QUERY, "$.foo"); + put(JSONMapParser.OVERRIDE_ORIGINAL_STRING, true); + }}); + List<JSONObject> output = parser.parse(JSON_LIST.getBytes()); + Assert.assertEquals(2, output.size()); + + JSONObject message = output.get(0); + // account for timestamp field in the size + Assert.assertEquals(5, message.size()); + Assert.assertEquals("foo1", message.get("name")); + Assert.assertEquals("bar", message.get("value")); + Assert.assertEquals(1.0, message.get("number")); + Assert.assertNotNull(message.get("timestamp")); + Assert.assertTrue(message.get("timestamp") instanceof Number); + Assert.assertNotNull(message.get("number")); + Assert.assertTrue(message.get("number") instanceof Number); + Assert.assertThat("original_string should have been handled by the parser", + message.get(Fields.ORIGINAL.getName()), equalTo("{\"name\":\"foo1\",\"number\":1.0,\"value\":\"bar\"}")); + message = output.get(1); + // account for timestamp field in the size + Assert.assertEquals(5, message.size()); + Assert.assertEquals("foo2", message.get("name")); + Assert.assertEquals("baz", message.get("value")); + Assert.assertEquals(2.0, message.get("number")); + Assert.assertNotNull(message.get("timestamp")); + Assert.assertTrue(message.get("timestamp") instanceof Number); + Assert.assertNotNull(message.get("number")); + Assert.assertTrue(message.get("number") instanceof Number); + Assert.assertThat("original_string should have been handled by the parser", + message.get(Fields.ORIGINAL.getName()), equalTo("{\"name\":\"foo2\",\"number\":2.0,\"value\":\"baz\"}")); } @Test(expected = IllegalStateException.class) @@ -130,7 +175,7 @@ public class JSONMapParserQueryTest { Assert.assertEquals(output.size(), 2); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 2); + Assert.assertEquals(output.get(0).size(), 1); JSONObject message = output.get(0); Assert.assertNotNull(message.get("timestamp")); @@ -161,12 +206,12 @@ public class JSONMapParserQueryTest { JSONMapParser.JSONP_QUERY, "$.foo")); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 2); - Assert.assertEquals(output.get(0).size(), 3); + Assert.assertEquals(output.get(0).size(), 2); JSONObject message = output.get(0); Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); - Assert.assertEquals(output.get(1).size(), 3); + Assert.assertEquals(output.get(1).size(), 2); message = output.get(1); Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); @@ -180,7 +225,7 @@ public class JSONMapParserQueryTest { JSONMapParser.JSONP_QUERY, "$.foo")); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 2); - Assert.assertEquals(output.get(0).size(), 6); + Assert.assertEquals(output.get(0).size(), 5); JSONObject message = output.get(0); Assert.assertEquals(message.get("collection.blah"), 7); Assert.assertEquals(message.get("collection.blah2"), "foo"); @@ -189,7 +234,7 @@ public class JSONMapParserQueryTest { Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); - Assert.assertEquals(output.get(1).size(), 6); + Assert.assertEquals(output.get(1).size(), 5); message = output.get(1); Assert.assertEquals(message.get("collection.blah"), 8); Assert.assertEquals(message.get("collection.blah2"), "bar"); diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java index ff73b50..3cdc346 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java @@ -18,6 +18,7 @@ package org.apache.metron.parsers.json; import com.google.common.collect.ImmutableMap; +import java.util.List; import org.adrianwalker.multilinestring.Multiline; import org.apache.log4j.Level; import org.apache.metron.parsers.BasicParser; @@ -26,9 +27,6 @@ import org.json.simple.JSONObject; import org.junit.Assert; import org.junit.Test; -import java.util.List; -import java.util.Map; - public class JSONMapParserTest { /** @@ -47,7 +45,7 @@ public class JSONMapParserTest { List<JSONObject> output = parser.parse(happyPathJSON.getBytes()); Assert.assertEquals(output.size(), 1); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 5); + Assert.assertEquals(output.get(0).size(), 4); JSONObject message = output.get(0); Assert.assertEquals("bar", message.get("foo")); Assert.assertEquals("blah", message.get("blah")); @@ -82,7 +80,7 @@ public class JSONMapParserTest { List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 1); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 2); + Assert.assertEquals(output.get(0).size(), 1); JSONObject message = output.get(0); Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); @@ -105,7 +103,7 @@ public class JSONMapParserTest { List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 1); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 3); + Assert.assertEquals(output.get(0).size(), 2); JSONObject message = output.get(0); Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); @@ -118,7 +116,7 @@ public class JSONMapParserTest { List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 1); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 6); + Assert.assertEquals(output.get(0).size(), 5); JSONObject message = output.get(0); Assert.assertEquals(message.get("collection.blah"), 7); Assert.assertEquals(message.get("collection.blah2"), "foo"); @@ -133,7 +131,7 @@ public class JSONMapParserTest { JSONMapParser parser = new JSONMapParser(); parser.configure(ImmutableMap.of(JSONMapParser.MAP_STRATEGY_CONFIG,JSONMapParser.MapStrategy.UNFOLD.name())); List<JSONObject> output = parser.parse(mixCollectionHandlingJSON.getBytes()); - Assert.assertEquals(output.get(0).size(), 4); + Assert.assertEquals(output.get(0).size(), 3); JSONObject message = output.get(0); Assert.assertEquals(message.get("collection.key"), "value"); Assert.assertEquals(message.get("key"),"value"); diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserWrappedQueryTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserWrappedQueryTest.java index 0da45dd..d5399d5 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserWrappedQueryTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserWrappedQueryTest.java @@ -61,7 +61,7 @@ public class JSONMapParserWrappedQueryTest { List<JSONObject> output = parser.parse(JSON_LIST.getBytes()); Assert.assertEquals(output.size(), 2); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 5); + Assert.assertEquals(output.get(0).size(), 4); JSONObject message = output.get(0); Assert.assertEquals("foo1", message.get("name")); Assert.assertEquals("bar", message.get("value")); @@ -128,7 +128,7 @@ public class JSONMapParserWrappedQueryTest { Assert.assertEquals(output.size(), 2); //don't forget the timestamp field! - Assert.assertEquals(output.get(0).size(), 2); + Assert.assertEquals(output.get(0).size(), 1); JSONObject message = output.get(0); Assert.assertNotNull(message.get("timestamp")); @@ -159,12 +159,12 @@ public class JSONMapParserWrappedQueryTest { JSONMapParser.JSONP_QUERY, "$.foo")); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 2); - Assert.assertEquals(output.get(0).size(), 3); + Assert.assertEquals(output.get(0).size(), 2); JSONObject message = output.get(0); Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); - Assert.assertEquals(output.get(1).size(), 3); + Assert.assertEquals(output.get(1).size(), 2); message = output.get(1); Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); @@ -178,7 +178,7 @@ public class JSONMapParserWrappedQueryTest { JSONMapParser.JSONP_QUERY, "$.foo")); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes()); Assert.assertEquals(output.size(), 2); - Assert.assertEquals(output.get(0).size(), 6); + Assert.assertEquals(output.get(0).size(), 5); JSONObject message = output.get(0); Assert.assertEquals(message.get("collection.blah"), 7); Assert.assertEquals(message.get("collection.blah2"), "foo"); @@ -187,7 +187,7 @@ public class JSONMapParserWrappedQueryTest { Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number); - Assert.assertEquals(output.get(1).size(), 6); + Assert.assertEquals(output.get(1).size(), 5); message = output.get(1); Assert.assertEquals(message.get("collection.blah"), 8); Assert.assertEquals(message.get("collection.blah2"), "bar"); diff --git a/metron-platform/metron-parsing/metron-parsing-storm/README.md b/metron-platform/metron-parsing/metron-parsing-storm/README.md index 69a09f4..5c16ca5 100644 --- a/metron-platform/metron-parsing/metron-parsing-storm/README.md +++ b/metron-platform/metron-parsing/metron-parsing-storm/README.md @@ -33,6 +33,12 @@ Metron's parsers can be run in Storm topologies, complete with their own set of * `spoutConfig` : A map representing a custom spout config (this is a map). If there are multiple sensors, the configs will be merged with the last specified taking precedence. This can be overridden on the command line. * `stormConfig` : The storm config to use (this is a map). This can be overridden on the command line. If both are specified, they are merged with CLI properties taking precedence. +**Note on dynamic vs static configuration** + +Field transformations configuration (see [Parser Configuration](../metron-parsing#parser-configuration)) is loaded dynamically from Zookeeper, so any updates pushed to Zookeeper will automatically be reflected in the parser without restarting it. +`parserConfig` (see [Parser Configuration](../metron-parsing#parser-configuration)) is provided one time at topology startup via a parser's configure() method. Any changes to the static parser config will require a restart. Storm-specific +configuration settings, such as those above, are also static and require a topology restart. + # Starting the Parser Topology Starting a particular parser topology on a running Metron deployment is @@ -121,7 +127,7 @@ you could create a file called `custom_config.json` containing and pass `--extra_topology_options custom_config.json` to `start_parser_topology.sh`. ## Parser Topology -The enrichment topology as started by the `$METRON_HOME/bin/start_parser_topology.sh` +The parser topology as started by the `$METRON_HOME/bin/start_parser_topology.sh` script uses a default of one executor per bolt. In a real production system, this should be customized by modifying the arguments sent to this utility. * Topology Wide