This is an automated email from the ASF dual-hosted git repository. leet pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/metron.git
commit 88f4d2cefe4bbb389732da3b4f5cbcf02b7b949a Author: mmiklavc <michael.miklav...@gmail.com> AuthorDate: Fri Aug 16 15:14:13 2019 -0400 METRON-614: Eliminate use of the default Charset (mmiklavc via justinleet) closes apache/metron#1341 --- metron-platform/metron-parsing/README.md | 7 +- .../metron-parsers-common/3rdPartyParser.md | 17 +++ .../org/apache/metron/parsers/BasicParser.java | 39 ++++- .../java/org/apache/metron/parsers/GrokParser.java | 22 ++- .../apache/metron/parsers/ParserRunnerImpl.java | 4 +- .../org/apache/metron/parsers/csv/CSVParser.java | 9 +- .../metron/parsers/interfaces/MessageParser.java | 21 ++- .../apache/metron/parsers/json/JSONMapParser.java | 6 +- .../parsers/regex/RegularExpressionsParser.java | 24 +-- .../metron/parsers/syslog/BaseSyslogParser.java | 43 ++++-- .../org/apache/metron/parsers/BasicParserTest.java | 167 +++++++++++++++++++++ .../metron/parsers/ParserRunnerImplTest.java | 3 +- .../apache/metron/parsers/csv/CSVParserTest.java | 29 +++- .../metron/parsers/json/JSONMapParserTest.java | 37 ++++- .../regex/RegularExpressionsParserTest.java | 28 +++- .../apache/metron/parsers/asa/BasicAsaParser.java | 2 +- .../apache/metron/parsers/bro/BasicBroParser.java | 5 +- .../org/apache/metron/parsers/cef/CEFParser.java | 5 +- .../metron/parsers/fireeye/BasicFireEyeParser.java | 6 +- .../apache/metron/parsers/ise/BasicIseParser.java | 14 +- .../metron/parsers/lancope/BasicLancopeParser.java | 5 +- .../org/apache/metron/parsers/leef/LEEFParser.java | 6 +- .../paloalto/BasicPaloAltoFirewallParser.java | 14 +- .../metron/parsers/snort/BasicSnortParser.java | 3 +- .../parsers/sourcefire/BasicSourcefireParser.java | 5 +- .../org/apache/metron/parsers/SnortParserTest.java | 34 +++-- .../metron/parsers/asa/BasicAsaParserTest.java | 46 ++++-- .../metron/parsers/bro/BasicBroParserTest.java | 23 ++- .../apache/metron/parsers/cef/CEFParserTest.java | 33 +++- .../parsers/fireeye/BasicFireEyeParserTest.java | 27 +++- .../metron/parsers/ise/BasicIseParserTest.java | 20 +++ .../parsers/lancope/BasicLancopeParserTest.java | 20 +++ .../apache/metron/parsers/leef/LEEFParserTest.java | 38 ++++- .../paloalto/BasicPaloAltoFirewallParserTest.java | 23 ++- .../sourcefire/BasicSourcefireParserTest.java | 20 +++ .../parsers/websphere/GrokWebSphereParserTest.java | 60 +++----- 36 files changed, 681 insertions(+), 184 deletions(-) diff --git a/metron-platform/metron-parsing/README.md b/metron-platform/metron-parsing/README.md index 1a885f9..9c4cd58 100644 --- a/metron-platform/metron-parsing/README.md +++ b/metron-platform/metron-parsing/README.md @@ -49,10 +49,12 @@ There are two general types types of parsers: * `dateFormat` : The date format to use to parse the time fields. Default is "yyyy-MM-dd HH:mm:ss.S z". * `timezone` : The timezone to use. `UTC` is default. * The Grok parser supports either 1 line to parse per incoming message, or incoming messages with multiple log lines, and will produce a json message per line + * `readCharset` : Optional. Specifies what charset the parser should use when reading sensor data from the sensor topic. "`UTF_8`" is default. * CSV Parser: `org.apache.metron.parsers.csv.CSVParser` with possible `parserConfig` entries of * `timestampFormat` : The date format of the timestamp to use. If unspecified, the parser assumes the timestamp is ms since unix epoch. * `columns` : A map of column names you wish to extract from the CSV to their offsets (e.g. `{ 'name' : 1, 'profession' : 3}` would be a column map for extracting the 2nd and 4th columns from a CSV) * `separator` : The column separator, `,` by default. + * `readCharset` : Optional. Specifies what charset the parser should use when reading sensor data from the sensor topic. "`UTF_8`" is default. * JSON Map Parser: `org.apache.metron.parsers.json.JSONMapParser` with possible `parserConfig` entries of * `mapStrategy` : A strategy to indicate how to handle multi-dimensional Maps. This is one of * `DROP` : Drop fields which contain maps @@ -76,6 +78,7 @@ There are two general types types of parsers: * `{ "name" : "value", "original_string" : "{\"name2\":\"value2\"}}` One final important point to note, and word of caution about setting this property to `true`, is about how JSON PQuery handles parsing and searching the source raw message - it will **NOT** retain a pure raw sub-message. This is due to the JSON libraries under the hood that normalize the JSON. The resulting generated `original_string` values may have a different property order and spacing. e.g. `{ "foo" :"bar" , "baz":"bang"}` would end up with an `original_string` that looks more like `{ "baz" : "bang", "foo" : "bar" }`. + * `readCharset` : Optional. Specifies what charset the parser should use when reading sensor data from the sensor topic. "`UTF_8`" is default. * Regular Expressions Parser * `recordTypeRegex` : A regular expression to uniquely identify a record type. * `messageHeaderRegex` : A regular expression used to extract fields from a message part which is common across all the messages. @@ -90,6 +93,7 @@ There are two general types types of parsers: Note this property may be necessary, because java does not support underscores in the named group names. So in case your property naming conventions requires underscores in property names, use this property. * `fields` : A json list of maps contaning a record type to regular expression mapping. + * `readCharset` : Optional. Specifies what charset the parser should use when reading sensor data from the sensor topic. "`UTF_8`" is default. A complete configuration example would look like: @@ -308,12 +312,13 @@ then it is assumed to be a regex and will match any topic matching the pattern ( * `mergeMetadata` : Boolean indicating whether to merge metadata with the message or not (The default is raw message strategy dependent). See below for a discussion about metadata. * `rawMessageStrategy` : The strategy to use when reading the raw data and metadata. See below for a discussion about message reading strategies. * `rawMessageStrategyConfig` : The raw message strategy configuration map. See below for a discussion about message reading strategies. -* `parserConfig` : A JSON Map representing the parser implementation specific configuration. Also include batch sizing and timeout for writer configuration here. +* `parserConfig` : A JSON Map representing the parser implementation specific configuration. Also include batch sizing and timeout for writer configuration here. The character set to use for reading inbound sensor data is also set here. * `batchSize` : Integer indicating number of records to batch together before sending to the writer. (default to `15`) * `batchTimeout` : The timeout after which a batch will be flushed even if batchSize has not been met. Optional. If unspecified, or set to `0`, it defaults to a system-determined duration which is a fraction of the Storm parameter `topology.message.timeout.secs`. Ignored if batchSize is `1`, since this disables batching. * The kafka writer can be configured within the parser config as well. (This is all configured a priori, but this is convenient for overriding the settings). See [here](../../metron-writer/README.md#kafka-writer) + * `readCharset` : Optional. Specifies what charset the parser should use when reading sensor data from the sensor topic. "`UTF_8`" is default. * `fieldTransformations` : An array of complex objects representing the transformations to be done on the message generated from the parser before writing out to the kafka topic. * `securityProtocol` : The security protocol to use for reading from kafka (this is a string). This can be overridden on the command line and also specified in the spout config via the `security.protocol` key. If both are specified, then they are merged and the CLI will take precedence. If multiple sensors are used, any non "PLAINTEXT" value will be used. * `cacheConfig` : Cache config for stellar field transformations. This configures a least frequently used cache. This is a map with the following keys. If not explicitly configured (the default), then no cache will be used. diff --git a/metron-platform/metron-parsing/metron-parsers-common/3rdPartyParser.md b/metron-platform/metron-parsing/metron-parsers-common/3rdPartyParser.md index 04cbc73..b66bb9e 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/3rdPartyParser.md +++ b/metron-platform/metron-parsing/metron-parsers-common/3rdPartyParser.md @@ -39,6 +39,23 @@ In order to do create a custom parser, we need to do one of the following: * Write a class which extends `org.apache.metron.parsers.BasicParser` * Provides convenience implementations to `validate` which ensures `timestamp` and `original_string` fields exist. +Also note that it is possible to specify a configuration option for the charset you would like your parser to use to read data. In order to do so, +you would call the `setReadCharset` method in your `configure` method when extending `BasicParser`. And then when you're specifying the charset +to use in the `parse` method, you would use `getReadCharset` as follows `rawMessage = new String(msg, getReadCharset());`. The common configuration +option key is "`readCharset`" and is passed via a key/value pair in the `parserConfig` JSON section of your overall parser configuration file, e.g. +``` +{ + ... + "parserConfig" : { + "readCharset" : "UTF_8" + ... + } + ... +} +``` + +If implementing the MessageParser interface directly, you would need to handle reading and setting the configuration on your own. Override the `default Charset getReadCharset()` method provided in the `MessageParser` interface. + ## Example In order to illustrate how this might be done, let's create a very diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/BasicParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/BasicParser.java index 49157d5..372c9f5 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/BasicParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/BasicParser.java @@ -17,8 +17,15 @@ */ package org.apache.metron.parsers; +import static org.apache.metron.common.Constants.Fields.DST_ADDR; +import static org.apache.metron.common.Constants.Fields.ORIGINAL; +import static org.apache.metron.common.Constants.Fields.SRC_ADDR; +import static org.apache.metron.common.Constants.Fields.TIMESTAMP; + import java.io.Serializable; import java.lang.invoke.MethodHandles; +import java.nio.charset.Charset; +import java.util.Map; import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.slf4j.Logger; @@ -30,14 +37,17 @@ public abstract class BasicParser implements protected static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private Charset readCharset; + @Override public boolean validate(JSONObject message) { JSONObject value = message; - if (!(value.containsKey("original_string"))) { - LOG.trace("[Metron] Message does not have original_string: {}", message); + final String invalidMessageTemplate = "[Metron] Message does not have {}: {}"; + if (!(value.containsKey(ORIGINAL.getName()))) { + LOG.trace(invalidMessageTemplate, ORIGINAL.getName(), message); return false; - } else if (!(value.containsKey("timestamp"))) { - LOG.trace("[Metron] Message does not have timestamp: {}", message); + } else if (!(value.containsKey(TIMESTAMP.getName()))) { + LOG.trace(invalidMessageTemplate, TIMESTAMP.getName(), message); return false; } else { LOG.trace("[Metron] Message conforms to schema: {}", message); @@ -49,10 +59,10 @@ public abstract class BasicParser implements try { String ipSrcAddr = null; String ipDstAddr = null; - if (value.containsKey("ip_src_addr")) - ipSrcAddr = value.get("ip_src_addr").toString(); - if (value.containsKey("ip_dst_addr")) - ipDstAddr = value.get("ip_dst_addr").toString(); + if (value.containsKey(SRC_ADDR.getName())) + ipSrcAddr = value.get(SRC_ADDR.getName()).toString(); + if (value.containsKey(DST_ADDR.getName())) + ipDstAddr = value.get(DST_ADDR.getName()).toString(); if (ipSrcAddr == null && ipDstAddr == null) return "0"; if (ipSrcAddr == null || ipSrcAddr.length() == 0) @@ -66,4 +76,17 @@ public abstract class BasicParser implements return "0"; } } + + public void setReadCharset(Map<String, Object> config) { + if (config.containsKey(READ_CHARSET)) { + readCharset = Charset.forName((String) config.get(READ_CHARSET)); + } else { + readCharset = MessageParser.super.getReadCharset(); + } + } + + @Override + public Charset getReadCharset() { + return null == this.readCharset ? MessageParser.super.getReadCharset() : this.readCharset; + } } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/GrokParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/GrokParser.java index 14873e3..338286b 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/GrokParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/GrokParser.java @@ -27,6 +27,7 @@ import java.io.InputStreamReader; import java.io.Serializable; import java.io.StringReader; import java.lang.invoke.MethodHandles; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -63,10 +64,12 @@ public class GrokParser implements MessageParser<JSONObject>, Serializable { protected String timestampField; protected SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S z"); protected String patternsCommonDir = "/patterns/common"; + private Charset readCharset; @Override @SuppressWarnings("unchecked") public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); this.grokPath = (String) parserConfig.get("grokPath"); String multiLineString = (String) parserConfig.get("multiLine"); if (!StringUtils.isBlank(multiLineString)) { @@ -116,7 +119,7 @@ public class GrokParser implements MessageParser<JSONObject>, Serializable { "Unable to initialize grok parser: Unable to load " + patternsCommonDir + " from either classpath or HDFS"); } - grok.addPatternFromReader(new InputStreamReader(commonInputStream, StandardCharsets.UTF_8)); + grok.addPatternFromReader(new InputStreamReader(commonInputStream, getReadCharset())); LOG.info("Loading parser-specific patterns from: {}", grokPath); InputStream patterInputStream = openInputStream(grokPath); @@ -124,7 +127,7 @@ public class GrokParser implements MessageParser<JSONObject>, Serializable { throw new RuntimeException("Grok parser unable to initialize grok parser: Unable to load " + grokPath + " from either classpath or HDFS"); } - grok.addPatternFromReader(new InputStreamReader(patterInputStream, StandardCharsets.UTF_8)); + grok.addPatternFromReader(new InputStreamReader(patterInputStream, getReadCharset())); LOG.info("Grok parser set the following grok expression for '{}': {}", () ->patternLabel, () -> grok.getPatterns().get(patternLabel)); @@ -159,7 +162,7 @@ public class GrokParser implements MessageParser<JSONObject>, Serializable { String originalMessage = null; // read the incoming raw data as if it may have multiple lines of logs // if there is only only one line, it will just get processed. - try (BufferedReader reader = new BufferedReader(new StringReader(new String(rawMessage, StandardCharsets.UTF_8)))) { + try (BufferedReader reader = new BufferedReader(new StringReader(new String(rawMessage, getReadCharset())))) { while ((originalMessage = reader.readLine()) != null) { LOG.debug("Grok parser parsing message: {}", originalMessage); try { @@ -292,4 +295,17 @@ public class GrokParser implements MessageParser<JSONObject>, Serializable { } } + public void setReadCharset(Map<String, Object> config) { + if (config.containsKey(READ_CHARSET)) { + readCharset = Charset.forName((String) config.get(READ_CHARSET)); + } else { + readCharset = MessageParser.super.getReadCharset(); + } + } + + @Override + public Charset getReadCharset() { + return null == this.readCharset ? MessageParser.super.getReadCharset() : this.readCharset; + } + } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java index d76205d..edab2b9 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/ParserRunnerImpl.java @@ -19,7 +19,6 @@ package org.apache.metron.parsers; import java.io.Serializable; import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -256,7 +255,8 @@ public class ParserRunnerImpl implements ParserRunner<JSONObject>, Serializable if (!message.containsKey(Constants.GUID)) { message.put(Constants.GUID, UUID.randomUUID().toString()); } - message.putIfAbsent(Fields.ORIGINAL.getName(), new String(rawMessage.getMessage(), StandardCharsets.UTF_8)); + message.putIfAbsent(Fields.ORIGINAL.getName(), + new String(rawMessage.getMessage(), parser.getReadCharset())); MessageFilter<JSONObject> filter = sensorToParserComponentMap.get(sensorType).getFilter(); if (filter == null || filter.emit(message, stellarContext)) { boolean isInvalid = !parser.validate(message); diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/csv/CSVParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/csv/CSVParser.java index 35e23d3..88567d6 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/csv/CSVParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/csv/CSVParser.java @@ -20,14 +20,13 @@ package org.apache.metron.parsers.csv; import com.google.common.collect.ImmutableList; import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.metron.common.csv.CSVConverter; -import org.apache.metron.stellar.common.utils.ConversionUtils; import org.apache.metron.parsers.BasicParser; +import org.apache.metron.stellar.common.utils.ConversionUtils; import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,8 +36,10 @@ public class CSVParser extends BasicParser { public static final String TIMESTAMP_FORMAT_CONF = "timestampFormat"; private transient CSVConverter converter; private SimpleDateFormat timestampFormat; + @Override public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); converter = new CSVConverter(); converter.initialize(parserConfig); Object tsFormatObj = parserConfig.get(TIMESTAMP_FORMAT_CONF); @@ -56,7 +57,7 @@ public class CSVParser extends BasicParser { @Override public List<JSONObject> parse(byte[] rawMessage) { try { - String msg = new String(rawMessage, StandardCharsets.UTF_8); + String msg = new String(rawMessage, getReadCharset()); Map<String, String> value = converter.toMap(msg); if(value != null) { value.put("original_string", msg); @@ -88,7 +89,7 @@ public class CSVParser extends BasicParser { return Collections.emptyList(); } } catch (Throwable e) { - String message = "Unable to parse " + new String(rawMessage, StandardCharsets.UTF_8) + ": " + e.getMessage(); + String message = "Unable to parse " + new String(rawMessage, getReadCharset()) + ": " + e.getMessage(); LOG.error(message, e); throw new IllegalStateException(message, e); } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/interfaces/MessageParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/interfaces/MessageParser.java index c9f8351..50e9f15 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/interfaces/MessageParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/interfaces/MessageParser.java @@ -17,17 +17,17 @@ */ package org.apache.metron.parsers.interfaces; -import org.apache.commons.lang3.NotImplementedException; -import org.apache.metron.parsers.DefaultMessageParserResult; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.List; -import java.util.Map; import java.util.Optional; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.metron.parsers.DefaultMessageParserResult; public interface MessageParser<T> extends Configurable { + + String READ_CHARSET = "readCharset"; // property to use for getting the read charset from parser config + /** * Initialize the message parser. This is done once. */ @@ -81,4 +81,11 @@ public interface MessageParser<T> extends Configurable { */ boolean validate(T message); + /** + * Provides a hook to override the default charset parsers use to read data. + * @return Charset to use for for reading + */ + default Charset getReadCharset() { + return StandardCharsets.UTF_8; + } } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java index 5da7c05..6f8d1de 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/json/JSONMapParser.java @@ -29,7 +29,6 @@ import com.jayway.jsonpath.spi.json.JacksonJsonProvider; import com.jayway.jsonpath.spi.json.JsonProvider; import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; import com.jayway.jsonpath.spi.mapper.MappingProvider; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; @@ -106,6 +105,7 @@ public class JSONMapParser extends BasicParser { @Override public void configure(Map<String, Object> config) { + setReadCharset(config); String strategyStr = (String) config.getOrDefault(MAP_STRATEGY_CONFIG, MapStrategy.DROP.name()); mapStrategy = MapStrategy.valueOf(strategyStr); overrideOriginalString = (Boolean) config.getOrDefault(OVERRIDE_ORIGINAL_STRING, false); @@ -170,7 +170,7 @@ public class JSONMapParser extends BasicParser { @SuppressWarnings("unchecked") public List<JSONObject> parse(byte[] rawMessage) { try { - String rawString = new String(rawMessage, StandardCharsets.UTF_8); + String rawString = new String(rawMessage, getReadCharset()); List<Map<String, Object>> messages = new ArrayList<>(); // if configured, wrap the json in an entity and array @@ -203,7 +203,7 @@ public class JSONMapParser extends BasicParser { } return Collections.unmodifiableList(parsedMessages); } catch (Throwable e) { - String message = "Unable to parse " + new String(rawMessage, StandardCharsets.UTF_8) + ": " + e.getMessage(); + String message = "Unable to parse " + new String(rawMessage, getReadCharset()) + ": " + e.getMessage(); LOG.error(message, e); throw new IllegalStateException(message, e); } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/regex/RegularExpressionsParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/regex/RegularExpressionsParser.java index d375451..2f7d798 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/regex/RegularExpressionsParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/regex/RegularExpressionsParser.java @@ -16,6 +16,19 @@ package org.apache.metron.parsers.regex; import com.google.common.base.CaseFormat; +import java.lang.invoke.MethodHandles; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import org.apache.commons.lang3.StringUtils; import org.apache.metron.common.Constants; import org.apache.metron.parsers.BasicParser; @@ -23,14 +36,6 @@ import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.lang.invoke.MethodHandles; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - //@formatter:off /** * General purpose class to parse unstructured text message into a json object. This class parses @@ -154,7 +159,7 @@ public class RegularExpressionsParser extends BasicParser { public List<JSONObject> parse(byte[] rawMessage) { String originalMessage = null; try { - originalMessage = new String(rawMessage, StandardCharsets.UTF_8).trim(); + originalMessage = new String(rawMessage, getReadCharset()).trim(); LOG.debug(" raw message. {}", originalMessage); if (originalMessage.isEmpty()) { LOG.warn("Message is empty."); @@ -201,6 +206,7 @@ public class RegularExpressionsParser extends BasicParser { // @formatter:on @Override public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); setParserConfig(parserConfig); setFields((List<Map<String, Object>>) getParserConfig() .get(ParserConfigConstants.FIELDS.getName())); diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/syslog/BaseSyslogParser.java b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/syslog/BaseSyslogParser.java index c349fd8..c154c7c 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/syslog/BaseSyslogParser.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/main/java/org/apache/metron/parsers/syslog/BaseSyslogParser.java @@ -20,48 +20,47 @@ package org.apache.metron.parsers.syslog; import com.github.palindromicity.syslog.SyslogParser; import com.github.palindromicity.syslog.dsl.SyslogFieldKeys; -import java.nio.charset.StandardCharsets; -import org.apache.commons.lang3.StringUtils; -import org.apache.metron.parsers.DefaultMessageParserResult; -import org.apache.metron.parsers.ParseException; -import org.apache.metron.parsers.interfaces.MessageParser; -import org.apache.metron.parsers.interfaces.MessageParserResult; -import org.apache.metron.parsers.utils.SyslogUtils; -import org.json.simple.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.io.Serializable; import java.io.StringReader; import java.lang.invoke.MethodHandles; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.time.Clock; import java.time.LocalDateTime; import java.time.ZoneId; import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.Consumer; +import org.apache.commons.lang3.StringUtils; +import org.apache.metron.parsers.DefaultMessageParserResult; +import org.apache.metron.parsers.ParseException; +import org.apache.metron.parsers.interfaces.MessageParser; +import org.apache.metron.parsers.interfaces.MessageParserResult; +import org.apache.metron.parsers.utils.SyslogUtils; +import org.json.simple.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Parser for well structured RFC 5424 messages. */ public abstract class BaseSyslogParser implements MessageParser<JSONObject>, Serializable { + protected static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private Charset readCharset; private Optional<Consumer<JSONObject>> messageProcessorOptional = Optional.empty(); private transient SyslogParser syslogParser; - protected Clock deviceClock; - protected void setSyslogParser(SyslogParser syslogParser) { this.syslogParser = syslogParser; } @@ -74,6 +73,7 @@ public abstract class BaseSyslogParser implements MessageParser<JSONObject>, Ser @Override public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); // we'll pull out the clock stuff ourselves String timeZone = (String) parserConfig.get("deviceTimeZone"); if (timeZone != null) @@ -110,7 +110,7 @@ public abstract class BaseSyslogParser implements MessageParser<JSONObject>, Ser return Optional.empty(); } - String originalString = new String(rawMessage, StandardCharsets.UTF_8); + String originalString = new String(rawMessage, getReadCharset()); final List<JSONObject> returnList = new ArrayList<>(); Map<Object,Throwable> errorMap = new HashMap<>(); try (Reader reader = new BufferedReader(new StringReader(originalString))) { @@ -150,4 +150,17 @@ public abstract class BaseSyslogParser implements MessageParser<JSONObject>, Ser .toEpochSecond(ZoneOffset.UTC)); } } + + public void setReadCharset(Map<String, Object> config) { + if (config.containsKey(READ_CHARSET)) { + readCharset = Charset.forName((String) config.get(READ_CHARSET)); + } else { + readCharset = MessageParser.super.getReadCharset(); + } + } + + @Override + public Charset getReadCharset() { + return null == this.readCharset ? MessageParser.super.getReadCharset() : this.readCharset; + } } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/BasicParserTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/BasicParserTest.java new file mode 100644 index 0000000..05ac15c --- /dev/null +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/BasicParserTest.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.metron.parsers; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.not; +import static org.junit.Assert.assertThat; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import org.apache.commons.io.FileUtils; +import org.apache.metron.parsers.interfaces.MessageParser; +import org.apache.metron.parsers.interfaces.MessageParserResult; +import org.json.simple.JSONObject; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +public class BasicParserTest { + + private static final String KEY1 = "key1"; + + private static class SomeParserWithCharset extends BasicParser { + + @Override + public void init() { + + } + + @Override + public void configure(Map<String, Object> config) { + setReadCharset(config); + } + + @Override + public Optional<MessageParserResult<JSONObject>> parseOptionalResult(byte[] parseMessage) { + String message = new String(parseMessage, getReadCharset()); + Map<String, Object> out = new HashMap<>(); + out.put(KEY1, message); + MessageParserResult<JSONObject> result = new DefaultMessageParserResult<JSONObject>( + Arrays.asList(new JSONObject(out))); + return Optional.of(result); + } + } + + private static class SomeParserNoCharset extends SomeParserWithCharset { + @Override + public void configure(Map<String, Object> config) { + // don't set the charset + } + } + + + private static final String SAMPLE_DATA = "Here is some sample data"; + private SomeParserWithCharset parserWithCharset; + private SomeParserNoCharset parserNoCharset; + private Map<String, Object> parserConfig; + private File fileUTF_16; + private File fileUTF_8; + + @Rule + public TemporaryFolder tempFolder = new TemporaryFolder(); + + @Before + public void setup() throws IOException, InterruptedException { + tempFolder.create(); + parserWithCharset = new SomeParserWithCharset(); + parserNoCharset = new SomeParserNoCharset(); + parserConfig = new HashMap<>(); + fileUTF_16 = new File(tempFolder.getRoot(), "fileUTF-16"); + fileUTF_8 = new File(tempFolder.getRoot(), "fileUTF-8"); + writeDataEncodedAs(fileUTF_16, SAMPLE_DATA, StandardCharsets.UTF_16); + writeDataEncodedAs(fileUTF_8, SAMPLE_DATA, StandardCharsets.UTF_8); + } + + private void writeDataEncodedAs(File file, String data, Charset charset) throws IOException { + byte[] bytes = data.getBytes(charset); + FileUtils.writeByteArrayToFile(file, bytes); + } + + @Test + public void verify_encoding_translation_assumptions() throws IOException { + // read in file encoded as UTF_16 bytes to a String using UTF_8 and UTF_16 encoding + // the UTF_8 translation here should be a garbled mess because UTF_16 needs to have a + // translation step for it to be correct in UTF_8 + String utf16_8 = readDataEncodedAs(fileUTF_16, StandardCharsets.UTF_8); + String utf16_16 = readDataEncodedAs(fileUTF_16, StandardCharsets.UTF_16); + File utf16_16_8 = new File(tempFolder.getRoot(), "outUTF-8"); + writeDataEncodedAs(utf16_16_8, utf16_16, StandardCharsets.UTF_8); + String utf8_8 = readDataEncodedAs(utf16_16_8, StandardCharsets.UTF_8); + assertThat(utf8_8, equalTo(utf16_16)); + assertThat(utf8_8, not(equalTo(utf16_8))); + + assertThat(utf8_8, equalTo(utf16_16)); + assertThat(utf8_8, not(equalTo(utf16_8))); + } + + private String readDataEncodedAs(File file, Charset charset) throws IOException { + return FileUtils.readFileToString(file, charset); + } + + @Test + public void parses_with_specified_encoding() { + parserConfig.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parserWithCharset.configure(parserConfig); + Optional<MessageParserResult<JSONObject>> result = parserWithCharset + .parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_16)); + MessageParserResult<JSONObject> json = result.get(); + assertThat(json.getMessages().size(), equalTo(1)); + assertThat(json.getMessages().get(0).get(KEY1), equalTo(SAMPLE_DATA)); + } + + @Test + public void values_will_not_match_when_specified_encoding_is_wrong() { + parserConfig.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_8.toString()); + parserWithCharset.configure(parserConfig); + Optional<MessageParserResult<JSONObject>> result = parserWithCharset + .parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_16)); + MessageParserResult<JSONObject> json = result.get(); + assertThat(json.getMessages().size(), equalTo(1)); + assertThat(json.getMessages().get(0).get(KEY1), not(equalTo(SAMPLE_DATA))); + } + + @Test + public void parses_with_default_encoding_when_not_configured() { + parserWithCharset.configure(parserConfig); + Optional<MessageParserResult<JSONObject>> result = parserWithCharset + .parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_8)); + MessageParserResult<JSONObject> json = result.get(); + assertThat(json.getMessages().size(), equalTo(1)); + assertThat(json.getMessages().get(0).get(KEY1), equalTo(SAMPLE_DATA)); + } + + @Test + public void parses_with_default_encoding_from_basic_parser() { + parserNoCharset.configure(parserConfig); + Optional<MessageParserResult<JSONObject>> result = parserNoCharset + .parseOptionalResult(SAMPLE_DATA.getBytes(StandardCharsets.UTF_8)); + MessageParserResult<JSONObject> json = result.get(); + assertThat(json.getMessages().size(), equalTo(1)); + assertThat(json.getMessages().get(0).get(KEY1), equalTo(SAMPLE_DATA)); + } + +} diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java index 486cda3..88996d8 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/ParserRunnerImplTest.java @@ -133,6 +133,7 @@ public class ParserRunnerImplTest { stellarFilter = mock(StellarFilter.class); mockStatic(ReflectionUtils.class); mockStatic(Filters.class); + when(broParser.getReadCharset()).thenReturn(StandardCharsets.UTF_8); when(ReflectionUtils.createInstance("org.apache.metron.parsers.bro.BasicBroParser")).thenReturn(broParser); when(ReflectionUtils.createInstance("org.apache.metron.parsers.snort.BasicSnortParser")).thenReturn(snortParser); @@ -312,7 +313,7 @@ public class ParserRunnerImplTest { inputMessage.put("guid", "guid"); inputMessage.put("ip_src_addr", "192.168.1.1"); inputMessage.put("ip_dst_addr", "192.168.1.2"); - RawMessage rawMessage = new RawMessage("raw_message".getBytes(StandardCharsets.UTF_8), new HashMap<>()); + RawMessage rawMessage = new RawMessage("raw_message_for_testing".getBytes(StandardCharsets.UTF_8), new HashMap<>()); JSONObject expectedOutput = new JSONObject(); expectedOutput.put("guid", "guid"); diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/csv/CSVParserTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/csv/CSVParserTest.java index 4ac60d0..21a7d26 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/csv/CSVParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/csv/CSVParserTest.java @@ -18,19 +18,23 @@ package org.apache.metron.parsers.csv; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; import org.adrianwalker.multilinestring.Multiline; import org.apache.hadoop.hbase.util.Bytes; import org.apache.log4j.Level; import org.apache.metron.common.configuration.SensorParserConfig; import org.apache.metron.common.utils.JSONUtils; +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.test.utils.UnitTestHelper; import org.json.simple.JSONObject; import org.junit.Assert; import org.junit.Test; -import java.io.IOException; -import java.util.List; - public class CSVParserTest { /** { @@ -121,4 +125,23 @@ public class CSVParserTest { UnitTestHelper.setLog4jLevel(CSVParser.class, Level.ERROR); } } + + @Test + public void getsReadCharsetFromConfig() throws IOException { + SensorParserConfig config = JSONUtils.INSTANCE.load(parserConfig, SensorParserConfig.class); + CSVParser parser = new CSVParser(); + parser.init(); + config.getParserConfig().put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config.getParserConfig()); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() throws IOException { + SensorParserConfig config = JSONUtils.INSTANCE.load(parserConfig, SensorParserConfig.class); + CSVParser parser = new CSVParser(); + parser.init(); + parser.configure(config.getParserConfig()); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java index f10e930..221bf74 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/json/JSONMapParserTest.java @@ -17,19 +17,33 @@ */ package org.apache.metron.parsers.json; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import com.google.common.collect.ImmutableMap; -import java.util.List; import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.adrianwalker.multilinestring.Multiline; import org.apache.log4j.Level; import org.apache.metron.parsers.BasicParser; +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.test.utils.UnitTestHelper; import org.json.simple.JSONObject; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; public class JSONMapParserTest { + private JSONMapParser parser; + + @Before + public void setup() { + parser = new JSONMapParser(); + } + /** { "foo" : "bar" @@ -42,7 +56,6 @@ public class JSONMapParserTest { @Test public void testHappyPath() { - JSONMapParser parser = new JSONMapParser(); List<JSONObject> output = parser.parse(happyPathJSON.getBytes(StandardCharsets.UTF_8)); Assert.assertEquals(output.size(), 1); //don't forget the timestamp field! @@ -77,7 +90,6 @@ public class JSONMapParserTest { @Test public void testCollectionHandlingDrop() { - JSONMapParser parser = new JSONMapParser(); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes(StandardCharsets.UTF_8)); Assert.assertEquals(output.size(), 1); //don't forget the timestamp field! @@ -89,7 +101,6 @@ public class JSONMapParserTest { @Test(expected=IllegalStateException.class) public void testCollectionHandlingError() { - JSONMapParser parser = new JSONMapParser(); parser.configure(ImmutableMap.of(JSONMapParser.MAP_STRATEGY_CONFIG, JSONMapParser.MapStrategy.ERROR.name())); UnitTestHelper.setLog4jLevel(BasicParser.class, Level.FATAL); parser.parse(collectionHandlingJSON.getBytes(StandardCharsets.UTF_8)); @@ -99,7 +110,6 @@ public class JSONMapParserTest { @Test public void testCollectionHandlingAllow() { - JSONMapParser parser = new JSONMapParser(); parser.configure(ImmutableMap.of(JSONMapParser.MAP_STRATEGY_CONFIG, JSONMapParser.MapStrategy.ALLOW.name())); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes(StandardCharsets.UTF_8)); Assert.assertEquals(output.size(), 1); @@ -112,7 +122,6 @@ public class JSONMapParserTest { @Test public void testCollectionHandlingUnfold() { - JSONMapParser parser = new JSONMapParser(); parser.configure(ImmutableMap.of(JSONMapParser.MAP_STRATEGY_CONFIG, JSONMapParser.MapStrategy.UNFOLD.name())); List<JSONObject> output = parser.parse(collectionHandlingJSON.getBytes(StandardCharsets.UTF_8)); Assert.assertEquals(output.size(), 1); @@ -129,7 +138,6 @@ public class JSONMapParserTest { @Test public void testMixedCollectionHandlingUnfold() { - JSONMapParser parser = new JSONMapParser(); parser.configure(ImmutableMap.of(JSONMapParser.MAP_STRATEGY_CONFIG,JSONMapParser.MapStrategy.UNFOLD.name())); List<JSONObject> output = parser.parse(mixCollectionHandlingJSON.getBytes( StandardCharsets.UTF_8)); @@ -140,4 +148,19 @@ public class JSONMapParserTest { Assert.assertNotNull(message.get("timestamp")); Assert.assertTrue(message.get("timestamp") instanceof Number ); } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/regex/RegularExpressionsParserTest.java b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/regex/RegularExpressionsParserTest.java index 079d813..e320be8 100644 --- a/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/regex/RegularExpressionsParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers-common/src/test/java/org/apache/metron/parsers/regex/RegularExpressionsParserTest.java @@ -14,18 +14,22 @@ */ package org.apache.metron.parsers.regex; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.adrianwalker.multilinestring.Multiline; +import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - public class RegularExpressionsParserTest { private RegularExpressionsParser regularExpressionsParser; @@ -274,4 +278,20 @@ public class RegularExpressionsParserTest { } throw new Exception("Could not parse : " + message); } + + @Test + public void getsReadCharsetFromConfig() throws ParseException { + JSONObject config = (JSONObject) new JSONParser().parse(parserConfig1); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + regularExpressionsParser.configure(config); + assertThat(regularExpressionsParser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() throws ParseException { + JSONObject config = (JSONObject) new JSONParser().parse(parserConfig1); + regularExpressionsParser.configure(config); + assertThat(regularExpressionsParser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } + } diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/asa/BasicAsaParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/asa/BasicAsaParser.java index 2fd32a6..b43f50b 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/asa/BasicAsaParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/asa/BasicAsaParser.java @@ -20,7 +20,6 @@ package org.apache.metron.parsers.asa; import com.google.common.collect.ImmutableMap; import java.io.InputStream; import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; import java.lang.invoke.MethodHandles; import java.nio.charset.StandardCharsets; import java.time.Clock; @@ -97,6 +96,7 @@ public class BasicAsaParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); String timeZone = (String) parserConfig.get("deviceTimeZone"); if (timeZone != null) deviceClock = Clock.system(ZoneId.of(timeZone)); diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/bro/BasicBroParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/bro/BasicBroParser.java index b60049f..33304bd 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/bro/BasicBroParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/bro/BasicBroParser.java @@ -19,7 +19,6 @@ package org.apache.metron.parsers.bro; import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.ArrayList; @@ -46,7 +45,7 @@ public class BasicBroParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { - + setReadCharset(parserConfig); } @Override @@ -63,7 +62,7 @@ public class BasicBroParser extends BasicParser { String rawMessage = null; List<JSONObject> messages = new ArrayList<>(); try { - rawMessage = new String(msg, StandardCharsets.UTF_8); + rawMessage = new String(msg, getReadCharset()); _LOG.trace("[Metron] Received message: {}", rawMessage); JSONObject cleanedMessage = cleaner.clean(rawMessage); diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/cef/CEFParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/cef/CEFParser.java index ea1eab7..147c7a5 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/cef/CEFParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/cef/CEFParser.java @@ -19,8 +19,6 @@ package org.apache.metron.parsers.cef; import java.lang.invoke.MethodHandles; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.time.Clock; import java.util.ArrayList; import java.util.HashMap; @@ -144,7 +142,7 @@ public class CEFParser extends BasicParser { public List<JSONObject> parse(byte[] rawMessage) { List<JSONObject> messages = new ArrayList<>(); - String cefString = new String(rawMessage, StandardCharsets.UTF_8); + String cefString = new String(rawMessage, getReadCharset()); Matcher matcher = p.matcher(cefString); @@ -260,6 +258,7 @@ public class CEFParser extends BasicParser { @Override public void configure(Map<String, Object> config) { + setReadCharset(config); } @SuppressWarnings("unchecked") diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/fireeye/BasicFireEyeParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/fireeye/BasicFireEyeParser.java index 0b2c73e..4316836 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/fireeye/BasicFireEyeParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/fireeye/BasicFireEyeParser.java @@ -55,7 +55,9 @@ public class BasicFireEyeParser extends BasicParser { private static final Pattern nvPattern = Pattern.compile(nvRegex); @Override - public void configure(Map<String, Object> parserConfig) {} + public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); + } @Override public void init() {} @@ -68,7 +70,7 @@ public class BasicFireEyeParser extends BasicParser { List<JSONObject> messages = new ArrayList<>(); try { - toParse = new String(rawMessage, StandardCharsets.UTF_8); + toParse = new String(rawMessage, getReadCharset()); // because we support what is basically a malformed syslog 3164 message having // some form of text before the PRIORITY, we need to use the priority as diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/ise/BasicIseParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/ise/BasicIseParser.java index a396eaf..2fcc795 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/ise/BasicIseParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/ise/BasicIseParser.java @@ -20,16 +20,14 @@ package org.apache.metron.parsers.ise; import com.esotericsoftware.minlog.Log; -import java.nio.charset.StandardCharsets; -import org.apache.metron.parsers.BasicParser; -import org.json.simple.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.StringReader; import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.metron.parsers.BasicParser; +import org.json.simple.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @SuppressWarnings("serial") public class BasicIseParser extends BasicParser { @@ -40,7 +38,7 @@ public class BasicIseParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { - + setReadCharset(parserConfig); } @Override @@ -56,7 +54,7 @@ public class BasicIseParser extends BasicParser { List<JSONObject> messages = new ArrayList<>(); try { - raw_message = new String(msg, StandardCharsets.UTF_8); + raw_message = new String(msg, getReadCharset()); _LOG.debug("Received message: {}", raw_message); /* diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/lancope/BasicLancopeParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/lancope/BasicLancopeParser.java index 671833c..f893796 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/lancope/BasicLancopeParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/lancope/BasicLancopeParser.java @@ -19,7 +19,6 @@ package org.apache.metron.parsers.lancope; import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -40,7 +39,7 @@ public class BasicLancopeParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { - + setReadCharset(parserConfig); } @Override @@ -56,7 +55,7 @@ public class BasicLancopeParser extends BasicParser { List<JSONObject> messages = new ArrayList<>(); try { - String raw_message = new String(msg, StandardCharsets.UTF_8); + String raw_message = new String(msg, getReadCharset()); payload = (JSONObject) JSONValue.parse(raw_message); diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/leef/LEEFParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/leef/LEEFParser.java index f5d96b2..86ec043 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/leef/LEEFParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/leef/LEEFParser.java @@ -21,8 +21,6 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.lang.invoke.MethodHandles; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.time.Clock; import java.util.ArrayList; @@ -32,7 +30,6 @@ import java.util.Map; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; - import org.apache.metron.common.Constants.Fields; import org.apache.metron.parsers.BasicParser; import org.apache.metron.parsers.DefaultMessageParserResult; @@ -137,7 +134,7 @@ public class LEEFParser extends BasicParser { Map<Object,Throwable> errors = new HashMap<>(); String originalMessage = null; - try (BufferedReader reader = new BufferedReader(new StringReader(new String(rawMessage, StandardCharsets.UTF_8)))) { + try (BufferedReader reader = new BufferedReader(new StringReader(new String(rawMessage, getReadCharset())))) { while ((originalMessage = reader.readLine()) != null) { Matcher matcher = pattern.matcher(originalMessage); while (matcher.find()) { @@ -272,6 +269,7 @@ public class LEEFParser extends BasicParser { @Override public void configure(Map<String, Object> config) { + setReadCharset(config); } @SuppressWarnings("unchecked") diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParser.java index c0a6efe..238eb32 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParser.java @@ -20,18 +20,16 @@ package org.apache.metron.parsers.paloalto; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import java.nio.charset.StandardCharsets; -import org.apache.metron.parsers.BasicParser; -import org.json.simple.JSONObject; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.regex.Pattern; +import org.apache.metron.parsers.BasicParser; +import org.json.simple.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class BasicPaloAltoFirewallParser extends BasicParser { @@ -161,7 +159,7 @@ public class BasicPaloAltoFirewallParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { - + setReadCharset(parserConfig); } @Override @@ -178,7 +176,7 @@ public class BasicPaloAltoFirewallParser extends BasicParser { List<JSONObject> messages = new ArrayList<>(); try { - toParse = new String(msg, StandardCharsets.UTF_8); + toParse = new String(msg, getReadCharset()); _LOG.debug("Received message: {}", toParse); parseMessage(toParse, outputMessage); long timestamp = System.currentTimeMillis(); diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/snort/BasicSnortParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/snort/BasicSnortParser.java index fd9e23d..d4c5170 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/snort/BasicSnortParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/snort/BasicSnortParser.java @@ -91,6 +91,7 @@ public class BasicSnortParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { + setReadCharset(parserConfig); dateTimeFormatter = getDateFormatter(parserConfig); dateTimeFormatter = getDateFormatterWithZone(dateTimeFormatter, parserConfig); init(); @@ -140,7 +141,7 @@ public class BasicSnortParser extends BasicParser { List<JSONObject> messages = new ArrayList<>(); try { // snort alerts expected as csv records - String csvMessage = new String(rawMessage, StandardCharsets.UTF_8); + String csvMessage = new String(rawMessage, getReadCharset()); Map<String, String> records = null; try { records = converter.toMap(csvMessage); diff --git a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParser.java b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParser.java index f8e09ff..9781e50 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParser.java +++ b/metron-platform/metron-parsing/metron-parsers/src/main/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParser.java @@ -18,7 +18,6 @@ package org.apache.metron.parsers.sourcefire; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -44,7 +43,7 @@ public class BasicSourcefireParser extends BasicParser { @Override public void configure(Map<String, Object> parserConfig) { - + setReadCharset(parserConfig); } @Override @@ -61,7 +60,7 @@ public class BasicSourcefireParser extends BasicParser { List<JSONObject> messages = new ArrayList<>(); try { - toParse = new String(msg, StandardCharsets.UTF_8); + toParse = new String(msg, getReadCharset()); _LOG.debug("Received message: {}", toParse); String tmp = toParse.substring(toParse.lastIndexOf("{")); diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/SnortParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/SnortParserTest.java index 2264770..9224b0d 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/SnortParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/SnortParserTest.java @@ -18,10 +18,19 @@ package org.apache.metron.parsers; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.startsWith; +import static org.junit.Assert.assertThat; + import java.nio.charset.StandardCharsets; +import java.time.ZoneId; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; import org.adrianwalker.multilinestring.Multiline; import org.apache.log4j.Level; import org.apache.metron.common.Constants; +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.parsers.snort.BasicSnortParser; import org.apache.metron.test.utils.UnitTestHelper; import org.junit.Assert; @@ -29,15 +38,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import java.time.ZoneId; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.startsWith; -import static org.junit.Assert.assertThat; - public class SnortParserTest { @Rule @@ -148,4 +148,20 @@ public class SnortParserTest { parser.configure(parserConfig); } + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + BasicSnortParser parser = new BasicSnortParser(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + BasicSnortParser parser = new BasicSnortParser(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/asa/BasicAsaParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/asa/BasicAsaParserTest.java index 1dd2e79..f6419f2 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/asa/BasicAsaParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/asa/BasicAsaParserTest.java @@ -17,29 +17,37 @@ */ package org.apache.metron.parsers.asa; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.startsWith; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + import java.nio.charset.StandardCharsets; +import java.time.Clock; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.util.HashMap; +import java.util.Map; import org.apache.log4j.Level; +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.test.utils.UnitTestHelper; import org.json.simple.JSONObject; -import org.junit.BeforeClass; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; -import java.time.*; -import java.util.HashMap; -import java.util.Map; - -import static org.hamcrest.CoreMatchers.startsWith; -import static org.junit.Assert.*; - public class BasicAsaParserTest { private static BasicAsaParser asaParser; + private Map<String, Object> parserConfig; - @BeforeClass - public static void setUpOnce() throws Exception { - Map<String, Object> parserConfig = new HashMap<>(); + @Before + public void setUp() throws Exception { + parserConfig = new HashMap<>(); asaParser = new BasicAsaParser(); asaParser.configure(parserConfig); asaParser.init(); @@ -47,7 +55,6 @@ public class BasicAsaParserTest { @Test public void testConfigureDefault() { - Map<String, Object> parserConfig = new HashMap<>(); BasicAsaParser testParser = new BasicAsaParser(); testParser.configure(parserConfig); testParser.init(); @@ -56,7 +63,6 @@ public class BasicAsaParserTest { @Test public void testConfigureTimeZoneOffset() { - Map<String, Object> parserConfig = new HashMap<>(); parserConfig.put("deviceTimeZone", "UTC-05:00"); BasicAsaParser testParser = new BasicAsaParser(); testParser.configure(parserConfig); @@ -68,7 +74,6 @@ public class BasicAsaParserTest { @Test public void testConfigureTimeZoneText() { - Map<String, Object> parserConfig = new HashMap<>(); parserConfig.put("deviceTimeZone", "America/New_York"); BasicAsaParser testParser = new BasicAsaParser(); testParser.configure(parserConfig); @@ -186,4 +191,17 @@ public class BasicAsaParserTest { JSONObject asaJson = asaParser.parse(rawMessage.getBytes(StandardCharsets.UTF_8)).get(0); UnitTestHelper.setLog4jLevel(BasicAsaParser.class, Level.ERROR); } + + @Test + public void getsReadCharsetFromConfig() { + parserConfig.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + asaParser.configure(parserConfig); + assertThat(asaParser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + asaParser.configure(parserConfig); + assertThat(asaParser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/bro/BasicBroParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/bro/BasicBroParserTest.java index f5adbe4..6c04ba9 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/bro/BasicBroParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/bro/BasicBroParserTest.java @@ -17,10 +17,16 @@ */ package org.apache.metron.parsers.bro; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; import org.adrianwalker.multilinestring.Multiline; import org.apache.commons.lang3.tuple.Pair; import org.apache.log4j.Level; +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.test.utils.UnitTestHelper; import org.json.simple.JSONArray; import org.json.simple.JSONObject; @@ -28,8 +34,6 @@ import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; import org.junit.AfterClass; import org.junit.Assert; - -import java.util.Map; import org.junit.BeforeClass; import org.junit.Test; @@ -1423,4 +1427,19 @@ public class BasicBroParserTest { public void testBadMessageNonJson() { broParser.parse("foo bar".getBytes(StandardCharsets.UTF_8)); } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + broParser.configure(config); + assertThat(broParser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + broParser.configure(config); + assertThat(broParser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/cef/CEFParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/cef/CEFParserTest.java index c3a569a..2a5d463 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/cef/CEFParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/cef/CEFParserTest.java @@ -18,6 +18,9 @@ package org.apache.metron.parsers.cef; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -26,21 +29,22 @@ import com.github.fge.jsonschema.core.report.ProcessingReport; import com.github.fge.jsonschema.main.JsonSchemaFactory; import com.github.fge.jsonschema.main.JsonValidator; import com.google.common.io.Resources; -import org.apache.metron.common.Constants.Fields; -import org.json.simple.JSONObject; -import org.json.simple.parser.JSONParser; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; +import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.metron.common.Constants.Fields; +import org.apache.metron.parsers.interfaces.MessageParser; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; public class CEFParserTest { private CEFParser parser; @@ -276,4 +280,19 @@ public class CEFParserTest { return parse; } + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } + } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/fireeye/BasicFireEyeParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/fireeye/BasicFireEyeParserTest.java index bde7c0a..6a2e3ea 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/fireeye/BasicFireEyeParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/fireeye/BasicFireEyeParserTest.java @@ -17,14 +17,18 @@ */ package org.apache.metron.parsers.fireeye; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import java.nio.charset.StandardCharsets; -import java.util.Map; -import java.util.Map.Entry; import java.time.Year; -import java.time.ZonedDateTime; import java.time.ZoneOffset; - +import java.time.ZonedDateTime; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; import org.apache.metron.parsers.AbstractParserConfigTest; +import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; @@ -74,4 +78,19 @@ public class BasicFireEyeParserTest extends AbstractParserConfigTest { long expectedTimestamp = ZonedDateTime.of(Year.now(ZoneOffset.UTC).getValue(), 3, 19, 5, 24, 39, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); Assert.assertEquals(expectedTimestamp, json.get("timestamp")); } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/ise/BasicIseParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/ise/BasicIseParserTest.java index 0a7ece4..c67f574 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/ise/BasicIseParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/ise/BasicIseParserTest.java @@ -17,12 +17,17 @@ */ package org.apache.metron.parsers.ise; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import com.github.fge.jsonschema.core.exceptions.ProcessingException; import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.util.HashMap; import java.util.Map; import org.apache.metron.parsers.AbstractParserConfigTest; +import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.junit.Assert; @@ -54,4 +59,19 @@ public class BasicIseParserTest extends AbstractParserConfigTest { Assert.assertTrue(validateJsonData(getSchemaJsonString(), json.toString())); } } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/lancope/BasicLancopeParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/lancope/BasicLancopeParserTest.java index ab4613f..380a635 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/lancope/BasicLancopeParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/lancope/BasicLancopeParserTest.java @@ -17,12 +17,17 @@ */ package org.apache.metron.parsers.lancope; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import com.github.fge.jsonschema.core.exceptions.ProcessingException; import java.io.IOException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.util.HashMap; import java.util.Map; import org.apache.metron.parsers.AbstractParserConfigTest; +import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; @@ -54,5 +59,20 @@ public class BasicLancopeParserTest extends AbstractParserConfigTest { Assert.assertTrue(validateJsonData(getSchemaJsonString(), json.toString())); } } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/leef/LEEFParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/leef/LEEFParserTest.java index f50fe37..65cbb9b 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/leef/LEEFParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/leef/LEEFParserTest.java @@ -17,13 +17,28 @@ */ package org.apache.metron.parsers.leef; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThat; + import com.fasterxml.jackson.databind.JsonNode; import com.github.fge.jackson.JsonLoader; import com.github.fge.jsonschema.core.report.ProcessingReport; import com.github.fge.jsonschema.main.JsonSchemaFactory; import com.github.fge.jsonschema.main.JsonValidator; import com.google.common.io.Resources; +import java.nio.charset.StandardCharsets; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; import org.apache.metron.common.Constants.Fields; +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.parsers.interfaces.MessageParserResult; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; @@ -31,14 +46,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import java.nio.charset.StandardCharsets; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.*; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; - public class LEEFParserTest { private LEEFParser parser; @@ -237,4 +244,19 @@ public class LEEFParserTest { Assert.assertTrue(parse.isPresent()); return parse.get().getMessages(); } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParserTest.java index f49cab4..f0cadbf 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/paloalto/BasicPaloAltoFirewallParserTest.java @@ -17,18 +17,22 @@ */ package org.apache.metron.parsers.paloalto; +import static org.hamcrest.CoreMatchers.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThat; import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.metron.parsers.AbstractParserConfigTest; +import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.json.simple.parser.ParseException; import org.junit.Before; import org.junit.Test; -import java.util.List; - public class BasicPaloAltoFirewallParserTest extends AbstractParserConfigTest { @Before @@ -753,4 +757,19 @@ public class BasicPaloAltoFirewallParserTest extends AbstractParserConfigTest { String expectedParserVersion = actual.get(BasicPaloAltoFirewallParser.ParserVersion).toString(); assertEquals(expectedParserVersion, "0"); } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParserTest.java index 578b93f..4215250 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/sourcefire/BasicSourcefireParserTest.java @@ -17,10 +17,15 @@ */ package org.apache.metron.parsers.sourcefire; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.Assert.assertThat; + import java.nio.charset.StandardCharsets; +import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import org.apache.metron.parsers.AbstractParserConfigTest; +import org.apache.metron.parsers.interfaces.MessageParser; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; @@ -56,4 +61,19 @@ public class BasicSourcefireParserTest extends AbstractParserConfigTest { } } } + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } } diff --git a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/websphere/GrokWebSphereParserTest.java b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/websphere/GrokWebSphereParserTest.java index 469fb73..798cb53 100644 --- a/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/websphere/GrokWebSphereParserTest.java +++ b/metron-platform/metron-parsing/metron-parsers/src/test/java/org/apache/metron/parsers/websphere/GrokWebSphereParserTest.java @@ -18,15 +18,19 @@ package org.apache.metron.parsers.websphere; +import static org.hamcrest.CoreMatchers.equalTo; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; import java.nio.charset.StandardCharsets; -import java.time.*; +import java.time.Year; +import java.time.ZoneId; +import java.time.ZonedDateTime; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; - +import org.apache.metron.parsers.interfaces.MessageParser; import org.apache.metron.parsers.interfaces.MessageParserResult; import org.json.simple.JSONObject; import org.junit.Assert; @@ -37,6 +41,7 @@ public class GrokWebSphereParserTest { private static final ZoneId UTC = ZoneId.of("UTC"); private Map<String, Object> parserConfig; + private GrokWebSphereParser parser; @Before public void setup() { @@ -45,14 +50,12 @@ public class GrokWebSphereParserTest { parserConfig.put("patternLabel", "WEBSPHERE"); parserConfig.put("timestampField", "timestamp_string"); parserConfig.put("dateFormat", "yyyy MMM dd HH:mm:ss"); + parser = new GrokWebSphereParser(); + parser.configure(parserConfig); } @Test public void testParseLoginLine() throws Exception { - - //Set up parser, parse message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<133>Apr 15 17:47:28 ABCXML1413 [rojOut][0x81000033][auth][notice] user(rick007): " + "[120.43.200.6]: User logged into 'cohlOut'."; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -79,10 +82,6 @@ public class GrokWebSphereParserTest { @Test public void testParseLogoutLine() throws Exception { - - //Set up parser, parse message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<134>Apr 15 18:02:27 PHIXML3RWD [0x81000019][auth][info] [14.122.2.201]: " + "User 'hjpotter' logged out from 'default'."; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -108,10 +107,6 @@ public class GrokWebSphereParserTest { @Test public void testParseRBMLine() throws Exception { - - //Set up parser, parse message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<131>Apr 15 17:36:35 ROBXML3QRS [0x80800018][auth][error] rbm(RBM-Settings): " + "trans(3502888135)[request] gtid(3502888135): RBM: Resource access denied."; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -136,10 +131,6 @@ public class GrokWebSphereParserTest { @Test public void testParseOtherLine() throws Exception { - - //Set up parser, parse message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<134>Apr 15 17:17:34 SAGPXMLQA333 [0x8240001c][audit][info] trans(191): (admin:default:system:*): " + "ntp-service 'NTP Service' - Operational state down"; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -163,10 +154,6 @@ public class GrokWebSphereParserTest { @Test public void testParseMalformedLoginLine() throws Exception { - - //Set up parser, attempt to parse malformed message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<133>Apr 15 17:47:28 ABCXML1413 [rojOut][0x81000033][auth][notice] rick007): " + "[120.43.200. User logged into 'cohlOut'."; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -193,10 +180,6 @@ public class GrokWebSphereParserTest { @Test public void testParseMalformedLogoutLine() throws Exception { - - //Set up parser, attempt to parse malformed message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<134>Apr 15 18:02:27 PHIXML3RWD [0x81000019][auth][info] [14.122.2.201: " + "User 'hjpotter' logged out from 'default."; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -222,10 +205,6 @@ public class GrokWebSphereParserTest { @Test public void testParseMalformedRBMLine() throws Exception { - - //Set up parser, parse message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<131>Apr 15 17:36:35 ROBXML3QRS [0x80800018][auth][error] rbmRBM-Settings): " + "trans3502888135)[request] gtid3502888135) RBM: Resource access denied."; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -250,10 +229,6 @@ public class GrokWebSphereParserTest { @Test public void testParseMalformedOtherLine() throws Exception { - - //Set up parser, parse message - GrokWebSphereParser parser = new GrokWebSphereParser(); - parser.configure(parserConfig); String testString = "<134>Apr 15 17:17:34 SAGPXMLQA333 [0x8240001c][audit][info] trans 191) admindefaultsystem*): " + "ntp-service 'NTP Service' - Operational state down:"; Optional<MessageParserResult<JSONObject>> resultOptional = parser.parseOptionalResult(testString.getBytes( @@ -275,5 +250,20 @@ public class GrokWebSphereParserTest { assertEquals(null, parsedJSON.get("process")); assertEquals("trans 191) admindefaultsystem*): ntp-service 'NTP Service' - Operational state down:", parsedJSON.get("message")); } - + + @Test + public void getsReadCharsetFromConfig() { + Map<String, Object> config = new HashMap<>(); + config.put(MessageParser.READ_CHARSET, StandardCharsets.UTF_16.toString()); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_16)); + } + + @Test + public void getsReadCharsetFromDefault() { + Map<String, Object> config = new HashMap<>(); + parser.configure(config); + assertThat(parser.getReadCharset(), equalTo(StandardCharsets.UTF_8)); + } + }