DRILL-5919: Add non-numeric support for JSON processing 1. Added two session options store.json.reader.non_numeric_numbers and store.json.reader.non_numeric_numbers that allow to read/write NaN and Infinity as numbers. By default these options are set to true.
2. Extended signature of convert_toJSON and convert_fromJSON functions by adding second optional parameter that enables/disables read/write NaN and Infinity. By default it is set true. 3. Added unit tests with nan, infitity values for math and aggregate functions 4. Replaced JsonReader's constructors with builder. This closes #1026 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9bb93703 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9bb93703 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9bb93703 Branch: refs/heads/master Commit: 9bb93703ec571b0ffa08b17fd31a00d6eeb6b4e9 Parents: bf56cd9 Author: Volodymyr Tkach <vovatkac...@gmail.com> Authored: Mon Nov 13 14:45:33 2017 +0000 Committer: Parth Chandra <par...@apache.org> Committed: Thu Jan 11 17:04:20 2018 -0800 ---------------------------------------------------------------------- .../store/kafka/decoders/JsonMessageReader.java | 6 +- .../exec/store/mongo/MongoRecordReader.java | 11 +- .../main/codegen/templates/MathFunctions.java | 10 + .../org/apache/drill/exec/ExecConstants.java | 10 + .../exec/expr/fn/impl/conv/JsonConvertFrom.java | 10 +- .../exec/expr/fn/impl/conv/JsonConvertTo.java | 2 + .../exec/expr/fn/impl/conv/RoundFunctions.java | 20 +- .../server/options/SystemOptionManager.java | 2 + .../exec/store/easy/json/JSONFormatPlugin.java | 2 +- .../exec/store/easy/json/JSONRecordReader.java | 15 +- .../exec/store/easy/json/JsonRecordWriter.java | 4 +- .../easy/json/reader/BaseJsonProcessor.java | 29 +- .../easy/json/reader/CountingJsonReader.java | 4 +- .../exec/vector/complex/fn/JsonReader.java | 91 ++- .../exec/vector/complex/fn/JsonWriter.java | 2 +- .../src/main/resources/drill-module.conf | 2 + .../fn/impl/TestMathFunctionsWithNanInf.java | 551 +++++++++++++++++++ .../vector/complex/writer/TestJsonNanInf.java | 274 +++++++++ 18 files changed, 1004 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/decoders/JsonMessageReader.java ---------------------------------------------------------------------- diff --git a/contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/decoders/JsonMessageReader.java b/contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/decoders/JsonMessageReader.java index 9ad6107..3239664 100644 --- a/contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/decoders/JsonMessageReader.java +++ b/contrib/storage-kafka/src/main/java/org/apache/drill/exec/store/kafka/decoders/JsonMessageReader.java @@ -58,7 +58,11 @@ public class JsonMessageReader implements MessageReader { public void init(DrillBuf buf, List<SchemaPath> columns, VectorContainerWriter writer, boolean allTextMode, boolean readNumbersAsDouble) { // set skipOuterList to false as it doesn't applicable for JSON records and it's only applicable for JSON files. - this.jsonReader = new JsonReader(buf, columns, allTextMode, false, readNumbersAsDouble); + this.jsonReader = new JsonReader.Builder(buf) + .schemaPathColumns(columns) + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) + .build(); this.writer = writer; } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java ---------------------------------------------------------------------- diff --git a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java index 77def0a..cacb318 100644 --- a/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java +++ b/contrib/storage-mongo/src/main/java/org/apache/drill/exec/store/mongo/MongoRecordReader.java @@ -25,6 +25,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.TimeUnit; +import com.google.common.collect.ImmutableList; import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.expression.SchemaPath; @@ -73,6 +74,7 @@ public class MongoRecordReader extends AbstractRecordReader { private final MongoStoragePlugin plugin; private final boolean enableAllTextMode; + private final boolean enableNanInf; private final boolean readNumbersAsDouble; private boolean unionEnabled; private final boolean isBsonRecordReader; @@ -92,6 +94,7 @@ public class MongoRecordReader extends AbstractRecordReader { buildFilters(subScanSpec.getFilter(), mergedFilters); enableAllTextMode = fragmentContext.getOptions().getOption(ExecConstants.MONGO_ALL_TEXT_MODE).bool_val; + enableNanInf = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_NAN_INF_NUMBERS).bool_val; readNumbersAsDouble = fragmentContext.getOptions().getOption(ExecConstants.MONGO_READER_READ_NUMBERS_AS_DOUBLE).bool_val; isBsonRecordReader = fragmentContext.getOptions().getOption(ExecConstants.MONGO_BSON_RECORD_READER).bool_val; logger.debug("BsonRecordReader is enabled? " + isBsonRecordReader); @@ -159,8 +162,12 @@ public class MongoRecordReader extends AbstractRecordReader { readNumbersAsDouble); logger.debug("Initialized BsonRecordReader. "); } else { - this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), Lists.newArrayList(getColumns()), - enableAllTextMode, false, readNumbersAsDouble); + this.jsonReader = new JsonReader.Builder(fragmentContext.getManagedBuffer()) + .schemaPathColumns(Lists.newArrayList(getColumns())) + .allTextMode(enableAllTextMode) + .readNumbersAsDouble(readNumbersAsDouble) + .enableNanInf(enableNanInf) + .build(); logger.debug(" Intialized JsonRecordReader. "); } } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/codegen/templates/MathFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/codegen/templates/MathFunctions.java b/exec/java-exec/src/main/codegen/templates/MathFunctions.java index 50ee6e6..548c10e 100644 --- a/exec/java-exec/src/main/codegen/templates/MathFunctions.java +++ b/exec/java-exec/src/main/codegen/templates/MathFunctions.java @@ -139,8 +139,18 @@ public class GMathFunctions{ } public void eval() { + + <#if func.funcName=='trunc' && (type.dataType=='Float4' || type.dataType=='Float8')> + if (Double.isInfinite(input1.value) || Double.isNaN(input1.value)){ + out.value = Double.NaN; + } else { + java.math.BigDecimal temp = new java.math.BigDecimal(input1.value); + out.value = temp.setScale(input2.value, java.math.RoundingMode.${func.mode}).doubleValue(); + } + <#else> java.math.BigDecimal temp = new java.math.BigDecimal(input1.value); out.value = temp.setScale(input2.value, java.math.RoundingMode.${func.mode}).doubleValue(); + </#if> } } </#list> http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java index d155466..c3e9d46 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java @@ -278,6 +278,16 @@ public final class ExecConstants { public static final DoubleValidator TEXT_ESTIMATED_ROW_SIZE = new RangeDoubleValidator("store.text.estimated_row_size_bytes", 1, Long.MAX_VALUE); /** + * Json writer option for writing `NaN` and `Infinity` tokens as numbers (not enclosed with double quotes) + */ + public static final String JSON_WRITER_NAN_INF_NUMBERS = "store.json.writer.allow_nan_inf"; + public static final BooleanValidator JSON_WRITER_NAN_INF_NUMBERS_VALIDATOR = new BooleanValidator(JSON_WRITER_NAN_INF_NUMBERS); + /** + * Json reader option that enables parser to read `NaN` and `Infinity` tokens as numbers + */ + public static final String JSON_READER_NAN_INF_NUMBERS = "store.json.reader.allow_nan_inf"; + public static final BooleanValidator JSON_READER_NAN_INF_NUMBERS_VALIDATOR = new BooleanValidator(JSON_READER_NAN_INF_NUMBERS); + /** * The column label (for directory levels) in results when querying files in a directory * E.g. labels: dir0 dir1 * structure: foo http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java index 19cbdab..8a379a0 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java @@ -30,6 +30,7 @@ import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling; import org.apache.drill.exec.expr.annotations.Output; import org.apache.drill.exec.expr.annotations.Param; import org.apache.drill.exec.expr.annotations.Workspace; +import org.apache.drill.exec.expr.holders.BitHolder; import org.apache.drill.exec.expr.holders.VarBinaryHolder; import org.apache.drill.exec.expr.holders.VarCharHolder; import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter; @@ -50,7 +51,9 @@ public class JsonConvertFrom { @Output ComplexWriter writer; public void setup(){ - jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false, false /* do not read numbers as doubles */); + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) + .defaultSchemaPathColumns() + .build(); } public void eval(){ @@ -76,7 +79,9 @@ public class JsonConvertFrom { @Output ComplexWriter writer; public void setup(){ - jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader(buffer, false, false, false /* do not read numbers as doubles */); + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) + .defaultSchemaPathColumns() + .build(); } public void eval(){ @@ -91,4 +96,5 @@ public class JsonConvertFrom { } } + } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertTo.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertTo.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertTo.java index 772999d..379af20 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertTo.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertTo.java @@ -28,6 +28,7 @@ import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope; import org.apache.drill.exec.expr.annotations.FunctionTemplate.NullHandling; import org.apache.drill.exec.expr.annotations.Output; import org.apache.drill.exec.expr.annotations.Param; +import org.apache.drill.exec.expr.holders.BitHolder; import org.apache.drill.exec.expr.holders.VarBinaryHolder; import org.apache.drill.exec.vector.complex.reader.FieldReader; @@ -104,4 +105,5 @@ public class JsonConvertTo { out.end = bytea.length; } } + } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/RoundFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/RoundFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/RoundFunctions.java index db39d15..9c5db1e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/RoundFunctions.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/RoundFunctions.java @@ -171,8 +171,14 @@ public class RoundFunctions { } public void eval() { - java.math.BigDecimal input = java.math.BigDecimal.valueOf(in.value); - out.value = input.setScale(0, java.math.RoundingMode.HALF_UP).floatValue(); + if (Float.isNaN(in.value)) { + out.value = 0; + } else if(Float.isInfinite(in.value)) { + out.value = Math.signum(in.value) > 0 ? Integer.MAX_VALUE : Integer.MIN_VALUE; + } else { + java.math.BigDecimal input = java.math.BigDecimal.valueOf(in.value); + out.value = input.setScale(0, java.math.RoundingMode.HALF_UP).floatValue(); + } } } @@ -186,8 +192,14 @@ public class RoundFunctions { } public void eval() { - java.math.BigDecimal input = java.math.BigDecimal.valueOf(in.value); - out.value = input.setScale(0, java.math.RoundingMode.HALF_UP).doubleValue(); + if (Double.isNaN(in.value)) { + out.value = 0; + } else if(Double.isInfinite(in.value)) { + out.value = Math.signum(in.value) > 0 ? Long.MAX_VALUE : Long.MIN_VALUE; + } else { + java.math.BigDecimal input = java.math.BigDecimal.valueOf(in.value); + out.value = input.setScale(0, java.math.RoundingMode.HALF_UP).doubleValue(); + } } } } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java index a1ddb30..369f3bc 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java @@ -140,6 +140,8 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea new OptionDefinition(ExecConstants.PARQUET_PAGEREADER_USE_FADVISE_VALIDATOR), new OptionDefinition(ExecConstants.PARQUET_READER_INT96_AS_TIMESTAMP_VALIDATOR), new OptionDefinition(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR), + new OptionDefinition(ExecConstants.JSON_WRITER_NAN_INF_NUMBERS_VALIDATOR), + new OptionDefinition(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR), new OptionDefinition(ExecConstants.ENABLE_UNION_TYPE), new OptionDefinition(ExecConstants.TEXT_ESTIMATED_ROW_SIZE), new OptionDefinition(ExecConstants.JSON_EXTENDED_TYPES), http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java index 73cb616..095e09a 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java @@ -82,7 +82,7 @@ public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> { options.put("extended", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_EXTENDED_TYPES))); options.put("uglify", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_UGLIFY))); options.put("skipnulls", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_SKIPNULLFIELDS))); - + options.put("enableNanInf", Boolean.toString(context.getOptions().getOption(ExecConstants.JSON_WRITER_NAN_INF_NUMBERS_VALIDATOR))); RecordWriter recordWriter = new JsonRecordWriter(writer.getStorageStrategy()); recordWriter.init(options); http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java index c406bb3..4abf7a5 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java @@ -20,7 +20,6 @@ package org.apache.drill.exec.store.easy.json; import java.io.IOException; import java.io.InputStream; import java.util.List; -import com.google.common.collect.Lists; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.exceptions.UserException; @@ -61,6 +60,7 @@ public class JSONRecordReader extends AbstractRecordReader { private long runningRecordCount = 0; private final FragmentContext fragmentContext; private final boolean enableAllTextMode; + private final boolean enableNanInf; private final boolean readNumbersAsDouble; private final boolean unionEnabled; private long parseErrorCount; @@ -114,6 +114,7 @@ public class JSONRecordReader extends AbstractRecordReader { this.fragmentContext = fragmentContext; // only enable all text mode if we aren't using embedded content mode. this.enableAllTextMode = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR); + this.enableNanInf = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR); this.readNumbersAsDouble = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR); this.unionEnabled = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.ENABLE_UNION_TYPE); this.skipMalformedJSONRecords = fragmentContext.getOptions().getOption(ExecConstants.JSON_SKIP_MALFORMED_RECORDS_VALIDATOR); @@ -139,12 +140,18 @@ public class JSONRecordReader extends AbstractRecordReader { this.writer = new VectorContainerWriter(output, unionEnabled); if (isSkipQuery()) { - this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer()); + this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf); } else { - this.jsonReader = new JsonReader(fragmentContext.getManagedBuffer(), ImmutableList.copyOf(getColumns()), enableAllTextMode, true, readNumbersAsDouble); + this.jsonReader = new JsonReader.Builder(fragmentContext.getManagedBuffer()) + .schemaPathColumns(ImmutableList.copyOf(getColumns())) + .allTextMode(enableAllTextMode) + .skipOuterList(true) + .readNumbersAsDouble(readNumbersAsDouble) + .enableNanInf(enableNanInf) + .build(); } setupParser(); - }catch(final Exception e){ + } catch (final Exception e){ handleAndRaise("Failure reading JSON file", e); } } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java index 345c056..b350d57 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JsonRecordWriter.java @@ -95,7 +95,9 @@ public class JsonRecordWriter extends JSONOutputRecordWriter implements RecordWr stream = fs.create(fileName); storageStrategy.applyToFile(fs, fileName); - JsonGenerator generator = factory.createGenerator(stream).useDefaultPrettyPrinter(); + JsonGenerator generator = factory.createGenerator(stream).useDefaultPrettyPrinter() + .configure(JsonGenerator.Feature.QUOTE_NON_NUMERIC_NUMBERS, + !Boolean.parseBoolean(writerOptions.get("enableNanInf"))); if (uglify) { generator = generator.setPrettyPrinter(new MinimalPrettyPrinter(LINE_FEED)); } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java index 95ebe6e..49e0f50 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java @@ -35,13 +35,15 @@ import org.apache.drill.common.exceptions.UserException; public abstract class BaseJsonProcessor implements JsonProcessor { - private static final ObjectMapper MAPPER = new ObjectMapper().configure( - JsonParser.Feature.ALLOW_COMMENTS, true).configure( - JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true); + private static final ObjectMapper DEFAULT_MAPPER = getDefaultMapper(); + + private static final ObjectMapper NAN_INF_MAPPER = getDefaultMapper() + .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true); private static final String JACKSON_PARSER_EOF_FILE_MSG = "Unexpected end-of-input:"; + private final boolean enableNanInf; - public static enum JsonExceptionProcessingState { + public enum JsonExceptionProcessingState { END_OF_STREAM, PROC_SUCCEED } @@ -50,6 +52,16 @@ public abstract class BaseJsonProcessor implements JsonProcessor { protected JsonToken lastSeenJsonToken = null; boolean ignoreJSONParseErrors = false; // default False + /** + * + * @return Default json mapper + */ + public static ObjectMapper getDefaultMapper() { + return new ObjectMapper().configure( + JsonParser.Feature.ALLOW_COMMENTS, true).configure( + JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true); + } + public boolean ignoreJSONParseError() { return ignoreJSONParseErrors; } @@ -58,13 +70,18 @@ public abstract class BaseJsonProcessor implements JsonProcessor { this.ignoreJSONParseErrors = ignoreJSONParseErrors; } - public BaseJsonProcessor(DrillBuf workBuf) { + public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf) { + this.enableNanInf = enableNanInf; workBuf = Preconditions.checkNotNull(workBuf); } @Override public void setSource(InputStream is) throws IOException { - parser = MAPPER.getFactory().createParser(is); + if (enableNanInf) { + parser = NAN_INF_MAPPER.getFactory().createParser(is); + } else { + parser = DEFAULT_MAPPER.getFactory().createParser(is); + } } @Override http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java index 5f7a7a4..cfbe2d7 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java @@ -29,8 +29,8 @@ import org.apache.drill.exec.vector.complex.writer.BaseWriter; public class CountingJsonReader extends BaseJsonProcessor { - public CountingJsonReader(DrillBuf workBuf) { - super(workBuf); + public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf) { + super(workBuf, enableNanInf); } @Override http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java index 4ffbb26..9c77fbf 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java @@ -80,27 +80,84 @@ public class JsonReader extends BaseJsonProcessor { private FieldSelection selection; - public JsonReader(DrillBuf managedBuf, boolean allTextMode, - boolean skipOuterList, boolean readNumbersAsDouble) { - this(managedBuf, GroupScan.ALL_COLUMNS, allTextMode, skipOuterList, - readNumbersAsDouble); + private JsonReader(Builder builder) { + super(builder.managedBuf, builder.enableNanInf); + selection = FieldSelection.getFieldSelection(builder.columns); + workingBuffer = builder.workingBuffer; + skipOuterList = builder.skipOuterList; + allTextMode = builder.allTextMode; + columns = builder.columns; + mapOutput = builder.mapOutput; + listOutput = builder.listOutput; + currentFieldName = builder.currentFieldName; + readNumbersAsDouble = builder.readNumbersAsDouble; } - public JsonReader(DrillBuf managedBuf, List<SchemaPath> columns, - boolean allTextMode, boolean skipOuterList, boolean readNumbersAsDouble) { - super(managedBuf); - assert Preconditions.checkNotNull(columns).size() > 0 : "JSON record reader requires at least one column"; - this.selection = FieldSelection.getFieldSelection(columns); - this.workingBuffer = new WorkingBuffer(managedBuf); - this.skipOuterList = skipOuterList; - this.allTextMode = allTextMode; - this.columns = columns; - this.mapOutput = new MapVectorOutput(workingBuffer); - this.listOutput = new ListVectorOutput(workingBuffer); - this.currentFieldName = "<none>"; - this.readNumbersAsDouble = readNumbersAsDouble; + public static class Builder { + private DrillBuf managedBuf; + private WorkingBuffer workingBuffer; + private List<SchemaPath> columns; + private MapVectorOutput mapOutput; + private ListVectorOutput listOutput; + private String currentFieldName = "<none>"; + private boolean readNumbersAsDouble; + private boolean skipOuterList; + private boolean allTextMode; + private boolean enableNanInf; + + + public Builder(DrillBuf managedBuf) { + this.managedBuf = managedBuf; + this.workingBuffer = new WorkingBuffer(managedBuf); + this.mapOutput = new MapVectorOutput(workingBuffer); + this.listOutput = new ListVectorOutput(workingBuffer); + this.readNumbersAsDouble = false; + this.skipOuterList = false; + this.allTextMode = false; + this.enableNanInf = true; + } + + public Builder readNumbersAsDouble(boolean readNumbersAsDouble) { + this.readNumbersAsDouble = readNumbersAsDouble; + return this; + } + + public Builder skipOuterList(boolean skipOuterList) { + this.skipOuterList = skipOuterList; + return this; + } + + public Builder allTextMode(boolean allTextMode) { + this.allTextMode = allTextMode; + return this; + } + + public Builder enableNanInf(boolean enableNanInf) { + this.enableNanInf = enableNanInf; + return this; + } + + public Builder defaultSchemaPathColumns() { + this.columns = GroupScan.ALL_COLUMNS; + return this; + } + + public Builder schemaPathColumns(List<SchemaPath> columns) { + this.columns = columns; + return this; + } + + public JsonReader build() { + if (columns == null) { + throw new IllegalStateException("You need to set SchemaPath columns in order to build JsonReader"); + } + assert Preconditions.checkNotNull(columns).size() > 0 : "JSON record reader requires at least one column"; + return new JsonReader(this); + } } + + @SuppressWarnings("resource") @Override public void ensureAtLeastOneField(ComplexWriter writer) { http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonWriter.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonWriter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonWriter.java index fe43f82..498a206 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonWriter.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonWriter.java @@ -37,7 +37,7 @@ public class JsonWriter { private final JsonOutput gen; public JsonWriter(OutputStream out, boolean pretty, boolean useExtendedOutput) throws IOException{ - JsonGenerator writer = factory.createJsonGenerator(out); + JsonGenerator writer = factory.configure(JsonGenerator.Feature.QUOTE_NON_NUMERIC_NUMBERS, false).createJsonGenerator(out); if(pretty){ writer = writer.useDefaultPrettyPrinter(); } http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/main/resources/drill-module.conf ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf index 97ac19d..c6a7203 100644 --- a/exec/java-exec/src/main/resources/drill-module.conf +++ b/exec/java-exec/src/main/resources/drill-module.conf @@ -517,6 +517,8 @@ drill.exec.options: { store.format: "parquet", store.hive.optimize_scan_with_native_readers: false, store.json.all_text_mode: false, + store.json.writer.allow_nan_inf: true, + store.json.reader.allow_nan_inf: true, store.json.extended_types: false, store.json.read_numbers_as_double: false, store.json.reader.print_skipped_invalid_record_number: false, http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java new file mode 100644 index 0000000..c82689e --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestMathFunctionsWithNanInf.java @@ -0,0 +1,551 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to you under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.drill.exec.fn.impl; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.test.BaseTestQuery; +import org.junit.Test; + +import java.io.File; + +public class TestMathFunctionsWithNanInf extends BaseTestQuery { + + + @Test + public void testIsNulFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select isnull(nan_col) as nan_col, isnull(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {false, false}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testIsNotNulFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select isnotnull(nan_col) as nan_col, isnotnull(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {true, true}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testEqualFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select equal(nan_col, nan_col) as nan_col, equal(inf_col, inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {false, true}; + evalTest(table_name, json, query, columns, values); + + } + + @Test + public void testNotEqualFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select not_equal(nan_col, nan_col) as nan_col, not_equal(inf_col, inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {true, false}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testLessThanFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select less_than(nan_col, 5) as nan_col, less_than(inf_col, 5) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {false, false}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void tesGreaterThanFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select greater_than(nan_col, 5) as nan_col, greater_than(inf_col, 5) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {false, true}; + evalTest(table_name, json, query, columns, values); + } + + + @Test + public void tesGreaterThanOrEqualToFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select greater_than_or_equal_to(nan_col, 5) as nan_col, " + + "greater_than_or_equal_to(inf_col, cast('Infinity' as float)) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {false, true}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testLessThanOrEqualToFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select less_than_or_equal_to(nan_col, 5) as nan_col," + + " less_than_or_equal_to(inf_col, cast('Infinity' as float)) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {false, true}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testHashFunctions() throws Exception { + test("select hash(cast('NaN' as double)) from (values(1))"); + test("select hash32(cast('NaN' as double)) from (values(1))"); + test("select hash32(cast('NaN' as double), 4) from (values(1))"); + test("select hash(cast('Infinity' as double)) from (values(1))"); + test("select hash32(cast('Infinity' as double)) from (values(1))"); + test("select hash32(cast('Infinity' as double), 4) from (values(1))"); + test("select hash64AsDouble(cast('NaN' as float)) from (values(1))"); + test("select hash64AsDouble(cast('NaN' as float), 4) from (values(1))"); + test("select hash64AsDouble(cast('Infinity' as float)) from (values(1))"); + test("select hash64AsDouble(cast('Infinity' as float), 4) from (values(1))"); + test("select hash32AsDouble(cast('NaN' as float)) from (values(1))"); + test("select hash32AsDouble(cast('NaN' as float), 4) from (values(1))"); + test("select hash32AsDouble(cast('Infinity' as float)) from (values(1))"); + test("select hash32AsDouble(cast('Infinity' as float), 4) from (values(1))"); + test("select hash64(cast('NaN' as float)) from (values(1))"); + test("select hash64(cast('NaN' as float), 4) from (values(1))"); + test("select hash64(cast('Infinity' as float)) from (values(1))"); + test("select hash64(cast('Infinity' as float), 4) from (values(1))"); + } + + @Test + public void testSignFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select sign(nan_col) as nan_col, sign(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {0, 1}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testLogFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select log(nan_col) as nan_col, log(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testAddFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select add(nan_col, 3) as nan_col, add(inf_col, 3) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testSubtractFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select subtract(nan_col, 3) as nan_col, subtract(inf_col, 3) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testDivideFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select divide(nan_col, 3) as nan_col, divide(inf_col, 3) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testMultiplyFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select multiply(nan_col, 3) as nan_col, multiply(inf_col, 3) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testTanhFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select tanh(nan_col) as nan_col, tanh(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, 1.0}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testTanFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select tan(nan_col) as nan_col, tan(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testAtanFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select atan(nan_col) as nan_col, atan(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, 1.5707963267948966}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testSinFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select sin(nan_col) as nan_col, sin(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testAsinFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select asin(nan_col) as nan_col, asin(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testSinhFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select sinh(nan_col) as nan_col, sinh(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testCosFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select cos(nan_col) as nan_col, cos(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testAcosFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select acos(nan_col) as nan_col, acos(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testCotFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select cot(nan_col) as nan_col, cot(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testCoshFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select cosh(nan_col) as nan_col, cosh(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testSqrtFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select sqrt(nan_col) as nan_col, sqrt(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testCeilFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select ceil(nan_col) as nan_col, ceil(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testNegativeFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select negative(nan_col) as nan_col, negative(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NEGATIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testAbsFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select abs(nan_col) as nan_col, abs(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testFloorFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select floor(nan_col) as nan_col, floor(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testExpFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select exp(nan_col) as nan_col, exp(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testCbrtFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select cbrt(nan_col) as nan_col, cbrt(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testModFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select mod(nan_col,1) as nan_col, mod(inf_col,1) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testDegreesFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select degrees(nan_col) as nan_col, degrees(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testTruncFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select trunc(nan_col,3) as nan_col, trunc(inf_col,3) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testPowerFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select power(nan_col, 2) as nan_col, power(inf_col, 2) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testRadiansFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select radians(nan_col) as nan_col, radians(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testRoundFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select round(nan_col) as nan_col, round(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {0.0, Double.valueOf(Long.MAX_VALUE)}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testCasthighFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select casthigh(nan_col) as nan_col, casthigh(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + @Test + public void testCastfloat4Function() throws Exception { + String table_name = "nan_test.json"; + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select castfloat4(nan_col) as nan_col, castfloat4(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Float.NaN, Float.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testVarpopFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5, \"inf_col\":5}]"; + String query = String.format("select var_pop(nan_col) as nan_col, var_pop(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testStddevsampFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5, \"inf_col\":5}]"; + String query = String.format("select stddev_samp(nan_col) as nan_col, stddev_samp(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testVarsampFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5, \"inf_col\":5}]"; + String query = String.format("select var_samp(nan_col) as nan_col, var_samp(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testStddevpopFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5, \"inf_col\":5}]"; + String query = String.format("select stddev_pop(nan_col) as nan_col, stddev_pop(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.NaN}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testMinFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select min(nan_col) as nan_col, min(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, 5.0}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testMaxFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select max(nan_col) as nan_col, max(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testSumFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select sum(nan_col) as nan_col, sum(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + @Test + public void testAvgFunction() throws Exception { + String table_name = "nan_test.json"; + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select avg(nan_col) as nan_col, avg(inf_col) as inf_col from dfs.`%s`", table_name); + String[] columns = {"nan_col", "inf_col"}; + Object[] values = {Double.NaN, Double.POSITIVE_INFINITY}; + evalTest(table_name, json, query, columns, values); + } + + private void evalTest(String table_name, String json, String query, String[] columns, Object[] values) throws Exception { + File file = new File(dirTestWatcher.getRootDir(), table_name); + try { + FileUtils.writeStringToFile(file, json); + test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + testBuilder() + .sqlQuery(query) + .ordered() + .baselineColumns(columns) + .baselineValues(values) + .build() + .run(); + } finally { + test("alter session set `%s` = false", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + FileUtils.deleteQuietly(file); + } + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/9bb93703/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java new file mode 100644 index 0000000..60d4b7a --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonNanInf.java @@ -0,0 +1,274 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to you under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.drill.exec.vector.complex.writer; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.test.BaseTestQuery; +import org.apache.drill.common.exceptions.UserRemoteException; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.vector.VarCharVector; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.File; +import java.util.List; + +import static org.apache.drill.test.TestBuilder.mapOf; +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.*; + +public class TestJsonNanInf extends BaseTestQuery { + + + @Test + public void testNanInfSelect() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String query = String.format("select * from dfs.`%s`",table); + try { + FileUtils.writeStringToFile(file, json); + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("nan_col", "inf_col") + .baselineValues(Double.NaN, Double.POSITIVE_INFINITY) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testExcludePositiveInfinity() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select inf_col from dfs.`%s` where inf_col <> cast('Infinity' as double)",table); + try { + FileUtils.writeStringToFile(file, json); + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("inf_col") + .baselineValues(5.0) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testExcludeNegativeInfinity() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "[{\"nan_col\":NaN, \"inf_col\":-Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select inf_col from dfs.`%s` where inf_col <> cast('-Infinity' as double)",table); + try { + FileUtils.writeStringToFile(file, json); + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("inf_col") + .baselineValues(5.0) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testIncludePositiveInfinity() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "[{\"nan_col\":NaN, \"inf_col\":Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select inf_col from dfs.`%s` where inf_col = cast('Infinity' as double)",table); + try { + FileUtils.writeStringToFile(file, json); + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("inf_col") + .baselineValues(Double.POSITIVE_INFINITY) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testExcludeNan() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "[{\"nan_col\":NaN, \"inf_col\":-Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select nan_col from dfs.`%s` where cast(nan_col as varchar) <> 'NaN'",table); + try { + FileUtils.writeStringToFile(file, json); + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("nan_col") + .baselineValues(5.0) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + + @Test + public void testIncludeNan() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "[{\"nan_col\":NaN, \"inf_col\":-Infinity}," + + "{\"nan_col\":5.0, \"inf_col\":5.0}]"; + String query = String.format("select nan_col from dfs.`%s` where cast(nan_col as varchar) = 'NaN'",table); + try { + FileUtils.writeStringToFile(file, json); + testBuilder() + .sqlQuery(query) + .unOrdered() + .baselineColumns("nan_col") + .baselineValues(Double.NaN) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test(expected = UserRemoteException.class) + public void testNanInfFailure() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + test("alter session set `%s` = false", ExecConstants.JSON_READER_NAN_INF_NUMBERS); + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + try { + FileUtils.writeStringToFile(file, json); + test("select * from dfs.`%s`;", table); + } catch (UserRemoteException e) { + assertThat(e.getMessage(), containsString("Error parsing JSON")); + throw e; + } finally { + test("alter session reset `%s`", ExecConstants.JSON_READER_NAN_INF_NUMBERS); + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testCreateTableNanInf() throws Exception { + String table = "nan_test.json"; + File file = new File(dirTestWatcher.getRootDir(), table); + String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; + String newTable = "ctas_test"; + try { + FileUtils.writeStringToFile(file, json); + test("alter session set `store.format`='json'"); + test("create table dfs.`%s` as select * from dfs.`%s`;", newTable, table); + + // ensuring that `NaN` and `Infinity` tokens ARE NOT enclosed with double quotes + File resultFile = new File(new File(file.getParent(), newTable),"0_0_0.json"); + String resultJson = FileUtils.readFileToString(resultFile); + int nanIndex = resultJson.indexOf("NaN"); + assertFalse("`NaN` must not be enclosed with \"\" ", resultJson.charAt(nanIndex - 1) == '"'); + assertFalse("`NaN` must not be enclosed with \"\" ", resultJson.charAt(nanIndex + "NaN".length()) == '"'); + int infIndex = resultJson.indexOf("Infinity"); + assertFalse("`Infinity` must not be enclosed with \"\" ", resultJson.charAt(infIndex - 1) == '"'); + assertFalse("`Infinity` must not be enclosed with \"\" ", resultJson.charAt(infIndex + "Infinity".length()) == '"'); + } finally { + test("drop table if exists dfs.`%s`", newTable); + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testConvertFromJsonFunction() throws Exception { + String table = "nan_test.csv"; + File file = new File(dirTestWatcher.getRootDir(), table); + String csv = "col_0, {\"nan_col\":NaN}"; + try { + FileUtils.writeStringToFile(file, csv); + testBuilder() + .sqlQuery(String.format("select convert_fromJSON(columns[1]) as col from dfs.`%s`", table)) + .unOrdered() + .baselineColumns("col") + .baselineValues(mapOf("nan_col", Double.NaN)) + .build() + .run(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + + + @Test + public void testConvertToJsonFunction() throws Exception { + String table = "nan_test.csv"; + File file = new File(dirTestWatcher.getRootDir(), table); + String csv = "col_0, {\"nan_col\":NaN}"; + String query = String.format("select string_binary(convert_toJSON(convert_fromJSON(columns[1]))) as col " + + "from dfs.`%s` where columns[0]='col_0'", table); + try { + FileUtils.writeStringToFile(file, csv); + List<QueryDataBatch> results = testSqlWithResults(query); + RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator()); + assertTrue("Query result must contain 1 row", results.size() == 1); + QueryDataBatch batch = results.get(0); + + batchLoader.load(batch.getHeader().getDef(), batch.getData()); + VectorWrapper<?> vw = batchLoader.getValueAccessorById(VarCharVector.class, batchLoader.getValueVectorId(SchemaPath.getCompoundPath("col")).getFieldIds()); + // ensuring that `NaN` token ARE NOT enclosed with double quotes + String resultJson = vw.getValueVector().getAccessor().getObject(0).toString(); + int nanIndex = resultJson.indexOf("NaN"); + assertFalse("`NaN` must not be enclosed with \"\" ", resultJson.charAt(nanIndex - 1) == '"'); + assertFalse("`NaN` must not be enclosed with \"\" ", resultJson.charAt(nanIndex + "NaN".length()) == '"'); + batch.release(); + batchLoader.clear(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + @Ignore("DRILL-6018") + public void testNanInfLiterals() throws Exception { + testBuilder() + .sqlQuery(" select sin(cast('NaN' as double)) as sin_col, " + + "cast('Infinity' as double)+1 as sum_col from (values(1))") + .unOrdered() + .baselineColumns("sin_col", "sum_col") + .baselineValues(Double.NaN, Double.POSITIVE_INFINITY) + .build() + .run(); + } + +}