This is an automated email from the ASF dual-hosted git repository.

mthomsen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/master by this push:
     new e5fa18d  NIFI-6088: Widen type inference for BIGINT and DOUBLE
e5fa18d is described below

commit e5fa18d63cda60f0b10a8a1a2abff1cf69e976bd
Author: Matthew Burgess <mattyb...@apache.org>
AuthorDate: Wed Feb 27 20:25:25 2019 -0500

    NIFI-6088: Widen type inference for BIGINT and DOUBLE
    
    This closes #3342
    
    Signed-off-by: Mike Thomsen <mikerthom...@gmail.com>
---
 .../nifi-record-serialization-services/pom.xml     |  1 +
 .../org/apache/nifi/json/JsonSchemaInference.java  |  6 +-
 .../apache/nifi/json/TestJsonSchemaInference.java  | 84 ++++++++++++++++++++++
 .../src/test/resources/json/data-types.json        | 24 +++++++
 4 files changed, 112 insertions(+), 3 deletions(-)

diff --git 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
index d65f471..27d4da7 100755
--- 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
+++ 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/pom.xml
@@ -152,6 +152,7 @@
                         
<exclude>src/test/resources/json/bank-account-multiarray.json</exclude>
                         
<exclude>src/test/resources/json/bank-account-multiline.json</exclude>
                         
<exclude>src/test/resources/json/bank-account-oneline.json</exclude>
+                        
<exclude>src/test/resources/json/data-types.json</exclude>
                         
<exclude>src/test/resources/json/json-with-unicode.json</exclude>
                         
<exclude>src/test/resources/json/primitive-type-array.json</exclude>
                         
<exclude>src/test/resources/json/single-bank-account.json</exclude>
diff --git 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
index b09c79f..02587cc 100644
--- 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
+++ 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonSchemaInference.java
@@ -56,13 +56,13 @@ public class JsonSchemaInference extends 
HierarchicalSchemaInference<JsonNode> {
         }
 
         if (jsonNode.isIntegralNumber()) {
+            if (jsonNode.isBigInteger()) {
+                return RecordFieldType.BIGINT.getDataType();
+            }
             return RecordFieldType.LONG.getDataType();
         }
 
         if (jsonNode.isFloatingPointNumber()) {
-            return RecordFieldType.FLOAT.getDataType();
-        }
-        if (jsonNode.isDouble()) {
             return RecordFieldType.DOUBLE.getDataType();
         }
         if (jsonNode.isBinary()) {
diff --git 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
new file mode 100644
index 0000000..0e50764
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestJsonSchemaInference.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.json;
+
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.schema.inference.InferSchemaAccessStrategy;
+import org.apache.nifi.schema.inference.TimeValueInference;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+
+public class TestJsonSchemaInference {
+
+    private final TimeValueInference timestampInference = new 
TimeValueInference("yyyy-MM-dd", "HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+
+    @Test
+    public void testInferenceIncludesAllRecords() throws IOException {
+        final File file = new File("src/test/resources/json/data-types.json");
+
+        final RecordSchema schema;
+        try (final InputStream in = new FileInputStream(file);
+             final InputStream bufferedIn = new BufferedInputStream(in)) {
+
+            final InferSchemaAccessStrategy<?> accessStrategy = new 
InferSchemaAccessStrategy<>(
+                    (var, content) -> new JsonRecordSource(content),
+                    new JsonSchemaInference(timestampInference), 
Mockito.mock(ComponentLog.class));
+            schema = accessStrategy.getSchema(null, bufferedIn, null);
+        }
+
+        assertSame(RecordFieldType.STRING, 
schema.getDataType("varcharc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("uuid").get().getFieldType());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("tinyintc").get().getFieldType());
+        assertSame(RecordFieldType.STRING, 
schema.getDataType("textc").get().getFieldType());
+        assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"), 
schema.getDataType("datec").get());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("smallintc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("mediumintc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("intc").get().getFieldType());
+        assertSame(RecordFieldType.BIGINT, 
schema.getDataType("bigintc").get().getFieldType());
+        assertSame(RecordFieldType.DOUBLE, 
schema.getDataType("floatc").get().getFieldType());
+        assertSame(RecordFieldType.DOUBLE, 
schema.getDataType("doublec").get().getFieldType());
+        assertSame(RecordFieldType.DOUBLE, 
schema.getDataType("decimalc").get().getFieldType());
+        
assertEquals(RecordFieldType.TIMESTAMP.getDataType("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"),
 schema.getDataType("timestampc").get());
+        assertEquals(RecordFieldType.TIME.getDataType("HH:mm:ss"), 
schema.getDataType("timec").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), 
schema.getDataType("charc").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), 
schema.getDataType("tinytextc").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), 
schema.getDataType("blobc").get());
+        assertEquals(RecordFieldType.STRING.getDataType(), 
schema.getDataType("mediumtextc").get());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("enumc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("setc").get().getFieldType());
+        assertSame(RecordFieldType.LONG, 
schema.getDataType("boolc").get().getFieldType());
+        assertEquals(RecordFieldType.STRING.getDataType(), 
schema.getDataType("binaryc").get());
+
+        final List<String> fieldNames = schema.getFieldNames();
+        assertEquals(Arrays.asList("varcharc", "uuid", "tinyintc", "textc", 
"datec", "smallintc", "mediumintc", "intc", "bigintc",
+                "floatc", "doublec", "decimalc", "timestampc", "timec", 
"charc", "tinytextc", "blobc", "mediumtextc", "enumc", "setc", "boolc", 
"binaryc"), fieldNames);
+    }
+
+}
\ No newline at end of file
diff --git 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
new file mode 100644
index 0000000..343288e
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/resources/json/data-types.json
@@ -0,0 +1,24 @@
+[{
+  "varcharc": "Nam penatibus in neque.",
+  "uuid": 1,
+  "tinyintc": -81,
+  "textc": "A faucibus volutpat placerat euismod mollis, quis semper quis 
ultrices aliquam massa vestibulum a lacus hendrerit turpis nullam, tincidunt 
ullamcorper ad ridiculus habitasse tristique vivamus elit.  Ac id montes erat 
accumsan rhoncus consectetur leo condimentum.\n\nConubia lectus et viverra 
taciti, mollis molestie phasellus, fermentum accumsan sem nisi sit dapibus 
interdum ridiculus blandit blandit.  Volutpat nullam orci cras.  Justo nullam 
penatibus non fusce vivamus integer [...]
+  "datec": "2019-02-27",
+  "smallintc": -8423,
+  "mediumintc": 6008538,
+  "intc": -1130599020,
+  "bigintc": 171234567890123456789,
+  "floatc": 182.33,
+  "doublec": 149.67382865705562,
+  "decimalc": 109.88,
+  "timestampc": "2019-02-27T20:40:53.000Z",
+  "timec": "20:40:53",
+  "charc": "DBDDGpPz",
+  "tinytextc": "hgFuypClmWWMNsDXEFJJOhdsljdBP",
+  "blobc": "Wc5YvvF8fUsOgejKPsOa",
+  "mediumtextc": "Torquent aliquet malesuada adipiscing, eget himenaeos 
facilisi ridiculus eros netus, nisi semper eleifend dolor nisi sapien phasellus 
luctus libero aenean suscipit pulvinar, lacus posuere id hendrerit feugiat 
vitae purus ac blandit euismod pharetra.  Adipiscing lectus primis eros 
pellentesque porta blandit dictum fermentum lectus tortor nam, fusce est dis 
class ornare neque est enim quisque a.\n\nScelerisque aptent etiam non 
imperdiet volutpat.  Quisque est fusce purus  [...]
+  "enumc": 1,
+  "setc": 4,
+  "boolc": 0,
+  "binaryc": "ehynfnybBfxmxgkMVrVt"
+}]
\ No newline at end of file

Reply via email to