This is an automated email from the ASF dual-hosted git repository.

exceptionfactory pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git


The following commit(s) were added to refs/heads/main by this push:
     new 7681054cf7 NIFI-12697 Allow disabling scientific notation when writing 
JSON
7681054cf7 is described below

commit 7681054cf7ad82b20f0d589957f37b1b5aeff4d1
Author: Mark Payne <marka...@hotmail.com>
AuthorDate: Tue Jan 30 09:50:32 2024 -0500

    NIFI-12697 Allow disabling scientific notation when writing JSON
    
    Use Jackson's internal implementations for parsing big integers and 
floating-point numbers more efficiently
    
    Addressed case where JSON Writer configured to not allow scientific 
notation, but receives a record that already has a SerializedForm using 
scientific notation.
    
    This closes #8319
    
    Signed-off-by: David Handermann <exceptionfact...@apache.org>
---
 .../nifi/json/AbstractJsonRowRecordReader.java     |  3 +
 .../java/org/apache/nifi/json/WriteJsonResult.java | 79 +++++++++++++++++-----
 .../org/apache/nifi/json/JsonRecordSetWriter.java  | 39 ++++++++---
 .../org/apache/nifi/json/TestWriteJsonResult.java  | 79 +++++++++++++++++++++-
 4 files changed, 173 insertions(+), 27 deletions(-)

diff --git 
a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
 
b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
index 6daf71a491..58a41cdfb8 100644
--- 
a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
+++ 
b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/AbstractJsonRowRecordReader.java
@@ -19,6 +19,7 @@ package org.apache.nifi.json;
 
 import com.fasterxml.jackson.core.JsonParseException;
 import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonParser.Feature;
 import com.fasterxml.jackson.core.JsonToken;
 import com.fasterxml.jackson.core.StreamReadConstraints;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -138,6 +139,8 @@ public abstract class AbstractJsonRowRecordReader 
implements RecordReader {
         try {
             final StreamReadConstraints configuredStreamReadConstraints = 
streamReadConstraints == null ? DEFAULT_STREAM_READ_CONSTRAINTS : 
streamReadConstraints;
             jsonParser = tokenParserFactory.getJsonParser(in, 
configuredStreamReadConstraints, allowComments);
+            jsonParser.enable(Feature.USE_FAST_DOUBLE_PARSER);
+            jsonParser.enable(Feature.USE_FAST_BIG_NUMBER_PARSER);
 
             if (strategy == StartingFieldStrategy.NESTED_FIELD) {
                 while (jsonParser.nextToken() != null) {
diff --git 
a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/WriteJsonResult.java
 
b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/WriteJsonResult.java
index 065c43823e..11408f237f 100644
--- 
a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/WriteJsonResult.java
+++ 
b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-json-record-utils/src/main/java/org/apache/nifi/json/WriteJsonResult.java
@@ -19,6 +19,7 @@ package org.apache.nifi.json;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonGenerator.Feature;
 import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.nifi.NullSuppression;
@@ -48,9 +49,11 @@ import java.math.BigInteger;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.regex.Pattern;
 
 public class WriteJsonResult extends AbstractRecordSetWriter implements 
RecordSetWriter, RawRecordWriter {
     private static final FieldConverter<Object, String> STRING_FIELD_CONVERTER 
= StandardFieldConverterRegistry.getRegistry().getFieldConverter(String.class);
+    private static final Pattern SCIENTIFIC_NOTATION_PATTERN = 
Pattern.compile("[0-9]([eE][-+]?)[0-9]");
 
     private final ComponentLog logger;
     private final SchemaAccessWriter schemaAccess;
@@ -63,17 +66,18 @@ public class WriteJsonResult extends 
AbstractRecordSetWriter implements RecordSe
     private final String timestampFormat;
     private final String mimeType;
     private final boolean prettyPrint;
+    private final boolean allowScientificNotation;
 
     private static final ObjectMapper objectMapper = new ObjectMapper();
 
     public WriteJsonResult(final ComponentLog logger, final RecordSchema 
recordSchema, final SchemaAccessWriter schemaAccess, final OutputStream out, 
final boolean prettyPrint,
             final NullSuppression nullSuppression, final OutputGrouping 
outputGrouping, final String dateFormat, final String timeFormat, final String 
timestampFormat) throws IOException {
-        this(logger, recordSchema, schemaAccess, out, prettyPrint, 
nullSuppression, outputGrouping, dateFormat, timeFormat, timestampFormat, 
"application/json");
+        this(logger, recordSchema, schemaAccess, out, prettyPrint, 
nullSuppression, outputGrouping, dateFormat, timeFormat, timestampFormat, 
"application/json", false);
     }
 
     public WriteJsonResult(final ComponentLog logger, final RecordSchema 
recordSchema, final SchemaAccessWriter schemaAccess, final OutputStream out, 
final boolean prettyPrint,
         final NullSuppression nullSuppression, final OutputGrouping 
outputGrouping, final String dateFormat, final String timeFormat, final String 
timestampFormat,
-        final String mimeType) throws IOException {
+        final String mimeType, final boolean allowScientificNotation) throws 
IOException {
 
         super(out);
         this.logger = logger;
@@ -82,6 +86,7 @@ public class WriteJsonResult extends AbstractRecordSetWriter 
implements RecordSe
         this.nullSuppression = nullSuppression;
         this.outputGrouping = outputGrouping;
         this.mimeType = mimeType;
+        this.allowScientificNotation = allowScientificNotation;
 
         this.dateFormat = dateFormat;
         this.timeFormat = timeFormat;
@@ -91,6 +96,10 @@ public class WriteJsonResult extends AbstractRecordSetWriter 
implements RecordSe
         factory.setCodec(objectMapper);
 
         this.generator = factory.createGenerator(out);
+        if (!allowScientificNotation) {
+            generator.enable(Feature.WRITE_BIGDECIMAL_AS_PLAIN);
+        }
+
         this.prettyPrint = prettyPrint;
         if (prettyPrint) {
             generator.useDefaultPrettyPrinter();
@@ -163,22 +172,44 @@ public class WriteJsonResult extends 
AbstractRecordSetWriter implements RecordSe
         return WriteResult.of(incrementRecordCount(), attributes);
     }
 
+    private boolean isUseSerializeForm(final Record record, final RecordSchema 
writeSchema) {
+        final Optional<SerializedForm> serializedForm = 
record.getSerializedForm();
+        if (serializedForm.isEmpty()) {
+            return false;
+        }
+
+        final SerializedForm form = serializedForm.get();
+        if (!form.getMimeType().equals(getMimeType()) || 
!record.getSchema().equals(writeSchema)) {
+            return false;
+        }
+
+        final Object serialized = form.getSerialized();
+        if (!(serialized instanceof final String serializedString)) {
+            return false;
+        }
+        final boolean serializedPretty = serializedString.contains("\n");
+        if (serializedPretty != this.prettyPrint) {
+            return false;
+        }
+
+        if (!allowScientificNotation && 
hasScientificNotation(serializedString)) {
+            return false;
+        }
+
+        return true;
+    }
+
+    private boolean hasScientificNotation(final String value) {
+        return SCIENTIFIC_NOTATION_PATTERN.matcher(value).find();
+    }
+
     private void writeRecord(final Record record, final RecordSchema 
writeSchema, final JsonGenerator generator,
         final GeneratorTask startTask, final GeneratorTask endTask, final 
boolean schemaAware) throws IOException {
 
-        final Optional<SerializedForm> serializedForm = 
record.getSerializedForm();
-        if (serializedForm.isPresent()) {
-            final SerializedForm form = serializedForm.get();
-            if (form.getMimeType().equals(getMimeType()) && 
record.getSchema().equals(writeSchema)) {
-                final Object serialized = form.getSerialized();
-                if (serialized instanceof final String serializedString) {
-                    final boolean serializedPretty = 
serializedString.contains("\n");
-                    if (serializedPretty == this.prettyPrint) {
-                        generator.writeRawValue((String) serialized);
-                        return;
-                    }
-                }
-            }
+        if (isUseSerializeForm(record, writeSchema)) {
+            final String serialized = (String) 
record.getSerializedForm().get().getSerialized();
+            generator.writeRawValue(serialized);
+            return;
         }
 
         try {
@@ -291,6 +322,12 @@ public class WriteJsonResult extends 
AbstractRecordSetWriter implements RecordSe
             generator.writeObject(formatted);
             return;
         }
+        if (!allowScientificNotation) {
+            if (value instanceof Double || value instanceof Float) {
+                generator.writeNumber(DataTypeUtils.toBigDecimal(value, 
fieldName));
+                return;
+            }
+        }
 
         generator.writeObject(value);
     }
@@ -346,10 +383,18 @@ public class WriteJsonResult extends 
AbstractRecordSetWriter implements RecordSe
                 break;
             }
             case DOUBLE:
-                generator.writeNumber(DataTypeUtils.toDouble(coercedValue, 
fieldName));
+                if (allowScientificNotation) {
+                    generator.writeNumber(DataTypeUtils.toDouble(coercedValue, 
fieldName));
+                } else {
+                    
generator.writeNumber(DataTypeUtils.toBigDecimal(coercedValue, fieldName));
+                }
                 break;
             case FLOAT:
-                generator.writeNumber(DataTypeUtils.toFloat(coercedValue, 
fieldName));
+                if (allowScientificNotation) {
+                    generator.writeNumber(DataTypeUtils.toFloat(coercedValue, 
fieldName));
+                } else {
+                    
generator.writeNumber(DataTypeUtils.toBigDecimal(coercedValue, fieldName));
+                }
                 break;
             case LONG:
                 generator.writeNumber(DataTypeUtils.toLong(coercedValue, 
fieldName));
diff --git 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java
 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java
index 83547e890d..3242f89b93 100644
--- 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java
+++ 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java
@@ -30,6 +30,7 @@ import org.apache.nifi.components.ValidationResult;
 import org.apache.nifi.controller.ConfigurationContext;
 import org.apache.nifi.expression.ExpressionLanguageScope;
 import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.migration.PropertyConfiguration;
 import org.apache.nifi.schema.access.SchemaNotFoundException;
 import org.apache.nifi.serialization.DateTimeTextRecordSetWriter;
 import org.apache.nifi.serialization.RecordSetWriter;
@@ -90,6 +91,14 @@ public class JsonRecordSetWriter extends 
DateTimeTextRecordSetWriter implements
             .defaultValue("false")
             .required(true)
             .build();
+    public static final PropertyDescriptor ALLOW_SCIENTIFIC_NOTATION = new 
PropertyDescriptor.Builder()
+            .name("Allow Scientific Notation")
+            .description("Specifies whether or not scientific notation should 
be used when writing numbers")
+            .expressionLanguageSupported(ExpressionLanguageScope.NONE)
+            .allowableValues("true", "false")
+            .defaultValue("false")
+            .required(true)
+            .build();
     public static final PropertyDescriptor OUTPUT_GROUPING = new 
PropertyDescriptor.Builder()
             .name("output-grouping")
             .displayName("Output Grouping")
@@ -120,6 +129,7 @@ public class JsonRecordSetWriter extends 
DateTimeTextRecordSetWriter implements
             .build();
 
     private volatile boolean prettyPrint;
+    private volatile boolean allowScientificNotation;
     private volatile NullSuppression nullSuppression;
     private volatile OutputGrouping outputGrouping;
     private volatile String compressionFormat;
@@ -130,12 +140,22 @@ public class JsonRecordSetWriter extends 
DateTimeTextRecordSetWriter implements
         final List<PropertyDescriptor> properties = new 
ArrayList<>(super.getSupportedPropertyDescriptors());
         properties.add(PRETTY_PRINT_JSON);
         properties.add(SUPPRESS_NULLS);
+        properties.add(ALLOW_SCIENTIFIC_NOTATION);
         properties.add(OUTPUT_GROUPING);
         properties.add(COMPRESSION_FORMAT);
         properties.add(COMPRESSION_LEVEL);
         return properties;
     }
 
+    @Override
+    public void migrateProperties(final PropertyConfiguration 
propertyConfiguration) {
+        // We added the ALLOW_SCIENTIFIC_NOTATION property with a default of 
'false'. However, we don't want to change the behavior
+        // of existing services. So we migrate existing services to use a 
value of 'true' to maintain backward compatibility.
+        if 
(!propertyConfiguration.hasProperty(ALLOW_SCIENTIFIC_NOTATION.getName())) {
+            propertyConfiguration.setProperty(ALLOW_SCIENTIFIC_NOTATION, 
"true");
+        }
+    }
+
     @Override
     protected Collection<ValidationResult> customValidate(ValidationContext 
context) {
         final List<ValidationResult> problems = new 
ArrayList<>(super.customValidate(context));
@@ -150,6 +170,7 @@ public class JsonRecordSetWriter extends 
DateTimeTextRecordSetWriter implements
     @OnEnabled
     public void onEnabled(final ConfigurationContext context) {
         prettyPrint = context.getProperty(PRETTY_PRINT_JSON).asBoolean();
+        allowScientificNotation = 
context.getProperty(ALLOW_SCIENTIFIC_NOTATION).asBoolean();
 
         final NullSuppression suppression;
         final String suppressNullValue = 
context.getProperty(SUPPRESS_NULLS).getValue();
@@ -180,36 +201,36 @@ public class JsonRecordSetWriter extends 
DateTimeTextRecordSetWriter implements
 
         final OutputStream bufferedOut = new BufferedOutputStream(out, 65536);
         final OutputStream compressionOut;
-        String mimeTypeRef;
+        String mimeType;
 
         try {
             switch (compressionFormat.toLowerCase()) {
                 case COMPRESSION_FORMAT_GZIP:
                     compressionOut = new GZIPOutputStream(bufferedOut, 
compressionLevel);
-                    mimeTypeRef = "application/gzip";
+                    mimeType = "application/gzip";
                     break;
                 case COMPRESSION_FORMAT_XZ_LZMA2:
                     compressionOut = new XZOutputStream(bufferedOut, new 
LZMA2Options());
-                    mimeTypeRef = "application/x-xz";
+                    mimeType = "application/x-xz";
                     break;
                 case COMPRESSION_FORMAT_SNAPPY:
                     compressionOut = new SnappyOutputStream(bufferedOut);
-                    mimeTypeRef = "application/x-snappy";
+                    mimeType = "application/x-snappy";
                     break;
                 case COMPRESSION_FORMAT_SNAPPY_FRAMED:
                     compressionOut = new SnappyFramedOutputStream(bufferedOut);
-                    mimeTypeRef = "application/x-snappy-framed";
+                    mimeType = "application/x-snappy-framed";
                     break;
                 case COMPRESSION_FORMAT_BZIP2:
-                    mimeTypeRef = "application/x-bzip2";
+                    mimeType = "application/x-bzip2";
                     compressionOut = new 
CompressorStreamFactory().createCompressorOutputStream(compressionFormat.toLowerCase(),
 bufferedOut);
                     break;
                 case COMPRESSION_FORMAT_ZSTD:
-                    mimeTypeRef = "application/zstd";
+                    mimeType = "application/zstd";
                     compressionOut = new 
CompressorStreamFactory().createCompressorOutputStream(compressionFormat.toLowerCase(),
 bufferedOut);
                     break;
                 default:
-                    mimeTypeRef = "application/json";
+                    mimeType = "application/json";
                     compressionOut = out;
             }
         } catch (CompressorException e) {
@@ -217,7 +238,7 @@ public class JsonRecordSetWriter extends 
DateTimeTextRecordSetWriter implements
         }
 
         return new WriteJsonResult(logger, schema, 
getSchemaAccessWriter(schema, variables), compressionOut, prettyPrint, 
nullSuppression, outputGrouping,
-                getDateFormat().orElse(null), getTimeFormat().orElse(null), 
getTimestampFormat().orElse(null), mimeTypeRef);
+                getDateFormat().orElse(null), getTimeFormat().orElse(null), 
getTimestampFormat().orElse(null), mimeType, allowScientificNotation);
     }
 
 }
diff --git 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestWriteJsonResult.java
 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestWriteJsonResult.java
index a2f1829b49..f2f83037fe 100644
--- 
a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestWriteJsonResult.java
+++ 
b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/json/TestWriteJsonResult.java
@@ -52,9 +52,86 @@ import java.util.List;
 import java.util.Map;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 class TestWriteJsonResult {
 
+    @Test
+    public void testScientificNotationUsage() throws IOException {
+        final List<RecordField> fields = new ArrayList<>();
+        fields.add(new RecordField("float", 
RecordFieldType.FLOAT.getDataType()));
+        fields.add(new RecordField("double", 
RecordFieldType.DOUBLE.getDataType()));
+        fields.add(new RecordField("decimal", 
RecordFieldType.DECIMAL.getDecimalDataType(5, 10)));
+        final RecordSchema schema = new SimpleRecordSchema(fields);
+
+        final String expectedWithScientificNotation = """
+            {"float":-4.2910323,"double":4.51E-7,"decimal":8.0E-8}
+            """.trim();
+        final String expectedWithScientificNotationArray = "[" + 
expectedWithScientificNotation + "]";
+        final String expectedWithoutScientificNotation = """
+            {"float":-4.2910323,"double":0.000000451,"decimal":0.000000080}
+            """.trim();
+        final String expectedWithoutScientificNotationArray = "[" + 
expectedWithoutScientificNotation + "]";
+
+        final Map<String, Object> values = Map.of(
+            "float", -4.291032244F,
+            "double", 0.000000451D,
+            "decimal", new BigDecimal("0.000000080")
+        );
+        final Record record = new MapRecord(schema, values);
+
+        final String withScientificNotation = writeRecord(record, true, false);
+        assertEquals(expectedWithScientificNotationArray, 
withScientificNotation);
+
+        // We cannot be sure of the ordering when writing the raw record
+        final String rawWithScientificNotation = writeRecord(record, true, 
true);
+        assertTrue(rawWithScientificNotation.contains("\"float\":-4.2910323"));
+        assertTrue(rawWithScientificNotation.contains("\"double\":4.51E-7"));
+        assertTrue(rawWithScientificNotation.contains("\"decimal\":8.0E-8"));
+
+        final String withoutScientificNotation = writeRecord(record, false, 
false);
+        assertEquals(expectedWithoutScientificNotationArray, 
withoutScientificNotation);
+
+        // We cannot be sure of the ordering when writing the raw record
+        final String rawWithoutScientificNotation = writeRecord(record, false, 
true);
+        
assertTrue(rawWithoutScientificNotation.contains("\"float\":-4.2910323"));
+        
assertTrue(rawWithoutScientificNotation.contains("\"double\":0.000000451"));
+        
assertTrue(rawWithoutScientificNotation.contains("\"decimal\":0.000000080"));
+
+        final Record recordWithSerializedForm = new MapRecord(schema, values, 
SerializedForm.of(expectedWithScientificNotation, "application/json"));
+        final String writtenWith = writeRecord(recordWithSerializedForm, true, 
false);
+        assertEquals(expectedWithScientificNotationArray, writtenWith);
+
+        final String writtenWithout = writeRecord(recordWithSerializedForm, 
false, false);
+        assertEquals(expectedWithoutScientificNotationArray, writtenWithout);
+
+        // We cannot be sure of the ordering when writing the raw record
+        final String writtenWithoutRaw = writeRecord(recordWithSerializedForm, 
false, true);
+        assertTrue(writtenWithoutRaw.contains("\"float\":-4.2910323"));
+        assertTrue(writtenWithoutRaw.contains("\"double\":0.000000451"));
+        assertTrue(writtenWithoutRaw.contains("\"decimal\":0.000000080"));
+    }
+
+    private String writeRecord(final Record record, final boolean 
allowScientificNotation, final boolean writeRawRecord) throws IOException {
+        try (final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+             final WriteJsonResult writer = new 
WriteJsonResult(Mockito.mock(ComponentLog.class), record.getSchema(), new 
SchemaNameAsAttribute(), baos, false,
+                 NullSuppression.NEVER_SUPPRESS, OutputGrouping.OUTPUT_ARRAY, 
RecordFieldType.DATE.getDefaultFormat(),
+                 RecordFieldType.TIME.getDefaultFormat(), 
RecordFieldType.TIMESTAMP.getDefaultFormat(), "application/json", 
allowScientificNotation)) {
+
+            writer.beginRecordSet();
+            if (writeRawRecord) {
+                writer.writeRawRecord(record);
+            } else {
+                writer.write(record);
+            }
+
+            writer.finishRecordSet();
+            writer.flush();
+
+            return baos.toString(StandardCharsets.UTF_8);
+        }
+    }
+
     @Test
     void testDataTypes() throws IOException {
         final List<RecordField> fields = new ArrayList<>();
@@ -111,7 +188,7 @@ class TestWriteJsonResult {
             writer.write(rs);
         }
 
-        final String output = baos.toString("UTF-8");
+        final String output = baos.toString(StandardCharsets.UTF_8);
 
         final String expected = new 
String(Files.readAllBytes(Paths.get("src/test/resources/json/output/dataTypes.json")));
         assertEquals(StringUtils.deleteWhitespace(expected), 
StringUtils.deleteWhitespace(output));

Reply via email to