[ https://issues.apache.org/jira/browse/NIFI-4882?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16401691#comment-16401691 ]
ASF GitHub Bot commented on NIFI-4882: -------------------------------------- Github user ijokarumawak commented on a diff in the pull request: https://github.com/apache/nifi/pull/2473#discussion_r175003250 --- Diff: nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/AbstractCSVRecordReader.java --- @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.csv; + + +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.serialization.RecordReader; +import org.apache.nifi.serialization.record.DataType; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.util.DataTypeUtils; +import java.text.DateFormat; +import java.util.function.Supplier; + +abstract public class AbstractCSVRecordReader implements RecordReader { + + protected final ComponentLog logger; + protected final boolean hasHeader; + protected final boolean ignoreHeader; + + protected final Supplier<DateFormat> LAZY_DATE_FORMAT; + protected final Supplier<DateFormat> LAZY_TIME_FORMAT; + protected final Supplier<DateFormat> LAZY_TIMESTAMP_FORMAT; + + protected final String dateFormat; + protected final String timeFormat; + protected final String timestampFormat; + + protected final RecordSchema schema; + + AbstractCSVRecordReader(final ComponentLog logger, final RecordSchema schema, final boolean hasHeader, final boolean ignoreHeader, + final String dateFormat, final String timeFormat, final String timestampFormat) { + this.logger = logger; + this.schema = schema; + this.hasHeader = hasHeader; + this.ignoreHeader = ignoreHeader; + + if (dateFormat == null || dateFormat.isEmpty()) { + this.dateFormat = null; + LAZY_DATE_FORMAT = () -> null; --- End diff -- Trivial, but LAZY_DATE_FORMAT can be simply a `null` instead of a lambda returning null. Because DataTypeUtils handles null supplier. Same goes for the time and timestamp. > CSVRecordReader should utilize specified date/time/timestamp format at its > convertSimpleIfPossible method > --------------------------------------------------------------------------------------------------------- > > Key: NIFI-4882 > URL: https://issues.apache.org/jira/browse/NIFI-4882 > Project: Apache NiFi > Issue Type: Bug > Components: Extensions > Reporter: Koji Kawamura > Assignee: Derek Straka > Priority: Major > > CSVRecordReader.convertSimpleIfPossible method is used by ValidateRecord. The > method does not coerce values to the target schema field type if the raw > string representation in the input CSV file is not compatible. > The type compatibility check is implemented as follows. But it does not use > user specified date/time/timestamp format: > {code} > // This will return 'false' for input '01/01/1900' when user > specified custom format 'MM/dd/YYYY' > if (DataTypeUtils.isCompatibleDataType(trimmed, dataType)) { > // The LAZY_DATE_FORMAT should be used to check > compatibility, too. > return DataTypeUtils.convertType(trimmed, dataType, > LAZY_DATE_FORMAT, LAZY_TIME_FORMAT, LAZY_TIMESTAMP_FORMAT, fieldName); > } else { > return value; > } > {code} > If input date strings have different format than the default format > 'yyyy-MM-dd', then ValidateRecord processor can not validate input records. > JacksonCSVRecordReader has the identical methods with CSVRecordReader. Those > classes should have an abstract class. -- This message was sent by Atlassian JIRA (v7.6.3#76005)