szaboferee commented on a change in pull request #3724: NIFI-6640 - UNION/CHOICE types not handled correctly URL: https://github.com/apache/nifi/pull/3724#discussion_r324566272
########## File path: nifi-commons/nifi-record/src/main/java/org/apache/nifi/serialization/record/util/DataTypeUtils.java ########## @@ -225,17 +232,109 @@ public static boolean isCompatibleDataType(final Object value, final DataType da } public static DataType chooseDataType(final Object value, final ChoiceDataType choiceType) { - for (final DataType subType : choiceType.getPossibleSubTypes()) { - if (isCompatibleDataType(value, subType)) { - if (subType.getFieldType() == RecordFieldType.CHOICE) { - return chooseDataType(value, (ChoiceDataType) subType); - } + Queue<DataType> possibleSubTypes = new LinkedList<>(choiceType.getPossibleSubTypes()); + Set<DataType> possibleSimpleSubTypes = new HashSet<>(); - return subType; + while (possibleSubTypes.peek() != null) { + DataType subType = possibleSubTypes.poll(); + if (subType instanceof ChoiceDataType) { + possibleSubTypes.addAll(((ChoiceDataType) subType).getPossibleSubTypes()); + } else { + possibleSimpleSubTypes.add(subType); } } - return null; + List<DataType> compatibleSimpleSubTypes = possibleSimpleSubTypes.stream() + .filter(subType -> isCompatibleDataType(value, subType)) + .collect(Collectors.toList()); + + int nrOfCompatibleSimpleSubTypes = compatibleSimpleSubTypes.size(); + + DataType chosenSimpleType; + if (nrOfCompatibleSimpleSubTypes == 0) { + chosenSimpleType = null; + } else if (nrOfCompatibleSimpleSubTypes == 1) { + chosenSimpleType = compatibleSimpleSubTypes.get(0); + } else { + chosenSimpleType = findMostSuitableType(value, compatibleSimpleSubTypes, Function.identity()) + .orElse(compatibleSimpleSubTypes.get(0)); + } + + return chosenSimpleType; + } + + public static <T> Optional<T> findMostSuitableType(Object value, List<T> types, Function<T, DataType> dataTypeMapper) { + final Optional<T> mostSuitableType; + + Optional<DataType> inferredDataTypeOptional = Optional.ofNullable(inferDataType(value, null)) Review comment: This needs to be created only if the value is not String, so I would move it behind that condition. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services