This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 36333ef5848 [HUDI-8748] Fix check for whether legacy mode is enabled
for file (#12481)
36333ef5848 is described below
commit 36333ef5848e8f16ae78b03354a8a086f1a448a1
Author: Tim Brown <[email protected]>
AuthorDate: Tue Dec 17 02:04:43 2024 -0600
[HUDI-8748] Fix check for whether legacy mode is enabled for file (#12481)
---
.../apache/parquet/avro/HoodieAvroReadSupport.java | 20 ++--
.../parquet/avro/TestHoodieAvroReadSupport.java | 122 +++++++++++++++++++++
2 files changed, 131 insertions(+), 11 deletions(-)
diff --git
a/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
index 07015209435..2dccb0888c4 100644
---
a/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
+++
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
@@ -102,25 +102,23 @@ public class HoodieAvroReadSupport<T> extends
AvroReadSupport<T> {
* }
* }
*/
- private boolean checkLegacyMode(List<Type> parquetFields) {
- for (Type type : parquetFields) {
+ private static boolean checkLegacyMode(List<Type> parquetFields) {
+ return parquetFields.stream().anyMatch(type -> {
if (!type.isPrimitive()) {
GroupType groupType = type.asGroupType();
OriginalType originalType = groupType.getOriginalType();
if (originalType == OriginalType.MAP
- && groupType.getFields().get(0).getOriginalType() !=
OriginalType.MAP_KEY_VALUE) {
- return false;
+ && !groupType.getFields().get(0).getName().equals("key_value")) {
+ return true;
}
if (originalType == OriginalType.LIST
- && !groupType.getType(0).getName().equals("array")) {
- return false;
- }
- if (!checkLegacyMode(groupType.getFields())) {
- return false;
+ && !groupType.getType(0).getName().equals("list")) {
+ return true;
}
+ return checkLegacyMode(groupType.getFields());
}
- }
- return true;
+ return false;
+ });
}
/**
diff --git
a/hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestHoodieAvroReadSupport.java
b/hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestHoodieAvroReadSupport.java
new file mode 100644
index 00000000000..54cd6f30144
--- /dev/null
+++
b/hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestHoodieAvroReadSupport.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.parquet.avro;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.schema.ConversionPatterns;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+class TestHoodieAvroReadSupport {
+ private final Type legacyListType =
ConversionPatterns.listType(Type.Repetition.REQUIRED, "legacyList",
+ Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE,
Type.Repetition.REPEATED).named("double_field"));
+ private final Type legacyListTypeWithObject =
ConversionPatterns.listType(Type.Repetition.REQUIRED, "legacyList",
+ new MessageType("foo",
Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE,
Type.Repetition.REPEATED).named("double_field")));
+ private final Type listType =
ConversionPatterns.listOfElements(Type.Repetition.REQUIRED, "newList",
+ Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE,
Type.Repetition.REPEATED).named("element"));
+ private final Type integerField =
Types.primitive(PrimitiveType.PrimitiveTypeName.INT32,
Type.Repetition.REQUIRED).named("int_field");
+ private final Type legacyMapType = new GroupType(Type.Repetition.OPTIONAL,
"my_map", OriginalType.MAP, new GroupType(Type.Repetition.REPEATED, "map",
+ Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED).named("key"),
+ Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED).named("value")));
+ private final Type mapType =
ConversionPatterns.stringKeyMapType(Type.Repetition.OPTIONAL, "newMap",
+ Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY,
Type.Repetition.REQUIRED).named("value"));
+ private final Configuration configuration = mock(Configuration.class);
+
+ @Test
+ void fileContainsLegacyList() {
+ when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE,
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+ MessageType messageType = new MessageType("LegacyList", integerField,
legacyListType, mapType);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("true"), anyString());
+ }
+
+ @Test
+ void fileContainsLegacyListWithElements() {
+ when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE,
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+ MessageType messageType = new MessageType("LegacyList", integerField,
legacyListTypeWithObject, mapType);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("true"), anyString());
+ }
+
+ @Test
+ void fileContainsLegacyMap() {
+ when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE,
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+ MessageType messageType = new MessageType("LegacyList", integerField,
legacyMapType, listType);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("true"), anyString());
+ }
+
+ @Test
+ void fileContainsNewListAndMap() {
+
when(configuration.get(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE)).thenReturn(null);
+ MessageType messageType = new MessageType("newFieldTypes", listType,
mapType, integerField);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("false"), anyString());
+ }
+
+ @Test
+ void fileContainsNoListOrMap() {
+ MessageType messageType = new MessageType("noListOrMap", integerField);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration, never()).set(anyString(), anyString());
+ }
+
+ @Test
+ void nestedLegacyList() {
+ when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE,
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+ MessageType nested = new MessageType("Nested", integerField,
legacyListType);
+ MessageType messageType = new MessageType("NestedList", integerField,
nested);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("true"), anyString());
+ }
+
+ @Test
+ void nestedLegacyMap() {
+ when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE,
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+ MessageType nested = new MessageType("Nested", integerField,
legacyMapType);
+ MessageType messageType = new MessageType("NestedList", integerField,
nested);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("true"), anyString());
+ }
+
+ @Test
+ void mapWithLegacyList() {
+ when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE,
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+ Type listValue = ConversionPatterns.listType(Type.Repetition.REQUIRED,
"value",
+ Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE,
Type.Repetition.REPEATED).named("double_field"));
+ Type mapWithList =
ConversionPatterns.stringKeyMapType(Type.Repetition.OPTIONAL, "newMap",
listValue);
+ MessageType messageType = new MessageType("NestedList", integerField,
mapWithList);
+ new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(),
messageType);
+ verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE),
eq("true"), anyString());
+ }
+}