This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 36333ef5848 [HUDI-8748] Fix check for whether legacy mode is enabled 
for file (#12481)
36333ef5848 is described below

commit 36333ef5848e8f16ae78b03354a8a086f1a448a1
Author: Tim Brown <[email protected]>
AuthorDate: Tue Dec 17 02:04:43 2024 -0600

    [HUDI-8748] Fix check for whether legacy mode is enabled for file (#12481)
---
 .../apache/parquet/avro/HoodieAvroReadSupport.java |  20 ++--
 .../parquet/avro/TestHoodieAvroReadSupport.java    | 122 +++++++++++++++++++++
 2 files changed, 131 insertions(+), 11 deletions(-)

diff --git 
a/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
 
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
index 07015209435..2dccb0888c4 100644
--- 
a/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
+++ 
b/hudi-hadoop-common/src/main/java/org/apache/parquet/avro/HoodieAvroReadSupport.java
@@ -102,25 +102,23 @@ public class HoodieAvroReadSupport<T> extends 
AvroReadSupport<T> {
    *      }
    *    }
    */
-  private boolean checkLegacyMode(List<Type> parquetFields) {
-    for (Type type : parquetFields) {
+  private static boolean checkLegacyMode(List<Type> parquetFields) {
+    return parquetFields.stream().anyMatch(type -> {
       if (!type.isPrimitive()) {
         GroupType groupType = type.asGroupType();
         OriginalType originalType = groupType.getOriginalType();
         if (originalType == OriginalType.MAP
-            && groupType.getFields().get(0).getOriginalType() != 
OriginalType.MAP_KEY_VALUE) {
-          return false;
+            && !groupType.getFields().get(0).getName().equals("key_value")) {
+          return true;
         }
         if (originalType == OriginalType.LIST
-            && !groupType.getType(0).getName().equals("array")) {
-          return false;
-        }
-        if (!checkLegacyMode(groupType.getFields())) {
-          return false;
+            && !groupType.getType(0).getName().equals("list")) {
+          return true;
         }
+        return checkLegacyMode(groupType.getFields());
       }
-    }
-    return true;
+      return false;
+    });
   }
 
   /**
diff --git 
a/hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestHoodieAvroReadSupport.java
 
b/hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestHoodieAvroReadSupport.java
new file mode 100644
index 00000000000..54cd6f30144
--- /dev/null
+++ 
b/hudi-hadoop-common/src/test/java/org/apache/parquet/avro/TestHoodieAvroReadSupport.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.parquet.avro;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.parquet.schema.ConversionPatterns;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Types;
+import org.junit.jupiter.api.Test;
+
+import java.util.Collections;
+
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+class TestHoodieAvroReadSupport {
+  private final Type legacyListType = 
ConversionPatterns.listType(Type.Repetition.REQUIRED, "legacyList",
+      Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, 
Type.Repetition.REPEATED).named("double_field"));
+  private final Type legacyListTypeWithObject = 
ConversionPatterns.listType(Type.Repetition.REQUIRED, "legacyList",
+      new MessageType("foo", 
Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, 
Type.Repetition.REPEATED).named("double_field")));
+  private final Type listType = 
ConversionPatterns.listOfElements(Type.Repetition.REQUIRED, "newList",
+      Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, 
Type.Repetition.REPEATED).named("element"));
+  private final Type integerField = 
Types.primitive(PrimitiveType.PrimitiveTypeName.INT32, 
Type.Repetition.REQUIRED).named("int_field");
+  private final Type legacyMapType = new GroupType(Type.Repetition.OPTIONAL, 
"my_map", OriginalType.MAP, new GroupType(Type.Repetition.REPEATED, "map",
+      Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, 
Type.Repetition.REQUIRED).named("key"),
+      Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, 
Type.Repetition.REQUIRED).named("value")));
+  private final Type mapType = 
ConversionPatterns.stringKeyMapType(Type.Repetition.OPTIONAL, "newMap",
+      Types.primitive(PrimitiveType.PrimitiveTypeName.BINARY, 
Type.Repetition.REQUIRED).named("value"));
+  private final Configuration configuration = mock(Configuration.class);
+
+  @Test
+  void fileContainsLegacyList() {
+    when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, 
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+    MessageType messageType = new MessageType("LegacyList", integerField, 
legacyListType, mapType);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("true"), anyString());
+  }
+
+  @Test
+  void fileContainsLegacyListWithElements() {
+    when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, 
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+    MessageType messageType = new MessageType("LegacyList", integerField, 
legacyListTypeWithObject, mapType);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("true"), anyString());
+  }
+
+  @Test
+  void fileContainsLegacyMap() {
+    when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, 
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+    MessageType messageType = new MessageType("LegacyList", integerField, 
legacyMapType, listType);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("true"), anyString());
+  }
+
+  @Test
+  void fileContainsNewListAndMap() {
+    
when(configuration.get(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE)).thenReturn(null);
+    MessageType messageType = new MessageType("newFieldTypes", listType, 
mapType, integerField);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("false"), anyString());
+  }
+
+  @Test
+  void fileContainsNoListOrMap() {
+    MessageType messageType = new MessageType("noListOrMap", integerField);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration, never()).set(anyString(), anyString());
+  }
+
+  @Test
+  void nestedLegacyList() {
+    when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, 
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+    MessageType nested = new MessageType("Nested", integerField, 
legacyListType);
+    MessageType messageType = new MessageType("NestedList", integerField, 
nested);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("true"), anyString());
+  }
+
+  @Test
+  void nestedLegacyMap() {
+    when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, 
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+    MessageType nested = new MessageType("Nested", integerField, 
legacyMapType);
+    MessageType messageType = new MessageType("NestedList", integerField, 
nested);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("true"), anyString());
+  }
+
+  @Test
+  void mapWithLegacyList() {
+    when(configuration.getBoolean(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE, 
AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT)).thenReturn(false);
+    Type listValue = ConversionPatterns.listType(Type.Repetition.REQUIRED, 
"value",
+        Types.primitive(PrimitiveType.PrimitiveTypeName.DOUBLE, 
Type.Repetition.REPEATED).named("double_field"));
+    Type mapWithList = 
ConversionPatterns.stringKeyMapType(Type.Repetition.OPTIONAL, "newMap", 
listValue);
+    MessageType messageType = new MessageType("NestedList", integerField, 
mapWithList);
+    new HoodieAvroReadSupport<>().init(configuration, Collections.emptyMap(), 
messageType);
+    verify(configuration).set(eq(AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE), 
eq("true"), anyString());
+  }
+}

Reply via email to