wgtmac commented on code in PR #1102:
URL: https://github.com/apache/parquet-mr/pull/1102#discussion_r1222352757


##########
parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java:
##########
@@ -31,10 +32,12 @@
 import org.apache.parquet.io.api.Converter;
 import org.apache.parquet.io.api.GroupConverter;
 import org.apache.parquet.io.api.PrimitiveConverter;
+
 import org.apache.parquet.schema.GroupType;

Review Comment:
   ```suggestion
   import org.apache.parquet.io.api.PrimitiveConverter;
   import org.apache.parquet.schema.GroupType;
   ```



##########
parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoConstants.java:
##########
@@ -26,6 +26,7 @@ public final class ProtoConstants {
   public static final String METADATA_ENUM_PREFIX = "parquet.proto.enum.";
   public static final String METADATA_ENUM_KEY_VALUE_SEPARATOR = ":";
   public static final String METADATA_ENUM_ITEM_SEPARATOR = ",";
+  public static final String CONFIG_IGNORE_UNKNOWN_FIELDS = 
"parquet.proto.ignore.unknwon.fields";

Review Comment:
   ```suggestion
     public static final String CONFIG_IGNORE_UNKNOWN_FIELDS = 
"parquet.proto.ignore.unknown.fields";
   ```



##########
parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java:
##########
@@ -80,35 +91,77 @@ class ProtoMessageConverter extends GroupConverter {
 
   // For usage in message arrays
   ProtoMessageConverter(Configuration conf, ParentValueContainer pvc, 
Message.Builder builder, GroupType parquetSchema, Map<String, String> 
extraMetadata) {
+    if (pvc == null) {
+      throw new IllegalStateException("Missing parent value container");
+    }
 
     int schemaSize = parquetSchema.getFieldCount();
     converters = new Converter[schemaSize];
     this.conf = conf;
     this.parent = pvc;
     this.extraMetadata = extraMetadata;
-    int parquetFieldIndex = 1;
-
-    if (pvc == null) {
-      throw new IllegalStateException("Missing parent value container");
-    }
+    boolean ignoreUnknownFields = conf.getBoolean(IGNORE_UNKNOWN_FIELDS, 
false);
 
     myBuilder = builder;
 
-    Descriptors.Descriptor protoDescriptor = builder.getDescriptorForType();
+    if (builder == null && ignoreUnknownFields) {
+      IntStream.range(0, parquetSchema.getFieldCount())
+        .forEach(i-> converters[i] = dummyScalarConverter(DUMMY_PVC, 
parquetSchema.getType(i), conf, extraMetadata));
+
+    } else {
 
-    for (Type parquetField : parquetSchema.getFields()) {
-      Descriptors.FieldDescriptor protoField = 
protoDescriptor.findFieldByName(parquetField.getName());
+      int parquetFieldIndex = 0;
+      Descriptors.Descriptor protoDescriptor =  builder.getDescriptorForType();
 
-      if (protoField == null) {
-        String description = "Scheme mismatch \n\"" + parquetField + "\"" +
-                "\n proto descriptor:\n" + protoDescriptor.toProto();
-        throw new IncompatibleSchemaModificationException("Cant find \"" + 
parquetField.getName() + "\" " + description);
+      for (Type parquetField : parquetSchema.getFields()) {
+
+        Descriptors.FieldDescriptor protoField = 
protoDescriptor.findFieldByName(parquetField.getName());

Review Comment:
   ```suggestion
         for (Type parquetField : parquetSchema.getFields()) {
           Descriptors.FieldDescriptor protoField = 
protoDescriptor.findFieldByName(parquetField.getName());
   ```



##########
parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoConstants.java:
##########
@@ -26,6 +26,7 @@ public final class ProtoConstants {
   public static final String METADATA_ENUM_PREFIX = "parquet.proto.enum.";
   public static final String METADATA_ENUM_KEY_VALUE_SEPARATOR = ":";
   public static final String METADATA_ENUM_ITEM_SEPARATOR = ",";
+  public static final String CONFIG_IGNORE_UNKNOWN_FIELDS = 
"parquet.proto.ignore.unknwon.fields";

Review Comment:
   Could you please add a comment for `CONFIG_IGNORE_UNKNOWN_FIELDS`? Like what 
we did for `CONFIG_ACCEPT_UNKNOWN_ENUM`.



##########
parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java:
##########
@@ -80,35 +91,77 @@ class ProtoMessageConverter extends GroupConverter {
 
   // For usage in message arrays
   ProtoMessageConverter(Configuration conf, ParentValueContainer pvc, 
Message.Builder builder, GroupType parquetSchema, Map<String, String> 
extraMetadata) {
+    if (pvc == null) {
+      throw new IllegalStateException("Missing parent value container");
+    }
 
     int schemaSize = parquetSchema.getFieldCount();
     converters = new Converter[schemaSize];
     this.conf = conf;
     this.parent = pvc;
     this.extraMetadata = extraMetadata;
-    int parquetFieldIndex = 1;
-
-    if (pvc == null) {
-      throw new IllegalStateException("Missing parent value container");
-    }
+    boolean ignoreUnknownFields = conf.getBoolean(IGNORE_UNKNOWN_FIELDS, 
false);
 
     myBuilder = builder;
 
-    Descriptors.Descriptor protoDescriptor = builder.getDescriptorForType();
+    if (builder == null && ignoreUnknownFields) {
+      IntStream.range(0, parquetSchema.getFieldCount())
+        .forEach(i-> converters[i] = dummyScalarConverter(DUMMY_PVC, 
parquetSchema.getType(i), conf, extraMetadata));
+
+    } else {
 
-    for (Type parquetField : parquetSchema.getFields()) {
-      Descriptors.FieldDescriptor protoField = 
protoDescriptor.findFieldByName(parquetField.getName());
+      int parquetFieldIndex = 0;
+      Descriptors.Descriptor protoDescriptor =  builder.getDescriptorForType();
 
-      if (protoField == null) {
-        String description = "Scheme mismatch \n\"" + parquetField + "\"" +
-                "\n proto descriptor:\n" + protoDescriptor.toProto();
-        throw new IncompatibleSchemaModificationException("Cant find \"" + 
parquetField.getName() + "\" " + description);
+      for (Type parquetField : parquetSchema.getFields()) {
+
+        Descriptors.FieldDescriptor protoField = 
protoDescriptor.findFieldByName(parquetField.getName());
+
+        validateProtoField(ignoreUnknownFields, protoDescriptor.toProto(), 
parquetField, protoField);
+
+        converters[parquetFieldIndex] = protoField != null ?
+            newMessageConverter(myBuilder, protoField, parquetField) :
+            dummyScalarConverter(DUMMY_PVC, parquetField, conf, extraMetadata);
+
+        parquetFieldIndex++;
       }
+    }
+  }
+
+  private void validateProtoField(boolean ignoreUnknownFields,
+                                  DescriptorProtos.DescriptorProto 
protoDescriptor,
+                                  Type parquetField,
+                                  Descriptors.FieldDescriptor protoField) {
+    if (protoField == null && !ignoreUnknownFields) {
+      String description = "Schema mismatch \n\"" + parquetField + "\"" +
+        "\n proto descriptor:\n" + protoDescriptor;
+      throw new IncompatibleSchemaModificationException("Cant find \"" + 
parquetField.getName() + "\" " + description);
+    }
+  }
 
-      converters[parquetFieldIndex - 1] = newMessageConverter(myBuilder, 
protoField, parquetField);
 
-      parquetFieldIndex++;
+  private Converter dummyScalarConverter(ParentValueContainer pvc,
+                                         Type parquetField, Configuration conf,
+                                         Map<String, String> extraMetadata) {
+
+    if(parquetField.isPrimitive()) {

Review Comment:
   ```suggestion
       if (parquetField.isPrimitive()) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to