xiarixiaoyao commented on code in PR #8851:
URL: https://github.com/apache/hudi/pull/8851#discussion_r1211598407


##########
hudi-spark-datasource/hudi-spark3.0.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark30HoodieVectorizedParquetRecordReader.java:
##########
@@ -184,4 +209,62 @@ public boolean nextKeyValue() throws IOException {
     ++batchIdx;
     return true;
   }
+
+  private void initializeInternal() throws IOException, 
UnsupportedOperationException {
+    // Check that the requested schema is supported.
+    missingColumns = new HashMap<>();
+    List<ColumnDescriptor> columns = requestedSchema.getColumns();
+    List<String[]> paths = requestedSchema.getPaths();
+    for (int i = 0; i < requestedSchema.getFieldCount(); ++i) {
+      String[] colPath = paths.get(i);
+      if (!fileSchema.containsPath(colPath)) {
+        if (columns.get(i).getMaxDefinitionLevel() == 0) {
+          // Column is missing in data but the required data is non-nullable. 
This file is invalid.
+          throw new IOException("Required column is missing in data file. Col: 
" + Arrays.toString(colPath));
+        }
+        missingColumns.put(i, requestedSchema.getFields().get(i).getName());
+      }
+    }
+    missed = schema != null && missingColumns.keySet().stream()
+        .allMatch(columnIndex -> 
Objects.nonNull(schema.findField(missingColumns.get(columnIndex)).getDefaultValue()));
+  }
+
+  private void setColumnDefaultValue(int columnIndex) {

Review Comment:
   ditto



##########
hudi-spark-datasource/hudi-spark3.0.x/src/main/java/org/apache/spark/sql/execution/datasources/parquet/Spark30HoodieVectorizedParquetRecordReader.java:
##########
@@ -184,4 +209,62 @@ public boolean nextKeyValue() throws IOException {
     ++batchIdx;
     return true;
   }
+
+  private void initializeInternal() throws IOException, 
UnsupportedOperationException {

Review Comment:
   Pls extract this method to reuse it for different version parquetRecordReader



##########
hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java:
##########
@@ -144,6 +148,11 @@ private static void toJson(Types.RecordType record, 
Integer maxColumnId, Long ve
       if (field.doc() != null) {
         generator.writeStringField(DOC, field.doc());
       }
+      // NOTE: The value of null field is JsonProperties.NULL_VALUE.
+      if (field.getDefaultValue() != null && field.getDefaultValue() != 
JsonProperties.NULL_VALUE) {
+        generator.writeFieldName(DEFAULT_VALUE);
+        generator.writeObject(field.getDefaultValue());

Review Comment:
   Do I need to verify the correctness of the defaultValue type before 
serialization, rather than just serializing it directly
     private static void writeDefaultValue(Types.Field field, JsonGenerator 
generator) throws IOException {
       if (field.getDefaultValue() == null || field.getDefaultValue() == 
JsonProperties.NULL_VALUE) {
         return;
       }
       switch (field.type().typeId()) {
         case RECORD:
         case ARRAY:
         case MAP:
           JsonNode defaultNode = 
JacksonUtils.toJsonNode(field.getDefaultValue());
           generator.writeObjectField(DEFAULT_VALUE, defaultNode);
           break;
         case STRING:
           generator.writeStringField(DEFAULT_VALUE, 
field.getDefaultValue().toString());
           break;
         case INT:
           generator.writeNumberField(DEFAULT_VALUE, 
Integer.valueOf(field.getDefaultValue().toString()));
           break;
         case LONG:
           generator.writeNumberField(DEFAULT_VALUE, 
Long.valueOf(field.getDefaultValue().toString()));
           break;
         case FLOAT:
           generator.writeNumberField(DEFAULT_VALUE, 
Double.valueOf(field.getDefaultValue().toString()));
           break;
         case DOUBLE:
           generator.writeNumberField(DEFAULT_VALUE, 
Double.valueOf(field.getDefaultValue().toString()));
           break;
         case BOOLEAN:
           generator.writeBooleanField(DEFAULT_VALUE, 
Boolean.valueOf(field.getDefaultValue().toString()));
           break;
         case DECIMAL:
           generator.writeBinaryField(DEFAULT_VALUE, 
(byte[])field.getDefaultValue());
           break;
         case FIXED:
           generator.writeBinaryField(DEFAULT_VALUE, 
(byte[])field.getDefaultValue());
           break;
         case BINARY:
           generator.writeBinaryField(DEFAULT_VALUE, 
(byte[])field.getDefaultValue());
           break;
         case DATE:
           generator.writeNumberField(DEFAULT_VALUE, 
Integer.valueOf(field.getDefaultValue().toString()));
           break;
         case TIME:
           generator.writeNumberField(DEFAULT_VALUE, 
Long.valueOf(field.getDefaultValue().toString()));
           break;
         case TIMESTAMP:
           generator.writeNumberField(DEFAULT_VALUE, 
Long.valueOf(field.getDefaultValue().toString()));
           break;
         case UUID:
           generator.writeStringField(DEFAULT_VALUE, 
field.getDefaultValue().toString());
           break;
         default:
           throw new RuntimeException("not implemented");
       }
     }



##########
hudi-common/src/main/java/org/apache/hudi/internal/schema/utils/SerDeHelper.java:
##########
@@ -263,6 +273,31 @@ private static Type parseTypeFromJson(JsonNode jsonNode) {
     throw new IllegalArgumentException(String.format("cannot parse type from 
jsonNode: %s", jsonNode));
   }
 
+  private static Object parseDefaultValueFromJson(JsonNode jsonNode, Type 
type) {
+    if (!jsonNode.has(DEFAULT_VALUE)) {
+      return null;
+    }
+    String defaultValue = jsonNode.get(DEFAULT_VALUE).asText();
+    switch (type.typeId()) {
+      case BOOLEAN:
+        return Boolean.parseBoolean(defaultValue);
+      case INT:
+        return Integer.parseInt(defaultValue);
+      case LONG:
+        return Long.parseLong(defaultValue);
+      case FLOAT:
+        return Float.parseFloat(defaultValue);
+      case DOUBLE:
+        return Double.parseDouble(defaultValue);
+      case BINARY:
+        return Base64.getDecoder().decode(defaultValue);
+      case DECIMAL:
+        return new BigDecimal(defaultValue);
+      default:
+        return defaultValue;

Review Comment:
   where is other type?
     private static Object parseDefaultValue(Type type, JsonNode field) {
       if (!field.has(DEFAULT_VALUE)) {
         return null;
       }
       Object defaultValue = null;
       switch (type.typeId()) {
         case RECORD:
         case ARRAY:
         case MAP:
           JsonNode defaultNode = field.get(DEFAULT_VALUE);
           defaultValue = JacksonUtils.toObject(defaultNode);
           break;
         case STRING:
           defaultValue = field.get(DEFAULT_VALUE).asText();
           break;
         case INT:
           defaultValue = field.get(DEFAULT_VALUE).asInt();
           break;
         case LONG:
           defaultValue = field.get(DEFAULT_VALUE).asLong();
           break;
         case FLOAT:
           defaultValue = field.get(DEFAULT_VALUE).asDouble();
           break;
         case DOUBLE:
           defaultValue = field.get(DEFAULT_VALUE).asDouble();
           break;
         case BOOLEAN:
           defaultValue = field.get(DEFAULT_VALUE).asBoolean();
           break;
         case DECIMAL:
           defaultValue = toBytes(field.get(DEFAULT_VALUE));
           break;
         case FIXED:
           defaultValue = toBytes(field.get(DEFAULT_VALUE));
           break;
         case BINARY:
           defaultValue = toBytes(field.get(DEFAULT_VALUE));
           break;
         case DATE:
           defaultValue = field.get(DEFAULT_VALUE).asInt();
           break;
         case TIME:
           defaultValue = field.get(DEFAULT_VALUE).asLong();
           break;
         case TIMESTAMP:
           defaultValue = field.get(DEFAULT_VALUE).asLong();
           break;
         case UUID:
           defaultValue = field.get(DEFAULT_VALUE).asText();
           break;
         default:
           throw new RuntimeException("not implemented");
       }
       return defaultValue;
     }



##########
hudi-common/src/main/java/org/apache/hudi/internal/schema/action/InternalSchemaChangeApplier.java:
##########
@@ -47,12 +47,13 @@ public InternalSchema applyAddChange(
       String colName,
       Type colType,
       String doc,
+      Object defaultValue,

Review Comment:
   add to java doc



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to