jonvex commented on code in PR #13663:
URL: https://github.com/apache/hudi/pull/13663#discussion_r2246425278
##########
hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java:
##########
@@ -1066,41 +1066,34 @@ private static Object
rewriteRecordWithNewSchemaInternal(Object oldRecord, Schem
switch (newSchema.getType()) {
case RECORD:
if (!(oldRecord instanceof IndexedRecord)) {
- throw new SchemaCompatibilityException("cannot rewrite record with
different type");
+ throw new SchemaCompatibilityException(String.format("Cannot rewrite
%s as a record", oldRecord.getClass().getName()));
}
IndexedRecord indexedRecord = (IndexedRecord) oldRecord;
- List<Schema.Field> fields = newSchema.getFields();
GenericData.Record newRecord = new GenericData.Record(newSchema);
- for (int i = 0; i < fields.size(); i++) {
- Schema.Field field = fields.get(i);
- String fieldName = field.name();
- if (!skipMetadataFields || !isMetadataField(fieldName)) {
- fieldNames.push(fieldName);
- Schema.Field oldField = oldSchema.getField(field.name());
- if (oldField != null && !renameCols.containsKey(field.name())) {
- newRecord.put(i,
rewriteRecordWithNewSchema(indexedRecord.get(oldField.pos()),
oldField.schema(), field.schema(), renameCols, fieldNames, false));
- } else {
- String fieldFullName = createFullName(fieldNames);
- String fieldNameFromOldSchema = renameCols.get(fieldFullName);
- // deal with rename
- Schema.Field oldFieldRenamed = fieldNameFromOldSchema == null ?
null : oldSchema.getField(fieldNameFromOldSchema);
- if (oldFieldRenamed != null) {
- // find rename
- newRecord.put(i,
rewriteRecordWithNewSchema(indexedRecord.get(oldFieldRenamed.pos()),
oldFieldRenamed.schema(), field.schema(), renameCols, fieldNames, false));
- } else {
- // deal with default value
- if (field.defaultVal() instanceof JsonProperties.Null) {
- newRecord.put(i, null);
- } else {
- if (!isNullable(field.schema()) && field.defaultVal() ==
null) {
- throw new SchemaCompatibilityException("Field " +
fieldFullName + " has no default value and is non-nullable");
- }
- newRecord.put(i, field.defaultVal());
- }
- }
- }
- fieldNames.pop();
+
+ for (int i = 0; i < newSchema.getFields().size(); i++) {
+ Schema.Field newSchemaField = newSchema.getFields().get(i);
+ String newSchemaFieldName = newSchemaField.name();
+ if (skipMetadataFields && isMetadataField(newSchemaFieldName)) {
+ continue;
+ }
+ fieldNames.push(newSchemaFieldName);
+ String fullFieldName = createFullName(fieldNames);
Review Comment:
Is this going to be too much of a perf issue? The code is cleaner this way,
but there are 2 improvements we can do:
1. pull the creation out of the loop and then just + newSchemaFieldName to it
2. if renameCols is empty we can prevent construction entirely
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]