nsivabalan commented on a change in pull request #2762: URL: https://github.com/apache/hudi/pull/2762#discussion_r607153471
########## File path: hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala ########## @@ -49,13 +50,45 @@ object AvroConversionUtils { def convertStructTypeToAvroSchema(structType: StructType, structName: String, recordNamespace: String): Schema = { - SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace) + getAvroSchemaWithDefaults(SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)) } def convertAvroSchemaToStructType(avroSchema: Schema): StructType = { SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType] } + /** + * Regenerate Avro schema with proper nullable default values. Avro expects null to be first entry in case of UNION so that + * default value can be set to null. + * @param writeSchema original writer schema. + * @return the regenerated schema with proper defaults set. + */ + def getAvroSchemaWithDefaults(writeSchema: Schema): Schema = { + val modifiedFields = writeSchema.getFields.map(field => { + field.schema().getType match { + case Schema.Type.RECORD => { + val newSchema = getAvroSchemaWithDefaults(field.schema()) + new Schema.Field(field.name(), newSchema, field.doc(), JsonProperties.NULL_VALUE) Review comment: sure. feel free to fix it. the one you had in your draft commit was using deprecated constructor and thought of fixing it. we could probably do field.defaultValue() -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org