aditiwari01 commented on a change in pull request #2762: URL: https://github.com/apache/hudi/pull/2762#discussion_r606761470
########## File path: hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala ########## @@ -49,13 +50,45 @@ object AvroConversionUtils { def convertStructTypeToAvroSchema(structType: StructType, structName: String, recordNamespace: String): Schema = { - SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace) + getAvroSchemaWithDefaults(SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)) } def convertAvroSchemaToStructType(avroSchema: Schema): StructType = { SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType] } + /** + * Regenerate Avro schema with proper nullable default values. Avro expects null to be first entry in case of UNION so that + * default value can be set to null. + * @param writeSchema original writer schema. + * @return the regenerated schema with proper defaults set. + */ + def getAvroSchemaWithDefaults(writeSchema: Schema): Schema = { + val modifiedFields = writeSchema.getFields.map(field => { + field.schema().getType match { + case Schema.Type.RECORD => { + val newSchema = getAvroSchemaWithDefaults(field.schema()) + new Schema.Field(field.name(), newSchema, field.doc(), JsonProperties.NULL_VALUE) Review comment: Why are we setting null default here? if a field is nullabe, it would be of type Union. IMO Record field must nnot have default. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org