giaosudau commented on a change in pull request #2012: URL: https://github.com/apache/hudi/pull/2012#discussion_r543093343
########## File path: hudi-spark/src/main/scala/org/apache/hudi/AvroConversionHelper.scala ########## @@ -364,4 +366,40 @@ object AvroConversionHelper { } } } + + /** + * Remove namespace from fixed field. + * org.apache.spark.sql.avro.SchemaConverters.toAvroType method adds namespace to fixed avro field + * https://github.com/apache/spark/blob/master/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala#L177 + * So, we need to remove that namespace so that reader schema without namespace do not throw erorr like this one + * org.apache.avro.AvroTypeException: Found hoodie.source.hoodie_source.height.fixed, expecting fixed + * + * @param schema Schema from which namespace needs to be removed for fixed fields + * @return input schema with namespace removed for fixed fields, if any + */ + def removeNamespaceFromFixedFields(schema: Schema): Schema ={ Review comment: it didn't work with table with more than **2 fixed type field.** ```{ "name": "fixed_type_field", "type": { "type": "fixed", "name": "fixed", "namespace": "hoodie.source.hoodie_source.fixed_type_field", "size": 7, "logicalType": "decimal", "precision": 15, "scale": 8 } }, "name": "fixed_type_field2", "type": { "type": "fixed", "name": "fixed", "namespace": "hoodie.source.hoodie_source.fixed_type_field2", "size": 7, "logicalType": "decimal", "precision": 15, "scale": 8 } } ``` ``` Exception in thread "main" org.apache.avro.SchemaParseException: Can't redefine: fixed at org.apache.avro.Schema$Names.put(Schema.java:1128) at org.apache.avro.Schema$NamedSchema.writeNameRef(Schema.java:562) at org.apache.avro.Schema$FixedSchema.toJson(Schema.java:907) at org.apache.avro.Schema$RecordSchema.fieldsToJson(Schema.java:716) at org.apache.avro.Schema$RecordSchema.toJson(Schema.java:701) at org.apache.avro.Schema.toString(Schema.java:324) at org.apache.hudi.utilities.schema.DebeziumSchemaRegistryProvider.getTargetSchema(DebeziumSchemaRegistryProvider.java:77) ``` with namespace it has another error ``` Caused by: java.lang.ClassCastException: java.nio.HeapByteBuffer cannot be cast to org.apache.avro.generic.GenericFixed at org.apache.hudi.AvroConversionHelper$.$anonfun$createConverterToRow$5(AvroConversionHelper.scala:98) at org.apache.hudi.AvroConversionHelper$.$anonfun$createConverterToRow$9(AvroConversionHelper.scala:173) at org.apache.hudi.AvroConversionHelper$.$anonfun$createConverterToRow$9(AvroConversionHelper.scala:173) at org.apache.hudi.AvroConversionUtils$.$anonfun$createDataFrame$2(AvroConversionUtils.scala:43) ``` Fixed by ``` case (d: DecimalType, FIXED) => (item: AnyRef) => if (item == null) { null } else { val decimalConversion = new DecimalConversion // val bigDecimal = decimalConversion.fromFixed(item.asInstanceOf[GenericFixed], avroSchema, // LogicalTypes.decimal(d.precision, d.scale)) // createDecimal(bigDecimal, d.precision, d.scale) val bigDecimal = decimalConversion.fromBytes(item.asInstanceOf[ByteBuffer], avroSchema, LogicalTypes.decimal(d.precision, d.scale)) createDecimal(bigDecimal, d.precision, d.scale) } ``` using the `case (d: DecimalType, BYTES) =>` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org