nsivabalan commented on code in PR #8107: URL: https://github.com/apache/hudi/pull/8107#discussion_r1131845834
########## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala: ########## @@ -1096,31 +1104,47 @@ object HoodieSparkSqlWriter { Some(writerSchema)) avroRecords.mapPartitions(it => { + val sparkPartitionId = TaskContext.getPartitionId() + val dataFileSchema = new Schema.Parser().parse(dataFileSchemaStr) val consistentLogicalTimestampEnabled = parameters.getOrElse( DataSourceWriteOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), DataSourceWriteOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()).toBoolean - it.map { avroRecord => + // generate record keys is auto generation is enabled. + val recordsWithRecordKeyOverride = mayBeAutoGenerateRecordKeys(autoGenerateRecordKeys, it, instantTime) + + // handle dropping partition columns + recordsWithRecordKeyOverride.map { avroRecordRecordKeyOverRide => val processedRecord = if (shouldDropPartitionColumns) { - HoodieAvroUtils.rewriteRecord(avroRecord, dataFileSchema) + HoodieAvroUtils.rewriteRecord(avroRecordRecordKeyOverRide._1, dataFileSchema) + } else { + avroRecordRecordKeyOverRide._1 + } + + // Generate HoodieKey for records + val hoodieKey = if (autoGenerateRecordKeys) { + // fetch record key from the recordKeyOverride if auto generation is enabled. + new HoodieKey(avroRecordRecordKeyOverRide._2.get, keyGenerator.getKey(avroRecordRecordKeyOverRide._1).getPartitionPath) Review Comment: yes. https://github.com/apache/hudi/pull/7699 HUDI-5535 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org