xushiyan commented on a change in pull request #4565: URL: https://github.com/apache/hudi/pull/4565#discussion_r785484692
########## File path: hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala ########## @@ -118,6 +123,18 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext, rows } + private def extractRequiredSchema(iter: Iterator[InternalRow]): Iterator[InternalRow] = { + val tableAvroSchema = new Schema.Parser().parse(tableState.tableAvroSchema) + val requiredAvroSchema = new Schema.Parser().parse(tableState.requiredAvroSchema) Review comment: this is not used? ########## File path: hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala ########## @@ -118,6 +123,18 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext, rows } + private def extractRequiredSchema(iter: Iterator[InternalRow]): Iterator[InternalRow] = { Review comment: the name could imply returning schema ```suggestion private def toRowsWithRequiredSchema(iter: Iterator[InternalRow]): Iterator[InternalRow] = { ``` ########## File path: hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala ########## @@ -54,15 +54,20 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext, private val preCombineField = tableState.preCombineField private val recordKeyFieldOpt = tableState.recordKeyFieldOpt private val payloadProps = if (preCombineField.isDefined) { - Some(HoodiePayloadConfig.newBuilder.withPayloadOrderingField(preCombineField.get).build.getProps) + val properties = HoodiePayloadConfig.newBuilder + .withPayloadOrderingField(preCombineField.get) + .withPayloadEventTimeField(preCombineField.get) Review comment: > setting returnNullIfNotFound to true does not take effect when call HoodieAvroUtils.getNestedFieldVal this looks like a bug in `getNestedFieldVal()` ? is it fixable in spark 3.2? ########## File path: hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala ########## @@ -729,6 +733,12 @@ object HoodieSparkSqlWriter { mergedParams(key) = value } } + + // use preCombineField to fill in PAYLOAD_ORDERING_FIELD_PROP_KEY and PAYLOAD_EVENT_TIME_FIELD_PROP_KEY + if (mergedParams.contains(PRECOMBINE_FIELD.key())) { + mergedParams.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY, mergedParams(PRECOMBINE_FIELD.key())) + mergedParams.put(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY, mergedParams(PRECOMBINE_FIELD.key())) + } Review comment: the expected scenario should be: when `preCombineField` is set, `PAYLOAD_ORDERING_FIELD_PROP_KEY` is set automatically, and PAYLOAD_EVENT_TIME_FIELD_PROP_KEY is never required. It'll be good to fix these along the way. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org