anishshri-db commented on code in PR #47104: URL: https://github.com/apache/spark/pull/47104#discussion_r1663210224
########## sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala: ########## @@ -187,23 +187,33 @@ class IncrementalExecution( } } - object WriteStatefulOperatorMetadataRule extends SparkPlanPartialRule { + // Planning rule used to record the state schema for the first run and validate state schema + // changes across query runs. + object StateSchemaAndOperatorMetadataRule extends SparkPlanPartialRule { override val rule: PartialFunction[SparkPlan, SparkPlan] = { + // In the case of TransformWithStateExec, we want to collect this StateSchema + // filepath, and write this path out in the OperatorStateMetadata file case stateStoreWriter: StateStoreWriter if isFirstBatch => + val stateSchemaVersion = stateStoreWriter match { + case _: TransformWithStateExec => sparkSession.sessionState.conf. + getConf(SQLConf.STREAMING_TRANSFORM_WITH_STATE_OP_STATE_SCHEMA_VERSION) + case _ => 2 + } + val stateSchemaPaths = + stateStoreWriter.validateAndMaybeEvolveStateSchema( + hadoopConf, + currentBatchId, + stateSchemaVersion) + // write out the state schema paths to the metadata file val metadata = stateStoreWriter.operatorStateMetadata() + // TODO: Populate metadata with stateSchemaPaths if is v2 Review Comment: nit: `if the version is v2` ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org