Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/20020#discussion_r158019546 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala --- @@ -87,6 +87,51 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends LeafExecNode { } } +/** + * A physical operator that executes the run method of a `RunnableCommand` and + * saves the result to prevent multiple executions. + * + * @param cmd the `RunnableCommand` this operator will run. + * @param children the children physical plans ran by the `RunnableCommand`. + */ +case class DataWritingCommandExec(cmd: DataWritingCommand, children: Seq[SparkPlan]) + extends SparkPlan { + + override lazy val metrics: Map[String, SQLMetric] = cmd.metrics + + /** + * A concrete command should override this lazy field to wrap up any side effects caused by the + * command or any other computation that should be evaluated exactly once. The value of this field + * can be used as the contents of the corresponding RDD generated from the physical plan of this + * command. + * + * The `execute()` method of all the physical command classes should reference `sideEffectResult` + * so that the command can be executed eagerly right after the command query is created. + */ + protected[sql] lazy val sideEffectResult: Seq[InternalRow] = { + val converter = CatalystTypeConverters.createToCatalystConverter(schema) + val rows = cmd.run(sqlContext.sparkSession, children) + + rows.map(converter(_).asInstanceOf[InternalRow]) + } + + override def innerChildren: Seq[QueryPlan[_]] = cmd.children --- End diff -- why do we need this?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org