Repository: spark Updated Branches: refs/heads/master d8e825e3b -> 64fbd1cef
[SPARK-22124][SQL] Sample and Limit should also defer input evaluation under codegen ## What changes were proposed in this pull request? We can override `usedInputs` to claim that an operator defers input evaluation. `Sample` and `Limit` are two operators which should claim it but don't. We should do it. ## How was this patch tested? Existing tests. Author: Liang-Chi Hsieh <vii...@gmail.com> Closes #19345 from viirya/SPARK-22124. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64fbd1ce Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64fbd1ce Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64fbd1ce Branch: refs/heads/master Commit: 64fbd1cef30033efd83570a752980ea658ee12bb Parents: d8e825e Author: Liang-Chi Hsieh <vii...@gmail.com> Authored: Tue Sep 26 15:23:13 2017 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Tue Sep 26 15:23:13 2017 +0800 ---------------------------------------------------------------------- .../org/apache/spark/sql/execution/basicPhysicalOperators.scala | 4 ++++ .../src/main/scala/org/apache/spark/sql/execution/limit.scala | 4 ++++ 2 files changed, 8 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/64fbd1ce/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala index 18142c4..8389e2f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala @@ -267,6 +267,10 @@ case class SampleExec( } } + // Mark this as empty. This plan doesn't need to evaluate any inputs and can defer the evaluation + // to the parent operator. + override def usedInputs: AttributeSet = AttributeSet.empty + override def inputRDDs(): Seq[RDD[InternalRow]] = { child.asInstanceOf[CodegenSupport].inputRDDs() } http://git-wip-us.apache.org/repos/asf/spark/blob/64fbd1ce/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala index 73a0f87..1f515e2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala @@ -62,6 +62,10 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport { child.asInstanceOf[CodegenSupport].inputRDDs() } + // Mark this as empty. This plan doesn't need to evaluate any inputs and can defer the evaluation + // to the parent operator. + override def usedInputs: AttributeSet = AttributeSet.empty + protected override def doProduce(ctx: CodegenContext): String = { child.asInstanceOf[CodegenSupport].produce(ctx, this) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org