This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e8965a12beb [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto e8965a12beb is described below commit e8965a12beb067e25c02baf08987616987608545 Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Mon Nov 14 12:15:17 2022 +0800 [SPARK-41116][CONNECT] Input relation can be optional for Project in Connect proto ### What changes were proposed in this pull request? I was writing test cases to test expressions and realized that we can allow `Project` without input plan. For example, `SELECT 1` is a valid query. For SQL it will generate `OneRowRelation` to make up the input plan but for the Connect users they shouldn't need to bother appending that relation. Instead, they can just submit a Project with expressions. Per our design, Proto is also a API layer and anyone can draft a proto plan without using built-in clients. This PR will improve the proto usability for `Project`. ### Why are the changes needed? 1. Improve usability. 2. Help write test cases for expressions. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT Closes #38632 from amaliujia/SPARK-41116. Authored-by: Rui Wang <rui.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../connect/src/main/protobuf/spark/connect/relations.proto | 3 +++ .../main/scala/org/apache/spark/sql/connect/dsl/package.scala | 11 +++++++++++ .../spark/sql/connect/planner/SparkConnectPlanner.scala | 6 +++++- .../spark/sql/connect/planner/SparkConnectProtoSuite.scala | 4 ++++ python/pyspark/sql/connect/proto/relations_pb2.pyi | 6 +++++- 5 files changed, 28 insertions(+), 2 deletions(-) diff --git a/connector/connect/src/main/protobuf/spark/connect/relations.proto b/connector/connect/src/main/protobuf/spark/connect/relations.proto index 4f30b5bfbde..759e9c04e63 100644 --- a/connector/connect/src/main/protobuf/spark/connect/relations.proto +++ b/connector/connect/src/main/protobuf/spark/connect/relations.proto @@ -98,6 +98,9 @@ message Read { // The input relation must be specified. // The projected expression can be an arbitrary expression. message Project { + // (Optional) Input relation is optional for Project. + // + // For example, `SELECT ABS(-1)` is valid plan without an input plan. Relation input = 1; repeated Expression expressions = 3; } diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala index f55ed835d23..eeffd054c7b 100644 --- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala +++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala @@ -241,6 +241,17 @@ package object dsl { } } + def select(exprs: Expression*): Relation = { + Relation + .newBuilder() + .setProject( + Project + .newBuilder() + .addAllExpressions(exprs.toIterable.asJava) + .build()) + .build() + } + implicit class DslLogicalPlan(val logicalPlan: Relation) { def select(exprs: Expression*): Relation = { Relation diff --git a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index f8ccc7b62e7..98660c32c4c 100644 --- a/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -235,7 +235,11 @@ class SparkConnectPlanner(session: SparkSession) { } private def transformProject(rel: proto.Project): LogicalPlan = { - val baseRel = transformRelation(rel.getInput) + val baseRel = if (rel.hasInput) { + transformRelation(rel.getInput) + } else { + logical.OneRowRelation() + } // TODO: support the target field for *. val projection = if (rel.getExpressionsCount == 1 && rel.getExpressions(0).hasUnresolvedStar) { diff --git a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala index 53ea1988809..111c6386c52 100644 --- a/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala +++ b/connector/connect/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala @@ -429,6 +429,10 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest { } } + test("Project does not require an input") { + comparePlans(select(1), spark.sql("SELECT 1")) + } + private def createLocalRelationProtoByQualifiedAttributes( attrs: Seq[proto.Expression.QualifiedAttribute]): proto.Relation = { val localRelationBuilder = proto.LocalRelation.newBuilder() diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi index e706fa3e11d..ea7ef02249e 100644 --- a/python/pyspark/sql/connect/proto/relations_pb2.pyi +++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi @@ -437,7 +437,11 @@ class Project(google.protobuf.message.Message): INPUT_FIELD_NUMBER: builtins.int EXPRESSIONS_FIELD_NUMBER: builtins.int @property - def input(self) -> global___Relation: ... + def input(self) -> global___Relation: + """(Optional) Input relation is optional for Project. + + For example, `SELECT ABS(-1)` is valid plan without an input plan. + """ @property def expressions( self, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org