This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 747953eb5c4 [SPARK-44071] Define and use Unresolved[Leaf|Unary]Node traits 747953eb5c4 is described below commit 747953eb5c46e121faf476a060049f1423ae7e91 Author: Ryan Johnson <ryan.john...@databricks.com> AuthorDate: Fri Jun 16 23:30:08 2023 +0300 [SPARK-44071] Define and use Unresolved[Leaf|Unary]Node traits ### What changes were proposed in this pull request? Looking at [unresolved.scala](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala), catalyst would benefit from an `UnresolvedNode` trait that various `UnresolvedFoo` classes could inherit: ```scala trait UnresolvedNode extends LogicalPlan { override def output: Seq[Attribute] = Nil override lazy val resolved = false } ``` Today, the code is duplicated in ~20 locations (7 of them in that one file). ### Why are the changes needed? Reduces redundancy, improves readability, documents programmer intent better. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Mild refactor, existing unit tests suffice. Closes #41617 from ryan-johnson-databricks/unresolved-node-trait. Authored-by: Ryan Johnson <ryan.john...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql/catalyst/analysis/RelationTimeTravel.scala | 8 ++-- .../spark/sql/catalyst/analysis/parameters.scala | 10 ++--- .../spark/sql/catalyst/analysis/unresolved.scala | 48 +++++++++------------- .../sql/catalyst/analysis/v2ResolutionPlans.scala | 32 +++------------ .../spark/sql/catalyst/catalog/interface.scala | 11 ++--- .../plans/logical/basicLogicalOperators.scala | 6 +-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 5 +-- 7 files changed, 39 insertions(+), 81 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala index 4daefa816a5..6e0d0998883 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} -import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan} +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.TreePattern.{RELATION_TIME_TRAVEL, TreePattern} /** @@ -29,8 +29,6 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{RELATION_TIME_TRAVEL, Tr case class RelationTimeTravel( relation: LogicalPlan, timestamp: Option[Expression], - version: Option[String]) extends LeafNode { - override def output: Seq[Attribute] = Nil - override lazy val resolved: Boolean = false + version: Option[String]) extends UnresolvedLeafNode { override val nodePatterns: Seq[TreePattern] = Seq(RELATION_TIME_TRAVEL) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala index 2a31e90465c..a00f9cec92c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala @@ -18,8 +18,8 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.SparkException -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, LeafExpression, Literal, SubqueryExpression, Unevaluable} -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} +import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, Literal, SubqueryExpression, Unevaluable} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH} import org.apache.spark.sql.errors.QueryErrorsBase @@ -47,10 +47,10 @@ case class Parameter(name: String) extends LeafExpression with Unevaluable { * The logical plan representing a parameterized query. It will be removed during analysis after * the parameters are bind. */ -case class ParameterizedQuery(child: LogicalPlan, args: Map[String, Expression]) extends UnaryNode { +case class ParameterizedQuery(child: LogicalPlan, args: Map[String, Expression]) + extends UnresolvedUnaryNode { + assert(args.nonEmpty) - override def output: Seq[Attribute] = Nil - override lazy val resolved = false final override val nodePatterns: Seq[TreePattern] = Seq(PARAMETERIZED_QUERY) override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(child = newChild) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 6637f550aa7..079d7564623 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -36,6 +36,18 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap class UnresolvedException(function: String) extends AnalysisException(s"Invalid call to $function on unresolved object") +/** Parent trait for unresolved node types */ +trait UnresolvedNode extends LogicalPlan { + override def output: Seq[Attribute] = Nil + override lazy val resolved: Boolean = false +} + +/** Parent trait for unresolved leaf node types */ +trait UnresolvedLeafNode extends LeafNode with UnresolvedNode + +/** Parent trait for unresolved unary node types */ +trait UnresolvedUnaryNode extends UnaryNode with UnresolvedNode + /** * A logical plan placeholder that holds the identifier clause string expression. It will be * replaced by the actual logical plan with the evaluated identifier string. @@ -43,9 +55,7 @@ class UnresolvedException(function: String) case class PlanWithUnresolvedIdentifier( identifierExpr: Expression, planBuilder: Seq[String] => LogicalPlan) - extends LeafNode { - override def output: Seq[Attribute] = Nil - override lazy val resolved = false + extends UnresolvedLeafNode { final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_IDENTIFIER) } @@ -77,7 +87,7 @@ case class UnresolvedRelation( multipartIdentifier: Seq[String], options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(), override val isStreaming: Boolean = false) - extends LeafNode with NamedRelation { + extends UnresolvedLeafNode with NamedRelation { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ /** Returns a `.` separated name for this relation. */ @@ -85,10 +95,6 @@ case class UnresolvedRelation( override def name: String = tableName - override def output: Seq[Attribute] = Nil - - override lazy val resolved = false - final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_RELATION) } @@ -115,11 +121,9 @@ object UnresolvedRelation { case class UnresolvedInlineTable( names: Seq[String], rows: Seq[Seq[Expression]]) - extends LeafNode { + extends UnresolvedLeafNode { lazy val expressionsResolved: Boolean = rows.forall(_.forall(_.resolved)) - override lazy val resolved = false - override def output: Seq[Attribute] = Nil } /** @@ -134,11 +138,7 @@ case class UnresolvedInlineTable( case class UnresolvedTableValuedFunction( name: Seq[String], functionArgs: Seq[Expression]) - extends LeafNode { - - override def output: Seq[Attribute] = Nil - - override lazy val resolved = false + extends UnresolvedLeafNode { final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TABLE_VALUED_FUNCTION) } @@ -174,11 +174,7 @@ object UnresolvedTableValuedFunction { case class UnresolvedTVFAliases( name: Seq[String], child: LogicalPlan, - outputNames: Seq[String]) extends UnaryNode { - - override def output: Seq[Attribute] = Nil - - override lazy val resolved = false + outputNames: Seq[String]) extends UnresolvedUnaryNode { final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TVF_ALIASES) @@ -634,11 +630,7 @@ case class UnresolvedAlias( case class UnresolvedSubqueryColumnAliases( outputColumnNames: Seq[String], child: LogicalPlan) - extends UnaryNode { - - override def output: Seq[Attribute] = Nil - - override lazy val resolved = false + extends UnresolvedUnaryNode { final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_SUBQUERY_COLUMN_ALIAS) @@ -717,9 +709,7 @@ case class UnresolvedOrdinal(ordinal: Int) case class UnresolvedHaving( havingCondition: Expression, child: LogicalPlan) - extends UnaryNode { - override lazy val resolved: Boolean = false - override def output: Seq[Attribute] = child.output + extends UnresolvedUnaryNode { override protected def withNewChildInternal(newChild: LogicalPlan): UnresolvedHaving = copy(child = newChild) final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_HAVING) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 2d26e281607..cbc08f760fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -34,11 +34,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap * Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to * [[ResolvedNamespace]] during analysis. */ -case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNode { - override lazy val resolved: Boolean = false - - override def output: Seq[Attribute] = Nil -} +case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends UnresolvedLeafNode /** * Holds the name of a table that has yet to be looked up in a catalog. It will be resolved to @@ -47,11 +43,7 @@ case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNod case class UnresolvedTable( multipartIdentifier: Seq[String], commandName: String, - relationTypeMismatchHint: Option[String]) extends LeafNode { - override lazy val resolved: Boolean = false - - override def output: Seq[Attribute] = Nil -} + relationTypeMismatchHint: Option[String]) extends UnresolvedLeafNode /** * Holds the name of a view that has yet to be looked up. It will be resolved to @@ -61,11 +53,7 @@ case class UnresolvedView( multipartIdentifier: Seq[String], commandName: String, allowTemp: Boolean, - relationTypeMismatchHint: Option[String]) extends LeafNode { - override lazy val resolved: Boolean = false - - override def output: Seq[Attribute] = Nil -} + relationTypeMismatchHint: Option[String]) extends UnresolvedLeafNode /** * Holds the name of a table or view that has yet to be looked up in a catalog. It will @@ -75,10 +63,7 @@ case class UnresolvedView( case class UnresolvedTableOrView( multipartIdentifier: Seq[String], commandName: String, - allowTempView: Boolean) extends LeafNode { - override lazy val resolved: Boolean = false - override def output: Seq[Attribute] = Nil -} + allowTempView: Boolean) extends UnresolvedLeafNode sealed trait PartitionSpec extends LeafExpression with Unevaluable { override def dataType: DataType = throw new IllegalStateException( @@ -127,9 +112,7 @@ case class UnresolvedFunctionName( commandName: String, requirePersistent: Boolean, funcTypeMismatchHint: Option[String], - possibleQualifiedName: Option[Seq[String]] = None) extends LeafNode { - override lazy val resolved: Boolean = false - override def output: Seq[Attribute] = Nil + possibleQualifiedName: Option[Seq[String]] = None) extends UnresolvedLeafNode { final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_FUNC) } @@ -138,10 +121,7 @@ case class UnresolvedFunctionName( * be resolved to [[ResolvedIdentifier]] during analysis. */ case class UnresolvedIdentifier(nameParts: Seq[String], allowTemp: Boolean = false) - extends LeafNode { - override lazy val resolved: Boolean = false - override def output: Seq[Attribute] = Nil -} + extends UnresolvedLeafNode /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index fce820c4927..6b72500f3f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -30,7 +30,7 @@ import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation +import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedLeafNode} import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal} import org.apache.spark.sql.catalyst.plans.logical._ @@ -790,10 +790,8 @@ object CatalogTypes { case class UnresolvedCatalogRelation( tableMeta: CatalogTable, options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(), - override val isStreaming: Boolean = false) extends LeafNode { + override val isStreaming: Boolean = false) extends UnresolvedLeafNode { assert(tableMeta.identifier.database.isDefined) - override lazy val resolved: Boolean = false - override def output: Seq[Attribute] = Nil } /** @@ -803,12 +801,9 @@ case class UnresolvedCatalogRelation( */ case class TemporaryViewRelation( tableMeta: CatalogTable, - plan: Option[LogicalPlan] = None) extends LeafNode { + plan: Option[LogicalPlan] = None) extends UnresolvedLeafNode { require(plan.isEmpty || (plan.get.resolved && tableMeta.properties.contains(VIEW_STORING_ANALYZED_PLAN))) - - override lazy val resolved: Boolean = false - override def output: Seq[Attribute] = Nil } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index a90510fe681..e23966775e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.{AliasIdentifier, SQLConfHelper} -import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRelation, Resolver, TypeCoercion, TypeCoercionBase} +import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRelation, Resolver, TypeCoercion, TypeCoercionBase, UnresolvedUnaryNode} import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN import org.apache.spark.sql.catalyst.expressions._ @@ -1498,12 +1498,10 @@ case class Unpivot( aliases: Option[Seq[Option[String]]], variableColumnName: String, valueColumnNames: Seq[String], - child: LogicalPlan) extends UnaryNode { + child: LogicalPlan) extends UnresolvedUnaryNode { // There should be no code path that creates `Unpivot` with both set None assert(ids.isDefined || values.isDefined, "at least one of `ids` and `values` must be defined") - override lazy val resolved = false // Unpivot will be replaced after being resolved. - override def output: Seq[Attribute] = Nil override def metadataOutput: Seq[Attribute] = Nil final override val nodePatterns: Seq[TreePattern] = Seq(UNPIVOT) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index b657dd55eb7..94bbb9e0caa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -95,10 +95,7 @@ case class TestFunction( copy(children = newChildren) } -case class UnresolvedTestPlan() extends LeafNode { - override lazy val resolved = false - override def output: Seq[Attribute] = Nil -} +case class UnresolvedTestPlan() extends UnresolvedLeafNode class AnalysisErrorSuite extends AnalysisTest { import TestRelations._ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org