This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new f4febd077631 [SPARK-51849][SQL] Refactoring
`ResolveDDLCommandStringTypes`
f4febd077631 is described below
commit f4febd0776319e1b51513ee1760c736928fb6643
Author: ilicmarkodb <[email protected]>
AuthorDate: Tue Apr 22 23:51:32 2025 +0800
[SPARK-51849][SQL] Refactoring `ResolveDDLCommandStringTypes`
### What changes were proposed in this pull request?
`ResolveDDLCommandStringTypes` renamed to
`ApplyDefaultCollationToStringType`.
### Why are the changes needed?
This is needed because this rule applies also to non-DDL plans (when
querying View).
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #50609 from ilicmarkodb/split_resolve_ddl_and_view.
Authored-by: ilicmarkodb <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit c86f617b2f1a2813c099f5a98eb5732b9c3f86b2)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
...ala => ApplyDefaultCollationToStringType.scala} | 80 +++++++++-------------
2 files changed, 33 insertions(+), 49 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ca84d02430b2..f50bbc79e70d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -396,7 +396,7 @@ class Analyzer(override val catalogManager: CatalogManager)
extends RuleExecutor
ResolveAliases ::
ResolveSubquery ::
ResolveSubqueryColumnAliases ::
- ResolveDDLCommandStringTypes ::
+ ApplyDefaultCollationToStringType ::
ResolveWindowOrder ::
ResolveWindowFrame ::
ResolveNaturalAndUsingJoin ::
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala
similarity index 74%
rename from
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
rename to
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala
index 2f7156a80389..cea2988badf4 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDDLCommandStringTypes.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala
@@ -24,88 +24,72 @@ import org.apache.spark.sql.connector.catalog.TableCatalog
import org.apache.spark.sql.types.{DataType, StringType}
/**
- * Resolves string types in DDL commands, where the string type inherits the
- * collation from the corresponding object (table/view -> schema -> catalog).
+ * Resolves string types in logical plans by assigning them the appropriate
collation. The
+ * collation is inherited from the relevant object in the hierarchy (e.g.,
table/view -> schema ->
+ * catalog). This rule is primarily applied to DDL commands, but it can also
be triggered in other
+ * scenarios. For example, when querying a view, its query is re-resolved each
time, and that query
+ * can take various forms.
*/
-object ResolveDDLCommandStringTypes extends Rule[LogicalPlan] {
+object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = {
- if (isDDLCommand(plan)) {
- transformDDL(plan)
- } else {
- // For non-DDL commands no need to do any further resolution of string
types
- plan
+ fetchDefaultCollation(plan) match {
+ case Some(collation) =>
+ transform(plan, StringType(collation))
+ case None => plan
}
}
- /** Default collation used, if object level collation is not provided */
- private def defaultCollation: String = "UTF8_BINARY"
-
- /** Returns the string type that should be used in a given DDL command */
- private def stringTypeForDDLCommand(table: LogicalPlan): StringType = {
- table match {
- case createTable: CreateTable if
createTable.tableSpec.collation.isDefined =>
- StringType(createTable.tableSpec.collation.get)
+ /** Returns the default collation that should be applied to the plan
+ * if specified; otherwise, returns None.
+ */
+ private def fetchDefaultCollation(plan: LogicalPlan): Option[String] = {
+ plan match {
+ case createTable: CreateTable =>
+ createTable.tableSpec.collation
// CreateView also handles CREATE OR REPLACE VIEW
// Unlike for tables, CreateView also handles CREATE OR REPLACE VIEW
- case createView: CreateView if createView.collation.isDefined =>
- StringType(createView.collation.get)
+ case createView: CreateView =>
+ createView.collation
- case replaceTable: ReplaceTable if
replaceTable.tableSpec.collation.isDefined =>
- StringType(replaceTable.tableSpec.collation.get)
+ case replaceTable: ReplaceTable =>
+ replaceTable.tableSpec.collation
case alterTable: AlterTableCommand if alterTable.table.resolved =>
alterTable.table match {
- case resolvedTbl: ResolvedTable =>
- val collation = resolvedTbl.table.properties.getOrDefault(
- TableCatalog.PROP_COLLATION, defaultCollation)
- StringType(collation)
-
- case _ =>
- // As a safeguard, use the default collation for unknown cases.
- StringType(defaultCollation)
+ case resolvedTbl: ResolvedTable
+ if
resolvedTbl.table.properties.containsKey(TableCatalog.PROP_COLLATION ) =>
+
Some(resolvedTbl.table.properties.get(TableCatalog.PROP_COLLATION))
+ case _ => None
}
case alterViewAs: AlterViewAs =>
alterViewAs.child match {
case resolvedPersistentView: ResolvedPersistentView =>
- val collation =
resolvedPersistentView.metadata.collation.getOrElse(defaultCollation)
- StringType(collation)
+ resolvedPersistentView.metadata.collation
case resolvedTempView: ResolvedTempView =>
- val collation =
resolvedTempView.metadata.collation.getOrElse(defaultCollation)
- StringType(collation)
- case _ =>
- // As a safeguard, use the default collation for unknown cases.
- StringType(defaultCollation)
+ resolvedTempView.metadata.collation
+ case _ => None
}
// Check if view has default collation
case _ if AnalysisContext.get.collation.isDefined =>
- StringType(AnalysisContext.get.collation.get)
+ AnalysisContext.get.collation
- case _ => StringType(defaultCollation)
+ case _ => None
}
}
- private def isDDLCommand(plan: LogicalPlan): Boolean = plan exists {
- case _: AddColumns | _: ReplaceColumns | _: AlterColumns => true
- case _ => isCreateOrAlterPlan(plan)
- }
-
private def isCreateOrAlterPlan(plan: LogicalPlan): Boolean = plan match {
// For CREATE TABLE, only v2 CREATE TABLE command is supported.
// Also, table DEFAULT COLLATION cannot be specified through CREATE TABLE
AS SELECT command.
case _: V2CreateTablePlan | _: ReplaceTable | _: CreateView | _:
AlterViewAs => true
- // Check if view has default collation
- case _ if AnalysisContext.get.collation.isDefined => true
case _ => false
}
- private def transformDDL(plan: LogicalPlan): LogicalPlan = {
- val newType = stringTypeForDDLCommand(plan)
-
+ private def transform(plan: LogicalPlan, newType: StringType): LogicalPlan =
{
plan resolveOperators {
- case p if isCreateOrAlterPlan(p) =>
+ case p if isCreateOrAlterPlan(p) ||
AnalysisContext.get.collation.isDefined =>
transformPlan(p, newType)
case addCols: AddColumns =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]