rdblue commented on code in PR #9332:
URL: https://github.com/apache/iceberg/pull/9332#discussion_r1430719038
##########
spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala:
##########
@@ -122,37 +147,132 @@ class IcebergSparkSqlExtensionsParser(delegate:
ParserInterface) extends ParserI
if (isIcebergCommand(sqlTextAfterSubstitution)) {
parse(sqlTextAfterSubstitution) { parser =>
astBuilder.visit(parser.singleStatement()) }.asInstanceOf[LogicalPlan]
} else {
- delegate.parsePlan(sqlText)
+ ViewSubstitutionExecutor.execute(delegate.parsePlan(sqlText))
}
}
- object UnresolvedIcebergTable {
+ private object ViewSubstitutionExecutor extends RuleExecutor[LogicalPlan] {
+ private val fixedPoint = FixedPoint(
+ maxIterations,
+ errorOnExceed = true,
+ maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key)
- def unapply(plan: LogicalPlan): Option[LogicalPlan] = {
- EliminateSubqueryAliases(plan) match {
- case UnresolvedRelation(multipartIdentifier, _, _) if
isIcebergTable(multipartIdentifier) =>
- Some(plan)
- case _ =>
+ override protected def batches: Seq[Batch] = Seq(Batch("pre-substitution",
fixedPoint, V2ViewSubstitution))
+ }
+
+ private object V2ViewSubstitution extends Rule[LogicalPlan] {
+ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+ // the reason for handling these cases here is because
ResolveSessionCatalog exits early for v2 commands
+ override def apply(plan: LogicalPlan): LogicalPlan =
plan.resolveOperatorsUp {
+ case u@UnresolvedView(identifier, _, _, _) =>
+ lookupTableOrView(identifier, viewOnly = true).getOrElse(u)
+
+ case u@UnresolvedTableOrView(identifier, _, _) =>
+ lookupTableOrView(identifier).getOrElse(u)
+
+ case CreateView(UnresolvedIdentifier(nameParts, allowTemp),
userSpecifiedColumns,
+ comment, properties, originalText, query, allowExisting, replace) =>
+ CreateIcebergView(UnresolvedIdentifier(nameParts, allowTemp),
userSpecifiedColumns,
+ comment, properties, originalText, query, allowExisting, replace)
+
+ case ShowViews(UnresolvedNamespace(multipartIdentifier), pattern,
output) =>
+ ShowIcebergViews(UnresolvedNamespace(multipartIdentifier), pattern,
output)
+
+ case DropView(UnresolvedIdentifier(nameParts, allowTemp), ifExists) =>
+ DropIcebergView(UnresolvedIdentifier(nameParts, allowTemp), ifExists)
+ }
+
+ private def expandIdentifier(nameParts: Seq[String]): Seq[String] = {
+ if (!isResolvingView || isReferredTempViewName(nameParts)) return
nameParts
+
+ if (nameParts.length == 1) {
+ AnalysisContext.get.catalogAndNamespace :+ nameParts.head
+ } else if
(SparkSession.active.sessionState.catalogManager.isCatalogRegistered(nameParts.head))
{
+ nameParts
+ } else {
+ AnalysisContext.get.catalogAndNamespace.head +: nameParts
+ }
+ }
+
+ /**
+ * Resolves relations to `ResolvedTable` or
`Resolved[Temp/Persistent]View`. This is
+ * for resolving DDL and misc commands. Code is copied from Spark's
Analyzer, but performs
+ * a view lookup before performing a table lookup.
+ */
+ private def lookupTableOrView(
+ identifier: Seq[String],
+ viewOnly: Boolean = false):
Option[LogicalPlan] = {
+ lookupTempView(identifier).map { tempView =>
+ ResolvedTempView(identifier.asIdentifier, tempView.tableMeta.schema)
+ }.orElse {
+ val multipartIdent = expandIdentifier(identifier)
+ val catalogAndIdentifier =
Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava)
+ if (null != catalogAndIdentifier) {
+
lookupView(SparkSession.active.sessionState.catalogManager.currentCatalog,
+ catalogAndIdentifier.identifier())
+
.orElse(lookupTable(SparkSession.active.sessionState.catalogManager.currentCatalog,
+ catalogAndIdentifier.identifier()))
+ } else {
None
+ }
}
}
- private def isIcebergTable(multipartIdent: Seq[String]): Boolean = {
- val catalogAndIdentifier =
Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava)
- catalogAndIdentifier.catalog match {
- case tableCatalog: TableCatalog =>
- Try(tableCatalog.loadTable(catalogAndIdentifier.identifier))
- .map(isIcebergTable)
- .getOrElse(false)
+ private def isResolvingView: Boolean =
AnalysisContext.get.catalogAndNamespace.nonEmpty
- case _ =>
- false
+ private def isReferredTempViewName(nameParts: Seq[String]): Boolean = {
+ AnalysisContext.get.referredTempViewNames.exists { n =>
+ (n.length == nameParts.length) && n.zip(nameParts).forall {
+ case (a, b) => resolver(a, b)
+ }
}
}
- private def isIcebergTable(table: Table): Boolean = table match {
- case _: SparkTable => true
- case _ => false
+ private def lookupTempView(identifier: Seq[String]):
Option[TemporaryViewRelation] = {
+ // We are resolving a view and this name is not a temp view when that
view was created. We
+ // return None earlier here.
+ if (isResolvingView && !isReferredTempViewName(identifier)) return None
Review Comment:
Because an Iceberg view must not reference a temporary view, this check
isn't needed and I don't think we will need `isResolvingView` either since that
references the `AnalysisContext`.
I think that the only time this should resolve temporary views is when the
original query (as parsed) references a temporary view. Any time there is a
single-part identifier in a view, it should be resolved using the view's
default catalog and namespace.
In addition, we need to ensure that there is no conflicting temporary view
definition when a view is created. We'll need to check that any single-part
identifier in the view SQL does not represent a temporary view at creation time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]