Re: [PR] Spark: Add support for Iceberg views [iceberg]

via GitHub Mon, 18 Dec 2023 14:41:10 -0800


rdblue commented on code in PR #9332:
URL: https://github.com/apache/iceberg/pull/9332#discussion_r1430719038



##########
spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala:
##########
@@ -122,37 +147,132 @@ class IcebergSparkSqlExtensionsParser(delegate: 
ParserInterface) extends ParserI
     if (isIcebergCommand(sqlTextAfterSubstitution)) {
       parse(sqlTextAfterSubstitution) { parser => 
astBuilder.visit(parser.singleStatement()) }.asInstanceOf[LogicalPlan]
     } else {
-      delegate.parsePlan(sqlText)
+      ViewSubstitutionExecutor.execute(delegate.parsePlan(sqlText))
     }
   }
 
-  object UnresolvedIcebergTable {
+  private object ViewSubstitutionExecutor extends RuleExecutor[LogicalPlan] {
+    private val fixedPoint = FixedPoint(
+      maxIterations,
+      errorOnExceed = true,
+      maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key)
 
-    def unapply(plan: LogicalPlan): Option[LogicalPlan] = {
-      EliminateSubqueryAliases(plan) match {
-        case UnresolvedRelation(multipartIdentifier, _, _) if 
isIcebergTable(multipartIdentifier) =>
-          Some(plan)
-        case _ =>
+    override protected def batches: Seq[Batch] = Seq(Batch("pre-substitution", 
fixedPoint, V2ViewSubstitution))
+  }
+
+  private object V2ViewSubstitution extends Rule[LogicalPlan] {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    // the reason for handling these cases here is because 
ResolveSessionCatalog exits early for v2 commands
+    override def apply(plan: LogicalPlan): LogicalPlan = 
plan.resolveOperatorsUp {
+      case u@UnresolvedView(identifier, _, _, _) =>
+        lookupTableOrView(identifier, viewOnly = true).getOrElse(u)
+
+      case u@UnresolvedTableOrView(identifier, _, _) =>
+        lookupTableOrView(identifier).getOrElse(u)
+
+      case CreateView(UnresolvedIdentifier(nameParts, allowTemp), 
userSpecifiedColumns,
+      comment, properties, originalText, query, allowExisting, replace) =>
+        CreateIcebergView(UnresolvedIdentifier(nameParts, allowTemp), 
userSpecifiedColumns,
+          comment, properties, originalText, query, allowExisting, replace)
+
+      case ShowViews(UnresolvedNamespace(multipartIdentifier), pattern, 
output) =>
+        ShowIcebergViews(UnresolvedNamespace(multipartIdentifier), pattern, 
output)
+
+      case DropView(UnresolvedIdentifier(nameParts, allowTemp), ifExists) =>
+        DropIcebergView(UnresolvedIdentifier(nameParts, allowTemp), ifExists)
+    }
+
+    private def expandIdentifier(nameParts: Seq[String]): Seq[String] = {
+      if (!isResolvingView || isReferredTempViewName(nameParts)) return 
nameParts
+
+      if (nameParts.length == 1) {
+        AnalysisContext.get.catalogAndNamespace :+ nameParts.head
+      } else if 
(SparkSession.active.sessionState.catalogManager.isCatalogRegistered(nameParts.head))
 {
+        nameParts
+      } else {
+        AnalysisContext.get.catalogAndNamespace.head +: nameParts
+      }
+    }
+
+    /**
+     * Resolves relations to `ResolvedTable` or 
`Resolved[Temp/Persistent]View`. This is
+     * for resolving DDL and misc commands. Code is copied from Spark's 
Analyzer, but performs
+     * a view lookup before performing a table lookup.
+     */
+    private def lookupTableOrView(
+                                   identifier: Seq[String],
+                                   viewOnly: Boolean = false): 
Option[LogicalPlan] = {
+      lookupTempView(identifier).map { tempView =>
+        ResolvedTempView(identifier.asIdentifier, tempView.tableMeta.schema)
+      }.orElse {
+        val multipartIdent = expandIdentifier(identifier)
+        val catalogAndIdentifier = 
Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava)
+        if (null != catalogAndIdentifier) {
+          
lookupView(SparkSession.active.sessionState.catalogManager.currentCatalog,
+            catalogAndIdentifier.identifier())
+            
.orElse(lookupTable(SparkSession.active.sessionState.catalogManager.currentCatalog,
+              catalogAndIdentifier.identifier()))
+        } else {
           None
+        }
       }
     }
 
-    private def isIcebergTable(multipartIdent: Seq[String]): Boolean = {
-      val catalogAndIdentifier = 
Spark3Util.catalogAndIdentifier(SparkSession.active, multipartIdent.asJava)
-      catalogAndIdentifier.catalog match {
-        case tableCatalog: TableCatalog =>
-          Try(tableCatalog.loadTable(catalogAndIdentifier.identifier))
-            .map(isIcebergTable)
-            .getOrElse(false)
+    private def isResolvingView: Boolean = 
AnalysisContext.get.catalogAndNamespace.nonEmpty
 
-        case _ =>
-          false
+    private def isReferredTempViewName(nameParts: Seq[String]): Boolean = {
+      AnalysisContext.get.referredTempViewNames.exists { n =>
+        (n.length == nameParts.length) && n.zip(nameParts).forall {
+          case (a, b) => resolver(a, b)
+        }
       }
     }
 
-    private def isIcebergTable(table: Table): Boolean = table match {
-      case _: SparkTable => true
-      case _ => false
+    private def lookupTempView(identifier: Seq[String]): 
Option[TemporaryViewRelation] = {
+      // We are resolving a view and this name is not a temp view when that 
view was created. We
+      // return None earlier here.
+      if (isResolvingView && !isReferredTempViewName(identifier)) return None

Review Comment:
   Because an Iceberg view must not reference a temporary view, this check 
isn't needed and I don't think we will need `isResolvingView` either since that 
references the `AnalysisContext`.
   
   I think that the only time this should resolve temporary views is when the 
original query (as parsed) references a temporary view. Any time there is a 
single-part identifier in a view, it should be resolved using the view's 
default catalog and namespace.
   
   In addition, we need to ensure that there is no conflicting temporary view 
definition when a view is created. We'll need to check that any single-part 
identifier in the view SQL does not represent a temporary view at creation time.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Spark: Add support for Iceberg views [iceberg]

Reply via email to