Github user jiangxb1987 commented on a diff in the pull request: https://github.com/apache/spark/pull/16233#discussion_r95421031 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala --- @@ -510,32 +542,90 @@ class Analyzer( * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog. */ object ResolveRelations extends Rule[LogicalPlan] { - private def lookupTableFromCatalog(u: UnresolvedRelation): LogicalPlan = { + + // If the unresolved relation is running directly on files, we just return the original + // UnresolvedRelation, the plan will get resolved later. Else we look up the table from catalog + // and change the default database name if it is a view. + // We usually look up a table from the default database if the table identifier has an empty + // database part, for a view the default database should be the currentDb when the view was + // created. When the case comes to resolving a nested view, the view may have different default + // database with that the referenced view has, so we need to use the variable `defaultDatabase` + // to track the current default database. + // When the relation we resolve is a view, we fetch the view.desc(which is a CatalogTable), and + // then set the value of `CatalogTable.viewDefaultDatabase` to the variable `defaultDatabase`, + // we look up the relations that the view references using the default database. + // For example: + // |- view1 (defaultDatabase = db1) + // |- operator + // |- table2 (defaultDatabase = db1) + // |- view2 (defaultDatabase = db2) + // |- view3 (defaultDatabase = db3) + // |- view4 (defaultDatabase = db4) + // In this case, the view `view1` is a nested view, it directly references `table2`ã`view2` + // and `view4`, the view `view2` references `view3`. On resolving the table, we look up the + // relations `table2`ã`view2`ã`view4` using the default database `db1`, and look up the + // relation `view3` using the default database `db2`. + // + // Note this is compatible with the views defined by older versions of Spark(before 2.2), which + // have empty defaultDatabase and all the relations in viewText have database part defined. + def resolveRelation( + plan: LogicalPlan, + defaultDatabase: Option[String] = None): LogicalPlan = plan match { + case u: UnresolvedRelation if !isRunningDirectlyOnFiles(u.tableIdentifier) => + val defaultDatabase = AnalysisContext.get.defaultDatabase + val relation = lookupTableFromCatalog(u, defaultDatabase) + resolveRelation(relation, defaultDatabase) + // The view's child should be a logical plan parsed from the `desc.viewText`, the variable + // `viewText` should be defined, or else we throw an error on the generation of the View + // operator. + case view @ View(desc, _, child) if !child.resolved => + val nestedViewLevel = AnalysisContext.get.nestedViewLevel + 1 + val context = AnalysisContext(defaultDatabase = desc.viewDefaultDatabase, + nestedViewLevel = nestedViewLevel) + // Resolve all the UnresolvedRelations and Views in the child. + val newChild = AnalysisContext.withAnalysisContext(context) { + execute(child) + } + view.copy(child = newChild) + case p @ SubqueryAlias(_, view: View, _) => + val newChild = resolveRelation(view, defaultDatabase) + p.copy(child = newChild) + case _ => plan + } + + def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { + case i @ InsertIntoTable(u: UnresolvedRelation, parts, child, _, _) if child.resolved => + i.copy(table = EliminateSubqueryAliases(lookupTableFromCatalog(u))) + case u: UnresolvedRelation => resolveRelation(u) + } + + // Look up the table with the given name from catalog. The database we look up the table from + // is decided follow the steps: + // 1. If the database part is defined in the table identifier, use that database name; + // 2. Else If the defaultDatabase is defined, use the default database name(In this case, no + // temporary objects can be used, and the default database is only used to look up a view); + // 3. Else use the currentDb of the SessionCatalog. + private def lookupTableFromCatalog( + u: UnresolvedRelation, + defaultDatabase: Option[String] = None): LogicalPlan = { try { - catalog.lookupRelation(u.tableIdentifier, u.alias) + val tableIdentWithDb = u.tableIdentifier.withDatabase(defaultDatabase) --- End diff -- I will update the comment, but since the `withDatabase` function is general-purpose, perhaps it's better to keep that in `TableIdentifier`.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org