Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/16613#discussion_r96560759 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala --- @@ -275,21 +276,93 @@ case class AlterViewAsCommand( throw new AnalysisException(s"${viewMeta.identifier} is not a view.") } - val viewSQL: String = new SQLBuilder(analyzedPlan).toSQL - // Validate the view SQL - make sure we can parse it and analyze it. - // If we cannot analyze the generated query, there is probably a bug in SQL generation. - try { - session.sql(viewSQL).queryExecution.assertAnalyzed() - } catch { - case NonFatal(e) => - throw new RuntimeException(s"Failed to analyze the canonicalized SQL: $viewSQL", e) - } + val newProperties = generateViewProperties(viewMeta.properties, session, originalText) val updatedViewMeta = viewMeta.copy( schema = analyzedPlan.schema, + properties = newProperties, viewOriginalText = Some(originalText), - viewText = Some(viewSQL)) + viewText = Some(originalText)) session.sessionState.catalog.alterTable(updatedViewMeta) } } + +object ViewHelper { + + import CatalogTable._ + + /** + * Generate the view default database in `properties`. + */ + def generateViewDefaultDatabase(databaseName: String): Map[String, String] = { + Map(VIEW_DEFAULT_DATABASE -> databaseName) + } + + /** + * Generate the view query output column names in `properties`. + */ + def generateQueryColumnNames(columns: Seq[String]): Map[String, String] = { + val props = new mutable.HashMap[String, String] + if (columns.nonEmpty) { + props.put(VIEW_QUERY_OUTPUT_NUM_COLUMNS, columns.length.toString) + columns.zipWithIndex.foreach { case (colName, index) => + props.put(s"$VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX$index", colName) + } + } + props.toMap + } + + /** + * Remove the view query output column names in `properties`. + */ + def removeQueryColumnNames(properties: Map[String, String]): Map[String, String] = { + // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable, + // while `CatalogTable` should be serializable. + properties.filterNot { case (key, _) => + key.startsWith(VIEW_QUERY_OUTPUT_PREFIX) + } + } + + /** + * Generate the view properties in CatalogTable, including: + * 1. view default database that is used to provide the default database name on view resolution. + * 2. the output column names of the query that creates a view, this is used to map the output of + * the view child to the view output during view resolution. + * + * @param properties the `properties` in CatalogTable. + * @param session the spark session. + * @param viewText the query string used to create the child plan on view resolution. + * @return new view properties including view default database and query column names properties. + */ + def generateViewProperties( + properties: Map[String, String], + session: SparkSession, + viewText: String): Map[String, String] = { + // Try to analyze the viewText, throw an AnalysisException if the query is invalid. + val queryPlan = try { + val unresolvedPlan = session.sessionState.sqlParser.parsePlan(viewText) + val resolvedPlan = session.sessionState.analyzer.execute(unresolvedPlan) + session.sessionState.analyzer.checkAnalysis(resolvedPlan) + + resolvedPlan + } catch { + case e: AnalysisException => + throw new AnalysisException(s"Failed to analyze the view query SQL: $viewText", + cause = Some(e)) + } + + // Generate the query column names, throw an AnalysisException if there exists duplicate column + // names. + val queryOutput = queryPlan.schema.fieldNames + assert(queryOutput.toSet.size == queryOutput.size, --- End diff -- nit: `queryOutput.distinct.size == queryOutput.size`
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org