[GitHub] spark pull request #16613: [SPARK-19024][SQL] Implement new approach to writ...

cloud-fan Tue, 17 Jan 2017 19:14:53 -0800

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16613#discussion_r96560759
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala ---
    @@ -275,21 +276,93 @@ case class AlterViewAsCommand(
           throw new AnalysisException(s"${viewMeta.identifier} is not a view.")
         }
     
    -    val viewSQL: String = new SQLBuilder(analyzedPlan).toSQL
    -    // Validate the view SQL - make sure we can parse it and analyze it.
    -    // If we cannot analyze the generated query, there is probably a bug 
in SQL generation.
    -    try {
    -      session.sql(viewSQL).queryExecution.assertAnalyzed()
    -    } catch {
    -      case NonFatal(e) =>
    -        throw new RuntimeException(s"Failed to analyze the canonicalized 
SQL: $viewSQL", e)
    -    }
    +    val newProperties = generateViewProperties(viewMeta.properties, 
session, originalText)
     
         val updatedViewMeta = viewMeta.copy(
           schema = analyzedPlan.schema,
    +      properties = newProperties,
           viewOriginalText = Some(originalText),
    -      viewText = Some(viewSQL))
    +      viewText = Some(originalText))
     
         session.sessionState.catalog.alterTable(updatedViewMeta)
       }
     }
    +
    +object ViewHelper {
    +
    +  import CatalogTable._
    +
    +  /**
    +   * Generate the view default database in `properties`.
    +   */
    +  def generateViewDefaultDatabase(databaseName: String): Map[String, 
String] = {
    +    Map(VIEW_DEFAULT_DATABASE -> databaseName)
    +  }
    +
    +  /**
    +   * Generate the view query output column names in `properties`.
    +   */
    +  def generateQueryColumnNames(columns: Seq[String]): Map[String, String] 
= {
    +    val props = new mutable.HashMap[String, String]
    +    if (columns.nonEmpty) {
    +      props.put(VIEW_QUERY_OUTPUT_NUM_COLUMNS, columns.length.toString)
    +      columns.zipWithIndex.foreach { case (colName, index) =>
    +        props.put(s"$VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX$index", colName)
    +      }
    +    }
    +    props.toMap
    +  }
    +
    +  /**
    +   * Remove the view query output column names in `properties`.
    +   */
    +  def removeQueryColumnNames(properties: Map[String, String]): Map[String, 
String] = {
    +    // We can't use `filterKeys` here, as the map returned by `filterKeys` 
is not serializable,
    +    // while `CatalogTable` should be serializable.
    +    properties.filterNot { case (key, _) =>
    +      key.startsWith(VIEW_QUERY_OUTPUT_PREFIX)
    +    }
    +  }
    +
    +  /**
    +   * Generate the view properties in CatalogTable, including:
    +   * 1. view default database that is used to provide the default database 
name on view resolution.
    +   * 2. the output column names of the query that creates a view, this is 
used to map the output of
    +   *    the view child to the view output during view resolution.
    +   *
    +   * @param properties the `properties` in CatalogTable.
    +   * @param session the spark session.
    +   * @param viewText the query string used to create the child plan on 
view resolution.
    +   * @return new view properties including view default database and query 
column names properties.
    +   */
    +  def generateViewProperties(
    +      properties: Map[String, String],
    +      session: SparkSession,
    +      viewText: String): Map[String, String] = {
    +    // Try to analyze the viewText, throw an AnalysisException if the 
query is invalid.
    +    val queryPlan = try {
    +      val unresolvedPlan = 
session.sessionState.sqlParser.parsePlan(viewText)
    +      val resolvedPlan = 
session.sessionState.analyzer.execute(unresolvedPlan)
    +      session.sessionState.analyzer.checkAnalysis(resolvedPlan)
    +
    +      resolvedPlan
    +    } catch {
    +      case e: AnalysisException =>
    +        throw new AnalysisException(s"Failed to analyze the view query 
SQL: $viewText",
    +          cause = Some(e))
    +    }
    +
    +    // Generate the query column names, throw an AnalysisException if 
there exists duplicate column
    +    // names.
    +    val queryOutput = queryPlan.schema.fieldNames
    +    assert(queryOutput.toSet.size == queryOutput.size,
    --- End diff --
    
    nit: `queryOutput.distinct.size == queryOutput.size`



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #16613: [SPARK-19024][SQL] Implement new approach to writ...

Reply via email to