cloud-fan commented on code in PR #46267: URL: https://github.com/apache/spark/pull/46267#discussion_r1596852480
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala: ########## @@ -945,54 +945,87 @@ class SessionCatalog( throw QueryCompilationErrors.invalidViewText(viewText, metadata.qualifiedName) } } - val projectList = if (!isHiveCreatedView(metadata)) { - val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) { - // For view created before Spark 2.2.0, the view text is already fully qualified, the plan - // output is the same with the view output. - metadata.schema.fieldNames.toImmutableArraySeq - } else { - assert(metadata.viewQueryColumnNames.length == metadata.schema.length) - metadata.viewQueryColumnNames - } + val schemaMode = metadata.viewSchemaMode + if (schemaMode == SchemaEvolution) { + View(desc = metadata, isTempView = isTempView, child = parsedPlan) + } else { + val projectList = if (!isHiveCreatedView(metadata)) { + val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) { + // For view created before Spark 2.2.0, the view text is already fully qualified, the plan + // output is the same with the view output. + metadata.schema.fieldNames.toImmutableArraySeq + } else { + assert(metadata.viewQueryColumnNames.length == metadata.schema.length) + metadata.viewQueryColumnNames + } - // For view queries like `SELECT * FROM t`, the schema of the referenced table/view may - // change after the view has been created. We need to add an extra SELECT to pick the columns - // according to the recorded column names (to get the correct view column ordering and omit - // the extra columns that we don't require), with UpCast (to make sure the type change is - // safe) and Alias (to respect user-specified view column names) according to the view schema - // in the catalog. - // Note that, the column names may have duplication, e.g. `CREATE VIEW v(x, y) AS - // SELECT 1 col, 2 col`. We need to make sure that the matching attributes have the same - // number of duplications, and pick the corresponding attribute by ordinal. - val viewConf = View.effectiveSQLConf(metadata.viewSQLConfigs, isTempView) - val normalizeColName: String => String = if (viewConf.caseSensitiveAnalysis) { - identity + // For view queries like `SELECT * FROM t`, the schema of the referenced table/view may + // change after the view has been created. We need to add an extra SELECT to pick the + // columns according to the recorded column names (to get the correct view column ordering + // and omit the extra columns that we don't require), with UpCast (to make sure the type + // change is safe) and Alias (to respect user-specified view column names) according to the + // view schema in the catalog. + // Note that, the column names may have duplication, e.g. `CREATE VIEW v(x, y) AS + // SELECT 1 col, 2 col`. We need to make sure that the matching attributes have the same + // number of duplications, and pick the corresponding attribute by ordinal. + val viewConf = View.effectiveSQLConf(metadata.viewSQLConfigs, isTempView) + val normalizeColName: String => String = if (viewConf.caseSensitiveAnalysis) { + identity + } else { + _.toLowerCase(Locale.ROOT) + } + val nameToCounts = viewColumnNames.groupBy(normalizeColName).transform((_, v) => v.length) + val nameToCurrentOrdinal = scala.collection.mutable.HashMap.empty[String, Int] + val viewDDL = buildViewDDL(metadata, isTempView) + + viewColumnNames.zip(metadata.schema).map { case (name, field) => + val normalizedName = normalizeColName(name) + val count = nameToCounts(normalizedName) + val ordinal = nameToCurrentOrdinal.getOrElse(normalizedName, 0) + nameToCurrentOrdinal(normalizedName) = ordinal + 1 + val col = GetViewColumnByNameAndOrdinal( + metadata.identifier.toString, name, ordinal, count, viewDDL) + val cast = schemaMode match { + /* + ** For schema binding, we cast the column to the expected type using safe cast only. + ** For legacy behavior, we cast the column to the expected type using safe cast only. + ** For schema compensation, we cast the column to the expected type using any cast + * in ansi mode. + ** For schema (type) evolution, we take the column as is. + */ + case SchemaBinding => UpCast(col, field.dataType) + case SchemaUnsupported => UpCast(col, field.dataType) + case SchemaCompensation => Cast(col, field.dataType, ansiEnabled = true) + case SchemaTypeEvolution => col + case other => throw SparkException.internalError("Unexpected ViewSchemaMode") + } + Alias(cast, field.name)(explicitMetadata = Some(field.metadata)) + } } else { - _.toLowerCase(Locale.ROOT) - } - val nameToCounts = viewColumnNames.groupBy(normalizeColName).transform((_, v) => v.length) - val nameToCurrentOrdinal = scala.collection.mutable.HashMap.empty[String, Int] - val viewDDL = buildViewDDL(metadata, isTempView) - - viewColumnNames.zip(metadata.schema).map { case (name, field) => - val normalizedName = normalizeColName(name) - val count = nameToCounts(normalizedName) - val ordinal = nameToCurrentOrdinal.getOrElse(normalizedName, 0) - nameToCurrentOrdinal(normalizedName) = ordinal + 1 - val col = GetViewColumnByNameAndOrdinal( - metadata.identifier.toString, name, ordinal, count, viewDDL) - Alias(UpCast(col, field.dataType), field.name)(explicitMetadata = Some(field.metadata)) - } - } else { - // For view created by hive, the parsed view plan may have different output columns with - // the schema stored in metadata. For example: `CREATE VIEW v AS SELECT 1 FROM t` - // the schema in metadata will be `_c0` while the parsed view plan has column named `1` - metadata.schema.zipWithIndex.map { case (field, index) => - val col = GetColumnByOrdinal(index, field.dataType) - Alias(UpCast(col, field.dataType), field.name)(explicitMetadata = Some(field.metadata)) + // For view created by hive, the parsed view plan may have different output columns with + // the schema stored in metadata. For example: `CREATE VIEW v AS SELECT 1 FROM t` + // the schema in metadata will be `_c0` while the parsed view plan has column named `1` + metadata.schema.zipWithIndex.map { case (field, index) => + val col = GetColumnByOrdinal(index, field.dataType) + val cast = schemaMode match { + /* + ** For schema binding, we cast the column to the expected type using safe cast only. + ** For legacy behavior, we cast the column to the expected type using safe cast only. + ** For schema compensation, we cast the column to the expected type using any cast + * in ansi mode. + ** For schema (type) evolution, we take teh column as is. + */ + case SchemaBinding => UpCast(col, field.dataType) + case SchemaUnsupported => UpCast(col, field.dataType) + case SchemaCompensation => Cast(col, field.dataType, ansiEnabled = true) + case SchemaTypeEvolution => col + case other => throw SparkException.internalError("Unexpected ViewSchemaMode") + } + Alias(cast, field.name)(explicitMetadata = Some(field.metadata)) Review Comment: nit: let's create a methond to do the cast, to avoid code duplication ########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala: ########## @@ -945,54 +945,87 @@ class SessionCatalog( throw QueryCompilationErrors.invalidViewText(viewText, metadata.qualifiedName) } } - val projectList = if (!isHiveCreatedView(metadata)) { - val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) { - // For view created before Spark 2.2.0, the view text is already fully qualified, the plan - // output is the same with the view output. - metadata.schema.fieldNames.toImmutableArraySeq - } else { - assert(metadata.viewQueryColumnNames.length == metadata.schema.length) - metadata.viewQueryColumnNames - } + val schemaMode = metadata.viewSchemaMode + if (schemaMode == SchemaEvolution) { + View(desc = metadata, isTempView = isTempView, child = parsedPlan) + } else { + val projectList = if (!isHiveCreatedView(metadata)) { + val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) { + // For view created before Spark 2.2.0, the view text is already fully qualified, the plan + // output is the same with the view output. + metadata.schema.fieldNames.toImmutableArraySeq + } else { + assert(metadata.viewQueryColumnNames.length == metadata.schema.length) + metadata.viewQueryColumnNames + } - // For view queries like `SELECT * FROM t`, the schema of the referenced table/view may - // change after the view has been created. We need to add an extra SELECT to pick the columns - // according to the recorded column names (to get the correct view column ordering and omit - // the extra columns that we don't require), with UpCast (to make sure the type change is - // safe) and Alias (to respect user-specified view column names) according to the view schema - // in the catalog. - // Note that, the column names may have duplication, e.g. `CREATE VIEW v(x, y) AS - // SELECT 1 col, 2 col`. We need to make sure that the matching attributes have the same - // number of duplications, and pick the corresponding attribute by ordinal. - val viewConf = View.effectiveSQLConf(metadata.viewSQLConfigs, isTempView) - val normalizeColName: String => String = if (viewConf.caseSensitiveAnalysis) { - identity + // For view queries like `SELECT * FROM t`, the schema of the referenced table/view may + // change after the view has been created. We need to add an extra SELECT to pick the + // columns according to the recorded column names (to get the correct view column ordering + // and omit the extra columns that we don't require), with UpCast (to make sure the type + // change is safe) and Alias (to respect user-specified view column names) according to the + // view schema in the catalog. + // Note that, the column names may have duplication, e.g. `CREATE VIEW v(x, y) AS + // SELECT 1 col, 2 col`. We need to make sure that the matching attributes have the same + // number of duplications, and pick the corresponding attribute by ordinal. + val viewConf = View.effectiveSQLConf(metadata.viewSQLConfigs, isTempView) + val normalizeColName: String => String = if (viewConf.caseSensitiveAnalysis) { + identity + } else { + _.toLowerCase(Locale.ROOT) + } + val nameToCounts = viewColumnNames.groupBy(normalizeColName).transform((_, v) => v.length) + val nameToCurrentOrdinal = scala.collection.mutable.HashMap.empty[String, Int] + val viewDDL = buildViewDDL(metadata, isTempView) + + viewColumnNames.zip(metadata.schema).map { case (name, field) => + val normalizedName = normalizeColName(name) + val count = nameToCounts(normalizedName) + val ordinal = nameToCurrentOrdinal.getOrElse(normalizedName, 0) + nameToCurrentOrdinal(normalizedName) = ordinal + 1 + val col = GetViewColumnByNameAndOrdinal( + metadata.identifier.toString, name, ordinal, count, viewDDL) + val cast = schemaMode match { + /* + ** For schema binding, we cast the column to the expected type using safe cast only. + ** For legacy behavior, we cast the column to the expected type using safe cast only. + ** For schema compensation, we cast the column to the expected type using any cast + * in ansi mode. + ** For schema (type) evolution, we take the column as is. + */ + case SchemaBinding => UpCast(col, field.dataType) + case SchemaUnsupported => UpCast(col, field.dataType) + case SchemaCompensation => Cast(col, field.dataType, ansiEnabled = true) + case SchemaTypeEvolution => col + case other => throw SparkException.internalError("Unexpected ViewSchemaMode") + } + Alias(cast, field.name)(explicitMetadata = Some(field.metadata)) + } } else { - _.toLowerCase(Locale.ROOT) - } - val nameToCounts = viewColumnNames.groupBy(normalizeColName).transform((_, v) => v.length) - val nameToCurrentOrdinal = scala.collection.mutable.HashMap.empty[String, Int] - val viewDDL = buildViewDDL(metadata, isTempView) - - viewColumnNames.zip(metadata.schema).map { case (name, field) => - val normalizedName = normalizeColName(name) - val count = nameToCounts(normalizedName) - val ordinal = nameToCurrentOrdinal.getOrElse(normalizedName, 0) - nameToCurrentOrdinal(normalizedName) = ordinal + 1 - val col = GetViewColumnByNameAndOrdinal( - metadata.identifier.toString, name, ordinal, count, viewDDL) - Alias(UpCast(col, field.dataType), field.name)(explicitMetadata = Some(field.metadata)) - } - } else { - // For view created by hive, the parsed view plan may have different output columns with - // the schema stored in metadata. For example: `CREATE VIEW v AS SELECT 1 FROM t` - // the schema in metadata will be `_c0` while the parsed view plan has column named `1` - metadata.schema.zipWithIndex.map { case (field, index) => - val col = GetColumnByOrdinal(index, field.dataType) - Alias(UpCast(col, field.dataType), field.name)(explicitMetadata = Some(field.metadata)) + // For view created by hive, the parsed view plan may have different output columns with + // the schema stored in metadata. For example: `CREATE VIEW v AS SELECT 1 FROM t` + // the schema in metadata will be `_c0` while the parsed view plan has column named `1` + metadata.schema.zipWithIndex.map { case (field, index) => + val col = GetColumnByOrdinal(index, field.dataType) + val cast = schemaMode match { + /* + ** For schema binding, we cast the column to the expected type using safe cast only. + ** For legacy behavior, we cast the column to the expected type using safe cast only. + ** For schema compensation, we cast the column to the expected type using any cast + * in ansi mode. + ** For schema (type) evolution, we take teh column as is. + */ + case SchemaBinding => UpCast(col, field.dataType) + case SchemaUnsupported => UpCast(col, field.dataType) + case SchemaCompensation => Cast(col, field.dataType, ansiEnabled = true) + case SchemaTypeEvolution => col + case other => throw SparkException.internalError("Unexpected ViewSchemaMode") + } + Alias(cast, field.name)(explicitMetadata = Some(field.metadata)) Review Comment: nit: let's create a method to do the cast, to avoid code duplication -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org