Github user wangyum commented on a diff in the pull request: https://github.com/apache/spark/pull/22124#discussion_r211447728 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala --- @@ -384,7 +384,12 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { } } - def apply(plan: LogicalPlan): LogicalPlan = removeRedundantAliases(plan, AttributeSet.empty) + def apply(plan: LogicalPlan): LogicalPlan = { + plan match { + case c: Command => c + case _ => removeRedundantAliases(plan, AttributeSet.empty) --- End diff -- For example: ```scala val path = "/tmp/spark/parquet" val cnt = 30 spark.range(cnt).selectExpr("id as col1").write.mode("overwrite").parquet(path) spark.sql(s"CREATE TABLE table1(col1 bigint) using parquet location '$path'") spark.sql("create view view1 as select col1 from table1 where col1 > -20") // The column name of table2 is inconsistent with the column name of view1. spark.sql("create table table2 (COL1 BIGINT) using parquet") // When querying the view, ensure that the column name of the query matches the column name of the target table. spark.sql("insert overwrite table table2 select COL1 from view1") ``` The execution plan change track: ```scala === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === !'Project ['id AS col1#2] Project [id#0L AS col1#2L] +- Range (0, 30, step=1, splits=Some(1)) +- Range (0, 30, step=1, splits=Some(1)) 17:02:55.061 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.CleanupAliases === Project [id#0L AS col1#2L] Project [id#0L AS col1#2L] +- Range (0, 30, step=1, splits=Some(1)) +- Range (0, 30, step=1, splits=Some(1)) 17:02:59.174 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.execution.datasources.DataSourceAnalysis === !'CreateTable `table1`, ErrorIfExists CreateDataSourceTableCommand `table1`, false 17:02:59.909 WARN org.apache.hadoop.hive.metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException 17:03:00.094 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations === 'Project ['col1] 'Project ['col1] +- 'Filter ('col1 > -20) +- 'Filter ('col1 > -20) ! +- 'UnresolvedRelation `table1` +- 'SubqueryAlias `default`.`table1` ! +- 'UnresolvedCatalogRelation `default`.`table1`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe 17:03:00.254 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.execution.datasources.FindDataSourceTable === 'Project ['col1] 'Project ['col1] +- 'Filter ('col1 > -20) +- 'Filter ('col1 > -20) ! +- 'SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` ! +- 'UnresolvedCatalogRelation `default`.`table1`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +- Relation[col1#5L] parquet 17:03:00.267 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === 'Project ['col1] 'Project ['col1] !+- 'Filter ('col1 > -20) +- 'Filter (col1#5L > -20) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.306 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.TypeCoercion$ImplicitTypeCasts === 'Project ['col1] 'Project ['col1] !+- 'Filter (col1#5L > -20) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.309 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === !'Project ['col1] Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.314 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.ResolveTimeZone === Project [col1#5L] Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.383 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.execution.datasources.DataSourceAnalysis === !'CreateTable `table2`, ErrorIfExists CreateDataSourceTableCommand `table2`, false 17:03:00.729 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations === 'Project ['col1] 'Project ['col1] +- 'Filter ('col1 > -20) +- 'Filter ('col1 > -20) ! +- 'UnresolvedRelation `table1` +- 'SubqueryAlias `default`.`table1` ! +- 'UnresolvedCatalogRelation `default`.`table1`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe 17:03:00.730 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.execution.datasources.FindDataSourceTable === 'Project ['col1] 'Project ['col1] +- 'Filter ('col1 > -20) +- 'Filter ('col1 > -20) ! +- 'SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` ! +- 'UnresolvedCatalogRelation `default`.`table1`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +- Relation[col1#5L] parquet 17:03:00.731 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === 'Project ['col1] 'Project ['col1] !+- 'Filter ('col1 > -20) +- 'Filter (col1#5L > -20) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.734 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.TypeCoercion$ImplicitTypeCasts === 'Project ['col1] 'Project ['col1] !+- 'Filter (col1#5L > -20) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.735 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === !'Project ['col1] Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.737 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.ResolveTimeZone === Project [col1#5L] Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.742 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations === 'InsertIntoTable 'UnresolvedRelation `table2`, true, false 'InsertIntoTable 'UnresolvedRelation `table2`, true, false +- 'Project ['COL1] +- 'Project ['COL1] ! +- 'UnresolvedRelation `view1` +- SubqueryAlias `default`.`view1` ! +- View (`default`.`view1`, [col1#6L]) ! +- Project [col1#5L] ! +- Filter (col1#5L > cast(-20 as bigint)) ! +- SubqueryAlias `default`.`table1` ! +- Relation[col1#5L] parquet 17:03:00.744 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === 'InsertIntoTable 'UnresolvedRelation `table2`, true, false 'InsertIntoTable 'UnresolvedRelation `table2`, true, false !+- 'Project ['COL1] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- View (`default`.`view1`, [col1#6L]) +- Project [col1#5L] +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.768 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations === !'InsertIntoTable 'UnresolvedRelation `table2`, true, false 'InsertIntoTable 'UnresolvedCatalogRelation `default`.`table2`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, true, false +- Project [COL1#6L] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- View (`default`.`view1`, [col1#6L]) +- Project [col1#5L] +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:00.852 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.execution.datasources.FindDataSourceTable === !'InsertIntoTable 'UnresolvedCatalogRelation `default`.`table2`, org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe, true, false 'InsertIntoTable Relation[COL1#7L] parquet, true, false +- Project [COL1#6L] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- View (`default`.`view1`, [col1#6L]) +- Project [col1#5L] +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet DataSourceStrategy 1:COL1#8L DataSourceStrategy 2:COL1#6L 17:03:00.896 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.execution.datasources.DataSourceAnalysis === !'InsertIntoTable Relation[COL1#7L] parquet, true, false InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( !+- Project [COL1#6L] Database: default ! +- SubqueryAlias `default`.`view1` Table: table2 ! +- View (`default`.`view1`, [col1#6L]) Owner: yumwang ! +- Project [col1#5L] Created Time: Mon Aug 20 17:03:00 PDT 2018 ! +- Filter (col1#5L > cast(-20 as bigint)) Last Access: Wed Dec 31 16:00:00 PST 1969 ! +- SubqueryAlias `default`.`table1` Created By: Spark 2.4.0-SNAPSHOT ! +- Relation[col1#5L] parquet Type: MANAGED ! Provider: parquet ! Table Properties: [transient_lastDdlTime=1534809780] ! Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 ! Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe ! InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat ! OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat ! Storage Properties: [serialization.format=1] ! Schema: root ! |-- COL1: long (nullable = true) ! ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ! +- Project [COL1#6L] ! +- SubqueryAlias `default`.`view1` ! +- View (`default`.`view1`, [col1#6L]) ! +- Project [col1#5L] ! +- Filter (col1#5L > cast(-20 as bigint)) ! +- SubqueryAlias `default`.`table1` ! +- Relation[col1#5L] parquet 17:03:00.916 WARN org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1: === Applying Rule org.apache.spark.sql.catalyst.analysis.AliasViewChild === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] +- Project [COL1#6L] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- View (`default`.`view1`, [col1#6L]) ! +- Project [col1#5L] +- Project [cast(col1#5L as bigint) AS col1#6L] ! +- Filter (col1#5L > cast(-20 as bigint)) +- Project [col1#5L] ! +- SubqueryAlias `default`.`table1` +- Filter (col1#5L > cast(-20 as bigint)) ! +- Relation[col1#5L] parquet +- SubqueryAlias `default`.`table1` ! +- Relation[col1#5L] parquet yumwang123:COL1#6L 17:03:00.949 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] +- Project [COL1#6L] +- Project [COL1#6L] ! +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) ! +- View (`default`.`view1`, [col1#6L]) +- Project [cast(col1#5L as bigint) AS col1#6L] ! +- Project [cast(col1#5L as bigint) AS col1#6L] +- Project [col1#5L] ! +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) ! +- Filter (col1#5L > cast(-20 as bigint)) +- Relation[col1#5L] parquet ! +- SubqueryAlias `default`.`table1` ! +- Relation[col1#5L] parquet 17:03:00.959 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.analysis.EliminateView === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] +- Project [COL1#6L] +- Project [COL1#6L] ! +- View (`default`.`view1`, [col1#6L]) +- Project [cast(col1#5L as bigint) AS col1#6L] ! +- Project [cast(col1#5L as bigint) AS col1#6L] +- Project [col1#5L] ! +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) ! +- Filter (col1#5L > cast(-20 as bigint)) +- Relation[col1#5L] parquet ! +- Relation[col1#5L] parquet 17:03:00.975 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.optimizer.ColumnPruning === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] !+- Project [COL1#6L] +- Project [cast(col1#5L as bigint) AS col1#6L] ! +- Project [cast(col1#5L as bigint) AS col1#6L] +- Filter (col1#5L > cast(-20 as bigint)) ! +- Project [col1#5L] +- Relation[col1#5L] parquet ! +- Filter (col1#5L > cast(-20 as bigint)) ! +- Relation[col1#5L] parquet 17:03:00.980 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.optimizer.ConstantFolding === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] +- Project [cast(col1#5L as bigint) AS col1#6L] +- Project [cast(col1#5L as bigint) AS col1#6L] ! +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > -20) +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:01.047 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.optimizer.SimplifyCasts === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] !+- Project [cast(col1#5L as bigint) AS col1#6L] +- Project [col1#5L AS col1#6L] +- Filter (col1#5L > -20) +- Filter (col1#5L > -20) +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:01.058 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.optimizer.RemoveRedundantAliases === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) !), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] !+- Project [col1#5L AS col1#6L] +- Project [col1#5L] +- Filter (col1#5L > -20) +- Filter (col1#5L > -20) +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet 17:03:01.061 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.optimizer.ColumnPruning === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] !+- Project [col1#5L] +- Filter (col1#5L > -20) ! +- Filter (col1#5L > -20) +- Relation[col1#5L] parquet ! +- Relation[col1#5L] parquet 17:03:01.116 WARN org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2: === Applying Rule org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] !+- Filter (col1#5L > -20) +- Filter (isnotnull(col1#5L) && (col1#5L > -20)) +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet queryExecution:== Parsed Logical Plan == 'InsertIntoTable 'UnresolvedRelation `table2`, true, false +- 'Project ['COL1] +- 'UnresolvedRelation `view1` == Analyzed Logical Plan == InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Table: table2 Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Schema: root -- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- Project [cast(col1#5L as bigint) AS col1#6L] +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet == Optimized Logical Plan == InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Table: table2 Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Schema: root -- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] +- Filter (isnotnull(col1#5L) && (col1#5L > -20)) +- Relation[col1#5L] parquet == Physical Plan == Execute InsertIntoHadoopFsRelationCommand InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Table: table2 Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Schema: root -- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] +- *(1) Project [col1#5L] +- *(1) Filter (isnotnull(col1#5L) && (col1#5L > -20)) +- *(1) FileScan parquet default.table1[col1#5L] Batched: true, Format: Parquet, Location: InMemoryFileIndex[file:/tmp/spark/parquet], PartitionFilters: [], PushedFilters: [IsNotNull(col1), GreaterThan(col1,-20)], ReadSchema: struct<col1:bigint> ``` The main 3 changes are: ```scala === Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences === 'InsertIntoTable 'UnresolvedRelation `table2`, true, false 'InsertIntoTable 'UnresolvedRelation `table2`, true, false !+- 'Project ['COL1] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- View (`default`.`view1`, [col1#6L]) +- Project [col1#5L] +- Project [col1#5L] +- Filter (col1#5L > cast(-20 as bigint)) +- Filter (col1#5L > cast(-20 as bigint)) +- SubqueryAlias `default`.`table1` +- SubqueryAlias `default`.`table1` +- Relation[col1#5L] parquet +- Relation[col1#5L] parquet ``` ```scala === Applying Rule org.apache.spark.sql.catalyst.analysis.AliasViewChild === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] +- Project [COL1#6L] +- Project [COL1#6L] +- SubqueryAlias `default`.`view1` +- SubqueryAlias `default`.`view1` +- View (`default`.`view1`, [col1#6L]) +- View (`default`.`view1`, [col1#6L]) ! +- Project [col1#5L] +- Project [cast(col1#5L as bigint) AS col1#6L] ! +- Filter (col1#5L > cast(-20 as bigint)) +- Project [col1#5L] ! +- SubqueryAlias `default`.`table1` +- Filter (col1#5L > cast(-20 as bigint)) ! +- Relation[col1#5L] parquet +- SubqueryAlias `default`.`table1` ! +- Relation[col1#5L] parquet ``` ```scala === Applying Rule org.apache.spark.sql.catalyst.optimizer.RemoveRedundantAliases === InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( InsertIntoHadoopFsRelationCommand file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2, false, Parquet, Map(serialization.format -> 1, path -> file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2), Overwrite, CatalogTable( Database: default Database: default Table: table2 Table: table2 Owner: yumwang Owner: yumwang Created Time: Mon Aug 20 17:03:00 PDT 2018 Created Time: Mon Aug 20 17:03:00 PDT 2018 Last Access: Wed Dec 31 16:00:00 PST 1969 Last Access: Wed Dec 31 16:00:00 PST 1969 Created By: Spark 2.4.0-SNAPSHOT Created By: Spark 2.4.0-SNAPSHOT Type: MANAGED Type: MANAGED Provider: parquet Provider: parquet Table Properties: [transient_lastDdlTime=1534809780] Table Properties: [transient_lastDdlTime=1534809780] Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Location: file:/private/var/folders/tg/f5mz46090wg7swzgdc69f8q03965_0/T/warehouse-04d554d2-7ddb-4e13-b065-164afe065972/table2 Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Storage Properties: [serialization.format=1] Storage Properties: [serialization.format=1] Schema: root Schema: root -- COL1: long (nullable = true) |-- COL1: long (nullable = true) !), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [COL1#6L] ), org.apache.spark.sql.execution.datasources.InMemoryFileIndex@c613921e, [col1#5L] !+- Project [col1#5L AS col1#6L] +- Project [col1#5L] +- Filter (col1#5L > -20) +- Filter (col1#5L > -20) +- Relation[col1#5L] parquet ``` We need `COL1#6L`, but after some optimization, the [`outputColumns`](https://github.com/apache/spark/blob/9f558601e822b7596e4bcc141d5c91a5a8859628/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala#L59) changed to `col1#5L`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org