Repository: spark
Updated Branches:
refs/heads/master 34283de16 -> 42279bff6
[SPARK-16374][SQL] Remove Alias from MetastoreRelation and SimpleCatalogRelation
#### What changes were proposed in this pull request?
Different from the other leaf nodes, `MetastoreRelation` and
`SimpleCatalogRelation` have a pre-defined `alias`, which is used to change the
qualifier of the node. However, based on the existing alias handling, alias
should be put in `SubqueryAlias`.
This PR is to separate alias handling from `MetastoreRelation` and
`SimpleCatalogRelation` to make it consistent with the other nodes. It
simplifies the signature and conversion to a `BaseRelation`.
For example, below is an example query for `MetastoreRelation`, which is
converted to a `LogicalRelation`:
```SQL
SELECT tmp.a + 1 FROM test_parquet_ctas tmp WHERE tmp.a > 2
```
Before changes, the analyzed plan is
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
+- SubqueryAlias tmp
+- Relation[a#951] parquet
```
After changes, the analyzed plan becomes
```
== Analyzed Logical Plan ==
(a + 1): int
Project [(a#951 + 1) AS (a + 1)#952]
+- Filter (a#951 > 2)
+- SubqueryAlias tmp
+- SubqueryAlias test_parquet_ctas
+- Relation[a#951] parquet
```
**Note: the optimized plans are the same.**
For `SimpleCatalogRelation`, the existing code always generates two Subqueries.
Thus, no change is needed.
#### How was this patch tested?
Added test cases.
Author: gatorsmile <[email protected]>
Closes #14053 from gatorsmile/removeAliasFromMetastoreRelation.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/42279bff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/42279bff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/42279bff
Branch: refs/heads/master
Commit: 42279bff686f9808ec7a9e8f4da95c717edc6026
Parents: 34283de
Author: gatorsmile <[email protected]>
Authored: Thu Jul 7 12:07:19 2016 +0800
Committer: Wenchen Fan <[email protected]>
Committed: Thu Jul 7 12:07:19 2016 +0800
----------------------------------------------------------------------
.../spark/sql/catalyst/catalog/SessionCatalog.scala | 2 +-
.../org/apache/spark/sql/catalyst/catalog/interface.scala | 5 ++---
.../spark/sql/catalyst/catalog/SessionCatalogSuite.scala | 2 +-
.../org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 10 ++++++----
.../org/apache/spark/sql/hive/MetastoreRelation.scala | 10 ++++------
5 files changed, 14 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index e1d4991..ffaefeb 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -403,7 +403,7 @@ class SessionCatalog(
val relation =
if (name.database.isDefined || !tempTables.contains(table)) {
val metadata = externalCatalog.getTable(db, table)
- SimpleCatalogRelation(db, metadata, alias)
+ SimpleCatalogRelation(db, metadata)
} else {
tempTables(table)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 6197aca..b12606e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -244,8 +244,7 @@ trait CatalogRelation {
*/
case class SimpleCatalogRelation(
databaseName: String,
- metadata: CatalogTable,
- alias: Option[String] = None)
+ metadata: CatalogTable)
extends LeafNode with CatalogRelation {
override def catalogTable: CatalogTable = metadata
@@ -261,7 +260,7 @@ case class SimpleCatalogRelation(
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything
is nullable.
nullable = true
- )(qualifier = Some(alias.getOrElse(metadata.identifier.table)))
+ )(qualifier = Some(metadata.identifier.table))
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index c8e7c51..05eb302 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -407,7 +407,7 @@ class SessionCatalogSuite extends SparkFunSuite {
val relationWithAlias =
SubqueryAlias(alias,
SubqueryAlias("tbl1",
- SimpleCatalogRelation("db2", tableMetadata, Some(alias))))
+ SimpleCatalogRelation("db2", tableMetadata)))
assert(catalog.lookupRelation(
TableIdentifier("tbl1", Some("db2")), alias = None) == relation)
assert(catalog.lookupRelation(
http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 20e64a4..2be51ed 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -180,8 +180,10 @@ private[hive] class HiveMetastoreCatalog(sparkSession:
SparkSession) extends Log
SubqueryAlias(aliasText, sessionState.sqlParser.parsePlan(viewText))
}
} else {
- MetastoreRelation(
- qualifiedTableName.database, qualifiedTableName.name, alias)(table,
client, sparkSession)
+ val qualifiedTable =
+ MetastoreRelation(
+ qualifiedTableName.database, qualifiedTableName.name)(table, client,
sparkSession)
+ alias.map(a => SubqueryAlias(a,
qualifiedTable)).getOrElse(qualifiedTable)
}
}
@@ -385,7 +387,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession:
SparkSession) extends Log
// Read path
case relation: MetastoreRelation if
shouldConvertMetastoreParquet(relation) =>
val parquetRelation = convertToParquetRelation(relation)
- SubqueryAlias(relation.alias.getOrElse(relation.tableName),
parquetRelation)
+ SubqueryAlias(relation.tableName, parquetRelation)
}
}
}
@@ -423,7 +425,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession:
SparkSession) extends Log
// Read path
case relation: MetastoreRelation if
shouldConvertMetastoreOrc(relation) =>
val orcRelation = convertToOrcRelation(relation)
- SubqueryAlias(relation.alias.getOrElse(relation.tableName),
orcRelation)
+ SubqueryAlias(relation.tableName, orcRelation)
}
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/42279bff/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
----------------------------------------------------------------------
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
index 58bca20..3ab1bda 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala
@@ -41,8 +41,7 @@ import org.apache.spark.sql.hive.client.HiveClient
private[hive] case class MetastoreRelation(
databaseName: String,
- tableName: String,
- alias: Option[String])
+ tableName: String)
(val catalogTable: CatalogTable,
@transient private val client: HiveClient,
@transient private val sparkSession: SparkSession)
@@ -52,13 +51,12 @@ private[hive] case class MetastoreRelation(
case relation: MetastoreRelation =>
databaseName == relation.databaseName &&
tableName == relation.tableName &&
- alias == relation.alias &&
output == relation.output
case _ => false
}
override def hashCode(): Int = {
- Objects.hashCode(databaseName, tableName, alias, output)
+ Objects.hashCode(databaseName, tableName, output)
}
override protected def otherCopyArgs: Seq[AnyRef] = catalogTable ::
sparkSession :: Nil
@@ -208,7 +206,7 @@ private[hive] case class MetastoreRelation(
CatalystSqlParser.parseDataType(f.dataType),
// Since data can be dumped in randomly with no validation, everything
is nullable.
nullable = true
- )(qualifier = Some(alias.getOrElse(tableName)))
+ )(qualifier = Some(tableName))
}
/** PartitionKey attributes */
@@ -243,6 +241,6 @@ private[hive] case class MetastoreRelation(
}
override def newInstance(): MetastoreRelation = {
- MetastoreRelation(databaseName, tableName, alias)(catalogTable, client,
sparkSession)
+ MetastoreRelation(databaseName, tableName)(catalogTable, client,
sparkSession)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]