spark git commit: [SPARK-3299][SQL]Public API in SQLContext to list tables
Repository: spark Updated Branches: refs/heads/branch-1.3 925fd84a1 - edbac178d [SPARK-3299][SQL]Public API in SQLContext to list tables https://issues.apache.org/jira/browse/SPARK-3299 Author: Yin Huai yh...@databricks.com Closes #4547 from yhuai/tables and squashes the following commits: 6c8f92e [Yin Huai] Add tableNames. acbb281 [Yin Huai] Update Python test. 7793dcb [Yin Huai] Fix scala test. 572870d [Yin Huai] Address comments. aba2e88 [Yin Huai] Format. 12c86df [Yin Huai] Add tables() to SQLContext to return a DataFrame containing existing tables. (cherry picked from commit 1d0596a16e1d3add2631f5d8169aeec2876a1362) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/edbac178 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/edbac178 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/edbac178 Branch: refs/heads/branch-1.3 Commit: edbac178d186f6936408b211385a5fea9e4f4603 Parents: 925fd84 Author: Yin Huai yh...@databricks.com Authored: Thu Feb 12 18:08:01 2015 -0800 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Feb 12 18:08:19 2015 -0800 -- python/pyspark/sql/context.py | 34 + .../spark/sql/catalyst/analysis/Catalog.scala | 37 ++ .../scala/org/apache/spark/sql/SQLContext.scala | 36 + .../org/apache/spark/sql/ListTablesSuite.scala | 76 +++ .../spark/sql/hive/HiveMetastoreCatalog.scala | 5 ++ .../apache/spark/sql/hive/ListTablesSuite.scala | 77 6 files changed, 265 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/edbac178/python/pyspark/sql/context.py -- diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index db4bcbe..082f1b6 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -621,6 +621,40 @@ class SQLContext(object): return DataFrame(self._ssql_ctx.table(tableName), self) +def tables(self, dbName=None): +Returns a DataFrame containing names of tables in the given database. + +If `dbName` is not specified, the current database will be used. + +The returned DataFrame has two columns, tableName and isTemporary +(a column with BooleanType indicating if a table is a temporary one or not). + + sqlCtx.registerRDDAsTable(df, table1) + df2 = sqlCtx.tables() + df2.filter(tableName = 'table1').first() +Row(tableName=u'table1', isTemporary=True) + +if dbName is None: +return DataFrame(self._ssql_ctx.tables(), self) +else: +return DataFrame(self._ssql_ctx.tables(dbName), self) + +def tableNames(self, dbName=None): +Returns a list of names of tables in the database `dbName`. + +If `dbName` is not specified, the current database will be used. + + sqlCtx.registerRDDAsTable(df, table1) + table1 in sqlCtx.tableNames() +True + table1 in sqlCtx.tableNames(db) +True + +if dbName is None: +return [name for name in self._ssql_ctx.tableNames()] +else: +return [name for name in self._ssql_ctx.tableNames(dbName)] + def cacheTable(self, tableName): Caches the specified table in-memory. self._ssql_ctx.cacheTable(tableName) http://git-wip-us.apache.org/repos/asf/spark/blob/edbac178/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala index df8d03b..f57eab2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala @@ -34,6 +34,12 @@ trait Catalog { tableIdentifier: Seq[String], alias: Option[String] = None): LogicalPlan + /** + * Returns tuples of (tableName, isTemporary) for all tables in the given database. + * isTemporary is a Boolean value indicates if a table is a temporary or not. + */ + def getTables(databaseName: Option[String]): Seq[(String, Boolean)] + def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit def unregisterTable(tableIdentifier: Seq[String]): Unit @@ -101,6 +107,12 @@ class SimpleCatalog(val caseSensitive: Boolean) extends Catalog { // properly qualified with this alias. alias.map(a = Subquery(a, tableWithQualifiers)).getOrElse(tableWithQualifiers) } +
spark git commit: [SPARK-3299][SQL]Public API in SQLContext to list tables
Repository: spark Updated Branches: refs/heads/master c025a4688 - 1d0596a16 [SPARK-3299][SQL]Public API in SQLContext to list tables https://issues.apache.org/jira/browse/SPARK-3299 Author: Yin Huai yh...@databricks.com Closes #4547 from yhuai/tables and squashes the following commits: 6c8f92e [Yin Huai] Add tableNames. acbb281 [Yin Huai] Update Python test. 7793dcb [Yin Huai] Fix scala test. 572870d [Yin Huai] Address comments. aba2e88 [Yin Huai] Format. 12c86df [Yin Huai] Add tables() to SQLContext to return a DataFrame containing existing tables. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d0596a1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d0596a1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d0596a1 Branch: refs/heads/master Commit: 1d0596a16e1d3add2631f5d8169aeec2876a1362 Parents: c025a46 Author: Yin Huai yh...@databricks.com Authored: Thu Feb 12 18:08:01 2015 -0800 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Feb 12 18:08:01 2015 -0800 -- python/pyspark/sql/context.py | 34 + .../spark/sql/catalyst/analysis/Catalog.scala | 37 ++ .../scala/org/apache/spark/sql/SQLContext.scala | 36 + .../org/apache/spark/sql/ListTablesSuite.scala | 76 +++ .../spark/sql/hive/HiveMetastoreCatalog.scala | 5 ++ .../apache/spark/sql/hive/ListTablesSuite.scala | 77 6 files changed, 265 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1d0596a1/python/pyspark/sql/context.py -- diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index db4bcbe..082f1b6 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -621,6 +621,40 @@ class SQLContext(object): return DataFrame(self._ssql_ctx.table(tableName), self) +def tables(self, dbName=None): +Returns a DataFrame containing names of tables in the given database. + +If `dbName` is not specified, the current database will be used. + +The returned DataFrame has two columns, tableName and isTemporary +(a column with BooleanType indicating if a table is a temporary one or not). + + sqlCtx.registerRDDAsTable(df, table1) + df2 = sqlCtx.tables() + df2.filter(tableName = 'table1').first() +Row(tableName=u'table1', isTemporary=True) + +if dbName is None: +return DataFrame(self._ssql_ctx.tables(), self) +else: +return DataFrame(self._ssql_ctx.tables(dbName), self) + +def tableNames(self, dbName=None): +Returns a list of names of tables in the database `dbName`. + +If `dbName` is not specified, the current database will be used. + + sqlCtx.registerRDDAsTable(df, table1) + table1 in sqlCtx.tableNames() +True + table1 in sqlCtx.tableNames(db) +True + +if dbName is None: +return [name for name in self._ssql_ctx.tableNames()] +else: +return [name for name in self._ssql_ctx.tableNames(dbName)] + def cacheTable(self, tableName): Caches the specified table in-memory. self._ssql_ctx.cacheTable(tableName) http://git-wip-us.apache.org/repos/asf/spark/blob/1d0596a1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala index df8d03b..f57eab2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala @@ -34,6 +34,12 @@ trait Catalog { tableIdentifier: Seq[String], alias: Option[String] = None): LogicalPlan + /** + * Returns tuples of (tableName, isTemporary) for all tables in the given database. + * isTemporary is a Boolean value indicates if a table is a temporary or not. + */ + def getTables(databaseName: Option[String]): Seq[(String, Boolean)] + def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit def unregisterTable(tableIdentifier: Seq[String]): Unit @@ -101,6 +107,12 @@ class SimpleCatalog(val caseSensitive: Boolean) extends Catalog { // properly qualified with this alias. alias.map(a = Subquery(a, tableWithQualifiers)).getOrElse(tableWithQualifiers) } + + override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { +tables.map { + case (name, _) = (name,