spark git commit: [SPARK-3299][SQL]Public API in SQLContext to list tables

2015-02-12 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 925fd84a1 - edbac178d


[SPARK-3299][SQL]Public API in SQLContext to list tables

https://issues.apache.org/jira/browse/SPARK-3299

Author: Yin Huai yh...@databricks.com

Closes #4547 from yhuai/tables and squashes the following commits:

6c8f92e [Yin Huai] Add tableNames.
acbb281 [Yin Huai] Update Python test.
7793dcb [Yin Huai] Fix scala test.
572870d [Yin Huai] Address comments.
aba2e88 [Yin Huai] Format.
12c86df [Yin Huai] Add tables() to SQLContext to return a DataFrame containing 
existing tables.

(cherry picked from commit 1d0596a16e1d3add2631f5d8169aeec2876a1362)
Signed-off-by: Michael Armbrust mich...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/edbac178
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/edbac178
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/edbac178

Branch: refs/heads/branch-1.3
Commit: edbac178d186f6936408b211385a5fea9e4f4603
Parents: 925fd84
Author: Yin Huai yh...@databricks.com
Authored: Thu Feb 12 18:08:01 2015 -0800
Committer: Michael Armbrust mich...@databricks.com
Committed: Thu Feb 12 18:08:19 2015 -0800

--
 python/pyspark/sql/context.py   | 34 +
 .../spark/sql/catalyst/analysis/Catalog.scala   | 37 ++
 .../scala/org/apache/spark/sql/SQLContext.scala | 36 +
 .../org/apache/spark/sql/ListTablesSuite.scala  | 76 +++
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |  5 ++
 .../apache/spark/sql/hive/ListTablesSuite.scala | 77 
 6 files changed, 265 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/edbac178/python/pyspark/sql/context.py
--
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index db4bcbe..082f1b6 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -621,6 +621,40 @@ class SQLContext(object):
 
 return DataFrame(self._ssql_ctx.table(tableName), self)
 
+def tables(self, dbName=None):
+Returns a DataFrame containing names of tables in the given 
database.
+
+If `dbName` is not specified, the current database will be used.
+
+The returned DataFrame has two columns, tableName and isTemporary
+(a column with BooleanType indicating if a table is a temporary one or 
not).
+
+ sqlCtx.registerRDDAsTable(df, table1)
+ df2 = sqlCtx.tables()
+ df2.filter(tableName = 'table1').first()
+Row(tableName=u'table1', isTemporary=True)
+
+if dbName is None:
+return DataFrame(self._ssql_ctx.tables(), self)
+else:
+return DataFrame(self._ssql_ctx.tables(dbName), self)
+
+def tableNames(self, dbName=None):
+Returns a list of names of tables in the database `dbName`.
+
+If `dbName` is not specified, the current database will be used.
+
+ sqlCtx.registerRDDAsTable(df, table1)
+ table1 in sqlCtx.tableNames()
+True
+ table1 in sqlCtx.tableNames(db)
+True
+
+if dbName is None:
+return [name for name in self._ssql_ctx.tableNames()]
+else:
+return [name for name in self._ssql_ctx.tableNames(dbName)]
+
 def cacheTable(self, tableName):
 Caches the specified table in-memory.
 self._ssql_ctx.cacheTable(tableName)

http://git-wip-us.apache.org/repos/asf/spark/blob/edbac178/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index df8d03b..f57eab2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -34,6 +34,12 @@ trait Catalog {
   tableIdentifier: Seq[String],
   alias: Option[String] = None): LogicalPlan
 
+  /**
+   * Returns tuples of (tableName, isTemporary) for all tables in the given 
database.
+   * isTemporary is a Boolean value indicates if a table is a temporary or not.
+   */
+  def getTables(databaseName: Option[String]): Seq[(String, Boolean)]
+
   def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit
 
   def unregisterTable(tableIdentifier: Seq[String]): Unit
@@ -101,6 +107,12 @@ class SimpleCatalog(val caseSensitive: Boolean) extends 
Catalog {
 // properly qualified with this alias.
 alias.map(a = Subquery(a, 
tableWithQualifiers)).getOrElse(tableWithQualifiers)
   }
+

spark git commit: [SPARK-3299][SQL]Public API in SQLContext to list tables

2015-02-12 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master c025a4688 - 1d0596a16


[SPARK-3299][SQL]Public API in SQLContext to list tables

https://issues.apache.org/jira/browse/SPARK-3299

Author: Yin Huai yh...@databricks.com

Closes #4547 from yhuai/tables and squashes the following commits:

6c8f92e [Yin Huai] Add tableNames.
acbb281 [Yin Huai] Update Python test.
7793dcb [Yin Huai] Fix scala test.
572870d [Yin Huai] Address comments.
aba2e88 [Yin Huai] Format.
12c86df [Yin Huai] Add tables() to SQLContext to return a DataFrame containing 
existing tables.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d0596a1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d0596a1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d0596a1

Branch: refs/heads/master
Commit: 1d0596a16e1d3add2631f5d8169aeec2876a1362
Parents: c025a46
Author: Yin Huai yh...@databricks.com
Authored: Thu Feb 12 18:08:01 2015 -0800
Committer: Michael Armbrust mich...@databricks.com
Committed: Thu Feb 12 18:08:01 2015 -0800

--
 python/pyspark/sql/context.py   | 34 +
 .../spark/sql/catalyst/analysis/Catalog.scala   | 37 ++
 .../scala/org/apache/spark/sql/SQLContext.scala | 36 +
 .../org/apache/spark/sql/ListTablesSuite.scala  | 76 +++
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |  5 ++
 .../apache/spark/sql/hive/ListTablesSuite.scala | 77 
 6 files changed, 265 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1d0596a1/python/pyspark/sql/context.py
--
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index db4bcbe..082f1b6 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -621,6 +621,40 @@ class SQLContext(object):
 
 return DataFrame(self._ssql_ctx.table(tableName), self)
 
+def tables(self, dbName=None):
+Returns a DataFrame containing names of tables in the given 
database.
+
+If `dbName` is not specified, the current database will be used.
+
+The returned DataFrame has two columns, tableName and isTemporary
+(a column with BooleanType indicating if a table is a temporary one or 
not).
+
+ sqlCtx.registerRDDAsTable(df, table1)
+ df2 = sqlCtx.tables()
+ df2.filter(tableName = 'table1').first()
+Row(tableName=u'table1', isTemporary=True)
+
+if dbName is None:
+return DataFrame(self._ssql_ctx.tables(), self)
+else:
+return DataFrame(self._ssql_ctx.tables(dbName), self)
+
+def tableNames(self, dbName=None):
+Returns a list of names of tables in the database `dbName`.
+
+If `dbName` is not specified, the current database will be used.
+
+ sqlCtx.registerRDDAsTable(df, table1)
+ table1 in sqlCtx.tableNames()
+True
+ table1 in sqlCtx.tableNames(db)
+True
+
+if dbName is None:
+return [name for name in self._ssql_ctx.tableNames()]
+else:
+return [name for name in self._ssql_ctx.tableNames(dbName)]
+
 def cacheTable(self, tableName):
 Caches the specified table in-memory.
 self._ssql_ctx.cacheTable(tableName)

http://git-wip-us.apache.org/repos/asf/spark/blob/1d0596a1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index df8d03b..f57eab2 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -34,6 +34,12 @@ trait Catalog {
   tableIdentifier: Seq[String],
   alias: Option[String] = None): LogicalPlan
 
+  /**
+   * Returns tuples of (tableName, isTemporary) for all tables in the given 
database.
+   * isTemporary is a Boolean value indicates if a table is a temporary or not.
+   */
+  def getTables(databaseName: Option[String]): Seq[(String, Boolean)]
+
   def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit
 
   def unregisterTable(tableIdentifier: Seq[String]): Unit
@@ -101,6 +107,12 @@ class SimpleCatalog(val caseSensitive: Boolean) extends 
Catalog {
 // properly qualified with this alias.
 alias.map(a = Subquery(a, 
tableWithQualifiers)).getOrElse(tableWithQualifiers)
   }
+
+  override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] 
= {
+tables.map {
+  case (name, _) = (name,