[carbondata] branch master updated: [CARBONDATA-3735] Avoid listing all tables in metastore

liuzhi Thu, 05 Mar 2020 03:35:22 -0800

This is an automated email from the ASF dual-hosted git repository.

liuzhi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git



The following commit(s) were added to refs/heads/master by this push:
     new ed3610c  [CARBONDATA-3735] Avoid listing all tables in metastore
ed3610c is described below

commit ed3610caaaa070f6d174e5e6e84b6f9304c2684f
Author: Jacky Li <jacky.li...@qq.com>
AuthorDate: Wed Mar 4 16:30:44 2020 +0800

    [CARBONDATA-3735] Avoid listing all tables in metastore
    
    Why is this PR needed?
    In CarbonCreateDataSourceTableCommand.scala, 
RegisterIndexTableCommand.scala, carbon is trying to list all tables in a 
database.
    It will be slow if there are many tables in the db, thus should be avoided.
    
    What changes were proposed in this PR?
    This PR uses catalog tableExists API instead of listing all tables
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    No
    
    This closes #3655
---
 .../command/table/CarbonCreateDataSourceTableCommand.scala   | 12 +-----------
 .../execution/command/table/CarbonShowTablesCommand.scala    |  2 +-
 .../secondaryindex/command/RegisterIndexTableCommand.scala   |  7 ++++---
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala
index 0274bd4..c94835f 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala
@@ -40,17 +40,7 @@ case class CarbonCreateDataSourceTableCommand(
     assert(table.tableType != CatalogTableType.VIEW)
     assert(table.provider.isDefined)
     val LOGGER = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
-    val sessionState = sparkSession.sessionState
-    val db = 
table.identifier.database.getOrElse(sessionState.catalog.getCurrentDatabase)
-    val existingTables = sessionState.catalog.listTables(db)
-    var tableExist = false
-    existingTables.foreach { tid =>
-      if (tid.table.equalsIgnoreCase(table.identifier.table)
-          && tid.database.getOrElse("").equalsIgnoreCase(db)) {
-        tableExist = true
-      }
-    }
-    if (tableExist) {
+    if (sparkSession.sessionState.catalog.tableExists(table.identifier)) {
       if (ignoreIfExists) {
         return Seq.empty[Row]
       } else {
diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
index d50b766..c14264d 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala
@@ -39,7 +39,7 @@ private[sql] case class CarbonShowTablesCommand ( 
databaseName: Option[String],
     // instead of calling tables in sparkSession.
     val catalog = sparkSession.sessionState.catalog
     val db = databaseName.getOrElse(catalog.getCurrentDatabase)
-    var tables =
+    val tables =
       tableIdentifierPattern.map(catalog.listTables(db, 
_)).getOrElse(catalog.listTables(db))
     val externalCatalog = sparkSession.sharedState.externalCatalog
     // this method checks whether the table is mainTable or datamap based on 
property "isVisible"
diff --git 
a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala
 
b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala
index 1caaa34..3a8e595 100644
--- 
a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala
+++ 
b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala
@@ -55,15 +55,16 @@ case class RegisterIndexTableCommand(dbName: 
Option[String], indexTableName: Str
     setAuditTable(databaseName, indexTableName)
     setAuditInfo(Map("Parent TableName" -> parentTable))
     // 1. check if the main and index table exist
-    val tables: Seq[TableIdentifier] = 
sparkSession.sessionState.catalog.listTables(databaseName)
-    if (!tables.exists(_.table.equalsIgnoreCase(parentTable))) {
+    if (!sparkSession.sessionState.catalog.tableExists(
+      TableIdentifier(parentTable, Some(databaseName)))) {
       val message: String = s"Secondary Index Table registration for table 
[$indexTableName] with" +
         s" table" +
         s" [$databaseName.$parentTable] failed." +
         s"Table [$parentTable] does not exists under database [$databaseName]"
       CarbonException.analysisException(message)
     }
-    if (!tables.exists(_.table.equalsIgnoreCase(indexTableName))) {
+    if (!sparkSession.sessionState.catalog.tableExists(
+      TableIdentifier(indexTableName, Some(databaseName)))) {
       val message: String = s"Secondary Index Table registration for table 
[$indexTableName] with" +
         s" table" +
         s" [$databaseName.$parentTable] failed." +

[carbondata] branch master updated: [CARBONDATA-3735] Avoid listing all tables in metastore

Reply via email to