This is an automated email from the ASF dual-hosted git repository. liuzhi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push: new ed3610c [CARBONDATA-3735] Avoid listing all tables in metastore ed3610c is described below commit ed3610caaaa070f6d174e5e6e84b6f9304c2684f Author: Jacky Li <jacky.li...@qq.com> AuthorDate: Wed Mar 4 16:30:44 2020 +0800 [CARBONDATA-3735] Avoid listing all tables in metastore Why is this PR needed? In CarbonCreateDataSourceTableCommand.scala, RegisterIndexTableCommand.scala, carbon is trying to list all tables in a database. It will be slow if there are many tables in the db, thus should be avoided. What changes were proposed in this PR? This PR uses catalog tableExists API instead of listing all tables Does this PR introduce any user interface change? No Is any new testcase added? No This closes #3655 --- .../command/table/CarbonCreateDataSourceTableCommand.scala | 12 +----------- .../execution/command/table/CarbonShowTablesCommand.scala | 2 +- .../secondaryindex/command/RegisterIndexTableCommand.scala | 7 ++++--- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala index 0274bd4..c94835f 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateDataSourceTableCommand.scala @@ -40,17 +40,7 @@ case class CarbonCreateDataSourceTableCommand( assert(table.tableType != CatalogTableType.VIEW) assert(table.provider.isDefined) val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) - val sessionState = sparkSession.sessionState - val db = table.identifier.database.getOrElse(sessionState.catalog.getCurrentDatabase) - val existingTables = sessionState.catalog.listTables(db) - var tableExist = false - existingTables.foreach { tid => - if (tid.table.equalsIgnoreCase(table.identifier.table) - && tid.database.getOrElse("").equalsIgnoreCase(db)) { - tableExist = true - } - } - if (tableExist) { + if (sparkSession.sessionState.catalog.tableExists(table.identifier)) { if (ignoreIfExists) { return Seq.empty[Row] } else { diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala index d50b766..c14264d 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonShowTablesCommand.scala @@ -39,7 +39,7 @@ private[sql] case class CarbonShowTablesCommand ( databaseName: Option[String], // instead of calling tables in sparkSession. val catalog = sparkSession.sessionState.catalog val db = databaseName.getOrElse(catalog.getCurrentDatabase) - var tables = + val tables = tableIdentifierPattern.map(catalog.listTables(db, _)).getOrElse(catalog.listTables(db)) val externalCatalog = sparkSession.sharedState.externalCatalog // this method checks whether the table is mainTable or datamap based on property "isVisible" diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala index 1caaa34..3a8e595 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/command/RegisterIndexTableCommand.scala @@ -55,15 +55,16 @@ case class RegisterIndexTableCommand(dbName: Option[String], indexTableName: Str setAuditTable(databaseName, indexTableName) setAuditInfo(Map("Parent TableName" -> parentTable)) // 1. check if the main and index table exist - val tables: Seq[TableIdentifier] = sparkSession.sessionState.catalog.listTables(databaseName) - if (!tables.exists(_.table.equalsIgnoreCase(parentTable))) { + if (!sparkSession.sessionState.catalog.tableExists( + TableIdentifier(parentTable, Some(databaseName)))) { val message: String = s"Secondary Index Table registration for table [$indexTableName] with" + s" table" + s" [$databaseName.$parentTable] failed." + s"Table [$parentTable] does not exists under database [$databaseName]" CarbonException.analysisException(message) } - if (!tables.exists(_.table.equalsIgnoreCase(indexTableName))) { + if (!sparkSession.sessionState.catalog.tableExists( + TableIdentifier(indexTableName, Some(databaseName)))) { val message: String = s"Secondary Index Table registration for table [$indexTableName] with" + s" table" + s" [$databaseName.$parentTable] failed." +