[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r891887309 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +571,103 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog(catalogName).asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) + assert(table.properties().get("external").equals("true")) + assert(table.properties().get("location").equals("file:" + dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test managed table" + + spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map.empty[String, String]) + + val tableName2 = "my_table2" + val description2 = "this is a test external table" + + spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName2).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description2, +options = Map("path" -> dir.getAbsolutePath)) + + val tables = spark.catalog.listTables("testcat.my_db").collect() + assert(tables.size == 2) + + val expectedTable1 = +new Table(tableName, catalogName, Array(dbName), description, + CatalogTableType.MANAGED.name, false) + assert(tables.exists(t => +expectedTable1.name.equals(t.name) && expectedTable1.database.equals(t.database) && +expectedTable1.description.equals(t.description) && +expectedTable1.tableType.equals(t.tableType) && +expectedTable1.isTemporary == t.isTemporary)) + + val expectedTable2 = +new Table(tableName2, catalogName, Array(dbName), description2, + CatalogTableType.EXTERNAL.name, false) + assert(tables.exists(t => +expectedTable2.name.equals(t.name) && expectedTable2.database.equals(t.database) && +expectedTable2.description.equals(t.description) && +expectedTable2.tableType.equals(t.tableType) && +expectedTable2.isTemporary == t.isTemporary)) +} Review Comment: Sure I will add this test case. I won't call this is to test `backward compatibility` though. `listTable` should always accept DB name: either it is a `database` or it is a `catalog_name.database`. This is more like to test corretness. -- This is an automated message from the Apache Git Service. To respond to
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r891563533 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -117,14 +128,44 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { case NonFatal(_) => None } val isTemp = sessionCatalog.isTempView(tableIdent) +val qualifier = + Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull) new Table( name = tableIdent.table, Review Comment: done ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -117,14 +128,44 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { case NonFatal(_) => None } val isTemp = sessionCatalog.isTempView(tableIdent) +val qualifier = Review Comment: hm updated. ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -97,8 +98,18 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { */ @throws[AnalysisException]("database does not exist") override def listTables(dbName: String): Dataset[Table] = { -val tables = sessionCatalog.listTables(dbName).map(makeTable) -CatalogImpl.makeDataset(tables, sparkSession) +if (sessionCatalog.databaseExists(dbName)) { Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r891537461 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: Actually we still need the change in `listTables`: existing users do not use 3-part name and they just use 2 part name. In the case of the `listTables`, existing users use `dbname` directly (so just `b` but not `a.b`). In this case, there is a choice to decide which catalog it is, and the default catalog is hive metastore. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r891534128 ## sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala: ## @@ -64,15 +65,34 @@ class Database( @Stable class Table( val name: String, -@Nullable val database: String, +@Nullable val catalog: String, +@Nullable val namespace: Array[String], @Nullable val description: String, val tableType: String, val isTemporary: Boolean) extends DefinedByConstructorParams { + def this(name: String, database: String, description: String, tableType: String, +isTemporary: Boolean) = { +this(name, null, Array(database), description, tableType, isTemporary) + } + + def database: String = { +if (namespace == null) { + null +} else if (namespace.length == 2) { Review Comment: Right I think we only care about array.length = 1 which is the database. ## sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala: ## @@ -64,15 +65,34 @@ class Database( @Stable class Table( val name: String, -@Nullable val database: String, +@Nullable val catalog: String, +@Nullable val namespace: Array[String], @Nullable val description: String, val tableType: String, val isTemporary: Boolean) extends DefinedByConstructorParams { + def this(name: String, database: String, description: String, tableType: String, +isTemporary: Boolean) = { +this(name, null, Array(database), description, tableType, isTemporary) + } + + def database: String = { +if (namespace == null) { + null +} else if (namespace.length == 2) { Review Comment: Right I think we only care about array.length = 1 which is the database. Will remove it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r891497676 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: yeah if you agree I will remove unnecessary change. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r889237990 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: It turns out our code does not support `a.b` as table/database name: ``` `also_table_name.my_db` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. org.apache.spark.sql.AnalysisException: `also_table_name.my_db` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. at org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350) at org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63) at org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677) ``` In this case, we won't hit the ambiguity that a database name `a.b` could be either a database name or `a` is catalog name for `ListTables` API? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r889237990 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: It turns out our code does not support `a.b` as table/database name: ``` `also_name.my_db` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. org.apache.spark.sql.AnalysisException: `also_name.my_db` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. at org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350) at org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63) at org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677) ``` In this case, we won't hit the ambiguity that a database name `a.b` could be either a database name or `a` is catalog name for `ListTables` API? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r889237990 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: It turns out our code does not support `a.b` as table/database name: ``` `also_table_name.my_table1` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. org.apache.spark.sql.AnalysisException: `also_table_name.my_table1` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. at org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350) at org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63) at org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677) ``` In this case, we won't hit the ambiguity that a database name `a.b` could be either a database name or `a` is catalog name for `ListTables` API? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r889237990 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: It turns out our code does not support `a.b` as table/database name: ``` `also_table_name.my_table1` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. org.apache.spark.sql.AnalysisException: `also_table_name.my_table1` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. at org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350) at org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63) at org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677) ``` In this case, we won't hit the ambiguity that a database name `a.b` could either a database name or `a` is catalog name for `ListTables` API? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r889237990 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -553,4 +570,100 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { }.getMessage assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table")) } + + test("three layer namespace compatibility - create managed table") { +spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) +val catalogName = "testcat" +val dbName = "my_db" +val tableName = "my_table" +val tableSchema = new StructType().add("i", "int") +val description = "this is a test table" + +val df = spark.catalog.createTable( + tableName = Array(catalogName, dbName, tableName).mkString("."), + source = classOf[FakeV2Provider].getName, + schema = tableSchema, + description = description, + options = Map.empty[String, String]) +assert(df.schema.equals(tableSchema)) + +val testCatalog = + spark.sessionState.catalogManager.catalog("testcat").asTableCatalog +val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) +assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) +assert(table.properties().get("comment").equals(description)) + } + + test("three layer namespace compatibility - create external table") { +withTempDir { dir => + val catalogName = "testcat" + val dbName = "my_db" + val tableName = "my_table" + val tableSchema = new StructType().add("i", "int") + val description = "this is a test table" + + val df = spark.catalog.createTable( +tableName = Array(catalogName, dbName, tableName).mkString("."), +source = classOf[FakeV2Provider].getName, +schema = tableSchema, +description = description, +options = Map("path" -> dir.getAbsolutePath)) + assert(df.schema.equals(tableSchema)) + + val testCatalog = +spark.sessionState.catalogManager.catalog("testcat").asTableCatalog + val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName)) + assert(table.schema().equals(tableSchema)) + assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName)) + assert(table.properties().get("comment").equals(description)) + assert(table.properties().get("path").equals(dir.getAbsolutePath)) +} + } + + test("three layer namespace compatibility - list tables") { Review Comment: It turns out our code does not support `a.b` as table name: ``` `also_table_name.my_table1` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. org.apache.spark.sql.AnalysisException: `also_table_name.my_table1` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _. at org.apache.spark.sql.errors.QueryCompilationErrors$.invalidNameForTableOrDatabaseError(QueryCompilationErrors.scala:572) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateName(SessionCatalog.scala:146) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:350) at org.apache.spark.sql.internal.CatalogSuite.createTable(CatalogSuite.scala:63) at org.apache.spark.sql.internal.CatalogSuite.$anonfun$new$101(CatalogSuite.scala:677) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r888383953 ## sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala: ## @@ -299,15 +313,18 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest { val functionFields = ScalaReflection.getConstructorParameterValues(function) val columnFields = ScalaReflection.getConstructorParameterValues(column) assert(dbFields == Seq("nama", "descripta", "locata")) -assert(tableFields == Seq("nama", "databasa", "descripta", "typa", false)) +assert(Seq(tableFields.apply(0), tableFields.apply(2), tableFields.apply(3), Review Comment: I didn't do this is because the 3rd parameter is the namespace as an Array thus `==` does not do deep compare. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r888374320 ## sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala: ## @@ -64,12 +64,26 @@ class Database( @Stable class Table( val name: String, -@Nullable val database: String, +@Nullable val qualifier: Array[String], @Nullable val description: String, val tableType: String, val isTemporary: Boolean) extends DefinedByConstructorParams { + def database: String = parseQualifier + + def parseQualifier: String = { +if (qualifier == null) { + null +} else if (qualifier.length == 2) { + qualifier(1) +} else if (qualifier.length == 1) { Review Comment: for example if a table is `catalog1.db1.table1`, In this case `catalog_name=catalog1, qualifier=[db1], table_name=table1`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r881112118 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -117,14 +128,42 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { case NonFatal(_) => None } val isTemp = sessionCatalog.isTempView(tableIdent) +val qualifier = + Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull) Review Comment: Will use `:String` to solve this issue. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r880816481 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -117,14 +128,42 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { case NonFatal(_) => None } val isTemp = sessionCatalog.isTempView(tableIdent) +val qualifier = + Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull) Review Comment: ``` /** * Identifies a table in a database. * If `database` is not defined, the current database is used. * When we register a permanent function in the FunctionRegistry, we use * unquotedString as the function name. */ case class TableIdentifier(table: String, database: Option[String]) extends IdentifierWithDatabase { override val identifier: String = table def this(table: String) = this(table, None) } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r880815873 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -117,14 +128,42 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { case NonFatal(_) => None } val isTemp = sessionCatalog.isTempView(tableIdent) +val qualifier = + Array(metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull) Review Comment: This is `makeTable(tableIdent: TableIdentifier)`, and the `TableIdentifier` contains no catalog name. Are you suggesting that I use `getCurrrentCatalog` (not exists but we can add one) to get the catalog name? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r879816903 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { isTemporary = isTemp) } + private def makeTable(ident: Seq[String]): Table = { +val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true) +val node = sparkSession.sessionState.executePlan(plan).analyzed +node match { + case t: ResolvedTable => +val isExternal = t.table.properties().getOrDefault("external", "false").equals("true") +new Table( + name = t.identifier.name(), + database = t.identifier.namespace().head, + description = t.table.properties().get("comment"), + tableType = +if (isExternal) CatalogTableType.EXTERNAL.name +else CatalogTableType.MANAGED.name, + isTemporary = false) + case v: ResolvedView => +new Table( Review Comment: BTW this idea triggered a compatibility check issue ``` spark-sql: Failed binary compatibility check against org.apache.spark:spark-sql_2.12:3.2.0! Found 1 potential problems (filtered 586) [error] * method this(java.lang.String,java.lang.String,java.lang.String,java.lang.String,Boolean)Unit in class org.apache.spark.sql.catalog.Table's type is different in current version, where it is (java.lang.String,Array[java.lang.String],java.lang.String,java.lang.String,Boolean)Unit instead of (java.lang.String,java.lang.String,java.lang.String,java.lang.String,Boolean)Unit ``` How do we usually solve such issues? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r879739463 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { isTemporary = isTemp) } + private def makeTable(ident: Seq[String]): Table = { +val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true) +val node = sparkSession.sessionState.executePlan(plan).analyzed +node match { + case t: ResolvedTable => +val isExternal = t.table.properties().getOrDefault("external", "false").equals("true") +new Table( + name = t.identifier.name(), + database = t.identifier.namespace().head, + description = t.table.properties().get("comment"), + tableType = +if (isExternal) CatalogTableType.EXTERNAL.name +else CatalogTableType.MANAGED.name, + isTemporary = false) + case v: ResolvedView => +new Table( Review Comment: pushed a new commit. Can you take a look? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r878542464 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { isTemporary = isTemp) } + private def makeTable(ident: Seq[String]): Table = { +val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true) +val node = sparkSession.sessionState.executePlan(plan).analyzed +node match { + case t: ResolvedTable => +val isExternal = t.table.properties().getOrDefault("external", "false").equals("true") +new Table( + name = t.identifier.name(), + database = t.identifier.namespace().head, + description = t.table.properties().get("comment"), + tableType = +if (isExternal) CatalogTableType.EXTERNAL.name +else CatalogTableType.MANAGED.name, + isTemporary = false) + case v: ResolvedView => +new Table( Review Comment: Why `database` is null if `qualifier.length` != 2? `qualifier` could contain only one string which is a database name? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r878455439 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -125,6 +135,31 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { isTemporary = isTemp) } + private def makeTable(ident: Seq[String]): Table = { +val plan = UnresolvedTableOrView(ident, "Catalog.listTables", true) +val node = sparkSession.sessionState.executePlan(plan).analyzed +node match { + case t: ResolvedTable => +val isExternal = t.table.properties().getOrDefault("external", "false").equals("true") +new Table( + name = t.identifier.name(), + database = t.identifier.namespace().head, + description = t.table.properties().get("comment"), + tableType = +if (isExternal) CatalogTableType.EXTERNAL.name +else CatalogTableType.MANAGED.name, + isTemporary = false) + case v: ResolvedView => +new Table( + name = v.identifier.name(), + database = v.identifier.namespace().toString, + description = "", + tableType = "", Review Comment: Good catch! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r878354239 ## sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala: ## @@ -78,7 +78,7 @@ class Table( s"tableType='$tableType', " + s"isTemporary='$isTemporary']" } - + Review Comment: I will revert this. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r877754283 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -97,8 +97,18 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { */ @throws[AnalysisException]("database does not exist") override def listTables(dbName: String): Dataset[Table] = { -val tables = sessionCatalog.listTables(dbName).map(makeTable) -CatalogImpl.makeDataset(tables, sparkSession) +if (sessionCatalog.databaseExists(dbName)) { + val tables = sessionCatalog.listTables(dbName).map(makeTable) + CatalogImpl.makeDataset(tables, sparkSession) +} else { + val multiParts = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName) + val plan = ShowTables(UnresolvedNamespace(multiParts), None) + val ret = sparkSession.sessionState.executePlan(plan).toRdd.collect() + val tables = ret +.map(row => TableIdentifier(row.getString(1), Some(row.getString(0 +.map(makeTable) Review Comment: done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r877706031 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -367,24 +377,40 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { schema: StructType, description: String, options: Map[String, String]): DataFrame = { -val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName) +val idents = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName) val storage = DataSource.buildStorageFormatFromOptions(options) val tableType = if (storage.locationUri.isDefined) { CatalogTableType.EXTERNAL } else { CatalogTableType.MANAGED } -val tableDesc = CatalogTable( - identifier = tableIdent, - tableType = tableType, - storage = storage, - schema = schema, - provider = Some(source), - comment = { if (description.isEmpty) None else Some(description) } -) -val plan = CreateTable(tableDesc, SaveMode.ErrorIfExists, None) +val location = if (storage.locationUri.isDefined) { + val locationStr = storage.locationUri.get.toString + Some(locationStr) +} else { + None +} + +val tableSpec = + TableSpec( +properties = Map(), +provider = Some(source), +options = options, +location = location, +comment = { if (description.isEmpty) None else Some(description) }, +serde = None, +external = tableType == CatalogTableType.EXTERNAL) + +val plan = + CreateTable( +name = UnresolvedDBObjectName(idents, isNamespace = true), Review Comment: oops yes it should be false. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] amaliujia commented on a diff in pull request #36586: [SPARK-39236][SQL] Make CreateTable and ListTables be compatible with 3 layer namespace
amaliujia commented on code in PR #36586: URL: https://github.com/apache/spark/pull/36586#discussion_r877705884 ## sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala: ## @@ -97,8 +97,18 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { */ @throws[AnalysisException]("database does not exist") override def listTables(dbName: String): Dataset[Table] = { -val tables = sessionCatalog.listTables(dbName).map(makeTable) -CatalogImpl.makeDataset(tables, sparkSession) +if (sessionCatalog.databaseExists(dbName)) { + val tables = sessionCatalog.listTables(dbName).map(makeTable) + CatalogImpl.makeDataset(tables, sparkSession) +} else { + val multiParts = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName) + val plan = ShowTables(UnresolvedNamespace(multiParts), None) + val ret = sparkSession.sessionState.executePlan(plan).toRdd.collect() + val tables = ret +.map(row => TableIdentifier(row.getString(1), Some(row.getString(0 +.map(makeTable) Review Comment: oh yes you are right. Let me add a version that looks up through analyzer. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org