asl3 commented on code in PR #54824:
URL: https://github.com/apache/spark/pull/54824#discussion_r3151756255
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExtendedExec.scala:
##########
@@ -73,6 +85,46 @@ case class ShowTablesExtendedExec(
rows.toSeq
}
+ private def runAsJson(): Seq[InternalRow] = {
+ val jsonRows = new ArrayBuffer[JObject]()
+
+ // fetch tables
+ val tables = catalog.listTables(namespace.toArray)
+ tables.foreach { tableIdent =>
+ if (StringUtils.filterPattern(Seq(tableIdent.name()), pattern).nonEmpty)
{
+ // V2 persistent tables are always TABLE
+ jsonRows += JObject(
+ "catalog" -> JString(catalog.name()),
+ "namespace" -> JArray(tableIdent.namespace().map(JString(_)).toList),
+ "name" -> JString(tableIdent.name()),
+ "type" -> JString("TABLE"),
+ "isTemporary" -> JBool(false)
+ )
+ }
+ }
+
+ // fetch temp views, includes: global temp view, local temp view
+ val sessionCatalog = session.sessionState.catalog
+ val db = namespace match {
+ case Seq(db) => Some(db)
+ case _ => None
+ }
+ val tempViews = sessionCatalog.listTempViews(db.getOrElse(""), pattern)
Review Comment:
For V2SessionCatalog, `catalog.listTables(namespace.toArray)` already
includes local temp views (and globals when ns is global_temp) via
`SessionCatalog.listTables(db, "*", includeLocalTempViews=true)`.
This block lists them again, so each temp view is emitted twice: once with
`type=TABLE, isTemporary=false`, once with `type=VIEW, isTemporary=true`. The
same duplicate path was removed from ShowTablesExec.
Should either skip this `listTempViews` block (matching ShowTablesExec) and
use an isTempView-style helper above, or filter temp views out of tables before
emission.
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala:
##########
@@ -461,4 +466,179 @@ trait ShowTablesSuiteBase extends QueryTest with
DDLCommandTestUtils {
}
}
}
+
+ test("SHOW TABLES AS JSON returns single row with json_metadata column") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT, data STRING) $defaultUsing")
+ val df = sql(s"SHOW TABLES IN $catalog.ns AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ val tblEntry = tables.find(t => (t \ "name").extract[String] == "tbl")
+ assert(tblEntry.isDefined)
+ assert((tblEntry.get \ "isTemporary").extract[Boolean] == false)
+ assert((tblEntry.get \ "namespace").isInstanceOf[JArray])
+ }
+ }
+
+ test("SHOW TABLES AS JSON with empty database") {
+ withNamespace(s"$catalog.ns_empty") {
+ sql(s"CREATE NAMESPACE $catalog.ns_empty")
+ val df = sql(s"SHOW TABLES IN $catalog.ns_empty AS JSON")
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ assert(tables.isEmpty)
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON returns single row with json_metadata
column") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT, data STRING) $defaultUsing")
+ val df = sql(s"SHOW TABLE EXTENDED IN $catalog.ns LIKE 'tbl' AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ assert(tables.length == 1)
+ val entry = tables.head
+ assert((entry \ "name").extract[String] == "tbl")
+ assert((entry \ "type").extract[String] == "TABLE")
+ assert((entry \ "isTemporary").extract[Boolean] == false)
+ assert((entry \ "catalog").isInstanceOf[JString])
+ assert((entry \ "namespace").isInstanceOf[JArray])
+ }
+ }
+
+ test("SHOW TABLES AS JSON includes temp views") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ withTempView("tv") {
+ sql("CREATE TEMP VIEW tv AS SELECT 1 AS id")
+ val df = sql(s"SHOW TABLES IN $catalog.ns AS JSON")
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ val tempView = tables.find(t => (t \ "name").extract[String] == "tv")
+ assert(tempView.isDefined)
+ assert((tempView.get \ "isTemporary").extract[Boolean] == true)
+ }
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON with local temp view") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val localTmpViewName = "tbl_local_tmp"
+ withTempView(localTmpViewName) {
+ sql(s"CREATE TEMPORARY VIEW $localTmpViewName AS SELECT id FROM $t")
+
+ val df = sql(s"SHOW TABLE EXTENDED IN $catalog.ns LIKE 'tbl*' AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ val tblEntry = tables.find(e => (e \ "name").extract[String] == "tbl")
+ assert(tblEntry.isDefined)
+ assert((tblEntry.get \ "isTemporary").extract[Boolean] == false)
+ assert((tblEntry.get \ "type").extract[String] == "TABLE")
+
+ val tempViewEntry = tables.find(e => (e \ "name").extract[String] ==
localTmpViewName)
+ assert(tempViewEntry.isDefined)
+ assert((tempViewEntry.get \ "isTemporary").extract[Boolean] == true)
+ assert((tempViewEntry.get \ "type").extract[String] == "VIEW")
+ }
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON with global temp view") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val globalTmpViewName = "ext_json_gtv"
+ val globalNamespace = "global_temp"
+ withView(s"$globalNamespace.$globalTmpViewName") {
+ sql(s"CREATE OR REPLACE GLOBAL TEMP VIEW $globalTmpViewName AS SELECT
id FROM $t")
+
+ val df = sql(s"SHOW TABLE EXTENDED IN $globalNamespace LIKE
'ext_json*' AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ val globalTempViewEntry =
+ tables.find(e => (e \ "name").extract[String] == globalTmpViewName)
+ assert(globalTempViewEntry.isDefined)
+ assert((globalTempViewEntry.get \ "isTemporary").extract[Boolean] ==
true)
+ assert((globalTempViewEntry.get \ "type").extract[String] == "VIEW")
+ }
+ }
+ }
+
+ test("SHOW TABLES AS JSON with global temp view") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val globalTmpViewName = "show_json_gtv"
+ val globalNamespace = "global_temp"
+ withView(s"$globalNamespace.$globalTmpViewName") {
+ sql(s"CREATE OR REPLACE GLOBAL TEMP VIEW $globalTmpViewName AS SELECT
id FROM $t")
+
+ val df = sql(s"SHOW TABLES IN $globalNamespace AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ val globalTempViewEntry =
+ tables.find(e => (e \ "name").extract[String] == globalTmpViewName)
+ assert(globalTempViewEntry.isDefined)
+ assert((globalTempViewEntry.get \ "isTemporary").extract[Boolean] ==
true)
+ }
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON with both local and global temp views") {
Review Comment:
seems @cloud-fan 's comment about .find() masking duplicates still needs to
be addressed here
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExtendedExec.scala:
##########
@@ -73,6 +85,46 @@ case class ShowTablesExtendedExec(
rows.toSeq
}
+ private def runAsJson(): Seq[InternalRow] = {
+ val jsonRows = new ArrayBuffer[JObject]()
+
+ // fetch tables
+ val tables = catalog.listTables(namespace.toArray)
+ tables.foreach { tableIdent =>
+ if (StringUtils.filterPattern(Seq(tableIdent.name()), pattern).nonEmpty)
{
+ // V2 persistent tables are always TABLE
Review Comment:
The comment "V2 persistent tables are always TABLE" and unconditional
type=TABLE, isTemporary=false is wrong when catalog is V2SessionCatalog. Can
use a per-row `isTempView` check (example: ShowTablesExec.isTempView)
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala:
##########
@@ -970,6 +982,48 @@ case class ShowTablesCommand(
Seq(Row(database, tableName, isTemp, s"$information\n"))
}
}
+
+ private def runAsJson(sparkSession: SparkSession): Seq[Row] = {
+ val catalog = sparkSession.sessionState.catalog
+ val db = databaseName.getOrElse(catalog.getCurrentDatabase)
+ val tables =
+ tableIdentifierPattern.map(catalog.listTables(db,
_)).getOrElse(catalog.listTables(db))
+
+ val jsonTables = tables.map { tableIdent =>
+ val isTemp = catalog.isTempView(tableIdent)
+ val ns = tableIdent.database.toList
+
+ if (isExtended) {
+ val tableType = if (isTemp) {
+ "VIEW"
+ } else {
+ Try(catalog.getTempViewOrPermanentTableMetadata(tableIdent)) match {
+ case Success(meta) if meta.tableType == CatalogTableType.VIEW =>
"VIEW"
+ case Success(_) => "TABLE"
+ case Failure(_) => "UNKNOWN"
Review Comment:
Failure(_) => "UNKNOWN" swallows any exception from
`getTempViewOrPermanentTableMetadata`. The text path lets exceptions propagate
- consider following the same here
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala:
##########
@@ -1313,6 +1313,7 @@ case class RenameTable(
case class ShowTables(
namespace: LogicalPlan,
pattern: Option[String],
+ asJson: Boolean = false,
Review Comment:
Currently have duplicated JSON emission code across Exec classes. Consider a
ShowTablesJsonCommand which takes the resolved namespace/catalog and handles
all variants centrally, similar to DescribeRelationJsonCommand
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala:
##########
@@ -461,4 +466,179 @@ trait ShowTablesSuiteBase extends QueryTest with
DDLCommandTestUtils {
}
}
}
+
+ test("SHOW TABLES AS JSON returns single row with json_metadata column") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT, data STRING) $defaultUsing")
+ val df = sql(s"SHOW TABLES IN $catalog.ns AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ val tblEntry = tables.find(t => (t \ "name").extract[String] == "tbl")
+ assert(tblEntry.isDefined)
+ assert((tblEntry.get \ "isTemporary").extract[Boolean] == false)
+ assert((tblEntry.get \ "namespace").isInstanceOf[JArray])
+ }
+ }
+
+ test("SHOW TABLES AS JSON with empty database") {
+ withNamespace(s"$catalog.ns_empty") {
+ sql(s"CREATE NAMESPACE $catalog.ns_empty")
+ val df = sql(s"SHOW TABLES IN $catalog.ns_empty AS JSON")
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ assert(tables.isEmpty)
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON returns single row with json_metadata
column") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT, data STRING) $defaultUsing")
+ val df = sql(s"SHOW TABLE EXTENDED IN $catalog.ns LIKE 'tbl' AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ assert(tables.length == 1)
+ val entry = tables.head
+ assert((entry \ "name").extract[String] == "tbl")
+ assert((entry \ "type").extract[String] == "TABLE")
+ assert((entry \ "isTemporary").extract[Boolean] == false)
+ assert((entry \ "catalog").isInstanceOf[JString])
+ assert((entry \ "namespace").isInstanceOf[JArray])
+ }
+ }
+
+ test("SHOW TABLES AS JSON includes temp views") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ withTempView("tv") {
+ sql("CREATE TEMP VIEW tv AS SELECT 1 AS id")
+ val df = sql(s"SHOW TABLES IN $catalog.ns AS JSON")
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+ val tempView = tables.find(t => (t \ "name").extract[String] == "tv")
+ assert(tempView.isDefined)
+ assert((tempView.get \ "isTemporary").extract[Boolean] == true)
+ }
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON with local temp view") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val localTmpViewName = "tbl_local_tmp"
+ withTempView(localTmpViewName) {
+ sql(s"CREATE TEMPORARY VIEW $localTmpViewName AS SELECT id FROM $t")
+
+ val df = sql(s"SHOW TABLE EXTENDED IN $catalog.ns LIKE 'tbl*' AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ val tblEntry = tables.find(e => (e \ "name").extract[String] == "tbl")
+ assert(tblEntry.isDefined)
+ assert((tblEntry.get \ "isTemporary").extract[Boolean] == false)
+ assert((tblEntry.get \ "type").extract[String] == "TABLE")
+
+ val tempViewEntry = tables.find(e => (e \ "name").extract[String] ==
localTmpViewName)
+ assert(tempViewEntry.isDefined)
+ assert((tempViewEntry.get \ "isTemporary").extract[Boolean] == true)
+ assert((tempViewEntry.get \ "type").extract[String] == "VIEW")
+ }
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON with global temp view") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val globalTmpViewName = "ext_json_gtv"
+ val globalNamespace = "global_temp"
+ withView(s"$globalNamespace.$globalTmpViewName") {
+ sql(s"CREATE OR REPLACE GLOBAL TEMP VIEW $globalTmpViewName AS SELECT
id FROM $t")
+
+ val df = sql(s"SHOW TABLE EXTENDED IN $globalNamespace LIKE
'ext_json*' AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ val globalTempViewEntry =
+ tables.find(e => (e \ "name").extract[String] == globalTmpViewName)
+ assert(globalTempViewEntry.isDefined)
+ assert((globalTempViewEntry.get \ "isTemporary").extract[Boolean] ==
true)
+ assert((globalTempViewEntry.get \ "type").extract[String] == "VIEW")
+ }
+ }
+ }
+
+ test("SHOW TABLES AS JSON with global temp view") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val globalTmpViewName = "show_json_gtv"
+ val globalNamespace = "global_temp"
+ withView(s"$globalNamespace.$globalTmpViewName") {
+ sql(s"CREATE OR REPLACE GLOBAL TEMP VIEW $globalTmpViewName AS SELECT
id FROM $t")
+
+ val df = sql(s"SHOW TABLES IN $globalNamespace AS JSON")
+ assert(df.schema.fieldNames === Seq("json_metadata"))
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ val globalTempViewEntry =
+ tables.find(e => (e \ "name").extract[String] == globalTmpViewName)
+ assert(globalTempViewEntry.isDefined)
+ assert((globalTempViewEntry.get \ "isTemporary").extract[Boolean] ==
true)
+ }
+ }
+ }
+
+ test("SHOW TABLE EXTENDED AS JSON with both local and global temp views") {
+ withNamespaceAndTable("ns", "tbl") { t =>
+ sql(s"CREATE TABLE $t (id INT) $defaultUsing")
+ val localTmpViewName = "both_json_ltv"
+ val globalTmpViewName = "both_json_gtv"
+ val globalNamespace = "global_temp"
+ withView(localTmpViewName, s"$globalNamespace.$globalTmpViewName") {
+ sql(s"CREATE OR REPLACE TEMP VIEW $localTmpViewName AS SELECT id FROM
$t")
+ sql(s"CREATE OR REPLACE GLOBAL TEMP VIEW $globalTmpViewName AS SELECT
id FROM $t")
+
+ val df = sql(s"SHOW TABLE EXTENDED IN $globalNamespace LIKE
'both_json*' AS JSON")
+ assert(df.count() == 1)
+
+ val jsonStr = df.collect()(0).getString(0)
+ val json = parse(jsonStr)
+ val tables = (json \ "tables").asInstanceOf[JArray].arr
+
+ assert(tables.length == 2)
Review Comment:
this test seems failing in CI?
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala:
##########
@@ -33,18 +36,32 @@ case class ShowTablesExec(
output: Seq[Attribute],
catalog: TableCatalog,
namespace: Seq[String],
- pattern: Option[String]) extends V2CommandExec with LeafExecNode {
+ pattern: Option[String],
+ asJson: Boolean = false) extends V2CommandExec with LeafExecNode {
override protected def run(): Seq[InternalRow] = {
- val rows = new ArrayBuffer[InternalRow]()
-
val tables = catalog.listTables(namespace.toArray)
- tables.map { table =>
- if (pattern.map(StringUtils.filterPattern(Seq(table.name()),
_).nonEmpty).getOrElse(true)) {
+ val filteredTables = tables.filter { table =>
+ pattern.map(StringUtils.filterPattern(Seq(table.name()),
_).nonEmpty).getOrElse(true)
+ }
+
+ if (asJson) {
+ val jsonTables = filteredTables.map { table =>
+ JObject(
+ "name" -> JString(table.name()),
Review Comment:
field order should be consistent between non-extended `{name, namespace,
isTemporary}` and extended `{catalog, namespace, name, type, isTemporary}`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]