cloud-fan commented on code in PR #43949: URL: https://github.com/apache/spark/pull/43949#discussion_r1411884756
########## sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala: ########## @@ -114,10 +154,46 @@ class V2SessionCatalog(catalog: SessionCatalog) schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): Table = { - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TransformHelper - val (partitionColumns, maybeBucketSpec, maybeClusterBySpec) = - partitions.toImmutableArraySeq.convertTransforms + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ val provider = properties.getOrDefault(TableCatalog.PROP_PROVIDER, conf.defaultDataSourceName) + + val (newSchema, newPartitions) = DataSourceV2Utils.getTableProvider(provider, conf) match { + // If the provider does not support external metadata, users should not be allowed to + // specify custom schema when creating the data source table, since the schema will not + // be used when loading the table. + case Some(p) if !p.supportsExternalMetadata() => + if (schema.nonEmpty) { + throw new SparkUnsupportedOperationException( + errorClass = "CANNOT_CREATE_DATA_SOURCE_TABLE.EXTERNAL_METADATA_UNSUPPORTED", + messageParameters = Map("tableName" -> ident.quoted, "provider" -> provider)) Review Comment: `ident.quoted` only quotes when necessary, but in error message, we require fully quoted. You can call `toSQLId(ident.asMultipartIdentifier)`, but maybe it's better to add a `def fullyQuoted` in `implicit class IdentifierHelper` and use it here. ########## sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala: ########## @@ -633,6 +634,95 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS } } } + + test("SPARK-46043: create table in SQL using a DSv2 source") { + Seq(classOf[SimpleDataSourceV2], classOf[JavaSimpleDataSourceV2]).foreach { cls => + withClue(cls.getName) { + // Create a table with empty schema. + withTable("test") { + sql(s"CREATE TABLE test USING ${cls.getName}") + checkAnswer( + sql(s"SELECT * FROM test WHERE i < 3"), + Seq(Row(0, 0), Row(1, -1), Row(2, -2))) + } + // Create a table with non-empty schema is not allowed. + checkError( + exception = intercept[SparkUnsupportedOperationException] { + sql(s"CREATE TABLE test(a INT, b INT) USING ${cls.getName}") + }, + errorClass = "CANNOT_CREATE_DATA_SOURCE_TABLE.EXTERNAL_METADATA_UNSUPPORTED", + parameters = Map("tableName" -> "default.test", "provider" -> cls.getName) Review Comment: it should be ``"`default`.`test`"`` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org