This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 308996b [SPARK-26716][SPARK-26765][FOLLOWUP][SQL] Clean up schema validation methods and override toString method in Avro 308996b is described below commit 308996bc72c95582577843d22fcca5f1051d242a Author: Gengliang Wang <gengliang.w...@databricks.com> AuthorDate: Thu Jan 31 15:44:44 2019 +0800 [SPARK-26716][SPARK-26765][FOLLOWUP][SQL] Clean up schema validation methods and override toString method in Avro ## What changes were proposed in this pull request? In #23639, the API `supportDataType` is refactored. We should also remove the method `verifyWriteSchema` and `verifyReadSchema` in `DataSourceUtils`. Since the error message use `FileFormat.toString` to specify the data source naming, this PR also overriding the `toString` method in `AvroFileFormat`. ## How was this patch tested? Unit test. Closes #23699 from gengliangwang/SPARK-26716-followup. Authored-by: Gengliang Wang <gengliang.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/avro/AvroFileFormat.scala | 2 ++ .../scala/org/apache/spark/sql/avro/AvroSuite.scala | 2 +- .../spark/sql/execution/datasources/DataSource.scala | 2 +- .../sql/execution/datasources/DataSourceUtils.scala | 17 +---------------- .../sql/execution/datasources/FileFormatWriter.scala | 2 +- 5 files changed, 6 insertions(+), 19 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala index 7391665..c2a7f31 100755 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala @@ -124,6 +124,8 @@ private[avro] class AvroFileFormat extends FileFormat override def shortName(): String = "avro" + override def toString(): String = "Avro" + override def isSplitable( sparkSession: SparkSession, options: Map[String, String], diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index d803537..81a5cb7 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -896,7 +896,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils { sql("select testType()").write.format("avro").mode("overwrite").save(tempDir) }.getMessage assert(msg.toLowerCase(Locale.ROOT) - .contains(s"data source does not support calendarinterval data type.")) + .contains(s"avro data source does not support calendarinterval data type.")) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index d48261e..db81fbd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -412,7 +412,7 @@ case class DataSource( hs.partitionSchema.map(_.name), "in the partition schema", equality) - DataSourceUtils.verifyReadSchema(hs.fileFormat, hs.dataSchema) + DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema) case _ => SchemaUtils.checkColumnNameDuplication( relation.schema.map(_.name), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala index a32a940..74eae94 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala @@ -24,26 +24,11 @@ import org.apache.spark.sql.types._ object DataSourceUtils { - - /** - * Verify if the schema is supported in datasource in write path. - */ - def verifyWriteSchema(format: FileFormat, schema: StructType): Unit = { - verifySchema(format, schema, isReadPath = false) - } - - /** - * Verify if the schema is supported in datasource in read path. - */ - def verifyReadSchema(format: FileFormat, schema: StructType): Unit = { - verifySchema(format, schema, isReadPath = true) - } - /** * Verify if the schema is supported in datasource. This verification should be done * in a driver side. */ - private def verifySchema(format: FileFormat, schema: StructType, isReadPath: Boolean): Unit = { + def verifySchema(format: FileFormat, schema: StructType): Unit = { schema.foreach { field => if (!format.supportDataType(field.dataType)) { throw new AnalysisException( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala index 91e92d8..2232da4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala @@ -98,7 +98,7 @@ object FileFormatWriter extends Logging { val caseInsensitiveOptions = CaseInsensitiveMap(options) val dataSchema = dataColumns.toStructType - DataSourceUtils.verifyWriteSchema(fileFormat, dataSchema) + DataSourceUtils.verifySchema(fileFormat, dataSchema) // Note: prepareWrite has side effect. It sets "job". val outputWriterFactory = fileFormat.prepareWrite(sparkSession, job, caseInsensitiveOptions, dataSchema) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org