Pa...

gengliangwang Tue, 26 Jun 2018 18:19:50 -0700

Github user gengliangwang commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21389#discussion_r198341922
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala ---
    @@ -202,4 +204,230 @@ class FileBasedDataSourceSuite extends QueryTest with 
SharedSQLContext with Befo
           }
         }
       }
    +
    +  // Unsupported data types of csv, json, orc, and parquet are as follows;
    +  //  csv -> R/W: Interval, Null, Array, Map, Struct
    +  //  json -> W: Interval
    +  //  orc -> W: Interval, Null
    +  //  parquet -> R/W: Interval, Null
    +  test("SPARK-24204 error handling for unsupported Array/Map/Struct types 
- csv") {
    +    withTempDir { dir =>
    +      val csvDir = new File(dir, "csv").getCanonicalPath
    +      var msg = intercept[UnsupportedOperationException] {
    +        Seq((1, "Tesla")).toDF("a", "b").selectExpr("struct(a, 
b)").write.csv(csvDir)
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support 
struct<a:int,b:string> data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +        val schema = StructType.fromDDL("a struct<b: Int>")
    +        spark.range(1).write.mode("overwrite").csv(csvDir)
    +        spark.read.schema(schema).csv(csvDir).collect()
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support struct<b:int> 
data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +        Seq((1, Map("Tesla" -> 3))).toDF("id", 
"cars").write.mode("overwrite").csv(csvDir)
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support 
map<string,int> data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +        val schema = StructType.fromDDL("a map<int, int>")
    +        spark.range(1).write.mode("overwrite").csv(csvDir)
    +        spark.read.schema(schema).csv(csvDir).collect()
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support map<int,int> 
data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +        Seq((1, Array("Tesla", "Chevy", "Ford"))).toDF("id", "brands")
    +          .write.mode("overwrite").csv(csvDir)
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support array<string> 
data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +         val schema = StructType.fromDDL("a array<int>")
    +         spark.range(1).write.mode("overwrite").csv(csvDir)
    +         spark.read.schema(schema).csv(csvDir).collect()
    +       }.getMessage
    +      assert(msg.contains("CSV data source does not support array<int> 
data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +        Seq((1, new UDT.MyDenseVector(Array(0.25, 2.25, 
4.25)))).toDF("id", "vectors")
    +          .write.mode("overwrite").csv(csvDir)
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support array<double> 
data type"))
    +
    +      msg = intercept[UnsupportedOperationException] {
    +        val schema = StructType(StructField("a", new 
UDT.MyDenseVectorUDT(), true) :: Nil)
    +        spark.range(1).write.mode("overwrite").csv(csvDir)
    +        spark.read.schema(schema).csv(csvDir).collect()
    +      }.getMessage
    +      assert(msg.contains("CSV data source does not support array<double> 
data type."))
    +    }
    +  }
    +
    +  test("SPARK-24204 error handling for unsupported Interval data types - 
csv, json, parquet, orc") {
    +    withTempDir { dir =>
    +      val tempDir = new File(dir, "files").getCanonicalPath
    +
    +       Seq("orc", "json").foreach { format =>
    --- End diff --
    
    Nit: we can put all the write path together to reduce duplicated code



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21389: [SPARK-24204][SQL] Verify a schema in Json/Orc/Pa...

Reply via email to