(datafusion-comet) branch main updated: chore: Add ignored tests for reading complex types from Parquet (#1167)

agrove Thu, 12 Dec 2024 15:41:41 -0800

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git



The following commit(s) were added to refs/heads/main by this push:
     new f1d08791 chore: Add ignored tests for reading complex types from 
Parquet (#1167)
f1d08791 is described below

commit f1d08791e0603e5543702fa952365d4e61f8df4c
Author: Andy Grove <[email protected]>
AuthorDate: Thu Dec 12 16:41:12 2024 -0700

    chore: Add ignored tests for reading complex types from Parquet (#1167)
    
    * Add ignored tests for reading structs from Parquet
    
    * add basic map test
    
    * add tests for Map and Array
---
 .../org/apache/comet/CometExpressionSuite.scala    | 127 +++++++++++++++++++++
 1 file changed, 127 insertions(+)

diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index 35f374bf..cce7cb20 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -2195,6 +2195,133 @@ class CometExpressionSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     }
   }
 
+  ignore("get_struct_field - select primitive fields") {
+    withTempPath { dir =>
+      // create input file with Comet disabled
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        val df = spark
+          .range(5)
+          // Add both a null struct and null inner value
+          .select(when(col("id") > 1, struct(when(col("id") > 2, 
col("id")).alias("id")))
+            .alias("nested1"))
+
+        df.write.parquet(dir.toString())
+      }
+
+      Seq("", "parquet").foreach { v1List =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+          val df = spark.read.parquet(dir.toString())
+          checkSparkAnswerAndOperator(df.select("nested1.id"))
+        }
+      }
+    }
+  }
+
+  ignore("get_struct_field - select subset of struct") {
+    withTempPath { dir =>
+      // create input file with Comet disabled
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        val df = spark
+          .range(5)
+          // Add both a null struct and null inner value
+          .select(
+            when(
+              col("id") > 1,
+              struct(
+                when(col("id") > 2, col("id")).alias("id"),
+                when(col("id") > 2, struct(when(col("id") > 3, 
col("id")).alias("id")))
+                  .as("nested2")))
+              .alias("nested1"))
+
+        df.write.parquet(dir.toString())
+      }
+
+      Seq("", "parquet").foreach { v1List =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+          val df = spark.read.parquet(dir.toString())
+          checkSparkAnswerAndOperator(df.select("nested1.id"))
+          checkSparkAnswerAndOperator(df.select("nested1.nested2"))
+          checkSparkAnswerAndOperator(df.select("nested1.nested2.id"))
+          checkSparkAnswerAndOperator(df.select("nested1.id", 
"nested1.nested2.id"))
+        }
+      }
+    }
+  }
+
+  ignore("get_struct_field - read entire struct") {
+    withTempPath { dir =>
+      // create input file with Comet disabled
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        val df = spark
+          .range(5)
+          // Add both a null struct and null inner value
+          .select(
+            when(
+              col("id") > 1,
+              struct(
+                when(col("id") > 2, col("id")).alias("id"),
+                when(col("id") > 2, struct(when(col("id") > 3, 
col("id")).alias("id")))
+                  .as("nested2")))
+              .alias("nested1"))
+
+        df.write.parquet(dir.toString())
+      }
+
+      Seq("", "parquet").foreach { v1List =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+          val df = spark.read.parquet(dir.toString())
+          checkSparkAnswerAndOperator(df.select("nested1"))
+        }
+      }
+    }
+  }
+
+  ignore("read map[int, int] from parquet") {
+    withTempPath { dir =>
+      // create input file with Comet disabled
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        val df = spark
+          .range(5)
+          // Spark does not allow null as a key but does allow null as a
+          // value, and the entire map be null
+          .select(
+            when(col("id") > 1, map(col("id"), when(col("id") > 2, 
col("id")))).alias("map1"))
+        df.write.parquet(dir.toString())
+      }
+
+      Seq("", "parquet").foreach { v1List =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+          val df = spark.read.parquet(dir.toString())
+          checkSparkAnswerAndOperator(df.select("map1"))
+          checkSparkAnswerAndOperator(df.select(map_keys(col("map1"))))
+          checkSparkAnswerAndOperator(df.select(map_values(col("map1"))))
+        }
+      }
+    }
+  }
+
+  ignore("read array[int] from parquet") {
+    withTempPath { dir =>
+      // create input file with Comet disabled
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        val df = spark
+          .range(5)
+          // Spark does not allow null as a key but does allow null as a
+          // value, and the entire map be null
+          .select(when(col("id") > 1, sequence(lit(0), col("id") * 
2)).alias("array1"))
+        df.write.parquet(dir.toString())
+      }
+
+      Seq("", "parquet").foreach { v1List =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+          val df = spark.read.parquet(dir.toString())
+          checkSparkAnswerAndOperator(df.select("array1"))
+          checkSparkAnswerAndOperator(df.select(element_at(col("array1"), 
lit(1))))
+        }
+      }
+    }
+  }
+
   test("CreateArray") {
     Seq(true, false).foreach { dictionaryEnabled =>
       withTempDir { dir =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-comet) branch main updated: chore: Add ignored tests for reading complex types from Parquet (#1167)

Reply via email to