This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new f1d08791 chore: Add ignored tests for reading complex types from
Parquet (#1167)
f1d08791 is described below
commit f1d08791e0603e5543702fa952365d4e61f8df4c
Author: Andy Grove <[email protected]>
AuthorDate: Thu Dec 12 16:41:12 2024 -0700
chore: Add ignored tests for reading complex types from Parquet (#1167)
* Add ignored tests for reading structs from Parquet
* add basic map test
* add tests for Map and Array
---
.../org/apache/comet/CometExpressionSuite.scala | 127 +++++++++++++++++++++
1 file changed, 127 insertions(+)
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index 35f374bf..cce7cb20 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -2195,6 +2195,133 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
+ ignore("get_struct_field - select primitive fields") {
+ withTempPath { dir =>
+ // create input file with Comet disabled
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ val df = spark
+ .range(5)
+ // Add both a null struct and null inner value
+ .select(when(col("id") > 1, struct(when(col("id") > 2,
col("id")).alias("id")))
+ .alias("nested1"))
+
+ df.write.parquet(dir.toString())
+ }
+
+ Seq("", "parquet").foreach { v1List =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+ val df = spark.read.parquet(dir.toString())
+ checkSparkAnswerAndOperator(df.select("nested1.id"))
+ }
+ }
+ }
+ }
+
+ ignore("get_struct_field - select subset of struct") {
+ withTempPath { dir =>
+ // create input file with Comet disabled
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ val df = spark
+ .range(5)
+ // Add both a null struct and null inner value
+ .select(
+ when(
+ col("id") > 1,
+ struct(
+ when(col("id") > 2, col("id")).alias("id"),
+ when(col("id") > 2, struct(when(col("id") > 3,
col("id")).alias("id")))
+ .as("nested2")))
+ .alias("nested1"))
+
+ df.write.parquet(dir.toString())
+ }
+
+ Seq("", "parquet").foreach { v1List =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+ val df = spark.read.parquet(dir.toString())
+ checkSparkAnswerAndOperator(df.select("nested1.id"))
+ checkSparkAnswerAndOperator(df.select("nested1.nested2"))
+ checkSparkAnswerAndOperator(df.select("nested1.nested2.id"))
+ checkSparkAnswerAndOperator(df.select("nested1.id",
"nested1.nested2.id"))
+ }
+ }
+ }
+ }
+
+ ignore("get_struct_field - read entire struct") {
+ withTempPath { dir =>
+ // create input file with Comet disabled
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ val df = spark
+ .range(5)
+ // Add both a null struct and null inner value
+ .select(
+ when(
+ col("id") > 1,
+ struct(
+ when(col("id") > 2, col("id")).alias("id"),
+ when(col("id") > 2, struct(when(col("id") > 3,
col("id")).alias("id")))
+ .as("nested2")))
+ .alias("nested1"))
+
+ df.write.parquet(dir.toString())
+ }
+
+ Seq("", "parquet").foreach { v1List =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+ val df = spark.read.parquet(dir.toString())
+ checkSparkAnswerAndOperator(df.select("nested1"))
+ }
+ }
+ }
+ }
+
+ ignore("read map[int, int] from parquet") {
+ withTempPath { dir =>
+ // create input file with Comet disabled
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ val df = spark
+ .range(5)
+ // Spark does not allow null as a key but does allow null as a
+ // value, and the entire map be null
+ .select(
+ when(col("id") > 1, map(col("id"), when(col("id") > 2,
col("id")))).alias("map1"))
+ df.write.parquet(dir.toString())
+ }
+
+ Seq("", "parquet").foreach { v1List =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+ val df = spark.read.parquet(dir.toString())
+ checkSparkAnswerAndOperator(df.select("map1"))
+ checkSparkAnswerAndOperator(df.select(map_keys(col("map1"))))
+ checkSparkAnswerAndOperator(df.select(map_values(col("map1"))))
+ }
+ }
+ }
+ }
+
+ ignore("read array[int] from parquet") {
+ withTempPath { dir =>
+ // create input file with Comet disabled
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ val df = spark
+ .range(5)
+ // Spark does not allow null as a key but does allow null as a
+ // value, and the entire map be null
+ .select(when(col("id") > 1, sequence(lit(0), col("id") *
2)).alias("array1"))
+ df.write.parquet(dir.toString())
+ }
+
+ Seq("", "parquet").foreach { v1List =>
+ withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> v1List) {
+ val df = spark.read.parquet(dir.toString())
+ checkSparkAnswerAndOperator(df.select("array1"))
+ checkSparkAnswerAndOperator(df.select(element_at(col("array1"),
lit(1))))
+ }
+ }
+ }
+ }
+
test("CreateArray") {
Seq(true, false).foreach { dictionaryEnabled =>
withTempDir { dir =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]