Github user dbtsai commented on a diff in the pull request: https://github.com/apache/spark/pull/22357#discussion_r216204022 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala --- @@ -155,6 +161,47 @@ class ParquetSchemaPruningSuite Row(null) :: Row(null) :: Nil) } + testSchemaPruning("select a single complex field and in where clause") { + val query1 = sql("select name.first from contacts where name.first = 'Jane'") + checkScan(query1, "struct<name:struct<first:string>>") + checkAnswer(query1, Row("Jane") :: Nil) + + val query2 = sql("select name.first, name.last from contacts where name.first = 'Jane'") + checkScan(query2, "struct<name:struct<first:string,last:string>>") + checkAnswer(query2, Row("Jane", "Doe") :: Nil) + + val query3 = sql("select name.first from contacts " + + "where employer.company.name = 'abc' and p = 1") --- End diff -- Let's say a user adds `where employer.company is not null`, can we still read schema with `employer:struct<company:struct<name:string>>>` as we only mark `contentAccessed = false` when `IsNotNull` is on an attribute?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org