Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22197#discussion_r212812744
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 ---
    @@ -1021,6 +1022,116 @@ class ParquetFilterSuite extends QueryTest with 
ParquetTest with SharedSQLContex
           }
         }
       }
    +
    +  test("SPARK-25207: Case-insensitive field resolution for pushdown when 
reading parquet") {
    +    val caseSensitiveParquetFilters =
    +      new ParquetFilters(conf.parquetFilterPushDownDate, 
conf.parquetFilterPushDownTimestamp,
    +        conf.parquetFilterPushDownDecimal, 
conf.parquetFilterPushDownStringStartWith,
    +        conf.parquetFilterPushDownInFilterThreshold, caseSensitive = true)
    +
    +    val caseInsensitiveParquetFilters =
    +      new ParquetFilters(conf.parquetFilterPushDownDate, 
conf.parquetFilterPushDownTimestamp,
    +        conf.parquetFilterPushDownDecimal, 
conf.parquetFilterPushDownStringStartWith,
    +        conf.parquetFilterPushDownInFilterThreshold, caseSensitive = false)
    +
    +    def testCaseInsensitiveResolution(
    +        schema: StructType,
    +        expected: FilterPredicate,
    +        filter: sources.Filter): Unit = {
    +      val parquetSchema = new 
SparkToParquetSchemaConverter(conf).convert(schema)
    +
    +      assertResult(Some(expected)) {
    +        caseInsensitiveParquetFilters.createFilter(parquetSchema, filter)
    +      }
    +      assertResult(None) {
    +        caseSensitiveParquetFilters.createFilter(parquetSchema, filter)
    +      }
    +    }
    +
    +    val schema = StructType(Seq(StructField("cint", IntegerType)))
    +
    +    testCaseInsensitiveResolution(
    +      schema, FilterApi.eq(intColumn("cint"), null.asInstanceOf[Integer]), 
sources.IsNull("CINT"))
    +
    +    testCaseInsensitiveResolution(
    +      schema,
    +      FilterApi.notEq(intColumn("cint"), null.asInstanceOf[Integer]),
    +      sources.IsNotNull("CINT"))
    +
    +    testCaseInsensitiveResolution(
    +      schema, FilterApi.eq(intColumn("cint"), 1000: Integer), 
sources.EqualTo("CINT", 1000))
    +
    +    testCaseInsensitiveResolution(
    +      schema,
    +      FilterApi.notEq(intColumn("cint"), 1000: Integer),
    +      sources.Not(sources.EqualTo("CINT", 1000)))
    +
    +    testCaseInsensitiveResolution(
    +      schema, FilterApi.eq(intColumn("cint"), 1000: Integer), 
sources.EqualNullSafe("CINT", 1000))
    +
    +    testCaseInsensitiveResolution(
    +      schema,
    +      FilterApi.notEq(intColumn("cint"), 1000: Integer),
    +      sources.Not(sources.EqualNullSafe("CINT", 1000)))
    +
    +    testCaseInsensitiveResolution(
    +      schema,
    +      FilterApi.lt(intColumn("cint"), 1000: Integer), 
sources.LessThan("CINT", 1000))
    +
    +    testCaseInsensitiveResolution(
    +      schema,
    +      FilterApi.ltEq(intColumn("cint"), 1000: Integer),
    +      sources.LessThanOrEqual("CINT", 1000))
    +
    +    testCaseInsensitiveResolution(
    +      schema, FilterApi.gt(intColumn("cint"), 1000: Integer), 
sources.GreaterThan("CINT", 1000))
    +
    +    testCaseInsensitiveResolution(
    +      schema,
    +      FilterApi.gtEq(intColumn("cint"), 1000: Integer),
    +      sources.GreaterThanOrEqual("CINT", 1000))
    --- End diff --
    
    nit: maybe we don't need to test against so many predicate. We just want to 
make sure case insensitive resolution work.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to