[ 
https://issues.apache.org/jira/browse/SPARK-26865?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Dongjoon Hyun updated SPARK-26865:
----------------------------------
    Description: 
DSv1 and DSv2 passes different filters. And, DSv2 doesn't guarantee that filter 
expressions match the underlying schema in terms of case-sensitivity.

{code}
buildReaderWithPartitionValues(..., filters: Seq[Filter], ...)
- IsNotNull(ID)

DataSourceV2Strategy.pushFilters
- IsNotNull(id)
{code}

steps to reproduce:
{code}
spark.range(10).write.orc("/tmp/o1")
spark.read.schema("ID long").orc("/tmp/o1").filter("id > 5").show

java.util.NoSuchElementException: key not found: id
  at scala.collection.immutable.Map$Map1.apply(Map.scala:114)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createBuilder(OrcFilters.scala:263)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.buildSearchArgument(OrcFilters.scala:153)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.$anonfun$convertibleFilters$1(OrcFilters.scala:99)
  at 
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:39)
  at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
  at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
  at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFilters(OrcFilters.scala:98)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createFilter(OrcFilters.scala:87)
  at 
org.apache.spark.sql.execution.datasources.v2.orc.OrcScanBuilder.pushFilters(OrcScanBuilder.scala:50)
{code}

  was:
steps to reproduce:
{code}
spark.range(10).write.orc("/tmp/o1")
spark.read.schema("ID long").orc("/tmp/o1").filter("id > 5").show

java.util.NoSuchElementException: key not found: id
  at scala.collection.immutable.Map$Map1.apply(Map.scala:114)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createBuilder(OrcFilters.scala:263)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.buildSearchArgument(OrcFilters.scala:153)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.$anonfun$convertibleFilters$1(OrcFilters.scala:99)
  at 
scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:39)
  at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
  at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
  at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFilters(OrcFilters.scala:98)
  at 
org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createFilter(OrcFilters.scala:87)
  at 
org.apache.spark.sql.execution.datasources.v2.orc.OrcScanBuilder.pushFilters(OrcScanBuilder.scala:50)
{code}


> ORC filter pushdown should be case insensitive by default
> ---------------------------------------------------------
>
>                 Key: SPARK-26865
>                 URL: https://issues.apache.org/jira/browse/SPARK-26865
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.0.0
>            Reporter: Wenchen Fan
>            Priority: Major
>
> DSv1 and DSv2 passes different filters. And, DSv2 doesn't guarantee that 
> filter expressions match the underlying schema in terms of case-sensitivity.
> {code}
> buildReaderWithPartitionValues(..., filters: Seq[Filter], ...)
> - IsNotNull(ID)
> DataSourceV2Strategy.pushFilters
> - IsNotNull(id)
> {code}
> steps to reproduce:
> {code}
> spark.range(10).write.orc("/tmp/o1")
> spark.read.schema("ID long").orc("/tmp/o1").filter("id > 5").show
> java.util.NoSuchElementException: key not found: id
>   at scala.collection.immutable.Map$Map1.apply(Map.scala:114)
>   at 
> org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createBuilder(OrcFilters.scala:263)
>   at 
> org.apache.spark.sql.execution.datasources.orc.OrcFilters$.buildSearchArgument(OrcFilters.scala:153)
>   at 
> org.apache.spark.sql.execution.datasources.orc.OrcFilters$.$anonfun$convertibleFilters$1(OrcFilters.scala:99)
>   at 
> scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:244)
>   at 
> scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
>   at 
> scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
>   at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:39)
>   at scala.collection.TraversableLike.flatMap(TraversableLike.scala:244)
>   at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:241)
>   at scala.collection.AbstractTraversable.flatMap(Traversable.scala:108)
>   at 
> org.apache.spark.sql.execution.datasources.orc.OrcFilters$.convertibleFilters(OrcFilters.scala:98)
>   at 
> org.apache.spark.sql.execution.datasources.orc.OrcFilters$.createFilter(OrcFilters.scala:87)
>   at 
> org.apache.spark.sql.execution.datasources.v2.orc.OrcScanBuilder.pushFilters(OrcScanBuilder.scala:50)
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to