[ https://issues.apache.org/jira/browse/PARQUET-1745?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Gabor Szadovszky resolved PARQUET-1745. --------------------------------------- Resolution: Not A Bug Closing this issue as "Not a Bug". See my previous comment and the referenced mail thread. > No result for partition key included in Parquet file > ---------------------------------------------------- > > Key: PARQUET-1745 > URL: https://issues.apache.org/jira/browse/PARQUET-1745 > Project: Parquet > Issue Type: Sub-task > Components: parquet-mr > Affects Versions: 1.11.0 > Reporter: Yuming Wang > Priority: Major > Attachments: FilterByColumnIndex.png > > > How to reproduce: > {code:sh} > git clone https://github.com/apache/spark.git && cd spark > git fetch origin pull/26804/head:PARQUET-1745 > git checkout PARQUET-1745 > build/sbt "sql/test-only *ParquetV2PartitionDiscoverySuite" > {code} > output: > {noformat} > [info] - read partitioned table - partition key included in Parquet file *** > FAILED *** (1 second, 57 milliseconds) > [info] Results do not match for query: > [info] Timezone: > sun.util.calendar.ZoneInfo[id="America/Los_Angeles",offset=-28800000,dstSavings=3600000,useDaylight=true,transitions=185,lastRule=java.util.SimpleTimeZone[id=America/Los_Angeles,offset=-28800000,dstSavings=3600000,useDaylight=true,startYear=0,startMode=3,startMonth=2,startDay=8,startDayOfWeek=1,startTime=7200000,startTimeMode=0,endMode=3,endMonth=10,endDay=1,endDayOfWeek=1,endTime=7200000,endTimeMode=0]] > [info] Timezone Env: > [info] > [info] == Parsed Logical Plan == > [info] 'Project [*] > [info] +- 'Filter ('pi = 1) > [info] +- 'UnresolvedRelation [t] > [info] > [info] == Analyzed Logical Plan == > [info] intField: int, stringField: string, pi: int, ps: string > [info] Project [intField#1788, stringField#1789, pi#1790, ps#1791] > [info] +- Filter (pi#1790 = 1) > [info] +- SubqueryAlias `t` > [info] +- RelationV2[intField#1788, stringField#1789, pi#1790, > ps#1791] parquet > file:/root/opensource/apache-spark/target/tmp/spark-c7e85130-3e1f-4137-ac7c-32f48be3b74a > [info] > [info] == Optimized Logical Plan == > [info] Filter (isnotnull(pi#1790) AND (pi#1790 = 1)) > [info] +- RelationV2[intField#1788, stringField#1789, pi#1790, ps#1791] > parquet > file:/root/opensource/apache-spark/target/tmp/spark-c7e85130-3e1f-4137-ac7c-32f48be3b74a > [info] > [info] == Physical Plan == > [info] *(1) Project [intField#1788, stringField#1789, pi#1790, ps#1791] > [info] +- *(1) Filter (isnotnull(pi#1790) AND (pi#1790 = 1)) > [info] +- *(1) ColumnarToRow > [info] +- BatchScan[intField#1788, stringField#1789, pi#1790, > ps#1791] ParquetScan Location: > InMemoryFileIndex[file:/root/opensource/apache-spark/target/tmp/spark-c7e85130-3e1f-4137-ac7c-32f..., > ReadSchema: struct<intField:int,stringField:string>, PushedFilters: > [IsNotNull(pi), EqualTo(pi,1)] > [info] > [info] == Results == > [info] > [info] == Results == > [info] !== Correct Answer - 20 == == Spark Answer - 0 == > [info] struct<> struct<> > [info] ![1,1,1,bar] > [info] ![1,1,1,foo] > [info] ![10,10,1,bar] > [info] ![10,10,1,foo] > [info] ![2,2,1,bar] > [info] ![2,2,1,foo] > [info] ![3,3,1,bar] > [info] ![3,3,1,foo] > [info] ![4,4,1,bar] > [info] ![4,4,1,foo] > [info] ![5,5,1,bar] > [info] ![5,5,1,foo] > [info] ![6,6,1,bar] > [info] ![6,6,1,foo] > [info] ![7,7,1,bar] > [info] ![7,7,1,foo] > [info] ![8,8,1,bar] > [info] ![8,8,1,foo] > [info] ![9,9,1,bar] > [info] ![9,9,1,foo] (QueryTest.scala:248) > [info] org.scalatest.exceptions.TestFailedException: > [info] at > org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530) > [info] at > org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529) > [info] at > org.apache.spark.sql.QueryTest$.newAssertionFailedException(QueryTest.scala:238) > [info] at org.scalatest.Assertions.fail(Assertions.scala:1091) > [info] at org.scalatest.Assertions.fail$(Assertions.scala:1087) > [info] at org.apache.spark.sql.QueryTest$.fail(QueryTest.scala:238) > [info] at org.apache.spark.sql.QueryTest$.checkAnswer(QueryTest.scala:248) > [info] at org.apache.spark.sql.QueryTest.checkAnswer(QueryTest.scala:156) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$194(ParquetPartitionDiscoverySuite.scala:1232) > [info] at > scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > [info] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > [info] at > org.apache.spark.sql.test.SQLTestUtilsBase.withTempView(SQLTestUtils.scala:260) > [info] at > org.apache.spark.sql.test.SQLTestUtilsBase.withTempView$(SQLTestUtils.scala:258) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetPartitionDiscoverySuite.withTempView(ParquetPartitionDiscoverySuite.scala:53) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$190(ParquetPartitionDiscoverySuite.scala:1212) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$190$adapted(ParquetPartitionDiscoverySuite.scala:1200) > [info] at > org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1(SQLTestUtils.scala:76) > [info] at > org.apache.spark.sql.test.SQLTestUtils.$anonfun$withTempDir$1$adapted(SQLTestUtils.scala:75) > [info] at > org.apache.spark.SparkFunSuite.withTempDir(SparkFunSuite.scala:161) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetPartitionDiscoverySuite.org$apache$spark$sql$test$SQLTestUtils$$super$withTempDir(ParquetPartitionDiscoverySuite.scala:53) > [info] at > org.apache.spark.sql.test.SQLTestUtils.withTempDir(SQLTestUtils.scala:75) > [info] at > org.apache.spark.sql.test.SQLTestUtils.withTempDir$(SQLTestUtils.scala:74) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetPartitionDiscoverySuite.withTempDir(ParquetPartitionDiscoverySuite.scala:53) > [info] at > org.apache.spark.sql.execution.datasources.parquet.ParquetV2PartitionDiscoverySuite.$anonfun$new$189(ParquetPartitionDiscoverySuite.scala:1200) > [info] at > scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) > [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) > [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > [info] at org.scalatest.Transformer.apply(Transformer.scala:22) > [info] at org.scalatest.Transformer.apply(Transformer.scala:20) > [info] at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) > [info] at > org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:149) > [info] at > org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) > [info] at > org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) > [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286) > [info] at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) > [info] at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) > [info] at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:56) > [info] at > org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:221) > [info] at > org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:214) > [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:56) > [info] at > org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) > [info] at > org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:393) > [info] at scala.collection.immutable.List.foreach(List.scala:392) > [info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381) > [info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:376) > [info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458) > [info] at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) > [info] at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) > [info] at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) > [info] at org.scalatest.Suite.run(Suite.scala:1124) > [info] at org.scalatest.Suite.run$(Suite.scala:1106) > [info] at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) > [info] at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) > [info] at org.scalatest.SuperEngine.runImpl(Engine.scala:518) > [info] at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) > [info] at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) > [info] at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:56) > [info] at > org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) > [info] at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) > [info] at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) > [info] at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:56) > [info] at > org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:317) > [info] at > org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:510) > [info] at sbt.ForkMain$Run$2.call(ForkMain.java:296) > [info] at sbt.ForkMain$Run$2.call(ForkMain.java:286) > [info] at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) > [info] at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > [info] at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > [info] at java.base/java.lang.Thread.run(Thread.java:834) > {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)