[ https://issues.apache.org/jira/browse/PARQUET-1744?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Gabor Szadovszky reassigned PARQUET-1744: ----------------------------------------- Assignee: Gabor Szadovszky > Some filters throws ArrayIndexOutOfBoundsException > -------------------------------------------------- > > Key: PARQUET-1744 > URL: https://issues.apache.org/jira/browse/PARQUET-1744 > Project: Parquet > Issue Type: Sub-task > Components: parquet-mr > Affects Versions: 1.11.0 > Reporter: Yuming Wang > Assignee: Gabor Szadovszky > Priority: Major > > How to reproduce: > * Build Spark > {code:sh} > git clone https://github.com/apache/spark.git && cd spark > git fetch origin pull/26804/head:PARQUET-1744 > git checkout PARQUET-1744 > build/sbt package > bin/spark-shell > {code} > * Prepare data: > {code:scala} > spark.sql("create table t1(a int, b int, c int) using parquet") > spark.sql("insert into t1 values(1,0,0)") > spark.sql("insert into t1 values(2,0,1)") > spark.sql("insert into t1 values(3,1,0)") > spark.sql("insert into t1 values(4,1,1)") > spark.sql("insert into t1 values(5,null,0)") > spark.sql("insert into t1 values(6,null,1)") > spark.sql("insert into t1 values(7,null,null)") > {code} > * Run test 1 > {code:scala} > scala> spark.sql("select a+120 from t1 where b<10 OR c=1").show > java.lang.reflect.InvocationTargetException > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at > org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155) > at > org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) > at > org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at java.base/java.lang.Thread.run(Thread.java:834) > Caused by: java.lang.ArrayIndexOutOfBoundsException: Index -1 out of bounds > for length 0 > at > org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMin(IntColumnIndexBuilder.java:74) > at > org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.lt(BoundaryOrder.java:123) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:262) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$2(ColumnIndexFilter.java:131) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:131) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$Lt.accept(Operators.java:209) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81) > at > org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81) > at > org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961) > at > org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766) > ... 29 more > +---------+ > |(a + 120)| > +---------+ > | 124| > | 121| > | 122| > | 123| > +---------+ > {code} > * Run test 2 > {code:scala} > scala> spark.sql("select a+140 from t1 where not (b<10 AND c=1)").show > java.lang.reflect.InvocationTargetException > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at > org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155) > at > org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) > at > org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at java.base/java.lang.Thread.run(Thread.java:834) > Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds > for length 0 > at > org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79) > at > org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81) > at > org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81) > at > org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961) > at > org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766) > ... 29 more > java.lang.reflect.InvocationTargetException > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at > org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155) > at > org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) > at > org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at java.base/java.lang.Thread.run(Thread.java:834) > Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds > for length 0 > at > org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79) > at > org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81) > at > org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81) > at > org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961) > at > org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766) > ... 29 more > +---------+ > |(a + 140)| > +---------+ > | 141| > | 143| > +---------+ > {code} > * Run test 3 > {code:scala} > scala> spark.sql("select a+150 from t1 where not (c=1 AND b<10)").show > java.lang.reflect.InvocationTargetException > at jdk.internal.reflect.GeneratedMethodAccessor59.invoke(Unknown Source) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at > org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155) > at > org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) > at > org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at java.base/java.lang.Thread.run(Thread.java:834) > Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds > for length 0 > at > org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79) > at > org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81) > at > org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81) > at > org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961) > at > org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766) > ... 28 more > java.lang.reflect.InvocationTargetException > at jdk.internal.reflect.GeneratedMethodAccessor59.invoke(Unknown Source) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at > org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155) > at > org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131) > at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169) > at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93) > at > org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872) > at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) > at > java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > at > java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > at java.base/java.lang.Thread.run(Thread.java:834) > Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds > for length 0 > at > org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79) > at > org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257) > at > org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56) > at > org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81) > at > org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137) > at > org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81) > at > org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961) > at > org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766) > ... 28 more > +---------+ > |(a + 150)| > +---------+ > | 151| > | 153| > +---------+ > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)