PARQUET-511: Integer overflow when counting values in column. This commit fixes an issue when the number of entries in a column page is larger than the size of an integer. No exception is thrown directly, but the def level is set incorrectly, leading to a null value being returned during read.
Author: Michal Gorecki <gorec...@amazon.com> Closes #321 from goreckm/int-overflow and squashes the following commits: d224815 [Michal Gorecki] enhancing exception message 7334be2 [Michal Gorecki] PARQUET-511: Integer overflow when counting values in column. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/aced0eb3 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/aced0eb3 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/aced0eb3 Branch: refs/heads/parquet-1.8.x Commit: aced0eb3770b82d9bde95ac499f902bed372fd39 Parents: 06567fa Author: Michal Gorecki <gorec...@amazon.com> Authored: Mon Aug 1 14:38:07 2016 -0700 Committer: Ryan Blue <b...@apache.org> Committed: Mon Jan 9 16:54:54 2017 -0800 ---------------------------------------------------------------------- .../java/org/apache/parquet/column/impl/ColumnReaderImpl.java | 6 +++--- .../org/apache/parquet/hadoop/ColumnChunkPageReadStore.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/aced0eb3/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java ---------------------------------------------------------------------- diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java index c53977f..6aafb78 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java @@ -149,7 +149,7 @@ public class ColumnReaderImpl implements ColumnReader { private int dictionaryId; private long endOfPageValueCount; - private int readValues = 0; + private long readValues = 0; private int pageValueCount = 0; private final PrimitiveConverter converter; @@ -351,8 +351,8 @@ public class ColumnReaderImpl implements ColumnReader { this.dictionary = null; } this.totalValueCount = pageReader.getTotalValueCount(); - if (totalValueCount == 0) { - throw new ParquetDecodingException("totalValueCount == 0"); + if (totalValueCount <= 0) { + throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0"); } consume(); } http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/aced0eb3/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java index ce10e64..2e8f84a 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java @@ -64,7 +64,7 @@ class ColumnChunkPageReadStore implements PageReadStore, DictionaryPageReadStore this.decompressor = decompressor; this.compressedPages = new LinkedList<DataPage>(compressedPages); this.compressedDictionaryPage = compressedDictionaryPage; - int count = 0; + long count = 0; for (DataPage p : compressedPages) { count += p.getValueCount(); }