This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new efddf73 Revert "HIVE-25845: Support ColumnIndexes for Parq files
(#3094)"
efddf73 is described below
commit efddf739ff1ff2962fc25c7e3edd660e67b8963f
Author: Peter Vary
AuthorDate: Mon Mar 14 21:48:58 2022 +0100
Revert "HIVE-25845: Support ColumnIndexes for Parq files (#3094)"
This reverts commit 88054ce553604a6d939149faf4d1c3b037706aba.
---
pom.xml | 2 +-
.../hive/ql/io/parquet/ParquetRecordReaderBase.java | 3 ---
.../vector/VectorizedParquetRecordReader.java| 20 +---
3 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/pom.xml b/pom.xml
index db34aa9..804aadd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -189,7 +189,7 @@
4.0.3
2.8
-1.11.2
+1.11.1
0.16.0
1.5.6
2.5.0
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
index 1227d52..5235edc 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java
@@ -107,19 +107,16 @@ public class ParquetRecordReaderBase {
final List splitGroup = new ArrayList();
final long splitStart = ((FileSplit) oldSplit).getStart();
final long splitLength = ((FileSplit) oldSplit).getLength();
- long blockRowCount = 0;
for (final BlockMetaData block : blocks) {
final long firstDataPage =
block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= splitStart && firstDataPage < splitStart +
splitLength) {
splitGroup.add(block);
- blockRowCount += block.getRowCount();
}
}
if (splitGroup.isEmpty()) {
LOG.warn("Skipping split, could not find row group in: " + oldSplit);
return null;
}
- LOG.debug("split group size: {}, row count: {}", splitGroup.size(),
blockRowCount);
FilterCompat.Filter filter = setFilter(jobConf,
fileMetaData.getSchema());
if (filter != null) {
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
index 26fb96c..d17ddd5 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
@@ -157,13 +157,12 @@ public class VectorizedParquetRecordReader extends
ParquetRecordReaderBase
jobConf = conf;
isReadCacheOnly = HiveConf.getBoolVar(jobConf,
ConfVars.LLAP_IO_CACHE_ONLY);
rbCtx = Utilities.getVectorizedRowBatchCtx(jobConf);
- ParquetInputSplit inputSplit = getSplit(oldInputSplit, jobConf);
- // use jobConf consistently throughout, as getSplit clones it & adds
filter to it.
+ ParquetInputSplit inputSplit = getSplit(oldInputSplit, conf);
if (inputSplit != null) {
-initialize(inputSplit, jobConf);
+initialize(inputSplit, conf);
}
FileSplit fileSplit = (FileSplit) oldInputSplit;
- initPartitionValues(fileSplit, jobConf);
+ initPartitionValues(fileSplit, conf);
bucketIdentifier = BucketIdentifier.from(conf, fileSplit.getPath());
} catch (Throwable e) {
LOG.error("Failed to create the vectorized reader due to exception " +
e);
@@ -271,6 +270,9 @@ public class VectorizedParquetRecordReader extends
ParquetRecordReaderBase
}
}
+for (BlockMetaData block : blocks) {
+ this.totalRowCount += block.getRowCount();
+}
this.fileSchema = footer.getFileMetaData().getSchema();
this.writerTimezone = DataWritableReadSupport
.getWriterTimeZoneId(footer.getFileMetaData().getKeyValueMetaData());
@@ -279,12 +281,9 @@ public class VectorizedParquetRecordReader extends
ParquetRecordReaderBase
requestedSchema = DataWritableReadSupport
.getRequestedSchema(indexAccess, columnNamesList, columnTypesList,
fileSchema, configuration);
-//TODO: For data cache this needs to be fixed and passed to reader.
-//Path path = wrapPathForCache(file, cacheKey, configuration, blocks,
cacheTag);
+Path path = wrapPathForCache(file, cacheKey, configuration, blocks,
cacheTag);
this.reader = new ParquetFileReader(
- configuration, footer.getFileMetaData(), file, blocks,
requestedSchema.getColumns());
-this.totalRowCount = this.reader.getFilteredRecordCount();
-LOG.debug("totalRowCount: {}", totalRowCount);
+ configuration, footer.getFileMetaData(), path, blocks,
requestedSche