This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new f29600d48bd Bump parquet from 1.16.0 to 1.17.0 (#17504)
f29600d48bd is described below
commit f29600d48bd5b60c3522d200be7ef5c8baba248f
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Wed Jan 14 14:13:41 2026 -0800
Bump parquet from 1.16.0 to 1.17.0 (#17504)
---
.../parquet/ParquetAvroRecordExtractor.java | 20 ++++++++++++--------
pom.xml | 2 +-
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git
a/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
b/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
index 8f8ba25a1cd..8d8f282eef1 100644
---
a/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
+++
b/pinot-plugins/pinot-input-format/pinot-parquet/src/main/java/org/apache/pinot/plugin/inputformat/parquet/ParquetAvroRecordExtractor.java
@@ -21,7 +21,6 @@ package org.apache.pinot.plugin.inputformat.parquet;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.avro.Schema;
-import org.apache.parquet.schema.PrimitiveType;
import org.apache.pinot.plugin.inputformat.avro.AvroRecordExtractor;
import org.apache.pinot.spi.data.readers.RecordExtractorConfig;
@@ -40,10 +39,10 @@ public class ParquetAvroRecordExtractor extends
AvroRecordExtractor {
Object handleDeprecatedTypes(Object value, Schema.Field field) {
Schema.Type avroColumnType = field.schema().getType();
- if (avroColumnType == org.apache.avro.Schema.Type.UNION) {
- org.apache.avro.Schema nonNullSchema = null;
- for (org.apache.avro.Schema childFieldSchema :
field.schema().getTypes()) {
- if (childFieldSchema.getType() != org.apache.avro.Schema.Type.NULL) {
+ if (avroColumnType == Schema.Type.UNION) {
+ Schema nonNullSchema = null;
+ for (Schema childFieldSchema : field.schema().getTypes()) {
+ if (childFieldSchema.getType() != Schema.Type.NULL) {
if (nonNullSchema == null) {
nonNullSchema = childFieldSchema;
} else {
@@ -51,10 +50,15 @@ public class ParquetAvroRecordExtractor extends
AvroRecordExtractor {
}
}
}
+ assert nonNullSchema != null;
- //INT96 is deprecated. We convert to long as we do in the native parquet
extractor.
- if
(nonNullSchema.getName().equals(PrimitiveType.PrimitiveTypeName.INT96.name())) {
- return ParquetNativeRecordExtractor.convertInt96ToLong((byte[]) value);
+ // NOTE:
+ // INT96 is deprecated. We convert to long as we do in the native
parquet extractor.
+ // See org.apache.parquet.avro.AvroSchemaConverter about how INT96 is
converted into Avro schema.
+ // We have to rely on the doc to determine whether a field is INT96.
+ if (nonNullSchema.getType() == Schema.Type.FIXED &&
nonNullSchema.getFixedSize() == 12
+ && "INT96 represented as byte[12]".equals(nonNullSchema.getDoc())) {
+ return ParquetNativeRecordExtractor.convertInt96ToLong((byte[]) value);
}
}
return value;
diff --git a/pom.xml b/pom.xml
index c2789e28a5e..6e9638cfed3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -140,7 +140,7 @@
<arrow.version>18.3.0</arrow.version>
<avro.version>1.12.1</avro.version>
- <parquet.version>1.16.0</parquet.version>
+ <parquet.version>1.17.0</parquet.version>
<orc.version>1.9.8</orc.version>
<hive.version>2.8.1</hive.version>
<helix.version>1.3.2</helix.version>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]