This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 0f509d2d67 Parquet: Add system config for unsafe Parquet ID fallback.
(#9324)
0f509d2d67 is described below
commit 0f509d2d678db2d7322dafded58ec0ca6d7fb268
Author: Ryan Blue <[email protected]>
AuthorDate: Mon Jan 22 10:00:21 2024 -0800
Parquet: Add system config for unsafe Parquet ID fallback. (#9324)
Co-authored-by: Fokko Driesprong <[email protected]>
---
.../main/java/org/apache/iceberg/SystemConfigs.java | 17 +++++++++++++++--
.../org/apache/iceberg/mapping/NameMapping.java | 5 +++++
.../java/org/apache/iceberg/parquet/Parquet.java | 21 ++++++++++++---------
3 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/core/src/main/java/org/apache/iceberg/SystemConfigs.java
b/core/src/main/java/org/apache/iceberg/SystemConfigs.java
index feac1f61a1..9cb345b444 100644
--- a/core/src/main/java/org/apache/iceberg/SystemConfigs.java
+++ b/core/src/main/java/org/apache/iceberg/SystemConfigs.java
@@ -72,6 +72,19 @@ public class SystemConfigs {
8,
Integer::parseUnsignedInt);
+ /** @deprecated will be removed in 2.0.0; use name mapping instead */
+ @Deprecated
+ public static final ConfigEntry<Boolean>
NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED =
+ new ConfigEntry<>(
+ "iceberg.netflix.unsafe-parquet-id-fallback.enabled",
+ "ICEBERG_NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED",
+ true,
+ s -> {
+ LOG.warn(
+ "Fallback ID assignment in Parquet is UNSAFE and will be
removed in 2.0.0. Use name mapping instead.");
+ return Boolean.parseBoolean(s);
+ });
+
public static class ConfigEntry<T> {
private final String propertyKey;
private final String envKey;
@@ -101,13 +114,13 @@ public class SystemConfigs {
public final T value() {
if (lazyValue == null) {
- lazyValue = getValue();
+ lazyValue = produceValue();
}
return lazyValue;
}
- private T getValue() {
+ private T produceValue() {
String value = System.getProperty(propertyKey);
if (value == null) {
value = System.getenv(envKey);
diff --git a/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java
b/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java
index 642a77a4f2..5ca2f75793 100644
--- a/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java
+++ b/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java
@@ -28,6 +28,11 @@ import
org.apache.iceberg.relocated.com.google.common.collect.Iterables;
/** Represents a mapping from external schema names to Iceberg type IDs. */
public class NameMapping implements Serializable {
private static final Joiner DOT = Joiner.on('.');
+ private static final NameMapping EMPTY = NameMapping.of();
+
+ public static NameMapping empty() {
+ return EMPTY;
+ }
public static NameMapping of(MappedField... fields) {
return new NameMapping(MappedFields.of(ImmutableList.copyOf(fields)));
diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java
b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java
index a19556c369..d591041d19 100644
--- a/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java
+++ b/parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java
@@ -66,6 +66,7 @@ import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.StructLike;
+import org.apache.iceberg.SystemConfigs;
import org.apache.iceberg.Table;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.data.parquet.GenericParquetWriter;
@@ -1166,27 +1167,29 @@ public class Parquet {
ParquetReadOptions options = optionsBuilder.build();
+ NameMapping mapping;
+ if (nameMapping != null) {
+ mapping = nameMapping;
+ } else if
(SystemConfigs.NETFLIX_UNSAFE_PARQUET_ID_FALLBACK_ENABLED.value()) {
+ mapping = null;
+ } else {
+ mapping = NameMapping.empty();
+ }
+
if (batchedReaderFunc != null) {
return new VectorizedParquetReader<>(
file,
schema,
options,
batchedReaderFunc,
- nameMapping,
+ mapping,
filter,
reuseContainers,
caseSensitive,
maxRecordsPerBatch);
} else {
return new org.apache.iceberg.parquet.ParquetReader<>(
- file,
- schema,
- options,
- readerFunc,
- nameMapping,
- filter,
- reuseContainers,
- caseSensitive);
+ file, schema, options, readerFunc, mapping, filter,
reuseContainers, caseSensitive);
}
}