This is an automated email from the ASF dual-hosted git repository.
amoghj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 345140e9f7 AWS, Core, Data, Spark: Remove deprecations for 1.11.0
(#14059)
345140e9f7 is described below
commit 345140e9f7d428fcc3b4fcc639a690084a698b76
Author: Doğukan Çağatay <[email protected]>
AuthorDate: Fri Sep 19 00:35:17 2025 +0200
AWS, Core, Data, Spark: Remove deprecations for 1.11.0 (#14059)
Co-authored-by: Eduard Tudenhoefner <[email protected]>
---
.palantir/revapi.yml | 57 ++
.../java/org/apache/iceberg/aws/s3/S3FileIO.java | 39 +-
.../org/apache/iceberg/aws/s3/S3InputFile.java | 88 ---
.../org/apache/iceberg/aws/s3/S3OutputFile.java | 41 --
.../org/apache/iceberg/PartitionStatsHandler.java | 19 -
.../org/apache/iceberg/PartitionStatsUtil.java | 144 -----
.../org/apache/iceberg/RewriteTablePathUtil.java | 91 ----
.../org/apache/iceberg/TableMetadataParser.java | 8 -
.../apache/iceberg/encryption/EncryptionUtil.java | 12 -
.../org/apache/iceberg/rest/auth/OAuth2Util.java | 91 +---
.../iceberg/PartitionStatsHandlerTestBase.java | 4 +-
.../org/apache/iceberg/TestPartitionStatsUtil.java | 588 ---------------------
.../apache/iceberg/TestRewriteTablePathUtil.java | 11 +-
.../iceberg/PartitionStatsHandlerBenchmark.java | 2 +-
.../apache/iceberg/data/PartitionStatsHandler.java | 282 ----------
.../java/org/apache/iceberg/gcp/gcs/GCSFileIO.java | 18 -
.../actions/TestComputePartitionStatsAction.java | 4 +-
.../extensions/TestRewriteDataFilesProcedure.java | 2 +-
.../extensions/TestRewriteManifestsProcedure.java | 2 +-
.../actions/TestComputePartitionStatsAction.java | 4 +-
.../extensions/TestRewriteDataFilesProcedure.java | 2 +-
.../extensions/TestRewriteManifestsProcedure.java | 2 +-
.../actions/TestComputePartitionStatsAction.java | 4 +-
23 files changed, 82 insertions(+), 1433 deletions(-)
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
index 1c871ba30c..8605745ae7 100644
--- a/.palantir/revapi.yml
+++ b/.palantir/revapi.yml
@@ -1363,6 +1363,63 @@ acceptedBreaks:
old: "method
org.apache.iceberg.parquet.ParquetValueWriters.StructWriter<org.apache.iceberg.data.Record>\
\
org.apache.iceberg.data.parquet.GenericParquetWriter::createStructWriter(java.util.List<org.apache.iceberg.parquet.ParquetValueWriter<?>>)"
justification: "Removing deprecations for 1.10.0"
+ "1.10.0":
+ org.apache.iceberg:iceberg-core:
+ - code: "java.class.removed"
+ old: "class org.apache.iceberg.PartitionStatsUtil"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method java.lang.String
org.apache.iceberg.RewriteTablePathUtil::stagingPath(java.lang.String,\
+ \ java.lang.String)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method
org.apache.iceberg.RewriteTablePathUtil.RewriteResult<org.apache.iceberg.DataFile>\
+ \
org.apache.iceberg.RewriteTablePathUtil::rewriteDataManifest(org.apache.iceberg.ManifestFile,\
+ \ org.apache.iceberg.io.OutputFile, org.apache.iceberg.io.FileIO, int,
java.util.Map<java.lang.Integer,\
+ \ org.apache.iceberg.PartitionSpec>, java.lang.String,
java.lang.String) throws\
+ \ java.io.IOException"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method
org.apache.iceberg.RewriteTablePathUtil.RewriteResult<org.apache.iceberg.DeleteFile>\
+ \
org.apache.iceberg.RewriteTablePathUtil::rewriteDeleteManifest(org.apache.iceberg.ManifestFile,\
+ \ org.apache.iceberg.io.OutputFile, org.apache.iceberg.io.FileIO, int,
java.util.Map<java.lang.Integer,\
+ \ org.apache.iceberg.PartitionSpec>, java.lang.String,
java.lang.String, java.lang.String)\
+ \ throws java.io.IOException"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.Schema
org.apache.iceberg.PartitionStatsHandler::schema(org.apache.iceberg.types.Types.StructType)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.TableMetadata
org.apache.iceberg.TableMetadataParser::read(org.apache.iceberg.io.FileIO,\
+ \ org.apache.iceberg.io.InputFile)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.encryption.EncryptionManager
org.apache.iceberg.encryption.EncryptionUtil::createEncryptionManager(java.util.Map<java.lang.String,\
+ \ java.lang.String>,
org.apache.iceberg.encryption.KeyManagementClient)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.rest.responses.OAuthTokenResponse
org.apache.iceberg.rest.auth.OAuth2Util::exchangeToken(org.apache.iceberg.rest.RESTClient,\
+ \ java.util.Map<java.lang.String, java.lang.String>, java.lang.String,
java.lang.String,\
+ \ java.lang.String, java.lang.String, java.lang.String)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.rest.responses.OAuthTokenResponse
org.apache.iceberg.rest.auth.OAuth2Util::exchangeToken(org.apache.iceberg.rest.RESTClient,\
+ \ java.util.Map<java.lang.String, java.lang.String>, java.lang.String,
java.lang.String,\
+ \ java.lang.String, java.lang.String, java.lang.String,
java.lang.String)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.rest.responses.OAuthTokenResponse
org.apache.iceberg.rest.auth.OAuth2Util::fetchToken(org.apache.iceberg.rest.RESTClient,\
+ \ java.util.Map<java.lang.String, java.lang.String>, java.lang.String,
java.lang.String)"
+ justification: "Removing deprecated code for 1.11.0"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.rest.responses.OAuthTokenResponse
org.apache.iceberg.rest.auth.OAuth2Util::fetchToken(org.apache.iceberg.rest.RESTClient,\
+ \ java.util.Map<java.lang.String, java.lang.String>, java.lang.String,
java.lang.String,\
+ \ java.lang.String)"
+ justification: "Removing deprecated code for 1.11.0"
+ org.apache.iceberg:iceberg-data:
+ - code: "java.class.removed"
+ old: "class org.apache.iceberg.data.PartitionStatsHandler"
+ justification: "Removing deprecated code for 1.11.0"
apache-iceberg-0.14.0:
org.apache.iceberg:iceberg-api:
- code: "java.class.defaultSerializationChanged"
diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
index 41d45db357..d5e51ed74a 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
@@ -122,7 +122,7 @@ public class S3FileIO
* @param s3 s3 supplier
*/
public S3FileIO(SerializableSupplier<S3Client> s3) {
- this(s3, null, new S3FileIOProperties());
+ this(s3, null);
}
/**
@@ -134,45 +134,10 @@ public class S3FileIO
* @param s3Async s3Async supplier
*/
public S3FileIO(SerializableSupplier<S3Client> s3,
SerializableSupplier<S3AsyncClient> s3Async) {
- this(s3, s3Async, new S3FileIOProperties());
- }
-
- /**
- * Constructor with custom s3 supplier and S3FileIO properties.
- *
- * <p>Calling {@link S3FileIO#initialize(Map)} will overwrite information
set in this constructor.
- *
- * @param s3 s3 supplier
- * @param s3FileIOProperties S3 FileIO properties
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3FileIO#S3FileIO(SerializableSupplier)} with {@link
S3FileIO#initialize(Map)} instead
- */
- @Deprecated
- public S3FileIO(SerializableSupplier<S3Client> s3, S3FileIOProperties
s3FileIOProperties) {
- this(s3, null, s3FileIOProperties);
- }
-
- /**
- * Constructor with custom s3 supplier, s3Async supplier and S3FileIO
properties.
- *
- * <p>Calling {@link S3FileIO#initialize(Map)} will overwrite information
set in this constructor.
- *
- * @param s3 s3 supplier
- * @param s3Async s3Async supplier
- * @param s3FileIOProperties S3 FileIO properties
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3FileIO#S3FileIO(SerializableSupplier, SerializableSupplier)} with
{@link
- * S3FileIO#initialize(Map)} instead
- */
- @Deprecated
- public S3FileIO(
- SerializableSupplier<S3Client> s3,
- SerializableSupplier<S3AsyncClient> s3Async,
- S3FileIOProperties s3FileIOProperties) {
this.s3 = s3;
this.s3Async = s3Async;
this.createStack = Thread.currentThread().getStackTrace();
- this.properties = SerializableMap.copyOf(s3FileIOProperties.properties());
+ this.properties = SerializableMap.copyOf(Maps.newHashMap());
}
@Override
diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java
b/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java
index 63ce84e2fe..5e4346fe9f 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3InputFile.java
@@ -30,94 +30,6 @@ public class S3InputFile extends BaseS3File implements
InputFile, NativelyEncryp
private NativeFileCryptoParameters nativeDecryptionParameters;
private Long length;
- /**
- * Creates a {@link S3InputFile} from the given parameters.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3InputFile#fromLocation(String, PrefixedS3Client, MetricsContext)}
instead.
- */
- @Deprecated
- public static S3InputFile fromLocation(
- String location,
- S3Client client,
- S3FileIOProperties s3FileIOProperties,
- MetricsContext metrics) {
- return new S3InputFile(
- client,
- null,
- new S3URI(location, s3FileIOProperties.bucketToAccessPointMapping()),
- null,
- s3FileIOProperties,
- metrics);
- }
-
- /**
- * Creates a {@link S3InputFile} from the given parameters.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3InputFile#fromLocation(String, PrefixedS3Client, MetricsContext)}
instead.
- */
- @Deprecated
- public static S3InputFile fromLocation(
- String location,
- S3Client client,
- S3AsyncClient asyncClient,
- S3FileIOProperties s3FileIOProperties,
- MetricsContext metrics) {
- return new S3InputFile(
- client,
- asyncClient,
- new S3URI(location, s3FileIOProperties.bucketToAccessPointMapping()),
- null,
- s3FileIOProperties,
- metrics);
- }
-
- /**
- * Creates a {@link S3InputFile} from the given parameters.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3InputFile#fromLocation(String, long, PrefixedS3Client,
MetricsContext)} instead.
- */
- @Deprecated
- public static S3InputFile fromLocation(
- String location,
- long length,
- S3Client client,
- S3FileIOProperties s3FileIOProperties,
- MetricsContext metrics) {
- return new S3InputFile(
- client,
- null,
- new S3URI(location, s3FileIOProperties.bucketToAccessPointMapping()),
- length > 0 ? length : null,
- s3FileIOProperties,
- metrics);
- }
-
- /**
- * Creates a {@link S3InputFile} from the given parameters.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3InputFile#fromLocation(String, long, PrefixedS3Client,
MetricsContext)} instead.
- */
- @Deprecated
- public static S3InputFile fromLocation(
- String location,
- long length,
- S3Client client,
- S3AsyncClient asyncClient,
- S3FileIOProperties s3FileIOProperties,
- MetricsContext metrics) {
- return new S3InputFile(
- client,
- asyncClient,
- new S3URI(location, s3FileIOProperties.bucketToAccessPointMapping()),
- length > 0 ? length : null,
- s3FileIOProperties,
- metrics);
- }
-
static S3InputFile fromLocation(
String location, PrefixedS3Client client, MetricsContext metrics) {
return fromLocation(location, 0, client, metrics);
diff --git a/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputFile.java
b/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputFile.java
index fa12bac311..3fcd3cdbd5 100644
--- a/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputFile.java
+++ b/aws/src/main/java/org/apache/iceberg/aws/s3/S3OutputFile.java
@@ -33,47 +33,6 @@ import software.amazon.awssdk.services.s3.S3Client;
public class S3OutputFile extends BaseS3File implements OutputFile,
NativelyEncryptedFile {
private NativeFileCryptoParameters nativeEncryptionParameters;
- /**
- * Creates a {@link S3OutputFile} from the given parameters.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3OutputFile#fromLocation(String, PrefixedS3Client, MetricsContext)}
instead.
- */
- @Deprecated
- public static S3OutputFile fromLocation(
- String location,
- S3Client client,
- S3FileIOProperties s3FileIOProperties,
- MetricsContext metrics) {
- return new S3OutputFile(
- client,
- null,
- new S3URI(location, s3FileIOProperties.bucketToAccessPointMapping()),
- s3FileIOProperties,
- metrics);
- }
-
- /**
- * Creates a {@link S3OutputFile} from the given parameters.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * S3OutputFile#fromLocation(String, PrefixedS3Client, MetricsContext)}
instead.
- */
- @Deprecated
- public static S3OutputFile fromLocation(
- String location,
- S3Client client,
- S3AsyncClient asyncClient,
- S3FileIOProperties s3FileIOProperties,
- MetricsContext metrics) {
- return new S3OutputFile(
- client,
- asyncClient,
- new S3URI(location, s3FileIOProperties.bucketToAccessPointMapping()),
- s3FileIOProperties,
- metrics);
- }
-
static S3OutputFile fromLocation(
String location, PrefixedS3Client client, MetricsContext metrics) {
return new S3OutputFile(
diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
index 6d2fc96c08..4e7c1b104e 100644
--- a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
+++ b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
@@ -100,25 +100,6 @@ public class PartitionStatsHandler {
.withWriteDefault(Literal.of(0))
.build();
- /**
- * Generates the partition stats file schema based on a combined partition
type which considers
- * all specs in a table.
- *
- * <p>Use this only for format version 1 and 2. For version 3 and above use
{@link
- * #schema(StructType, int)}
- *
- * @param unifiedPartitionType unified partition schema type. Could be
calculated by {@link
- * Partitioning#partitionType(Table)}.
- * @return a schema that corresponds to the provided unified partition type.
- * @deprecated since 1.10.0, will be removed in 1.11.0. Use {@link
#schema(StructType, int)}
- * instead.
- */
- @Deprecated
- public static Schema schema(StructType unifiedPartitionType) {
- Preconditions.checkState(!unifiedPartitionType.fields().isEmpty(), "Table
must be partitioned");
- return v2Schema(unifiedPartitionType);
- }
-
/**
* Generates the partition stats file schema for a given format version
based on a combined
* partition type which considers all specs in a table.
diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java
b/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java
deleted file mode 100644
index ceb0fb9005..0000000000
--- a/core/src/main/java/org/apache/iceberg/PartitionStatsUtil.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.relocated.com.google.common.collect.Queues;
-import org.apache.iceberg.types.Comparators;
-import org.apache.iceberg.types.Types.StructType;
-import org.apache.iceberg.util.PartitionMap;
-import org.apache.iceberg.util.PartitionUtil;
-import org.apache.iceberg.util.Tasks;
-import org.apache.iceberg.util.ThreadPools;
-
-/**
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * org.apache.iceberg.PartitionStatsHandler} directly
- */
-@Deprecated
-public class PartitionStatsUtil {
-
- private PartitionStatsUtil() {}
-
- /**
- * Computes the partition stats for the given snapshot of the table.
- *
- * @param table the table for which partition stats to be computed.
- * @param snapshot the snapshot for which partition stats is computed.
- * @return the collection of {@link PartitionStats}
- */
- public static Collection<PartitionStats> computeStats(Table table, Snapshot
snapshot) {
- Preconditions.checkArgument(table != null, "table cannot be null");
- Preconditions.checkArgument(Partitioning.isPartitioned(table), "table must
be partitioned");
- Preconditions.checkArgument(snapshot != null, "snapshot cannot be null");
-
- StructType partitionType = Partitioning.partitionType(table);
- List<ManifestFile> manifests = snapshot.allManifests(table.io());
- Queue<PartitionMap<PartitionStats>> statsByManifest =
Queues.newConcurrentLinkedQueue();
- Tasks.foreach(manifests)
- .stopOnFailure()
- .throwFailureWhenFinished()
- .executeWith(ThreadPools.getWorkerPool())
- .run(manifest -> statsByManifest.add(collectStats(table, manifest,
partitionType)));
-
- return mergeStats(statsByManifest, table.specs());
- }
-
- /**
- * Sorts the {@link PartitionStats} based on the partition data.
- *
- * @param stats collection of {@link PartitionStats} which needs to be
sorted.
- * @param partitionType unified partition schema.
- * @return the list of {@link PartitionStats}
- */
- public static List<PartitionStats> sortStats(
- Collection<PartitionStats> stats, StructType partitionType) {
- List<PartitionStats> entries = Lists.newArrayList(stats);
- entries.sort(partitionStatsCmp(partitionType));
- return entries;
- }
-
- private static Comparator<PartitionStats> partitionStatsCmp(StructType
partitionType) {
- return Comparator.comparing(PartitionStats::partition,
Comparators.forType(partitionType));
- }
-
- private static PartitionMap<PartitionStats> collectStats(
- Table table, ManifestFile manifest, StructType partitionType) {
- try (ManifestReader<?> reader = openManifest(table, manifest)) {
- PartitionMap<PartitionStats> statsMap =
PartitionMap.create(table.specs());
- int specId = manifest.partitionSpecId();
- PartitionSpec spec = table.specs().get(specId);
- PartitionData keyTemplate = new PartitionData(partitionType);
-
- for (ManifestEntry<?> entry : reader.entries()) {
- ContentFile<?> file = entry.file();
- StructLike coercedPartition =
- PartitionUtil.coercePartition(partitionType, spec,
file.partition());
- StructLike key = keyTemplate.copyFor(coercedPartition);
- Snapshot snapshot = table.snapshot(entry.snapshotId());
- PartitionStats stats =
- statsMap.computeIfAbsent(
- specId,
- ((PartitionData) file.partition()).copy(),
- () -> new PartitionStats(key, specId));
- if (entry.isLive()) {
- stats.liveEntry(file, snapshot);
- } else {
- stats.deletedEntry(snapshot);
- }
- }
-
- return statsMap;
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
-
- private static ManifestReader<?> openManifest(Table table, ManifestFile
manifest) {
- List<String> projection = BaseScan.scanColumns(manifest.content());
- return ManifestFiles.open(manifest, table.io()).select(projection);
- }
-
- private static Collection<PartitionStats> mergeStats(
- Queue<PartitionMap<PartitionStats>> statsByManifest, Map<Integer,
PartitionSpec> specs) {
- PartitionMap<PartitionStats> statsMap = PartitionMap.create(specs);
-
- for (PartitionMap<PartitionStats> stats : statsByManifest) {
- stats.forEach(
- (key, value) ->
- statsMap.merge(
- key,
- value,
- (existingEntry, newEntry) -> {
- existingEntry.appendStats(newEntry);
- return existingEntry;
- }));
- }
-
- return statsMap.values();
- }
-}
diff --git a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java
b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java
index e947135124..133a156af0 100644
--- a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java
+++ b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java
@@ -312,42 +312,6 @@ public class RewriteTablePathUtil {
return manifestFiles;
}
- /**
- * Rewrite a data manifest, replacing path references.
- *
- * @param manifestFile source manifest file to rewrite
- * @param outputFile output file to rewrite manifest file to
- * @param io file io
- * @param format format of the manifest file
- * @param specsById map of partition specs by id
- * @param sourcePrefix source prefix that will be replaced
- * @param targetPrefix target prefix that will replace it
- * @return a copy plan of content files in the manifest that was rewritten
- * @deprecated since 1.10.0, will be removed in 1.11.0
- */
- @Deprecated
- public static RewriteResult<DataFile> rewriteDataManifest(
- ManifestFile manifestFile,
- OutputFile outputFile,
- FileIO io,
- int format,
- Map<Integer, PartitionSpec> specsById,
- String sourcePrefix,
- String targetPrefix)
- throws IOException {
- PartitionSpec spec = specsById.get(manifestFile.partitionSpecId());
- try (ManifestWriter<DataFile> writer =
- ManifestFiles.write(format, spec, outputFile,
manifestFile.snapshotId());
- ManifestReader<DataFile> reader =
- ManifestFiles.read(manifestFile, io,
specsById).select(Arrays.asList("*"))) {
- return StreamSupport.stream(reader.entries().spliterator(), false)
- .map(
- entry ->
- writeDataFileEntry(entry, Set.of(), spec, sourcePrefix,
targetPrefix, writer))
- .reduce(new RewriteResult<>(), RewriteResult::append);
- }
- }
-
/**
* Rewrite a data manifest, replacing path references.
*
@@ -384,47 +348,6 @@ public class RewriteTablePathUtil {
}
}
- /**
- * Rewrite a delete manifest, replacing path references.
- *
- * @param manifestFile source delete manifest to rewrite
- * @param outputFile output file to rewrite manifest file to
- * @param io file io
- * @param format format of the manifest file
- * @param specsById map of partition specs by id
- * @param sourcePrefix source prefix that will be replaced
- * @param targetPrefix target prefix that will replace it
- * @param stagingLocation staging location for rewritten files (referred
delete file will be
- * rewritten here)
- * @return a copy plan of content files in the manifest that was rewritten
- * @deprecated since 1.10.0, will be removed in 1.11.0
- */
- @Deprecated
- public static RewriteResult<DeleteFile> rewriteDeleteManifest(
- ManifestFile manifestFile,
- OutputFile outputFile,
- FileIO io,
- int format,
- Map<Integer, PartitionSpec> specsById,
- String sourcePrefix,
- String targetPrefix,
- String stagingLocation)
- throws IOException {
- PartitionSpec spec = specsById.get(manifestFile.partitionSpecId());
- try (ManifestWriter<DeleteFile> writer =
- ManifestFiles.writeDeleteManifest(format, spec, outputFile,
manifestFile.snapshotId());
- ManifestReader<DeleteFile> reader =
- ManifestFiles.readDeleteManifest(manifestFile, io, specsById)
- .select(Arrays.asList("*"))) {
- return StreamSupport.stream(reader.entries().spliterator(), false)
- .map(
- entry ->
- writeDeleteFileEntry(
- entry, Set.of(), spec, sourcePrefix, targetPrefix,
stagingLocation, writer))
- .reduce(new RewriteResult<>(), RewriteResult::append);
- }
- }
-
/**
* Rewrite a delete manifest, replacing path references.
*
@@ -717,20 +640,6 @@ public class RewriteTablePathUtil {
return path.endsWith(FILE_SEPARATOR) ? path : path + FILE_SEPARATOR;
}
- /**
- * Construct a staging path under a given staging directory
- *
- * @param originalPath source path
- * @param stagingDir staging directory
- * @return a staging path under the staging directory, based on the original
path
- * @deprecated since 1.10.0, will be removed in 1.11.0. Use {@link
#stagingPath(String, String,
- * String)} instead to avoid filename conflicts
- */
- @Deprecated
- public static String stagingPath(String originalPath, String stagingDir) {
- return stagingDir + fileName(originalPath);
- }
-
/**
* Construct a staging path under a given staging directory, preserving
relative directory
* structure to avoid conflicts when multiple files have the same name but
different paths.
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
index 396396f41b..eeeeeab8a6 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java
@@ -294,14 +294,6 @@ public class TableMetadataParser {
return read(io.newInputFile(path));
}
- /**
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
#read(InputFile)} instead.
- */
- @Deprecated
- public static TableMetadata read(FileIO io, InputFile file) {
- return read(file);
- }
-
public static TableMetadata read(InputFile file) {
Codec codec = Codec.fromFileName(file.location());
try (InputStream is =
diff --git
a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java
b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java
index 922cac455d..36efde6299 100644
--- a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java
+++ b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java
@@ -71,18 +71,6 @@ public class EncryptionUtil {
return kmsClient;
}
- /**
- * Create a standard encryption manager.
- *
- * @deprecated will be removed in 1.11.0; use {@link
#createEncryptionManager(List, Map,
- * KeyManagementClient)} instead.
- */
- @Deprecated
- public static EncryptionManager createEncryptionManager(
- Map<String, String> tableProperties, KeyManagementClient kmsClient) {
- return createEncryptionManager(List.of(), tableProperties, kmsClient);
- }
-
static EncryptionManager createEncryptionManager(
List<EncryptedKey> keys, Map<String, String> tableProperties,
KeyManagementClient kmsClient) {
Preconditions.checkArgument(kmsClient != null, "Invalid KMS client: null");
diff --git a/core/src/main/java/org/apache/iceberg/rest/auth/OAuth2Util.java
b/core/src/main/java/org/apache/iceberg/rest/auth/OAuth2Util.java
index d931b49610..c2b47e6e94 100644
--- a/core/src/main/java/org/apache/iceberg/rest/auth/OAuth2Util.java
+++ b/core/src/main/java/org/apache/iceberg/rest/auth/OAuth2Util.java
@@ -48,7 +48,6 @@ import org.apache.iceberg.rest.HTTPRequest;
import org.apache.iceberg.rest.ImmutableHTTPRequest;
import org.apache.iceberg.rest.RESTClient;
import org.apache.iceberg.rest.RESTUtil;
-import org.apache.iceberg.rest.ResourcePaths;
import org.apache.iceberg.rest.responses.OAuthTokenResponse;
import org.apache.iceberg.util.JsonUtil;
import org.apache.iceberg.util.Pair;
@@ -186,7 +185,12 @@ public class OAuth2Util {
}
return fetchToken(
- client, Map.of(), config.credential(), config.scope(),
config.oauth2ServerUri());
+ client,
+ Map.of(),
+ config.credential(),
+ config.scope(),
+ config.oauth2ServerUri(),
+ ImmutableMap.of());
}
}
@@ -221,59 +225,6 @@ public class OAuth2Util {
return response;
}
- /**
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * OAuth2Util#exchangeToken(RESTClient, Map, String, String, String,
String, String, String,
- * Map)} instead.
- */
- @Deprecated
- public static OAuthTokenResponse exchangeToken(
- RESTClient client,
- Map<String, String> headers,
- String subjectToken,
- String subjectTokenType,
- String actorToken,
- String actorTokenType,
- String scope) {
- return exchangeToken(
- client,
- headers,
- subjectToken,
- subjectTokenType,
- actorToken,
- actorTokenType,
- scope,
- ResourcePaths.tokens(),
- ImmutableMap.of());
- }
-
- /**
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * OAuth2Util#exchangeToken(RESTClient, Map, String, String, String,
String, String, String,
- * Map)} instead.
- */
- @Deprecated
- public static OAuthTokenResponse exchangeToken(
- RESTClient client,
- Map<String, String> headers,
- String subjectToken,
- String subjectTokenType,
- String actorToken,
- String actorTokenType,
- String scope,
- String oauth2ServerUri) {
- return exchangeToken(
- client,
- headers,
- subjectToken,
- subjectTokenType,
- actorToken,
- actorTokenType,
- scope,
- oauth2ServerUri,
- ImmutableMap.of());
- }
-
public static OAuthTokenResponse fetchToken(
RESTClient client,
Map<String, String> headers,
@@ -299,33 +250,6 @@ public class OAuth2Util {
return response;
}
- /**
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * OAuth2Util#fetchToken(RESTClient, Map, String, String, String, Map)}
instead.
- */
- @Deprecated
- public static OAuthTokenResponse fetchToken(
- RESTClient client, Map<String, String> headers, String credential,
String scope) {
-
- return fetchToken(
- client, headers, credential, scope, ResourcePaths.tokens(),
ImmutableMap.of());
- }
-
- /**
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * OAuth2Util#fetchToken(RESTClient, Map, String, String, String, Map)}
instead.
- */
- @Deprecated
- public static OAuthTokenResponse fetchToken(
- RESTClient client,
- Map<String, String> headers,
- String credential,
- String scope,
- String oauth2ServerUri) {
-
- return fetchToken(client, headers, credential, scope, oauth2ServerUri,
ImmutableMap.of());
- }
-
private static Map<String, String> tokenExchangeRequest(
String subjectToken,
String subjectTokenType,
@@ -638,7 +562,8 @@ public class OAuth2Util {
return refreshToken(
client, config, basicHeaders, token(), tokenType(),
optionalOAuthParams());
} else {
- return fetchToken(client, Map.of(), credential(), scope(),
oauth2ServerUri());
+ return fetchToken(
+ client, Map.of(), credential(), scope(), oauth2ServerUri(),
ImmutableMap.of());
}
}
diff --git
a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
index cf39e4611b..71fdc9507d 100644
--- a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
@@ -617,7 +617,7 @@ public abstract class PartitionStatsHandlerTestBase {
Table testTable =
TestTables.create(tempDir("old_schema"), "old_schema", SCHEMA, spec,
2, fileFormatProperty);
Types.StructType partitionType = Partitioning.partitionType(testTable);
- Schema newSchema = PartitionStatsHandler.schema(partitionType);
+ Schema newSchema = PartitionStatsHandler.schema(partitionType, 2);
Schema oldSchema = invalidOldSchema(partitionType);
PartitionStatisticsFile invalidStatisticsFile =
@@ -667,7 +667,7 @@ public abstract class PartitionStatsHandlerTestBase {
List<PartitionStats> partitionStats;
try (CloseableIterable<PartitionStats> recordIterator =
PartitionStatsHandler.readPartitionStatsFile(
- PartitionStatsHandler.schema(partitionType),
+ PartitionStatsHandler.schema(partitionType, 2),
testTable.io().newInputFile(statisticsFile.path()))) {
partitionStats = Lists.newArrayList(recordIterator);
}
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
b/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
deleted file mode 100644
index 98e75f2626..0000000000
--- a/core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
+++ /dev/null
@@ -1,588 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg;
-
-import static org.apache.iceberg.types.Types.NestedField.optional;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import org.apache.iceberg.expressions.Expressions;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.types.Types;
-import org.assertj.core.groups.Tuple;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-
-/**
- * @deprecated since 1.10.0, will be removed in 1.11.0; covered by
`PartitionStatsHandlerTestBase`.
- */
-@Deprecated
-public class TestPartitionStatsUtil {
- private static final Schema SCHEMA =
- new Schema(
- optional(1, "c1", Types.IntegerType.get()),
- optional(2, "c2", Types.StringType.get()),
- optional(3, "c3", Types.StringType.get()));
-
- protected static final PartitionSpec SPEC =
- PartitionSpec.builderFor(SCHEMA).identity("c2").identity("c3").build();
-
- @TempDir public File temp;
-
- @Test
- public void testPartitionStatsOnEmptyTable() throws Exception {
- Table testTable = TestTables.create(tempDir("empty_table"), "empty_table",
SCHEMA, SPEC, 2);
- assertThatThrownBy(
- () -> PartitionStatsUtil.computeStats(testTable,
testTable.currentSnapshot()))
- .isInstanceOf(IllegalArgumentException.class)
- .hasMessage("snapshot cannot be null");
- }
-
- @Test
- public void testPartitionStatsOnUnPartitionedTable() throws Exception {
- Table testTable =
- TestTables.create(
- tempDir("unpartitioned_table"),
- "unpartitioned_table",
- SCHEMA,
- PartitionSpec.unpartitioned(),
- 2);
-
- List<DataFile> files = prepareDataFiles(testTable);
- AppendFiles appendFiles = testTable.newAppend();
- files.forEach(appendFiles::appendFile);
- appendFiles.commit();
-
- assertThatThrownBy(
- () -> PartitionStatsUtil.computeStats(testTable,
testTable.currentSnapshot()))
- .isInstanceOf(IllegalArgumentException.class)
- .hasMessage("table must be partitioned");
- }
-
- @Test
- public void testPartitionStats() throws Exception {
- Table testTable =
- TestTables.create(
- tempDir("partition_stats_compute"), "partition_stats_compute",
SCHEMA, SPEC, 2);
-
- List<DataFile> files = prepareDataFiles(testTable);
- for (int i = 0; i < 3; i++) {
- // insert same set of records thrice to have a new manifest files
- AppendFiles appendFiles = testTable.newAppend();
- files.forEach(appendFiles::appendFile);
- appendFiles.commit();
- }
-
- Snapshot snapshot1 = testTable.currentSnapshot();
- Types.StructType partitionType = Partitioning.partitionType(testTable);
- computeAndValidatePartitionStats(
- testTable,
- Tuple.tuple(
- partitionData(partitionType, "foo", "A"),
- 0,
- 3 * files.get(0).recordCount(),
- 3,
- 3 * files.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "foo", "B"),
- 0,
- 3 * files.get(1).recordCount(),
- 3,
- 3 * files.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", "A"),
- 0,
- 3 * files.get(2).recordCount(),
- 3,
- 3 * files.get(2).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", "B"),
- 0,
- 3 * files.get(3).recordCount(),
- 3,
- 3 * files.get(3).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()));
-
- DeleteFile deleteFile =
- FileGenerationUtil.generatePositionDeleteFile(testTable,
TestHelpers.Row.of("foo", "A"));
- testTable.newRowDelta().addDeletes(deleteFile).commit();
- Snapshot snapshot2 = testTable.currentSnapshot();
-
- DeleteFile eqDelete =
- FileGenerationUtil.generateEqualityDeleteFile(testTable,
TestHelpers.Row.of("bar", "B"));
- testTable.newRowDelta().addDeletes(eqDelete).commit();
- Snapshot snapshot3 = testTable.currentSnapshot();
-
- computeAndValidatePartitionStats(
- testTable,
- Tuple.tuple(
- partitionData(partitionType, "foo", "A"),
- 0,
- 3 * files.get(0).recordCount(),
- 3,
- 3 * files.get(0).fileSizeInBytes(),
- deleteFile.recordCount(), // position delete file count
- 1, // one position delete file
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(), // new snapshot from pos delete commit
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "foo", "B"),
- 0,
- 3 * files.get(1).recordCount(),
- 3,
- 3 * files.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", "A"),
- 0,
- 3 * files.get(2).recordCount(),
- 3,
- 3 * files.get(2).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", "B"),
- 0,
- 3 * files.get(3).recordCount(),
- 3,
- 3 * files.get(3).fileSizeInBytes(),
- 0L,
- 0,
- eqDelete.recordCount(),
- 1, // one equality delete file
- null,
- snapshot3.timestampMillis(), // new snapshot from equality delete
commit
- snapshot3.snapshotId()));
- }
-
- @Test
- @SuppressWarnings("MethodLength")
- public void testPartitionStatsWithSchemaEvolution() throws Exception {
- final PartitionSpec specBefore =
PartitionSpec.builderFor(SCHEMA).identity("c2").build();
-
- Table testTable =
- TestTables.create(
- tempDir("partition_stats_schema_evolve"),
- "partition_stats_schema_evolve",
- SCHEMA,
- specBefore,
- SortOrder.unsorted(),
- 2);
-
- List<DataFile> dataFiles = prepareDataFilesOnePart(testTable);
- for (int i = 0; i < 2; i++) {
- AppendFiles appendFiles = testTable.newAppend();
- dataFiles.forEach(appendFiles::appendFile);
- appendFiles.commit();
- }
- Snapshot snapshot1 = testTable.currentSnapshot();
- Types.StructType partitionType = Partitioning.partitionType(testTable);
-
- computeAndValidatePartitionStats(
- testTable,
- Tuple.tuple(
- partitionData(partitionType, "foo"),
- 0,
- 2 * dataFiles.get(0).recordCount(),
- 2,
- 2 * dataFiles.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar"),
- 0,
- 2 * dataFiles.get(1).recordCount(),
- 2,
- 2 * dataFiles.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()));
-
- // Evolve the partition spec to include c3
- testTable.updateSpec().addField("c3").commit();
- List<DataFile> filesWithNewSpec = prepareDataFiles(testTable);
- filesWithNewSpec.add(
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("bar", null)));
- partitionType = Partitioning.partitionType(testTable);
-
- AppendFiles appendFiles = testTable.newAppend();
- filesWithNewSpec.forEach(appendFiles::appendFile);
- appendFiles.commit();
- Snapshot snapshot2 = testTable.currentSnapshot();
-
- computeAndValidatePartitionStats(
- testTable,
- Tuple.tuple(
- partitionData(partitionType, "foo", null), // unified tuple
- 0, // old spec id as the record is unmodified
- 2 * dataFiles.get(0).recordCount(),
- 2,
- 2 * dataFiles.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", null),
- 0, // old spec id for "bar, null" before evolution
- 2 * dataFiles.get(1).recordCount(),
- 2,
- 2 * dataFiles.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", null),
- 1, // new spec id for "bar, null" after evolution
- filesWithNewSpec.get(4).recordCount(),
- 1,
- filesWithNewSpec.get(4).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(), // new snapshot
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "foo", "A"),
- 1, // new spec id
- filesWithNewSpec.get(0).recordCount(),
- 1,
- filesWithNewSpec.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(), // new snapshot
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "foo", "B"),
- 1,
- filesWithNewSpec.get(1).recordCount(),
- 1,
- filesWithNewSpec.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", "A"),
- 1,
- filesWithNewSpec.get(2).recordCount(),
- 1,
- filesWithNewSpec.get(2).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", "B"),
- 1,
- filesWithNewSpec.get(3).recordCount(),
- 1,
- filesWithNewSpec.get(3).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()));
- }
-
- @Test
- @SuppressWarnings("MethodLength")
- public void testPartitionStatsWithBucketTransformSchemaEvolution() throws
Exception {
- PartitionSpec specBefore =
- PartitionSpec.builderFor(SCHEMA).identity("c2").bucket("c1",
2).build();
-
- Table testTable =
- TestTables.create(
- tempDir("partition_stats_schema_evolve2"),
- "partition_stats_schema_evolve2",
- SCHEMA,
- specBefore,
- SortOrder.unsorted(),
- 2);
-
- List<DataFile> dataFiles = Lists.newArrayList();
- for (int i = 0; i < 2; i++) {
- dataFiles.add(FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("foo", i)));
- }
-
- AppendFiles appendFiles = testTable.newAppend();
- dataFiles.forEach(appendFiles::appendFile);
- appendFiles.commit();
-
- Snapshot snapshot1 = testTable.currentSnapshot();
- Types.StructType partitionType = Partitioning.partitionType(testTable);
-
- computeAndValidatePartitionStats(
- testTable,
- Tuple.tuple(
- partitionData(partitionType, "foo", 0),
- 0,
- dataFiles.get(0).recordCount(),
- 1,
- dataFiles.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "foo", 1),
- 0,
- dataFiles.get(1).recordCount(),
- 1,
- dataFiles.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()));
-
- // Evolve the partition spec
- testTable
- .updateSpec()
- .removeField(Expressions.bucket("c1", 2))
- .addField(Expressions.bucket("c1", 4))
- .commit();
-
- List<DataFile> filesWithNewSpec = Lists.newArrayList();
- for (int i = 0; i < 4; i++) {
- filesWithNewSpec.add(
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("bar", i)));
- }
-
- appendFiles = testTable.newAppend();
- filesWithNewSpec.forEach(appendFiles::appendFile);
- appendFiles.commit();
-
- Snapshot snapshot2 = testTable.currentSnapshot();
- partitionType = Partitioning.partitionType(testTable);
-
- computeAndValidatePartitionStats(
- testTable,
- Tuple.tuple(
- partitionData(partitionType, "foo", 0, null),
- 0,
- dataFiles.get(0).recordCount(),
- 1,
- dataFiles.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "foo", 1, null),
- 0,
- dataFiles.get(1).recordCount(),
- 1,
- dataFiles.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", null, 0),
- 1,
- filesWithNewSpec.get(0).recordCount(),
- 1,
- filesWithNewSpec.get(0).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", null, 1),
- 1,
- filesWithNewSpec.get(1).recordCount(),
- 1,
- filesWithNewSpec.get(1).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", null, 2),
- 1,
- filesWithNewSpec.get(2).recordCount(),
- 1,
- filesWithNewSpec.get(2).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()),
- Tuple.tuple(
- partitionData(partitionType, "bar", null, 3),
- 1,
- filesWithNewSpec.get(3).recordCount(),
- 1,
- filesWithNewSpec.get(3).fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot2.timestampMillis(),
- snapshot2.snapshotId()));
- }
-
- private static PartitionData partitionData(Types.StructType partitionType,
Object... fields) {
- PartitionData partitionData = new PartitionData(partitionType);
- for (int i = 0; i < fields.length; i++) {
- partitionData.set(i, fields[i]);
- }
-
- return partitionData;
- }
-
- private static List<DataFile> prepareDataFiles(Table table) {
- List<DataFile> dataFiles = Lists.newArrayList();
- dataFiles.add(FileGenerationUtil.generateDataFile(table,
TestHelpers.Row.of("foo", "A")));
- dataFiles.add(FileGenerationUtil.generateDataFile(table,
TestHelpers.Row.of("foo", "B")));
- dataFiles.add(FileGenerationUtil.generateDataFile(table,
TestHelpers.Row.of("bar", "A")));
- dataFiles.add(FileGenerationUtil.generateDataFile(table,
TestHelpers.Row.of("bar", "B")));
-
- return dataFiles;
- }
-
- private static List<DataFile> prepareDataFilesOnePart(Table table) {
- List<DataFile> dataFiles = Lists.newArrayList();
- dataFiles.add(FileGenerationUtil.generateDataFile(table,
TestHelpers.Row.of("foo")));
- dataFiles.add(FileGenerationUtil.generateDataFile(table,
TestHelpers.Row.of("bar")));
-
- return dataFiles;
- }
-
- private static void computeAndValidatePartitionStats(Table testTable,
Tuple... expectedValues) {
- // compute and commit partition stats file
- Collection<PartitionStats> result =
- PartitionStatsUtil.computeStats(testTable,
testTable.currentSnapshot());
-
- assertThat(result)
- .extracting(
- PartitionStats::partition,
- PartitionStats::specId,
- PartitionStats::dataRecordCount,
- PartitionStats::dataFileCount,
- PartitionStats::totalDataFileSizeInBytes,
- PartitionStats::positionDeleteRecordCount,
- PartitionStats::positionDeleteFileCount,
- PartitionStats::equalityDeleteRecordCount,
- PartitionStats::equalityDeleteFileCount,
- PartitionStats::totalRecords,
- PartitionStats::lastUpdatedAt,
- PartitionStats::lastUpdatedSnapshotId)
- .containsExactlyInAnyOrder(expectedValues);
- }
-
- private File tempDir(String folderName) throws IOException {
- return java.nio.file.Files.createTempDirectory(temp.toPath(),
folderName).toFile();
- }
-}
diff --git
a/core/src/test/java/org/apache/iceberg/TestRewriteTablePathUtil.java
b/core/src/test/java/org/apache/iceberg/TestRewriteTablePathUtil.java
index 8a688bebf5..851e423fb6 100644
--- a/core/src/test/java/org/apache/iceberg/TestRewriteTablePathUtil.java
+++ b/core/src/test/java/org/apache/iceberg/TestRewriteTablePathUtil.java
@@ -50,9 +50,10 @@ public class TestRewriteTablePathUtil {
public void testStagingPathBackwardCompatibility() {
// Test that the deprecated method still works
String originalPath = "/some/path/file.parquet";
+ String sourcePrefix = "/some/path";
String stagingDir = "/staging/";
- String result = RewriteTablePathUtil.stagingPath(originalPath, stagingDir);
+ String result = RewriteTablePathUtil.stagingPath(originalPath,
sourcePrefix, stagingDir);
assertThat(result).isEqualTo("/staging/file.parquet");
}
@@ -75,16 +76,8 @@ public class TestRewriteTablePathUtil {
String stagingDir = "/staging/";
String fileDirectlyUnderPrefix = "/source/table/file.parquet";
- // Test new method
String newMethodResult =
RewriteTablePathUtil.stagingPath(fileDirectlyUnderPrefix,
sourcePrefix, stagingDir);
-
- // Test old deprecated method
- String oldMethodResult =
RewriteTablePathUtil.stagingPath(fileDirectlyUnderPrefix, stagingDir);
-
- // Both methods should behave the same when there's no middle part
assertThat(newMethodResult).isEqualTo("/staging/file.parquet");
- assertThat(oldMethodResult).isEqualTo("/staging/file.parquet");
- assertThat(newMethodResult).isEqualTo(oldMethodResult);
}
}
diff --git
a/data/src/jmh/java/org/apache/iceberg/PartitionStatsHandlerBenchmark.java
b/data/src/jmh/java/org/apache/iceberg/PartitionStatsHandlerBenchmark.java
index ec0bfa9306..938dc28637 100644
--- a/data/src/jmh/java/org/apache/iceberg/PartitionStatsHandlerBenchmark.java
+++ b/data/src/jmh/java/org/apache/iceberg/PartitionStatsHandlerBenchmark.java
@@ -104,7 +104,7 @@ public class PartitionStatsHandlerBenchmark {
List<PartitionStats> stats;
try (CloseableIterable<PartitionStats> recordIterator =
PartitionStatsHandler.readPartitionStatsFile(
- PartitionStatsHandler.schema(Partitioning.partitionType(table)),
+ PartitionStatsHandler.schema(Partitioning.partitionType(table), 2),
Files.localInput(statisticsFile.path()))) {
stats = Lists.newArrayList(recordIterator);
}
diff --git
a/data/src/main/java/org/apache/iceberg/data/PartitionStatsHandler.java
b/data/src/main/java/org/apache/iceberg/data/PartitionStatsHandler.java
deleted file mode 100644
index e4901f4e8c..0000000000
--- a/data/src/main/java/org/apache/iceberg/data/PartitionStatsHandler.java
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg.data;
-
-import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT;
-import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT;
-
-import java.io.IOException;
-import java.util.Collection;
-import java.util.List;
-import java.util.Locale;
-import java.util.UUID;
-import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.HasTableOperations;
-import org.apache.iceberg.ImmutableGenericPartitionStatisticsFile;
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.PartitionStatisticsFile;
-import org.apache.iceberg.PartitionStats;
-import org.apache.iceberg.PartitionStatsUtil;
-import org.apache.iceberg.Partitioning;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.Snapshot;
-import org.apache.iceberg.StructLike;
-import org.apache.iceberg.Table;
-import org.apache.iceberg.avro.Avro;
-import org.apache.iceberg.avro.InternalReader;
-import org.apache.iceberg.data.parquet.InternalWriter;
-import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.io.DataWriter;
-import org.apache.iceberg.io.InputFile;
-import org.apache.iceberg.io.OutputFile;
-import org.apache.iceberg.parquet.Parquet;
-import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-import org.apache.iceberg.types.Types.IntegerType;
-import org.apache.iceberg.types.Types.LongType;
-import org.apache.iceberg.types.Types.NestedField;
-import org.apache.iceberg.types.Types.StructType;
-
-/**
- * Computes, writes and reads the {@link PartitionStatisticsFile}. Uses
generic readers and writers
- * to support writing and reading of the stats in table default format.
- *
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * org.apache.iceberg.PartitionStatsHandler} from core module
- */
-@Deprecated
-public class PartitionStatsHandler {
-
- private PartitionStatsHandler() {}
-
- public static final int PARTITION_FIELD_ID = 0;
- public static final String PARTITION_FIELD_NAME = "partition";
- public static final NestedField SPEC_ID = NestedField.required(1, "spec_id",
IntegerType.get());
- public static final NestedField DATA_RECORD_COUNT =
- NestedField.required(2, "data_record_count", LongType.get());
- public static final NestedField DATA_FILE_COUNT =
- NestedField.required(3, "data_file_count", IntegerType.get());
- public static final NestedField TOTAL_DATA_FILE_SIZE_IN_BYTES =
- NestedField.required(4, "total_data_file_size_in_bytes", LongType.get());
- public static final NestedField POSITION_DELETE_RECORD_COUNT =
- NestedField.optional(5, "position_delete_record_count", LongType.get());
- public static final NestedField POSITION_DELETE_FILE_COUNT =
- NestedField.optional(6, "position_delete_file_count", IntegerType.get());
- public static final NestedField EQUALITY_DELETE_RECORD_COUNT =
- NestedField.optional(7, "equality_delete_record_count", LongType.get());
- public static final NestedField EQUALITY_DELETE_FILE_COUNT =
- NestedField.optional(8, "equality_delete_file_count", IntegerType.get());
- public static final NestedField TOTAL_RECORD_COUNT =
- NestedField.optional(9, "total_record_count", LongType.get());
- public static final NestedField LAST_UPDATED_AT =
- NestedField.optional(10, "last_updated_at", LongType.get());
- public static final NestedField LAST_UPDATED_SNAPSHOT_ID =
- NestedField.optional(11, "last_updated_snapshot_id", LongType.get());
-
- /**
- * Generates the partition stats file schema based on a combined partition
type which considers
- * all specs in a table.
- *
- * @param unifiedPartitionType unified partition schema type. Could be
calculated by {@link
- * Partitioning#partitionType(Table)}.
- * @return a schema that corresponds to the provided unified partition type.
- */
- public static Schema schema(StructType unifiedPartitionType) {
- Preconditions.checkState(!unifiedPartitionType.fields().isEmpty(), "Table
must be partitioned");
- return new Schema(
- NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME,
unifiedPartitionType),
- SPEC_ID,
- DATA_RECORD_COUNT,
- DATA_FILE_COUNT,
- TOTAL_DATA_FILE_SIZE_IN_BYTES,
- POSITION_DELETE_RECORD_COUNT,
- POSITION_DELETE_FILE_COUNT,
- EQUALITY_DELETE_RECORD_COUNT,
- EQUALITY_DELETE_FILE_COUNT,
- TOTAL_RECORD_COUNT,
- LAST_UPDATED_AT,
- LAST_UPDATED_SNAPSHOT_ID);
- }
-
- /**
- * Computes and writes the {@link PartitionStatisticsFile} for a given
table's current snapshot.
- *
- * @param table The {@link Table} for which the partition statistics is
computed.
- * @return {@link PartitionStatisticsFile} for the current snapshot, or null
if no statistics are
- * present.
- */
- public static PartitionStatisticsFile computeAndWriteStatsFile(Table table)
throws IOException {
- if (table.currentSnapshot() == null) {
- return null;
- }
-
- return computeAndWriteStatsFile(table,
table.currentSnapshot().snapshotId());
- }
-
- /**
- * Computes and writes the {@link PartitionStatisticsFile} for a given table
and snapshot.
- *
- * @param table The {@link Table} for which the partition statistics is
computed.
- * @param snapshotId snapshot for which partition statistics are computed.
- * @return {@link PartitionStatisticsFile} for the given snapshot, or null
if no statistics are
- * present.
- */
- public static PartitionStatisticsFile computeAndWriteStatsFile(Table table,
long snapshotId)
- throws IOException {
- Snapshot snapshot = table.snapshot(snapshotId);
- Preconditions.checkArgument(snapshot != null, "Snapshot not found: %s",
snapshotId);
-
- Collection<PartitionStats> stats = PartitionStatsUtil.computeStats(table,
snapshot);
- if (stats.isEmpty()) {
- return null;
- }
-
- StructType partitionType = Partitioning.partitionType(table);
- List<PartitionStats> sortedStats = PartitionStatsUtil.sortStats(stats,
partitionType);
- return writePartitionStatsFile(
- table, snapshot.snapshotId(), schema(partitionType), sortedStats);
- }
-
- @VisibleForTesting
- static PartitionStatisticsFile writePartitionStatsFile(
- Table table, long snapshotId, Schema dataSchema,
Iterable<PartitionStats> records)
- throws IOException {
- FileFormat fileFormat =
- FileFormat.fromString(
- table.properties().getOrDefault(DEFAULT_FILE_FORMAT,
DEFAULT_FILE_FORMAT_DEFAULT));
-
- OutputFile outputFile = newPartitionStatsFile(table, fileFormat,
snapshotId);
-
- try (DataWriter<StructLike> writer = dataWriter(dataSchema, outputFile,
fileFormat)) {
- records.iterator().forEachRemaining(writer::write);
- }
-
- return ImmutableGenericPartitionStatisticsFile.builder()
- .snapshotId(snapshotId)
- .path(outputFile.location())
- .fileSizeInBytes(outputFile.toInputFile().getLength())
- .build();
- }
-
- /**
- * Reads partition statistics from the specified {@link InputFile} using
given schema.
- *
- * @param schema The {@link Schema} of the partition statistics file.
- * @param inputFile An {@link InputFile} pointing to the partition stats
file.
- */
- public static CloseableIterable<PartitionStats> readPartitionStatsFile(
- Schema schema, InputFile inputFile) {
- CloseableIterable<StructLike> records = dataReader(schema, inputFile);
- return CloseableIterable.transform(records,
PartitionStatsHandler::recordToPartitionStats);
- }
-
- private static OutputFile newPartitionStatsFile(
- Table table, FileFormat fileFormat, long snapshotId) {
- Preconditions.checkArgument(
- table instanceof HasTableOperations,
- "Table must have operations to retrieve metadata location");
-
- return table
- .io()
- .newOutputFile(
- ((HasTableOperations) table)
- .operations()
- .metadataFileLocation(
- fileFormat.addExtension(
- String.format(
- Locale.ROOT, "partition-stats-%d-%s", snapshotId,
UUID.randomUUID()))));
- }
-
- private static DataWriter<StructLike> dataWriter(
- Schema dataSchema, OutputFile outputFile, FileFormat fileFormat) throws
IOException {
- switch (fileFormat) {
- case PARQUET:
- return Parquet.writeData(outputFile)
- .schema(dataSchema)
- .createWriterFunc(InternalWriter::createWriter)
- .withSpec(PartitionSpec.unpartitioned())
- .build();
- case AVRO:
- return Avro.writeData(outputFile)
- .schema(dataSchema)
- .createWriterFunc(org.apache.iceberg.avro.InternalWriter::create)
- .withSpec(PartitionSpec.unpartitioned())
- .build();
- case ORC:
- // Internal writers are not supported for ORC yet.
- default:
- throw new UnsupportedOperationException("Unsupported file format:" +
fileFormat.name());
- }
- }
-
- private static CloseableIterable<StructLike> dataReader(Schema schema,
InputFile inputFile) {
- FileFormat fileFormat = FileFormat.fromFileName(inputFile.location());
- Preconditions.checkArgument(
- fileFormat != null, "Unable to determine format of file: %s",
inputFile.location());
-
- switch (fileFormat) {
- case PARQUET:
- return Parquet.read(inputFile)
- .project(schema)
- .createReaderFunc(
- fileSchema ->
-
org.apache.iceberg.data.parquet.InternalReader.create(schema, fileSchema))
- .build();
- case AVRO:
- return Avro.read(inputFile)
- .project(schema)
- .createReaderFunc(fileSchema -> InternalReader.create(schema))
- .build();
- case ORC:
- // Internal readers are not supported for ORC yet.
- default:
- throw new UnsupportedOperationException("Unsupported file format:" +
fileFormat.name());
- }
- }
-
- private static PartitionStats recordToPartitionStats(StructLike record) {
- PartitionStats stats =
- new PartitionStats(
- record.get(PARTITION_FIELD_ID, StructLike.class),
- record.get(SPEC_ID.fieldId(), Integer.class));
- stats.set(DATA_RECORD_COUNT.fieldId(),
record.get(DATA_RECORD_COUNT.fieldId(), Long.class));
- stats.set(DATA_FILE_COUNT.fieldId(), record.get(DATA_FILE_COUNT.fieldId(),
Integer.class));
- stats.set(
- TOTAL_DATA_FILE_SIZE_IN_BYTES.fieldId(),
- record.get(TOTAL_DATA_FILE_SIZE_IN_BYTES.fieldId(), Long.class));
- stats.set(
- POSITION_DELETE_RECORD_COUNT.fieldId(),
- record.get(POSITION_DELETE_RECORD_COUNT.fieldId(), Long.class));
- stats.set(
- POSITION_DELETE_FILE_COUNT.fieldId(),
- record.get(POSITION_DELETE_FILE_COUNT.fieldId(), Integer.class));
- stats.set(
- EQUALITY_DELETE_RECORD_COUNT.fieldId(),
- record.get(EQUALITY_DELETE_RECORD_COUNT.fieldId(), Long.class));
- stats.set(
- EQUALITY_DELETE_FILE_COUNT.fieldId(),
- record.get(EQUALITY_DELETE_FILE_COUNT.fieldId(), Integer.class));
- stats.set(TOTAL_RECORD_COUNT.fieldId(),
record.get(TOTAL_RECORD_COUNT.fieldId(), Long.class));
- stats.set(LAST_UPDATED_AT.fieldId(), record.get(LAST_UPDATED_AT.fieldId(),
Long.class));
- stats.set(
- LAST_UPDATED_SNAPSHOT_ID.fieldId(),
- record.get(LAST_UPDATED_SNAPSHOT_ID.fieldId(), Long.class));
- return stats;
- }
-}
diff --git a/gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java
b/gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java
index c77e17ad08..188eddbfbb 100644
--- a/gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java
+++ b/gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java
@@ -29,7 +29,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.iceberg.common.DynConstructors;
-import org.apache.iceberg.gcp.GCPProperties;
import org.apache.iceberg.io.BulkDeletionFailureException;
import org.apache.iceberg.io.DelegateFileIO;
import org.apache.iceberg.io.FileInfo;
@@ -91,23 +90,6 @@ public class GCSFileIO implements DelegateFileIO,
SupportsStorageCredentials {
this.properties = SerializableMap.copyOf(Maps.newHashMap());
}
- /**
- * Constructor with custom storage supplier and GCP properties.
- *
- * <p>Calling {@link GCSFileIO#initialize(Map)} will overwrite information
set in this
- * constructor.
- *
- * @param storageSupplier storage supplier
- * @param gcpProperties gcp properties
- * @deprecated since 1.10.0, will be removed in 1.11.0; use {@link
- * GCSFileIO#GCSFileIO(SerializableSupplier)} with {@link
GCSFileIO#initialize(Map)} instead
- */
- @Deprecated
- public GCSFileIO(SerializableSupplier<Storage> storageSupplier,
GCPProperties gcpProperties) {
- this.storageSupplier = storageSupplier;
- this.properties = SerializableMap.copyOf(gcpProperties.properties());
- }
-
@Override
public InputFile newInputFile(String path) {
return GCSInputFile.fromLocation(path, clientForStoragePath(path),
metrics);
diff --git
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
index 3a1b71b380..303411eb7d 100644
---
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
+++
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
@@ -120,7 +120,7 @@ public class TestComputePartitionStatsAction extends
CatalogTestBase {
assertThat(table.partitionStatisticsFiles()).containsExactly(statisticsFile);
Types.StructType partitionType = Partitioning.partitionType(table);
- Schema dataSchema = PartitionStatsHandler.schema(partitionType);
+ Schema dataSchema = PartitionStatsHandler.schema(partitionType, 2);
validatePartitionStats(
statisticsFile,
dataSchema,
@@ -209,7 +209,7 @@ public class TestComputePartitionStatsAction extends
CatalogTestBase {
assertThat(table.partitionStatisticsFiles()).containsExactly(statisticsFile);
Types.StructType partitionType = Partitioning.partitionType(table);
- Schema dataSchema = PartitionStatsHandler.schema(partitionType);
+ Schema dataSchema = PartitionStatsHandler.schema(partitionType, 2);
// should contain stats for only partitions of snapshot1 (no entry for
partition bar, A)
validatePartitionStats(
statisticsFile,
diff --git
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
index 2d762a500f..ae88e04e44 100644
---
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
+++
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
@@ -158,7 +158,7 @@ public class TestRewriteDataFilesProcedure extends
ExtensionsTestBase {
PartitionStatisticsFile statisticsFile =
PartitionStatsHandler.computeAndWriteStatsFile(table);
table.updatePartitionStatistics().setPartitionStatistics(statisticsFile).commit();
- Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table));
+ Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table), 2);
List<PartitionStats> statsBeforeCompaction;
try (CloseableIterable<PartitionStats> recordIterator =
PartitionStatsHandler.readPartitionStatsFile(
diff --git
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
index a940186b97..1e41b3a042 100644
---
a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
+++
b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
@@ -411,7 +411,7 @@ public class TestRewriteManifestsProcedure extends
ExtensionsTestBase {
PartitionStatisticsFile statisticsFile =
PartitionStatsHandler.computeAndWriteStatsFile(table);
table.updatePartitionStatistics().setPartitionStatistics(statisticsFile).commit();
- Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table));
+ Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table), 2);
List<PartitionStats> statsBeforeRewrite;
try (CloseableIterable<PartitionStats> recordIterator =
PartitionStatsHandler.readPartitionStatsFile(
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
index 3a1b71b380..303411eb7d 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
@@ -120,7 +120,7 @@ public class TestComputePartitionStatsAction extends
CatalogTestBase {
assertThat(table.partitionStatisticsFiles()).containsExactly(statisticsFile);
Types.StructType partitionType = Partitioning.partitionType(table);
- Schema dataSchema = PartitionStatsHandler.schema(partitionType);
+ Schema dataSchema = PartitionStatsHandler.schema(partitionType, 2);
validatePartitionStats(
statisticsFile,
dataSchema,
@@ -209,7 +209,7 @@ public class TestComputePartitionStatsAction extends
CatalogTestBase {
assertThat(table.partitionStatisticsFiles()).containsExactly(statisticsFile);
Types.StructType partitionType = Partitioning.partitionType(table);
- Schema dataSchema = PartitionStatsHandler.schema(partitionType);
+ Schema dataSchema = PartitionStatsHandler.schema(partitionType, 2);
// should contain stats for only partitions of snapshot1 (no entry for
partition bar, A)
validatePartitionStats(
statisticsFile,
diff --git
a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
index 1340e205dd..3aabd635bb 100644
---
a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
+++
b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java
@@ -157,7 +157,7 @@ public class TestRewriteDataFilesProcedure extends
ExtensionsTestBase {
PartitionStatisticsFile statisticsFile =
PartitionStatsHandler.computeAndWriteStatsFile(table);
table.updatePartitionStatistics().setPartitionStatistics(statisticsFile).commit();
- Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table));
+ Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table), 2);
List<PartitionStats> statsBeforeCompaction;
try (CloseableIterable<PartitionStats> recordIterator =
PartitionStatsHandler.readPartitionStatsFile(
diff --git
a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
index 1dd85f814f..a6896715ca 100644
---
a/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
+++
b/spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteManifestsProcedure.java
@@ -411,7 +411,7 @@ public class TestRewriteManifestsProcedure extends
ExtensionsTestBase {
PartitionStatisticsFile statisticsFile =
PartitionStatsHandler.computeAndWriteStatsFile(table);
table.updatePartitionStatistics().setPartitionStatistics(statisticsFile).commit();
- Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table));
+ Schema dataSchema =
PartitionStatsHandler.schema(Partitioning.partitionType(table), 2);
List<PartitionStats> statsBeforeRewrite;
try (CloseableIterable<PartitionStats> recordIterator =
PartitionStatsHandler.readPartitionStatsFile(
diff --git
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
index 3a1b71b380..303411eb7d 100644
---
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
+++
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/actions/TestComputePartitionStatsAction.java
@@ -120,7 +120,7 @@ public class TestComputePartitionStatsAction extends
CatalogTestBase {
assertThat(table.partitionStatisticsFiles()).containsExactly(statisticsFile);
Types.StructType partitionType = Partitioning.partitionType(table);
- Schema dataSchema = PartitionStatsHandler.schema(partitionType);
+ Schema dataSchema = PartitionStatsHandler.schema(partitionType, 2);
validatePartitionStats(
statisticsFile,
dataSchema,
@@ -209,7 +209,7 @@ public class TestComputePartitionStatsAction extends
CatalogTestBase {
assertThat(table.partitionStatisticsFiles()).containsExactly(statisticsFile);
Types.StructType partitionType = Partitioning.partitionType(table);
- Schema dataSchema = PartitionStatsHandler.schema(partitionType);
+ Schema dataSchema = PartitionStatsHandler.schema(partitionType, 2);
// should contain stats for only partitions of snapshot1 (no entry for
partition bar, A)
validatePartitionStats(
statisticsFile,