This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new f37a04b532 Core, Orc: Remove deprecated partition stats read
functionality (#14998)
f37a04b532 is described below
commit f37a04b532595354fdc85e127491b032f528e2e0
Author: gaborkaszab <[email protected]>
AuthorDate: Fri May 22 10:03:42 2026 +0200
Core, Orc: Remove deprecated partition stats read functionality (#14998)
---
.palantir/revapi.yml | 56 +++
.../java/org/apache/iceberg/PartitionStats.java | 317 ----------------
.../org/apache/iceberg/PartitionStatsHandler.java | 218 +----------
.../iceberg/PartitionStatsHandlerTestBase.java | 398 +++++----------------
.../org/apache/iceberg/TestPartitionStats.java | 135 -------
.../iceberg/orc/TestOrcPartitionStatsHandler.java | 21 --
6 files changed, 154 insertions(+), 991 deletions(-)
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
index 047be57415..80fa8f15f1 100644
--- a/.palantir/revapi.yml
+++ b/.palantir/revapi.yml
@@ -517,6 +517,62 @@ acceptedBreaks:
- code: "java.class.removed"
old: "class org.apache.iceberg.data.PartitionStatsHandler"
justification: "Removing deprecated code for 1.11.0"
+ "1.11.0":
+ org.apache.iceberg:iceberg-core:
+ - code: "java.class.removed"
+ old: "class org.apache.iceberg.PartitionStats"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field org.apache.iceberg.PartitionStatsHandler.DATA_FILE_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field org.apache.iceberg.PartitionStatsHandler.DATA_RECORD_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field org.apache.iceberg.PartitionStatsHandler.DV_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_FILE_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.EQUALITY_DELETE_RECORD_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_AT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.LAST_UPDATED_SNAPSHOT_ID"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_FILE_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.POSITION_DELETE_RECORD_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field org.apache.iceberg.PartitionStatsHandler.SPEC_ID"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.TOTAL_DATA_FILE_SIZE_IN_BYTES"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removed"
+ old: "field org.apache.iceberg.PartitionStatsHandler.TOTAL_RECORD_COUNT"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removedWithConstant"
+ old: "field org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_ID"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.field.removedWithConstant"
+ old: "field
org.apache.iceberg.PartitionStatsHandler.PARTITION_FIELD_NAME"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.method.removed"
+ old: "method org.apache.iceberg.Schema
org.apache.iceberg.PartitionStatsHandler::schema(org.apache.iceberg.types.Types.StructType,\
+ \ int)"
+ justification: "Removed deprecated functionality for partition stats"
+ - code: "java.method.removed"
+ old: "method
org.apache.iceberg.io.CloseableIterable<org.apache.iceberg.PartitionStats>\
+ \
org.apache.iceberg.PartitionStatsHandler::readPartitionStatsFile(org.apache.iceberg.Schema,\
+ \ org.apache.iceberg.io.InputFile)"
+ justification: "Removed deprecated functionality for partition stats"
"1.2.0":
org.apache.iceberg:iceberg-api:
- code: "java.field.constantValueChanged"
diff --git a/core/src/main/java/org/apache/iceberg/PartitionStats.java
b/core/src/main/java/org/apache/iceberg/PartitionStats.java
deleted file mode 100644
index e8a4e18916..0000000000
--- a/core/src/main/java/org/apache/iceberg/PartitionStats.java
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg;
-
-import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
-
-/**
- * Class to hold partition statistics values.
- *
- * @deprecated will be removed in 1.12.0. Use {@link BasePartitionStatistics
instead}
- */
-@Deprecated
-public class PartitionStats implements StructLike {
-
- private static final int STATS_COUNT = 13;
-
- private StructLike partition;
- private int specId;
- private long dataRecordCount;
- private int dataFileCount;
- private long totalDataFileSizeInBytes;
- private long positionDeleteRecordCount; // also includes dv record count as
per spec
- private int positionDeleteFileCount;
- private long equalityDeleteRecordCount;
- private int equalityDeleteFileCount;
- private Long totalRecordCount; // null by default
- private Long lastUpdatedAt; // null by default
- private Long lastUpdatedSnapshotId; // null by default
- private int dvCount;
-
- public PartitionStats(StructLike partition, int specId) {
- this.partition = partition;
- this.specId = specId;
- }
-
- public StructLike partition() {
- return partition;
- }
-
- public int specId() {
- return specId;
- }
-
- public long dataRecordCount() {
- return dataRecordCount;
- }
-
- public int dataFileCount() {
- return dataFileCount;
- }
-
- public long totalDataFileSizeInBytes() {
- return totalDataFileSizeInBytes;
- }
-
- public long positionDeleteRecordCount() {
- return positionDeleteRecordCount;
- }
-
- public int positionDeleteFileCount() {
- return positionDeleteFileCount;
- }
-
- public long equalityDeleteRecordCount() {
- return equalityDeleteRecordCount;
- }
-
- public int equalityDeleteFileCount() {
- return equalityDeleteFileCount;
- }
-
- public Long totalRecords() {
- return totalRecordCount;
- }
-
- public Long lastUpdatedAt() {
- return lastUpdatedAt;
- }
-
- public Long lastUpdatedSnapshotId() {
- return lastUpdatedSnapshotId;
- }
-
- public int dvCount() {
- return dvCount;
- }
-
- /**
- * Updates the partition stats from the data/delete file.
- *
- * @param file the {@link ContentFile} from the manifest entry.
- * @param snapshot the snapshot corresponding to the live entry.
- */
- void liveEntry(ContentFile<?> file, Snapshot snapshot) {
- Preconditions.checkArgument(specId == file.specId(), "Spec IDs must
match");
-
- switch (file.content()) {
- case DATA:
- this.dataRecordCount += file.recordCount();
- this.dataFileCount += 1;
- this.totalDataFileSizeInBytes += file.fileSizeInBytes();
- break;
- case POSITION_DELETES:
- this.positionDeleteRecordCount += file.recordCount();
- if (file.format() == FileFormat.PUFFIN) {
- this.dvCount += 1;
- } else {
- this.positionDeleteFileCount += 1;
- }
-
- break;
- case EQUALITY_DELETES:
- this.equalityDeleteRecordCount += file.recordCount();
- this.equalityDeleteFileCount += 1;
- break;
- default:
- throw new UnsupportedOperationException("Unsupported file content
type: " + file.content());
- }
-
- if (snapshot != null) {
- updateSnapshotInfo(snapshot.snapshotId(), snapshot.timestampMillis());
- }
-
- // Note: Not computing the `TOTAL_RECORD_COUNT` for now as it needs
scanning the data.
- }
-
- /**
- * Updates the modified time and snapshot ID for the deleted manifest entry.
- *
- * @param snapshot the snapshot corresponding to the deleted manifest entry.
- */
- void deletedEntry(Snapshot snapshot) {
- if (snapshot != null) {
- updateSnapshotInfo(snapshot.snapshotId(), snapshot.timestampMillis());
- }
- }
-
- /**
- * Decrement the counters as it was included in the previous stats and
updates the modified time
- * and snapshot ID for the deleted manifest entry.
- *
- * @param snapshot the snapshot corresponding to the deleted manifest entry.
- */
- void deletedEntryForIncrementalCompute(ContentFile<?> file, Snapshot
snapshot) {
- Preconditions.checkArgument(specId == file.specId(), "Spec IDs must
match");
-
- switch (file.content()) {
- case DATA:
- this.dataRecordCount -= file.recordCount();
- this.dataFileCount -= 1;
- this.totalDataFileSizeInBytes -= file.fileSizeInBytes();
- break;
- case POSITION_DELETES:
- this.positionDeleteRecordCount -= file.recordCount();
- if (file.format() == FileFormat.PUFFIN) {
- this.dvCount -= 1;
- } else {
- this.positionDeleteFileCount -= 1;
- }
-
- break;
- case EQUALITY_DELETES:
- this.equalityDeleteRecordCount -= file.recordCount();
- this.equalityDeleteFileCount -= 1;
- break;
- default:
- throw new UnsupportedOperationException("Unsupported file content
type: " + file.content());
- }
-
- if (snapshot != null) {
- updateSnapshotInfo(snapshot.snapshotId(), snapshot.timestampMillis());
- }
- }
-
- /**
- * Appends statistics from given entry to current entry.
- *
- * @param entry the entry from which statistics will be sourced.
- */
- void appendStats(PartitionStats entry) {
- Preconditions.checkArgument(specId == entry.specId(), "Spec IDs must
match");
-
- this.dataRecordCount += entry.dataRecordCount;
- this.dataFileCount += entry.dataFileCount;
- this.totalDataFileSizeInBytes += entry.totalDataFileSizeInBytes;
- this.positionDeleteRecordCount += entry.positionDeleteRecordCount;
- this.positionDeleteFileCount += entry.positionDeleteFileCount;
- this.equalityDeleteRecordCount += entry.equalityDeleteRecordCount;
- this.equalityDeleteFileCount += entry.equalityDeleteFileCount;
- this.dvCount += entry.dvCount;
-
- if (entry.totalRecordCount != null) {
- if (totalRecordCount == null) {
- this.totalRecordCount = entry.totalRecordCount;
- } else {
- this.totalRecordCount += entry.totalRecordCount;
- }
- }
-
- if (entry.lastUpdatedAt != null) {
- updateSnapshotInfo(entry.lastUpdatedSnapshotId, entry.lastUpdatedAt);
- }
- }
-
- private void updateSnapshotInfo(long snapshotId, long updatedAt) {
- if (lastUpdatedAt == null || lastUpdatedAt < updatedAt) {
- this.lastUpdatedAt = updatedAt;
- this.lastUpdatedSnapshotId = snapshotId;
- }
- }
-
- @Override
- public int size() {
- return STATS_COUNT;
- }
-
- @Override
- public <T> T get(int pos, Class<T> javaClass) {
- switch (pos) {
- case 0:
- return javaClass.cast(partition);
- case 1:
- return javaClass.cast(specId);
- case 2:
- return javaClass.cast(dataRecordCount);
- case 3:
- return javaClass.cast(dataFileCount);
- case 4:
- return javaClass.cast(totalDataFileSizeInBytes);
- case 5:
- return javaClass.cast(positionDeleteRecordCount);
- case 6:
- return javaClass.cast(positionDeleteFileCount);
- case 7:
- return javaClass.cast(equalityDeleteRecordCount);
- case 8:
- return javaClass.cast(equalityDeleteFileCount);
- case 9:
- return javaClass.cast(totalRecordCount);
- case 10:
- return javaClass.cast(lastUpdatedAt);
- case 11:
- return javaClass.cast(lastUpdatedSnapshotId);
- case 12:
- return javaClass.cast(dvCount);
- default:
- throw new UnsupportedOperationException("Unknown position: " + pos);
- }
- }
-
- @Override
- public <T> void set(int pos, T value) {
- switch (pos) {
- case 0:
- this.partition = (StructLike) value;
- break;
- case 1:
- this.specId = (int) value;
- break;
- case 2:
- this.dataRecordCount = (long) value;
- break;
- case 3:
- this.dataFileCount = (int) value;
- break;
- case 4:
- this.totalDataFileSizeInBytes = (long) value;
- break;
- case 5:
- // optional field as per spec, implementation initialize to 0 for
counters
- this.positionDeleteRecordCount = value == null ? 0L : (long) value;
- break;
- case 6:
- // optional field as per spec, implementation initialize to 0 for
counters
- this.positionDeleteFileCount = value == null ? 0 : (int) value;
- break;
- case 7:
- // optional field as per spec, implementation initialize to 0 for
counters
- this.equalityDeleteRecordCount = value == null ? 0L : (long) value;
- break;
- case 8:
- // optional field as per spec, implementation initialize to 0 for
counters
- this.equalityDeleteFileCount = value == null ? 0 : (int) value;
- break;
- case 9:
- this.totalRecordCount = (Long) value;
- break;
- case 10:
- this.lastUpdatedAt = (Long) value;
- break;
- case 11:
- this.lastUpdatedSnapshotId = (Long) value;
- break;
- case 12:
- this.dvCount = value == null ? 0 : (int) value;
- break;
- default:
- throw new UnsupportedOperationException("Unknown position: " + pos);
- }
- }
-}
diff --git a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
index a3e298d72b..29f7bcb53c 100644
--- a/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
+++ b/core/src/main/java/org/apache/iceberg/PartitionStatsHandler.java
@@ -33,20 +33,14 @@ import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.iceberg.data.GenericRecord;
-import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileAppender;
-import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.io.OutputFile;
import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Queues;
import org.apache.iceberg.types.Comparators;
-import org.apache.iceberg.types.Types;
-import org.apache.iceberg.types.Types.IntegerType;
-import org.apache.iceberg.types.Types.LongType;
-import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.util.Pair;
import org.apache.iceberg.util.PartitionMap;
@@ -67,180 +61,6 @@ public class PartitionStatsHandler {
private static final Logger LOG =
LoggerFactory.getLogger(PartitionStatsHandler.class);
- // schema of the partition stats file as per spec
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#EMPTY_PARTITION_FIELD}
- */
- @Deprecated public static final int PARTITION_FIELD_ID = 1;
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#EMPTY_PARTITION_FIELD}
- */
- @Deprecated public static final String PARTITION_FIELD_NAME = "partition";
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#SPEC_ID}
- */
- @Deprecated
- public static final NestedField SPEC_ID = NestedField.required(2, "spec_id",
IntegerType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#DATA_RECORD_COUNT}
- */
- @Deprecated
- public static final NestedField DATA_RECORD_COUNT =
- NestedField.required(3, "data_record_count", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#DATA_FILE_COUNT}
- */
- @Deprecated
- public static final NestedField DATA_FILE_COUNT =
- NestedField.required(4, "data_file_count", IntegerType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
- * PartitionStatistics#TOTAL_DATA_FILE_SIZE_IN_BYTES}
- */
- @Deprecated
- public static final NestedField TOTAL_DATA_FILE_SIZE_IN_BYTES =
- NestedField.required(5, "total_data_file_size_in_bytes", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
- * PartitionStatistics#POSITION_DELETE_RECORD_COUNT}
- */
- @Deprecated
- public static final NestedField POSITION_DELETE_RECORD_COUNT =
- NestedField.optional(6, "position_delete_record_count", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
- * PartitionStatistics#POSITION_DELETE_FILE_COUNT}
- */
- @Deprecated
- public static final NestedField POSITION_DELETE_FILE_COUNT =
- NestedField.optional(7, "position_delete_file_count", IntegerType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
- * PartitionStatistics#EQUALITY_DELETE_RECORD_COUNT}
- */
- @Deprecated
- public static final NestedField EQUALITY_DELETE_RECORD_COUNT =
- NestedField.optional(8, "equality_delete_record_count", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
- * PartitionStatistics#EQUALITY_DELETE_FILE_COUNT}
- */
- @Deprecated
- public static final NestedField EQUALITY_DELETE_FILE_COUNT =
- NestedField.optional(9, "equality_delete_file_count", IntegerType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#TOTAL_RECORD_COUNT}
- */
- @Deprecated
- public static final NestedField TOTAL_RECORD_COUNT =
- NestedField.optional(10, "total_record_count", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#LAST_UPDATED_AT}
- */
- @Deprecated
- public static final NestedField LAST_UPDATED_AT =
- NestedField.optional(11, "last_updated_at", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#LAST_UPDATED_SNAPSHOT_ID}
- */
- @Deprecated
- public static final NestedField LAST_UPDATED_SNAPSHOT_ID =
- NestedField.optional(12, "last_updated_snapshot_id", LongType.get());
-
- /**
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#DV_COUNT}
- */
- @Deprecated
- public static final NestedField DV_COUNT =
- NestedField.required("dv_count")
- .withId(13)
- .ofType(Types.IntegerType.get())
- .withInitialDefault(Literal.of(0))
- .withWriteDefault(Literal.of(0))
- .build();
-
- /**
- * Generates the partition stats file schema for a given format version
based on a combined
- * partition type which considers all specs in a table.
- *
- * @param unifiedPartitionType unified partition schema type. Could be
calculated by {@link
- * Partitioning#partitionType(Table)}.
- * @return a schema that corresponds to the provided unified partition type.
- * @deprecated will be removed in 1.12.0. Use {@link
PartitionStatistics#schema(StructType, int)}
- * instead.
- */
- @Deprecated
- public static Schema schema(StructType unifiedPartitionType, int
formatVersion) {
- Preconditions.checkState(!unifiedPartitionType.fields().isEmpty(), "Table
must be partitioned");
- Preconditions.checkState(
- formatVersion > 0 && formatVersion <=
TableMetadata.SUPPORTED_TABLE_FORMAT_VERSION,
- "Invalid format version: %s",
- formatVersion);
-
- if (formatVersion <= 2) {
- return v2Schema(unifiedPartitionType);
- }
-
- return v3Schema(unifiedPartitionType);
- }
-
- private static Schema v2Schema(StructType unifiedPartitionType) {
- return new Schema(
- NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME,
unifiedPartitionType),
- SPEC_ID,
- DATA_RECORD_COUNT,
- DATA_FILE_COUNT,
- TOTAL_DATA_FILE_SIZE_IN_BYTES,
- POSITION_DELETE_RECORD_COUNT,
- POSITION_DELETE_FILE_COUNT,
- EQUALITY_DELETE_RECORD_COUNT,
- EQUALITY_DELETE_FILE_COUNT,
- TOTAL_RECORD_COUNT,
- LAST_UPDATED_AT,
- LAST_UPDATED_SNAPSHOT_ID);
- }
-
- private static Schema v3Schema(StructType unifiedPartitionType) {
- return new Schema(
- NestedField.required(PARTITION_FIELD_ID, PARTITION_FIELD_NAME,
unifiedPartitionType),
- SPEC_ID,
- DATA_RECORD_COUNT,
- DATA_FILE_COUNT,
- TOTAL_DATA_FILE_SIZE_IN_BYTES,
- NestedField.required(
- POSITION_DELETE_RECORD_COUNT.fieldId(),
- POSITION_DELETE_RECORD_COUNT.name(),
- LongType.get()),
- NestedField.required(
- POSITION_DELETE_FILE_COUNT.fieldId(),
- POSITION_DELETE_FILE_COUNT.name(),
- IntegerType.get()),
- NestedField.required(
- EQUALITY_DELETE_RECORD_COUNT.fieldId(),
- EQUALITY_DELETE_RECORD_COUNT.name(),
- LongType.get()),
- NestedField.required(
- EQUALITY_DELETE_FILE_COUNT.fieldId(),
- EQUALITY_DELETE_FILE_COUNT.name(),
- IntegerType.get()),
- TOTAL_RECORD_COUNT,
- LAST_UPDATED_AT,
- LAST_UPDATED_SNAPSHOT_ID,
- DV_COUNT);
- }
-
/**
* Computes the stats incrementally after the snapshot that has partition
stats file till the
* current snapshot and writes the combined result into a {@link
PartitionStatisticsFile} after
@@ -343,28 +163,6 @@ public class PartitionStatsHandler {
.build();
}
- /**
- * Reads partition statistics from the specified {@link InputFile} using
given schema.
- *
- * @param schema The {@link Schema} of the partition statistics file.
- * @param inputFile An {@link InputFile} pointing to the partition stats
file.
- * @deprecated will be removed in 1.12.0, use {@link
PartitionStatisticsScan} instead
- */
- @Deprecated
- public static CloseableIterable<PartitionStats> readPartitionStatsFile(
- Schema schema, InputFile inputFile) {
- Preconditions.checkArgument(schema != null, "Invalid schema: null");
- Preconditions.checkArgument(inputFile != null, "Invalid input file: null");
-
- FileFormat fileFormat = FileFormat.fromFileName(inputFile.location());
- Preconditions.checkArgument(
- fileFormat != null, "Unable to determine format of file: %s",
inputFile.location());
-
- CloseableIterable<StructLike> records =
- InternalData.read(fileFormat, inputFile).project(schema).build();
- return CloseableIterable.transform(records,
PartitionStatsHandler::recordToPartitionStats);
- }
-
private static OutputFile newPartitionStatsFile(
Table table, FileFormat fileFormat, long snapshotId) {
Preconditions.checkArgument(
@@ -382,19 +180,6 @@ public class PartitionStatsHandler {
Locale.ROOT, "partition-stats-%d-%s", snapshotId,
UUID.randomUUID()))));
}
- private static PartitionStats recordToPartitionStats(StructLike record) {
- int pos = 0;
- PartitionStats stats =
- new PartitionStats(
- record.get(pos++, StructLike.class), // partition
- record.get(pos++, Integer.class)); // spec id
- for (; pos < record.size(); pos++) {
- stats.set(pos, record.get(pos, Object.class));
- }
-
- return stats;
- }
-
private static Collection<PartitionStatistics>
computeAndMergeStatsIncremental(
Table table, Snapshot snapshot, long lastSnapshotWithStats) {
PartitionMap<PartitionStatistics> statsMap =
PartitionMap.create(table.specs());
@@ -680,7 +465,8 @@ public class PartitionStatsHandler {
* @param targetStats partition statistics to be updated.
* @param inputStats the partition statistics used as input.
*/
- private static void appendStats(PartitionStatistics targetStats,
PartitionStatistics inputStats) {
+ @VisibleForTesting
+ static void appendStats(PartitionStatistics targetStats, PartitionStatistics
inputStats) {
Preconditions.checkArgument(targetStats.specId() != null, "Invalid spec
ID: null");
Preconditions.checkArgument(
targetStats.specId().equals(inputStats.specId()), "Spec IDs must
match");
diff --git
a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
index 7b7e85bfff..54cd9cd433 100644
--- a/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
+++ b/core/src/test/java/org/apache/iceberg/PartitionStatsHandlerTestBase.java
@@ -24,7 +24,6 @@ import static
org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
-import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.Arrays;
@@ -32,7 +31,6 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.UUID;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.io.CloseableIterable;
@@ -41,7 +39,6 @@ import
org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Comparators;
import org.apache.iceberg.types.Types;
-import org.assertj.core.groups.Tuple;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.extension.ExtendWith;
@@ -61,6 +58,10 @@ public abstract class PartitionStatsHandlerTestBase extends
PartitionStatisticsT
private final Map<String, String> fileFormatProperty =
ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format().name());
+ private static final PartitionData PARTITION =
+ new PartitionData(
+ Types.StructType.of(Types.NestedField.required(1, "foo",
Types.IntegerType.get())));
+
@Test
public void testPartitionStatsOnEmptyTable() throws Exception {
Table testTable =
@@ -294,185 +295,6 @@ public abstract class PartitionStatsHandlerTestBase
extends PartitionStatisticsT
}
}
- /**
- * @deprecated will be removed in 1.12.0
- */
- @SuppressWarnings("checkstyle:MethodLength")
- @Test
- @Deprecated
- public void testPartitionStats() throws Exception {
- Table testTable =
- TestTables.create(
- tempDir("partition_stats_compute"),
- "partition_stats_compute",
- SCHEMA,
- SPEC,
- 2,
- fileFormatProperty);
-
- DataFile dataFile1 =
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("foo", "A"));
- DataFile dataFile2 =
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("foo", "B"));
- DataFile dataFile3 =
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("bar", "A"));
- DataFile dataFile4 =
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("bar", "B"));
-
- for (int i = 0; i < 3; i++) {
- // insert same set of seven records thrice to have a new manifest files
- testTable
- .newAppend()
- .appendFile(dataFile1)
- .appendFile(dataFile2)
- .appendFile(dataFile3)
- .appendFile(dataFile4)
- .commit();
- }
-
- Snapshot snapshot1 = testTable.currentSnapshot();
- Schema recordSchema =
PartitionStatistics.schema(Partitioning.partitionType(testTable), 2);
-
- Types.StructType partitionType =
-
recordSchema.findField(EMPTY_PARTITION_FIELD.fieldId()).type().asStructType();
- computeAndValidatePartitionStats(
- testTable,
- recordSchema,
- Tuple.tuple(
- partitionRecord(partitionType, "foo", "A"),
- 0,
- 3 * dataFile1.recordCount(),
- 3,
- 3 * dataFile1.fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId(),
- 0),
- Tuple.tuple(
- partitionRecord(partitionType, "foo", "B"),
- 0,
- 3 * dataFile2.recordCount(),
- 3,
- 3 * dataFile2.fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId(),
- 0),
- Tuple.tuple(
- partitionRecord(partitionType, "bar", "A"),
- 0,
- 3 * dataFile3.recordCount(),
- 3,
- 3 * dataFile3.fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId(),
- 0),
- Tuple.tuple(
- partitionRecord(partitionType, "bar", "B"),
- 0,
- 3 * dataFile4.recordCount(),
- 3,
- 3 * dataFile4.fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId(),
- 0));
-
- DeleteFile posDelete =
- FileGenerationUtil.generatePositionDeleteFile(testTable,
TestHelpers.Row.of("bar", "A"));
- testTable.newRowDelta().addDeletes(posDelete).commit();
- // snapshot2 is unused in the result as same partition was updated by
snapshot4
-
- DeleteFile eqDelete =
- FileGenerationUtil.generateEqualityDeleteFile(testTable,
TestHelpers.Row.of("foo", "A"));
- testTable.newRowDelta().addDeletes(eqDelete).commit();
- Snapshot snapshot3 = testTable.currentSnapshot();
-
- testTable.updateProperties().set(TableProperties.FORMAT_VERSION,
"3").commit();
- DeleteFile dv = FileGenerationUtil.generateDV(testTable, dataFile3);
- testTable.newRowDelta().addDeletes(dv).commit();
- Snapshot snapshot4 = testTable.currentSnapshot();
-
- recordSchema =
PartitionStatistics.schema(Partitioning.partitionType(testTable), 3);
-
- computeAndValidatePartitionStats(
- testTable,
- recordSchema,
- Tuple.tuple(
- partitionRecord(partitionType, "foo", "A"),
- 0,
- 3 * dataFile1.recordCount(),
- 3,
- 3 * dataFile1.fileSizeInBytes(),
- 0L,
- 0,
- eqDelete.recordCount(),
- 1,
- null,
- snapshot3.timestampMillis(),
- snapshot3.snapshotId(),
- 0),
- Tuple.tuple(
- partitionRecord(partitionType, "foo", "B"),
- 0,
- 3 * dataFile2.recordCount(),
- 3,
- 3 * dataFile2.fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId(),
- 0),
- Tuple.tuple(
- partitionRecord(partitionType, "bar", "A"),
- 0,
- 3 * dataFile3.recordCount(),
- 3,
- 3 * dataFile3.fileSizeInBytes(),
- posDelete.recordCount() + dv.recordCount(),
- 1,
- 0L,
- 0,
- null,
- snapshot4.timestampMillis(),
- snapshot4.snapshotId(),
- 1), // dv count
- Tuple.tuple(
- partitionRecord(partitionType, "bar", "B"),
- 0,
- 3 * dataFile4.recordCount(),
- 3,
- 3 * dataFile4.fileSizeInBytes(),
- 0L,
- 0,
- 0L,
- 0,
- null,
- snapshot1.timestampMillis(),
- snapshot1.snapshotId(),
- 0));
- }
-
@Test
public void testCopyOnWriteDelete() throws Exception {
Table testTable =
@@ -591,39 +413,6 @@ public abstract class PartitionStatsHandlerTestBase
extends PartitionStatisticsT
assertThat(PartitionStatsHandler.latestStatsFile(testTable,
snapshotBranchBId)).isNull();
}
- /**
- * @deprecated will be removed in 1.12.0
- */
- @Test
- @Deprecated
- public void testReadingStatsWithInvalidSchema() throws Exception {
- PartitionSpec spec =
PartitionSpec.builderFor(SCHEMA).identity("c1").build();
- Table testTable =
- TestTables.create(tempDir("old_schema"), "old_schema", SCHEMA, spec,
2, fileFormatProperty);
- Types.StructType partitionType = Partitioning.partitionType(testTable);
- Schema newSchema = PartitionStatistics.schema(partitionType, 2);
- Schema oldSchema = invalidOldSchema(partitionType);
-
- PartitionStatisticsFile invalidStatisticsFile =
- PartitionStatsHandler.writePartitionStatsFile(
- testTable, 42L, oldSchema,
Collections.singletonList(randomStats(partitionType)));
-
- try (CloseableIterable<PartitionStats> recordIterator =
- PartitionStatsHandler.readPartitionStatsFile(
- newSchema,
testTable.io().newInputFile(invalidStatisticsFile.path()))) {
-
- if (format() == FileFormat.PARQUET) {
- assertThatThrownBy(() -> Lists.newArrayList(recordIterator))
- .isInstanceOf(IllegalArgumentException.class)
- .hasMessageContaining("Not a primitive type: struct");
- } else if (format() == FileFormat.AVRO) {
- assertThatThrownBy(() -> Lists.newArrayList(recordIterator))
- .isInstanceOf(IllegalStateException.class)
- .hasMessageContaining("Not an instance of
org.apache.iceberg.StructLike");
- }
- }
- }
-
@Test
public void testFullComputeFallbackWithInvalidStats() throws Exception {
PartitionSpec spec =
PartitionSpec.builderFor(SCHEMA).identity("c1").build();
@@ -661,105 +450,110 @@ public abstract class PartitionStatsHandlerTestBase
extends PartitionStatisticsT
assertThat(partitionStats.get(0).dataFileCount()).isEqualTo(2);
}
- /**
- * @deprecated will be removed in 1.12.0
- */
@Test
- @Deprecated
- public void testV2toV3SchemaEvolution() throws Exception {
- Table testTable =
- TestTables.create(
- tempDir("schema_evolution"), "schema_evolution", SCHEMA, SPEC, 2,
fileFormatProperty);
+ public void testAppendWithAllValues() {
+ BasePartitionStatistics stats1 =
+ createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077800000L,
12345L);
+ BasePartitionStatistics stats2 =
+ createStats(200L, 7, 500L, 1L, 100, 0L, 50, 7L, 1625077900000L,
12346L);
- // write stats file using v2 schema
- DataFile dataFile =
- FileGenerationUtil.generateDataFile(testTable,
TestHelpers.Row.of("foo", "A"));
- testTable.newAppend().appendFile(dataFile).commit();
- PartitionStatisticsFile statisticsFile =
- PartitionStatsHandler.computeAndWriteStatsFile(
- testTable, testTable.currentSnapshot().snapshotId());
+ PartitionStatsHandler.appendStats(stats1, stats2);
- Types.StructType partitionSchema = Partitioning.partitionType(testTable);
+ validateStats(stats1, 300L, 22, 1500L, 3L, 600, 1L, 250, 22L,
1625077900000L, 12346L);
+ }
- // read with v2 schema
- Schema v2Schema = PartitionStatistics.schema(partitionSchema, 2);
- List<PartitionStats> partitionStatsV2;
- try (CloseableIterable<PartitionStats> recordIterator =
- PartitionStatsHandler.readPartitionStatsFile(
- v2Schema, testTable.io().newInputFile(statisticsFile.path()))) {
- partitionStatsV2 = Lists.newArrayList(recordIterator);
- }
+ @Test
+ public void testAppendWithThisNullOptionalField() {
+ BasePartitionStatistics stats1 =
+ createStats(100L, 15, 1000L, 2L, 500, 1L, 200, null, null, null);
+ BasePartitionStatistics stats2 =
+ createStats(100L, 7, 500L, 1L, 100, 0L, 50, 7L, 1625077900000L,
12346L);
- // read with v3 schema
- Schema v3Schema = PartitionStatistics.schema(partitionSchema, 3);
- List<PartitionStats> partitionStatsV3;
- try (CloseableIterable<PartitionStats> recordIterator =
- PartitionStatsHandler.readPartitionStatsFile(
- v3Schema, testTable.io().newInputFile(statisticsFile.path()))) {
- partitionStatsV3 = Lists.newArrayList(recordIterator);
- }
+ PartitionStatsHandler.appendStats(stats1, stats2);
- assertThat(partitionStatsV2).hasSameSizeAs(partitionStatsV3);
- Comparator<StructLike> comparator = Comparators.forType(partitionSchema);
- for (int i = 0; i < partitionStatsV2.size(); i++) {
- assertThat(isEqual(comparator, partitionStatsV2.get(i),
partitionStatsV3.get(i))).isTrue();
- }
+ validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 7L,
1625077900000L, 12346L);
}
- private static void computeAndValidatePartitionStats(
- Table testTable, Schema recordSchema, Tuple... expectedValues) throws
IOException {
- // compute and commit partition stats file
- Snapshot currentSnapshot = testTable.currentSnapshot();
- PartitionStatisticsFile result =
PartitionStatsHandler.computeAndWriteStatsFile(testTable);
-
testTable.updatePartitionStatistics().setPartitionStatistics(result).commit();
- assertThat(result.snapshotId()).isEqualTo(currentSnapshot.snapshotId());
+ @Test
+ public void testAppendWithBothNullOptionalFields() {
+ BasePartitionStatistics stats1 =
+ createStats(100L, 15, 1000L, 2L, 500, 1L, 200, null, null, null);
+ BasePartitionStatistics stats2 = createStats(100L, 7, 500L, 1L, 100, 0L,
50, null, null, null);
- // read the partition entries from the stats file
- List<PartitionStats> partitionStats;
- try (CloseableIterable<PartitionStats> recordIterator =
- PartitionStatsHandler.readPartitionStatsFile(
- recordSchema, testTable.io().newInputFile(result.path()))) {
- partitionStats = Lists.newArrayList(recordIterator);
- }
+ PartitionStatsHandler.appendStats(stats1, stats2);
- assertThat(partitionStats)
- .extracting(
- PartitionStats::partition,
- PartitionStats::specId,
- PartitionStats::dataRecordCount,
- PartitionStats::dataFileCount,
- PartitionStats::totalDataFileSizeInBytes,
- PartitionStats::positionDeleteRecordCount,
- PartitionStats::positionDeleteFileCount,
- PartitionStats::equalityDeleteRecordCount,
- PartitionStats::equalityDeleteFileCount,
- PartitionStats::totalRecords,
- PartitionStats::lastUpdatedAt,
- PartitionStats::lastUpdatedSnapshotId,
- PartitionStats::dvCount)
- .containsExactlyInAnyOrder(expectedValues);
+ validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, null, null, null);
}
- @SuppressWarnings("checkstyle:CyclomaticComplexity")
- private static boolean isEqual(
- Comparator<StructLike> partitionComparator, PartitionStats stats1,
PartitionStats stats2) {
- if (stats1 == stats2) {
- return true;
- } else if (stats1 == null || stats2 == null) {
- return false;
- }
+ @Test
+ public void testAppendWithOtherNullOptionalFields() {
+ BasePartitionStatistics stats1 =
+ createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077900000L,
12346L);
+ BasePartitionStatistics stats2 = createStats(100L, 7, 500L, 1L, 100, 0L,
50, null, null, null);
- return partitionComparator.compare(stats1.partition(), stats2.partition())
== 0
- && stats1.specId() == stats2.specId()
- && stats1.dataRecordCount() == stats2.dataRecordCount()
- && stats1.dataFileCount() == stats2.dataFileCount()
- && stats1.totalDataFileSizeInBytes() ==
stats2.totalDataFileSizeInBytes()
- && stats1.positionDeleteRecordCount() ==
stats2.positionDeleteRecordCount()
- && stats1.positionDeleteFileCount() == stats2.positionDeleteFileCount()
- && stats1.equalityDeleteRecordCount() ==
stats2.equalityDeleteRecordCount()
- && stats1.equalityDeleteFileCount() == stats2.equalityDeleteFileCount()
- && Objects.equals(stats1.totalRecords(), stats2.totalRecords())
- && Objects.equals(stats1.lastUpdatedAt(), stats2.lastUpdatedAt())
- && Objects.equals(stats1.lastUpdatedSnapshotId(),
stats2.lastUpdatedSnapshotId());
+ PartitionStatsHandler.appendStats(stats1, stats2);
+
+ validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 15L,
1625077900000L, 12346L);
+ }
+
+ @Test
+ public void testAppendEmptyStats() {
+ BasePartitionStatistics stats1 = new BasePartitionStatistics(PARTITION, 1);
+ BasePartitionStatistics stats2 = new BasePartitionStatistics(PARTITION, 1);
+
+ PartitionStatsHandler.appendStats(stats1, stats2);
+
+ validateStats(stats1, 0L, 0, 0L, 0L, 0, 0L, 0, null, null, null);
+ }
+
+ @Test
+ public void testAppendWithDifferentSpec() {
+ BasePartitionStatistics stats1 = new BasePartitionStatistics(PARTITION, 1);
+ BasePartitionStatistics stats2 = new BasePartitionStatistics(PARTITION, 2);
+
+ assertThatThrownBy(() -> PartitionStatsHandler.appendStats(stats1, stats2))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Spec IDs must match");
+ }
+
+ private BasePartitionStatistics createStats(
+ long dataRecordCount,
+ int dataFileCount,
+ long totalDataFileSizeInBytes,
+ long positionDeleteRecordCount,
+ int positionDeleteFileCount,
+ long equalityDeleteRecordCount,
+ int equalityDeleteFileCount,
+ Long totalRecordCount,
+ Long lastUpdatedAt,
+ Long lastUpdatedSnapshotId) {
+
+ BasePartitionStatistics stats = new BasePartitionStatistics(PARTITION, 1);
+ stats.set(2, dataRecordCount);
+ stats.set(3, dataFileCount);
+ stats.set(4, totalDataFileSizeInBytes);
+ stats.set(5, positionDeleteRecordCount);
+ stats.set(6, positionDeleteFileCount);
+ stats.set(7, equalityDeleteRecordCount);
+ stats.set(8, equalityDeleteFileCount);
+ stats.set(9, totalRecordCount);
+ stats.set(10, lastUpdatedAt);
+ stats.set(11, lastUpdatedSnapshotId);
+
+ return stats;
+ }
+
+ private void validateStats(PartitionStatistics stats, Object...
expectedValues) {
+ // Spec id and partition data should be unchanged
+ assertThat(stats.get(0, PartitionData.class)).isEqualTo(PARTITION);
+ assertThat(stats.get(1, Integer.class)).isEqualTo(1);
+
+ for (int i = 0; i < expectedValues.length; i++) {
+ if (expectedValues[i] == null) {
+ assertThat(stats.get(i + 2, Object.class)).isNull();
+ } else {
+ assertThat(stats.get(i + 2,
Object.class)).isEqualTo(expectedValues[i]);
+ }
+ }
}
}
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
b/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
deleted file mode 100644
index c215fbcb80..0000000000
--- a/core/src/test/java/org/apache/iceberg/TestPartitionStats.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.iceberg;
-
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-import org.apache.iceberg.types.Types;
-import org.junit.jupiter.api.Test;
-
-public class TestPartitionStats {
-
- private static final PartitionData PARTITION =
- new PartitionData(
- Types.StructType.of(Types.NestedField.required(1, "foo",
Types.IntegerType.get())));
-
- @Test
- public void testAppendWithAllValues() {
- PartitionStats stats1 =
- createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077800000L,
12345L);
- PartitionStats stats2 = createStats(200L, 7, 500L, 1L, 100, 0L, 50, 7L,
1625077900000L, 12346L);
-
- stats1.appendStats(stats2);
-
- validateStats(stats1, 300L, 22, 1500L, 3L, 600, 1L, 250, 22L,
1625077900000L, 12346L);
- }
-
- @Test
- public void testAppendWithThisNullOptionalField() {
- PartitionStats stats1 = createStats(100L, 15, 1000L, 2L, 500, 1L, 200,
null, null, null);
- PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, 7L,
1625077900000L, 12346L);
-
- stats1.appendStats(stats2);
-
- validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 7L,
1625077900000L, 12346L);
- }
-
- @Test
- public void testAppendWithBothNullOptionalFields() {
- PartitionStats stats1 = createStats(100L, 15, 1000L, 2L, 500, 1L, 200,
null, null, null);
- PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, null,
null, null);
-
- stats1.appendStats(stats2);
-
- validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, null, null, null);
- }
-
- @Test
- public void testAppendWithOtherNullOptionalFields() {
- PartitionStats stats1 =
- createStats(100L, 15, 1000L, 2L, 500, 1L, 200, 15L, 1625077900000L,
12346L);
- PartitionStats stats2 = createStats(100L, 7, 500L, 1L, 100, 0L, 50, null,
null, null);
-
- stats1.appendStats(stats2);
-
- validateStats(stats1, 200L, 22, 1500L, 3L, 600, 1L, 250, 15L,
1625077900000L, 12346L);
- }
-
- @Test
- public void testAppendEmptyStats() {
- PartitionStats stats1 = new PartitionStats(PARTITION, 1);
- PartitionStats stats2 = new PartitionStats(PARTITION, 1);
-
- stats1.appendStats(stats2);
-
- validateStats(stats1, 0L, 0, 0L, 0L, 0, 0L, 0, null, null, null);
- }
-
- @Test
- public void testAppendWithDifferentSpec() {
- PartitionStats stats1 = new PartitionStats(PARTITION, 1);
- PartitionStats stats2 = new PartitionStats(PARTITION, 2);
-
- assertThatThrownBy(() -> stats1.appendStats(stats2))
- .isInstanceOf(IllegalArgumentException.class)
- .hasMessage("Spec IDs must match");
- }
-
- private PartitionStats createStats(
- long dataRecordCount,
- int dataFileCount,
- long totalDataFileSizeInBytes,
- long positionDeleteRecordCount,
- int positionDeleteFileCount,
- long equalityDeleteRecordCount,
- int equalityDeleteFileCount,
- Long totalRecordCount,
- Long lastUpdatedAt,
- Long lastUpdatedSnapshotId) {
-
- PartitionStats stats = new PartitionStats(PARTITION, 1);
- stats.set(2, dataRecordCount);
- stats.set(3, dataFileCount);
- stats.set(4, totalDataFileSizeInBytes);
- stats.set(5, positionDeleteRecordCount);
- stats.set(6, positionDeleteFileCount);
- stats.set(7, equalityDeleteRecordCount);
- stats.set(8, equalityDeleteFileCount);
- stats.set(9, totalRecordCount);
- stats.set(10, lastUpdatedAt);
- stats.set(11, lastUpdatedSnapshotId);
-
- return stats;
- }
-
- private void validateStats(PartitionStats stats, Object... expectedValues) {
- // Spec id and partition data should be unchanged
- assertThat(stats.get(0, PartitionData.class)).isEqualTo(PARTITION);
- assertThat(stats.get(1, Integer.class)).isEqualTo(1);
-
- for (int i = 0; i < expectedValues.length; i++) {
- if (expectedValues[i] == null) {
- assertThat(stats.get(i + 2, Object.class)).isNull();
- } else {
- assertThat(stats.get(i + 2,
Object.class)).isEqualTo(expectedValues[i]);
- }
- }
- }
-}
diff --git
a/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java
b/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java
index 117293629f..04d1d212f7 100644
--- a/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java
+++ b/orc/src/test/java/org/apache/iceberg/orc/TestOrcPartitionStatsHandler.java
@@ -43,13 +43,6 @@ public class TestOrcPartitionStatsHandler extends
PartitionStatsHandlerTestBase
.hasMessage("Cannot write using unregistered internal data format:
ORC");
}
- @Override
- public void testPartitionStats() throws Exception {
- assertThatThrownBy(super::testPartitionStats)
- .isInstanceOf(UnsupportedOperationException.class)
- .hasMessage("Cannot write using unregistered internal data format:
ORC");
- }
-
@Override
public void testLatestStatsFile() throws Exception {
assertThatThrownBy(super::testLatestStatsFile)
@@ -71,24 +64,10 @@ public class TestOrcPartitionStatsHandler extends
PartitionStatsHandlerTestBase
.hasMessage("Cannot write using unregistered internal data format:
ORC");
}
- @Override
- public void testReadingStatsWithInvalidSchema() {
- assertThatThrownBy(super::testReadingStatsWithInvalidSchema)
- .isInstanceOf(UnsupportedOperationException.class)
- .hasMessage("Cannot write using unregistered internal data format:
ORC");
- }
-
@Override
public void testFullComputeFallbackWithInvalidStats() {
assertThatThrownBy(super::testFullComputeFallbackWithInvalidStats)
.isInstanceOf(UnsupportedOperationException.class)
.hasMessage("Cannot write using unregistered internal data format:
ORC");
}
-
- @Override
- public void testV2toV3SchemaEvolution() {
- assertThatThrownBy(super::testV2toV3SchemaEvolution)
- .isInstanceOf(UnsupportedOperationException.class)
- .hasMessage("Cannot write using unregistered internal data format:
ORC");
- }
}