This is an automated email from the ASF dual-hosted git repository.
laskoviymishka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 51b3140b fix(manifest): stop writing deprecated distinct_counts
(field-id 111) (#1102)
51b3140b is described below
commit 51b3140b3a96826622f2a9bc10be357b75d509ba
Author: Krutika Dhananjay <[email protected]>
AuthorDate: Thu May 21 22:21:52 2026 +0530
fix(manifest): stop writing deprecated distinct_counts (field-id 111)
(#1102)
The Avro wire schema declared `distinct_counts` on data_file v1 and v2,
causing it to be emitted on every manifest entry. The Iceberg spec marks
this field as "Deprecated. Do not write."
(https://github.com/apache/iceberg/blob/main/format/spec.md?plain=1#L667),
so writers should not emit it. The field stands deprecated since 2020 --
https://github.com/apache/iceberg/issues/767#issuecomment-582192665
Strict readers that don't include field-id 111 in their data_file read
schema (e.g. PyIceberg 0.10) fail to resolve manifests written by this
library with: ResolveError "File/read schema are not aligned for map,
got None".
---------
Co-authored-by: Krutika Dhananjay <[email protected]>
Co-authored-by: Matt Topol <[email protected]>
---
codec/data_file.go | 11 +-
codec/data_file_test.go | 32 ++-
data_file_codec.go | 12 +-
internal/avro_schemas.go | 6 -
manifest.go | 17 +-
manifest_test.go | 239 ++++++++++++++-------
table/pos_delete_partitioned_fanout_writer_test.go | 4 +-
table/table_test.go | 14 +-
8 files changed, 200 insertions(+), 135 deletions(-)
diff --git a/codec/data_file.go b/codec/data_file.go
index b1f78fb1..4df07d9d 100644
--- a/codec/data_file.go
+++ b/codec/data_file.go
@@ -62,11 +62,12 @@ import (
// usable as a standalone manifest entry — they only round-trip via
// [DecodeDataFile].
//
-// distinct_counts (field 111) is deprecated in the spec for all
-// versions. Already-set values round-trip on v1 and v2 as a
-// read-compatibility artifact; v3 omits the field entirely
-// (apache/iceberg#12182). New DataFiles should not set distinct
-// counts.
+// distinct_counts (field 111) is deprecated in the spec for every
+// version (apache/iceberg#12182). EncodeDataFile drops the field on
+// encode for v1, v2, and v3 alike — values populated on the source
+// DataFile are not transported. Legacy manifests that already carry
+// the field on the wire still decode correctly through
+// [DecodeDataFile]. New DataFiles should not set distinct counts.
func EncodeDataFile(df iceberg.DataFile, spec iceberg.PartitionSpec, schema
*iceberg.Schema, version int) ([]byte, error) {
if version < 1 || version > 3 {
return nil, fmt.Errorf("codec: EncodeDataFile: unsupported
format version %d", version)
diff --git a/codec/data_file_test.go b/codec/data_file_test.go
index 99796b4d..0acb0791 100644
--- a/codec/data_file_test.go
+++ b/codec/data_file_test.go
@@ -183,15 +183,13 @@ func fullyPopulatedDataFile(t *testing.T, version int)
(iceberg.PartitionSpec, *
SplitOffsets([]int64{0, 4096}).
SortOrderID(0).
KeyMetadata([]byte("kms-key-1"))
- if version < 3 {
- // distinct_counts is deprecated for all versions in the spec
- // (apache/iceberg#12182). The fixture sets it on v1/v2 to
- // exercise the read-compatibility round-trip path — counts
- // already present on a DataFile read from a legacy manifest
- // must survive an encode→decode cycle. New DataFiles should
- // not carry distinct counts.
- builder.DistinctValueCounts(map[int]int64{1: 64, 2: 128})
- }
+ // distinct_counts (field 111) is deprecated in the spec for every
+ // version (apache/iceberg#12182). The fixture populates it on every
+ // version to assert that the wire codec drops the field on encode
+ // regardless of what the in-memory DataFile carries — strict readers
+ // (e.g. PyIceberg) reject manifests that emit it. The deprecated
+ // setter is invoked here intentionally to exercise that drop path.
+ builder.DistinctValueCounts(map[int]int64{1: 64, 2: 128})
//nolint:staticcheck // SA1019: deliberate use of deprecated setter to assert
the encoder drops the field
if version >= 2 {
builder.EqualityFieldIDs([]int{1})
}
@@ -223,17 +221,11 @@ func assertDataFileEqual(t *testing.T, want, got
iceberg.DataFile, version int)
require.Equal(t, want.SortOrderID(), got.SortOrderID())
require.Equal(t, want.SpecID(), got.SpecID())
require.Equal(t, want.ContentType(), got.ContentType())
- if version < 3 {
- // distinct_counts (field 111) is deprecated for all versions
- // but still writable on v1/v2. The codec preserves it on
- // round-trip for read compatibility with legacy manifests; v3
- // drops it per spec (apache/iceberg#12182).
- require.Equal(t, want.DistinctValueCounts(),
got.DistinctValueCounts())
- } else {
- require.Empty(t, got.DistinctValueCounts(),
- "v3 manifest-entry schema omits distinct_counts
(deprecated in spec); "+
- "see internal/avro_schemas.go data_file_v3")
- }
+ require.Empty(t, got.DistinctValueCounts(),
+ "distinct_counts (field 111) is deprecated in every version "+
+ "(apache/iceberg#12182); the manifest-entry schemas in
"+
+ "internal/avro_schemas.go omit it for v1/v2/v3 so the
encoder "+
+ "drops it on the wire regardless of what the source
DataFile carries")
if version >= 2 {
require.Equal(t, want.EqualityFieldIDs(),
got.EqualityFieldIDs())
}
diff --git a/data_file_codec.go b/data_file_codec.go
index 15706b7e..7a7b3d39 100644
--- a/data_file_codec.go
+++ b/data_file_codec.go
@@ -91,11 +91,13 @@ type AvroEntryMarshaler interface {
// as a standalone manifest entry — they only round-trip via the
// matching decoder.
//
-// distinct_counts (field 111) is deprecated in the spec for all
-// versions. MarshalAvroEntry preserves any value already on the
-// source for v1 and v2 as a read-compatibility artifact; v3 omits
-// the field entirely (apache/iceberg#12182). New DataFiles should
-// not set distinct counts.
+// distinct_counts (field 111) is deprecated in the spec for every
+// version (apache/iceberg#12182). MarshalAvroEntry drops the field
+// on encode for v1, v2, and v3 alike — values populated on the
+// source DataFile are not transported. The Avro tag on the dataFile
+// struct is intentionally retained so legacy manifests that already
+// carry the field on the wire still decode through the matching
+// reader. New DataFiles should not set distinct counts.
func (d *dataFile) MarshalAvroEntry(spec PartitionSpec, schema *Schema,
version int) ([]byte, error) {
if version < 1 || version > 3 {
return nil, fmt.Errorf("iceberg: MarshalAvroEntry: unsupported
format version %d", version)
diff --git a/internal/avro_schemas.go b/internal/avro_schemas.go
index 2dc8ba8f..720f4be4 100644
--- a/internal/avro_schemas.go
+++ b/internal/avro_schemas.go
@@ -259,9 +259,6 @@ func init() {
fieldNode("nan_value_counts",
NullableNode(newMapNode("k138_v139", IntNode,
LongNode, 138, 139)),
137, withDoc("map of value to count")),
- fieldNode("distinct_counts",
- NullableNode(newMapNode("k123_v124", IntNode,
LongNode, 123, 124)),
- 111, withDoc("map of column id to distinct
value count")),
fieldNode("lower_bounds",
NullableNode(newMapNode("k126_v127", IntNode,
BytesNode, 126, 127)),
125, withDoc("map of column id to lower
bound")),
@@ -298,9 +295,6 @@ func init() {
fieldNode("nan_value_counts",
NullableNode(newMapNode("k138_v139", IntNode,
LongNode, 138, 139)),
137, withDoc("map of value to count")),
- fieldNode("distinct_counts",
- NullableNode(newMapNode("k123_v124", IntNode,
LongNode, 123, 124)),
- 111, withDoc("map of column id to distinct
value count")),
fieldNode("lower_bounds",
NullableNode(newMapNode("k126_v127", IntNode,
BytesNode, 126, 127)),
125, withDoc("map of column id to lower
bound")),
diff --git a/manifest.go b/manifest.go
index b6fb77f9..af0040cf 100644
--- a/manifest.go
+++ b/manifest.go
@@ -944,16 +944,6 @@ func (v3writerImpl) prepareEntry(entry *manifestEntry,
snapshotID int64) (Manife
}
}
- // v3 spec deprecates data_file.distinct_counts (Java parity:
- // apache/iceberg#12182). prepareEntry takes ownership of the entry's
data
- // file and clears the Avro-facing pointer; the cached distinctCntMap
and
- // DistinctValueCounts() getter are intentionally preserved so
in-process
- // readers keep their view. Best-effort: only the in-tree *dataFile is
- // cleared; third-party DataFile impls bypass this guard.
- if df, ok := entry.DataFile().(*dataFile); ok {
- df.DistinctCounts = nil
- }
-
return entry, nil
}
@@ -2248,6 +2238,13 @@ func (b *DataFileBuilder) NaNValueCounts(counts
map[int]int64) *DataFileBuilder
}
// DistinctValueCounts sets the distinct value counts for the data file.
+//
+// Deprecated: distinct_counts (field 111) is deprecated in every
+// version of the Iceberg spec (apache/iceberg#12182). The Avro
+// manifest-entry schemas omit the field for v1, v2, and v3, so values
+// set here are not transported in manifests written by this library.
+// The setter is retained for round-tripping legacy DataFiles read from
+// older manifests; new code should not call it.
func (b *DataFileBuilder) DistinctValueCounts(counts map[int]int64)
*DataFileBuilder {
b.d.DistinctCounts = mapToAvroColMap(counts)
diff --git a/manifest_test.go b/manifest_test.go
index 69e53cf0..eb0071a1 100644
--- a/manifest_test.go
+++ b/manifest_test.go
@@ -2115,10 +2115,21 @@ func (m *ManifestTestSuite)
TestManifestRoundTripSortOrderID() {
m.Equal(expectedSortOrderID, *got)
}
-// TestWriteManifestV3OmitsDistinctCounts verifies the v3 writer clears
-// data_file.distinct_counts (deprecated by v3 spec; Java parity:
-// apache/iceberg#12182). v2 round-trip will be added with #1038.
-func (m *ManifestTestSuite) TestWriteManifestV3OmitsDistinctCounts() {
+// TestWriteManifestOmitsDistinctCounts verifies that the manifest
+// writer drops the deprecated distinct_counts field (id 111) from the
+// wire for every format version, regardless of what the source
+// DataFile carries (apache/iceberg#12182). The data_file_v{1,2,3}
+// Avro schemas in internal/avro_schemas.go omit field 111, so the
+// encoder never emits it and a round-trip read returns an empty map.
+func (m *ManifestTestSuite) TestWriteManifestOmitsDistinctCounts() {
+ for _, version := range []int{1, 2, 3} {
+ m.Run("v"+strconv.Itoa(version), func() {
+ m.assertWriteOmitsDistinctCounts(version)
+ })
+ }
+}
+
+func (m *ManifestTestSuite) assertWriteOmitsDistinctCounts(version int) {
partitionSpec := NewPartitionSpecID(0)
snapshotID := int64(1)
seqNum := int64(1)
@@ -2137,29 +2148,158 @@ func (m *ManifestTestSuite)
TestWriteManifestV3OmitsDistinctCounts() {
m.Require().NoError(err)
dataFileBuilder.DistinctValueCounts(map[int]int64{1: 42})
- entry := NewManifestEntry(
- EntryStatusADDED,
- &snapshotID,
- &seqNum, &seqNum,
- dataFileBuilder.Build(),
- )
-
var buf bytes.Buffer
- _, err = WriteManifest(
- "s3://bucket/ns/table/metadata/distinct.avro", &buf, 3,
+ file, err := WriteManifest(
+ "s3://bucket/ns/table/metadata/distinct.avro", &buf, version,
partitionSpec,
- NewSchema(
- 0,
+ NewSchema(0,
NestedField{ID: 1, Name: "id", Type: Int64Type{},
Required: true},
),
snapshotID,
- []ManifestEntry{entry},
+ []ManifestEntry{NewManifestEntry(
+ EntryStatusADDED,
+ &snapshotID,
+ &seqNum, &seqNum,
+ dataFileBuilder.Build(),
+ )},
)
m.Require().NoError(err)
- df, ok := entry.DataFile().(*dataFile)
- m.Require().True(ok)
- m.Nil(df.DistinctCounts, "v3 writer must clear DistinctCounts on the
entry's *dataFile")
+ entries, err := ReadManifest(file, &buf, false)
+ m.Require().NoError(err)
+ m.Require().Len(entries, 1)
+ m.Empty(entries[0].DataFile().DistinctValueCounts(),
+ "manifest writer must drop distinct_counts on the wire for
every format version; "+
+ "see internal/avro_schemas.go
data_file_v"+strconv.Itoa(version))
+}
+
+// TestReadManifestLegacyDistinctCounts is a back-compat regression
+// guard. Older iceberg-go and other Iceberg writers (Java, PySpark)
+// embed distinct_counts (id 111) in the data_file record. PR #1102
+// dropped the field from this library's writer schema, but legacy
+// manifests already on disk must still decode correctly: the dataFile
+// struct's avro:"distinct_counts" tag is intentionally retained for
+// this read path.
+//
+// The test bypasses WriteManifest and writes a raw OCF using a
+// fixture schema that mirrors the pre-PR data_file_v2 (with field
+// 111 re-injected), then reads it through ReadManifest and asserts
+// the distinct counts come back populated.
+func (m *ManifestTestSuite) TestReadManifestLegacyDistinctCounts() {
+ partitionSpec := NewPartitionSpec()
+ tableSchema := NewSchema(0,
+ NestedField{ID: 1, Name: "id", Type: Int64Type{}, Required:
true},
+ )
+ partitionSchema, err :=
partitionTypeToAvroSchema(partitionSpec.PartitionType(tableSchema))
+ m.Require().NoError(err)
+
+ legacySchema := injectDistinctCountsIntoEntrySchema(m.T(),
partitionSchema, 2)
+
+ snapshotID := int64(42)
+ seqNum := int64(7)
+ df := &dataFile{
+ Content: EntryContentData,
+ Path: "s3://bucket/ns/table/data/legacy.parquet",
+ Format: ParquetFile,
+ PartitionData: map[string]any{},
+ RecordCount: 100,
+ FileSize: 4096,
+ DistinctCounts: mapToAvroColMap(map[int]int64{1: 99, 2: 88}),
+ }
+ entry := &manifestEntry{
+ EntryStatus: EntryStatusADDED,
+ Snapshot: &snapshotID,
+ SeqNum: &seqNum,
+ FileSeqNum: &seqNum,
+ Data: df,
+ }
+
+ var buf bytes.Buffer
+ wr, err := ocf.NewWriter(&buf, legacySchema,
+ ocf.WithSchema(legacySchema.String()),
+ ocf.WithMetadata(map[string][]byte{
+ "format-version": []byte("2"),
+ "content": []byte("data"),
+ }),
+ )
+ m.Require().NoError(err)
+ m.Require().NoError(wr.Encode(entry))
+ m.Require().NoError(wr.Close())
+
+ file := &manifestFile{
+ version: 2,
+ Path: "s3://bucket/ns/table/metadata/legacy.avro",
+ Content: ManifestContentData,
+ }
+ entries, err := ReadManifest(file, &buf, false)
+ m.Require().NoError(err)
+ m.Require().Len(entries, 1)
+ m.Equal(map[int]int64{1: 99, 2: 88},
entries[0].DataFile().DistinctValueCounts(),
+ "ReadManifest must decode distinct_counts (field 111) from
legacy "+
+ "manifests where the file's writer schema still carries
the field")
+}
+
+// injectDistinctCountsIntoEntrySchema returns a manifest_entry avro
+// schema for the given format version with the deprecated
+// distinct_counts field (id 111) re-added into the data_file record.
+// The shape mirrors the pre-PR-1102 data_file_v{version} schema
+// definition that was removed from internal/avro_schemas.go, so it
+// can be used to fixture-up legacy manifests for back-compat tests.
+func injectDistinctCountsIntoEntrySchema(t *testing.T, partitionSchema
*avro.Schema, version int) *avro.Schema {
+ t.Helper()
+
+ base, err := internal.NewManifestEntrySchema(partitionSchema, version)
+ if err != nil {
+ t.Fatalf("base entry schema for v%d: %v", version, err)
+ }
+ root := base.Root()
+
+ dfIdx := -1
+ for i := range root.Fields {
+ if root.Fields[i].Name == "data_file" {
+ dfIdx = i
+
+ break
+ }
+ }
+ if dfIdx == -1 {
+ t.Fatalf("data_file field not found on manifest_entry v%d
schema", version)
+ }
+
+ distinctCountsField := avro.SchemaField{
+ Name: "distinct_counts",
+ Type: avro.SchemaNode{
+ Type: "union",
+ Branches: []avro.SchemaNode{
+ {Type: "null"},
+ {
+ Type: "array",
+ Items: &avro.SchemaNode{
+ Type: "record",
+ Name: "k123_v124",
+ Fields: []avro.SchemaField{
+ {Name: "key", Type:
avro.SchemaNode{Type: "int"}, Props: map[string]any{"field-id": 123}},
+ {Name: "value", Type:
avro.SchemaNode{Type: "long"}, Props: map[string]any{"field-id": 124}},
+ },
+ },
+ Props: map[string]any{"logicalType":
"map"},
+ },
+ },
+ },
+ Props: map[string]any{"field-id": 111},
+ Doc: "map of column id to distinct value count",
+ }
+
+ dfField := root.Fields[dfIdx]
+ dfField.Type.Fields = append(append([]avro.SchemaField{},
dfField.Type.Fields...), distinctCountsField)
+ root.Fields[dfIdx] = dfField
+
+ legacy, err := root.Schema()
+ if err != nil {
+ t.Fatalf("recompiling legacy entry schema for v%d: %v",
version, err)
+ }
+
+ return legacy
}
func (m *ManifestTestSuite) TestWriteManifestClosesWriterOnEntryError() {
@@ -2334,64 +2474,3 @@ func (m *ManifestTestSuite)
TestEntriesCloseErrorAsFinalPair() {
"terminal error must equal or wrap the simulated close error")
m.Equal(1, file.closeCount, "file must be closed exactly once even when
Close returns an error")
}
-
-// TestWriteManifestV2KeepsDistinctCounts is a regression guard that v2
-// manifest writers preserve data_file.distinct_counts (id 111) per the
-// Iceberg v2 spec. Fixes #1038.
-func (m *ManifestTestSuite) TestWriteManifestV2KeepsDistinctCounts() {
- m.assertDistinctCountsRoundTrip(2)
-}
-
-// TestWriteManifestV1KeepsDistinctCounts is a regression guard that v1
-// manifest writers preserve data_file.distinct_counts (id 111) per the
-// Iceberg v1 spec. Fixes #1038.
-func (m *ManifestTestSuite) TestWriteManifestV1KeepsDistinctCounts() {
- m.assertDistinctCountsRoundTrip(1)
-}
-
-// assertDistinctCountsRoundTrip writes a manifest at the given format
-// version with distinct_counts populated for one column, round-trips it
-// through ReadManifest, and asserts the read side observes the same map.
-func (m *ManifestTestSuite) assertDistinctCountsRoundTrip(version int) {
- partitionSpec := NewPartitionSpecID(0)
- snapshotID := int64(1)
- seqNum := int64(1)
-
- dataFileBuilder, err := NewDataFileBuilder(
- partitionSpec,
- EntryContentData,
- "s3://bucket/ns/table/data/distinct.parquet",
- ParquetFile,
- map[int]any{},
- map[int]string{},
- map[int]int{},
- 1,
- 1,
- )
- m.Require().NoError(err)
- dataFileBuilder.DistinctValueCounts(map[int]int64{1: 42})
-
- var buf bytes.Buffer
- file, err := WriteManifest(
- "s3://bucket/ns/table/metadata/distinct.avro", &buf, version,
- partitionSpec,
- NewSchema(0,
- NestedField{ID: 1, Name: "id", Type: Int64Type{},
Required: true},
- ),
- snapshotID,
- []ManifestEntry{NewManifestEntry(
- EntryStatusADDED,
- &snapshotID,
- &seqNum, &seqNum,
- dataFileBuilder.Build(),
- )},
- )
- m.Require().NoError(err)
-
- entries, err := ReadManifest(file, &buf, false)
- m.Require().NoError(err)
- m.Require().Len(entries, 1)
-
- m.Equal(map[int]int64{1: 42},
entries[0].DataFile().DistinctValueCounts(),
- "manifest writer must preserve distinct_counts for the
requested format version")
-}
diff --git a/table/pos_delete_partitioned_fanout_writer_test.go
b/table/pos_delete_partitioned_fanout_writer_test.go
index 83b6fc42..7b6e5284 100644
--- a/table/pos_delete_partitioned_fanout_writer_test.go
+++ b/table/pos_delete_partitioned_fanout_writer_test.go
@@ -74,7 +74,7 @@ func TestPositionDeletePartitionedFanoutWriterProcessBatch(t
*testing.T) {
name: "success",
pathToPartitionContext:
map[string]partitionContext{"file://namespace/age_bucket=1/test.parquet":
{partitionData: map[int]any{iceberg.PartitionDataIDStart: 1}, specID: 0}},
input:
mustLoadRecordBatchFromJSON(PositionalDeleteArrowSchema, `[{"file_path":
"file://namespace/age_bucket=1/test.parquet", "pos": 100}]`),
- expectedDataFile: &mockDataFile{columnSizes:
map[int]int64{2147483545: 88, 2147483546: 174}, format: iceberg.ParquetFile,
partition: map[int]any{iceberg.PartitionDataIDStart: 1}, count: 1, specid: 0,
contentType: iceberg.EntryContentPosDeletes, sortOrderID: ptr(1)},
+ expectedDataFile: &mockDataFile{columnSizes:
map[int]int64{2147483545: 86, 2147483546: 172}, format: iceberg.ParquetFile,
partition: map[int]any{iceberg.PartitionDataIDStart: 1}, count: 1, specid: 0,
contentType: iceberg.EntryContentPosDeletes, sortOrderID: ptr(1)},
},
// This test case illustrates how the
positionDeletePartitionedFanoutWriter does not validate that all records
// in a batch have the same file path. Doing so would be
prohibitive in the current implementation and
@@ -84,7 +84,7 @@ func TestPositionDeletePartitionedFanoutWriterProcessBatch(t
*testing.T) {
name: "batch with records having
different file paths",
pathToPartitionContext:
map[string]partitionContext{"file://namespace/age_bucket=1/test.parquet":
{partitionData: map[int]any{iceberg.PartitionDataIDStart: 1}, specID: 0}},
input:
mustLoadRecordBatchFromJSON(PositionalDeleteArrowSchema, `[{"file_path":
"file://namespace/age_bucket=1/test.parquet", "pos": 100}, {"file_path":
"file://namespace/age_bucket=0/test.parquet", "pos": 10}]`),
- expectedDataFile: &mockDataFile{columnSizes:
map[int]int64{2147483545: 96, 2147483546: 187}, format: iceberg.ParquetFile,
partition: map[int]any{iceberg.PartitionDataIDStart: 1}, count: 2, specid: 0,
contentType: iceberg.EntryContentPosDeletes, sortOrderID: ptr(1)},
+ expectedDataFile: &mockDataFile{columnSizes:
map[int]int64{2147483545: 94, 2147483546: 185}, format: iceberg.ParquetFile,
partition: map[int]any{iceberg.PartitionDataIDStart: 1}, count: 2, specid: 0,
contentType: iceberg.EntryContentPosDeletes, sortOrderID: ptr(1)},
},
}
diff --git a/table/table_test.go b/table/table_test.go
index 17c5bd16..adf8c052 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -546,12 +546,12 @@ func (t *TableWritingTestSuite)
TestAddFilesUnpartitioned() {
Operation: table.OpAppend,
Properties: iceberg.Properties{
"added-data-files": "5",
- "added-files-size": "3590",
+ "added-files-size": "3070",
"added-records": "5",
"total-data-files": "5",
"total-delete-files": "0",
"total-equality-deletes": "0",
- "total-files-size": "3590",
+ "total-files-size": "3070",
"total-position-deletes": "0",
"total-records": "5",
},
@@ -767,13 +767,13 @@ func (t *TableWritingTestSuite)
TestAddFilesPartitionedTable() {
Operation: table.OpAppend,
Properties: iceberg.Properties{
"added-data-files": "5",
- "added-files-size": "3590",
+ "added-files-size": "3070",
"added-records": "5",
"changed-partition-count": "1",
"total-data-files": "5",
"total-delete-files": "0",
"total-equality-deletes": "0",
- "total-files-size": "3590",
+ "total-files-size": "3070",
"total-position-deletes": "0",
"total-records": "5",
},
@@ -1133,15 +1133,15 @@ func (t *TableWritingTestSuite) TestReplaceDataFiles() {
Operation: table.OpOverwrite,
Properties: iceberg.Properties{
"added-data-files": "1",
- "added-files-size": "1066",
+ "added-files-size": "963",
"added-records": "4",
"deleted-data-files": "2",
"deleted-records": "4",
- "removed-files-size": "2132",
+ "removed-files-size": "1816",
"total-data-files": "4",
"total-delete-files": "0",
"total-equality-deletes": "0",
- "total-files-size": "4264",
+ "total-files-size": "3687",
"total-position-deletes": "0",
"total-records": "10",
},