This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 6e679fc6 fix: remove unnecessary safe-guard (#775)
6e679fc6 is described below
commit 6e679fc689a81b439da751b54a1f7718ddede388
Author: Tobias Pütz <[email protected]>
AuthorDate: Thu Mar 19 17:30:38 2026 +0100
fix: remove unnecessary safe-guard (#775)
`ArrowSchemaToIceberg` has a `hasIDs` branch which means it supports
add-files with field_ids now
---
table/arrow_utils.go | 14 ++++----
table/table_test.go | 99 +++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 100 insertions(+), 13 deletions(-)
diff --git a/table/arrow_utils.go b/table/arrow_utils.go
index d7f0ee89..16bbc008 100644
--- a/table/arrow_utils.go
+++ b/table/arrow_utils.go
@@ -1373,21 +1373,19 @@ func filesToDataFiles(ctx context.Context, fileIO
iceio.IO, meta *MetadataBuilde
arrSchema := must(rdr.Schema())
- if hasIDs := must(VisitArrowSchema(arrSchema,
hasIDs{})); hasIDs {
- yield(nil, fmt.Errorf("%w: cannot add file %s
because it has field-ids. add-files only supports the addition of files without
field_ids",
- iceberg.ErrNotImplemented, filePath))
-
- return
- }
-
if err := checkArrowSchemaCompat(currentSchema,
arrSchema, false); err != nil {
yield(nil, err)
return
}
+ pathToIDSchema := currentSchema
+ if fileHasIDs := must(VisitArrowSchema(arrSchema,
hasIDs{})); fileHasIDs {
+ pathToIDSchema =
must(ArrowSchemaToIceberg(arrSchema, false, nil))
+ }
+
statistics :=
format.DataFileStatsFromMeta(rdr.Metadata(),
must(computeStatsPlan(currentSchema, meta.props)),
- must(format.PathToIDMapping(currentSchema)))
+ must(format.PathToIDMapping(pathToIDSchema)))
partitionValues := make(map[int]any)
if !currentSpec.Equals(*iceberg.UnpartitionedSpec) {
diff --git a/table/table_test.go b/table/table_test.go
index 02e34482..d9949449 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -374,6 +374,14 @@ func (t *TableWritingTestSuite) writeParquet(fio
iceio.WriteFileIO, filePath str
nil, pqarrow.DefaultWriterProps()))
}
+func (t *TableWritingTestSuite) writeParquetWithStoredSchema(fio
iceio.WriteFileIO, filePath string, arrTbl arrow.Table) {
+ fo, err := fio.Create(filePath)
+ t.Require().NoError(err)
+
+ t.Require().NoError(pqarrow.WriteTable(arrTbl, fo, arrTbl.NumRows(),
+ nil,
pqarrow.NewArrowWriterProperties(pqarrow.WithStoreSchema())))
+}
+
func (t *TableWritingTestSuite) createTable(identifier table.Identifier,
formatVersion int, spec iceberg.PartitionSpec, sc *iceberg.Schema) *table.Table
{
meta, err := table.NewMetadata(sc, &spec, table.UnsortedSortOrder,
t.location, iceberg.Properties{table.PropertyFormatVersion:
strconv.Itoa(formatVersion)})
@@ -462,8 +470,7 @@ func (t *TableWritingTestSuite)
TestAddFilesUnpartitionedHasFieldIDs() {
ident := table.Identifier{"default", "unpartitioned_table_with_ids_v" +
strconv.Itoa(t.formatVersion)}
tbl := t.createTable(ident, t.formatVersion,
*iceberg.UnpartitionedSpec, t.tableSchema)
-
- t.NotNil(tbl)
+ t.Require().NotNil(tbl)
files := make([]string, 0)
for i := range 5 {
@@ -473,9 +480,91 @@ func (t *TableWritingTestSuite)
TestAddFilesUnpartitionedHasFieldIDs() {
}
tx := tbl.NewTransaction()
- err := tx.AddFiles(t.ctx, files, nil, false)
- t.Error(err)
- t.ErrorIs(err, iceberg.ErrNotImplemented)
+ t.Require().NoError(tx.AddFiles(t.ctx, files, nil, false))
+
+ stagedTbl, err := tx.StagedTable()
+ t.Require().NoError(err)
+ t.NotNil(stagedTbl.NameMapping())
+
+ scan, err := tx.Scan()
+ t.Require().NoError(err)
+
+ contents, err := scan.ToArrowTable(context.Background())
+ t.Require().NoError(err)
+ defer contents.Release()
+
+ t.EqualValues(5, contents.NumRows())
+}
+
+func (t *TableWritingTestSuite) TestAddFilesFieldIDsWithDifferentNames() {
+ ident := table.Identifier{"default",
"unpartitioned_table_ids_diff_names_v" + strconv.Itoa(t.formatVersion)}
+ tbl := t.createTable(ident, t.formatVersion,
+ *iceberg.UnpartitionedSpec, t.tableSchema)
+ t.Require().NotNil(tbl)
+
+ renamedSchema := arrow.NewSchema([]arrow.Field{
+ {
+ Name: "alpha", Type: arrow.FixedWidthTypes.Boolean,
+ Metadata:
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "1"}),
+ },
+ {
+ Name: "beta", Type: arrow.BinaryTypes.String,
+ Metadata:
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "2"}),
+ },
+ {
+ Name: "gamma", Type: arrow.PrimitiveTypes.Int32,
+ Metadata:
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "3"}),
+ },
+ {
+ Name: "delta", Type: arrow.PrimitiveTypes.Date32,
+ Metadata:
arrow.MetadataFrom(map[string]string{"PARQUET:field_id": "4"}),
+ },
+ }, nil)
+
+ renamedTbl, err := array.TableFromJSON(memory.DefaultAllocator,
renamedSchema, []string{
+ `[{"alpha": true, "beta": "bar_string", "gamma": 123, "delta":
"2024-03-07"}]`,
+ })
+ t.Require().NoError(err)
+ defer renamedTbl.Release()
+
+ files := make([]string, 0, 5)
+ for i := range 5 {
+ filePath :=
fmt.Sprintf("%s/unpartitioned_ids_diff_names/test-%d.parquet", t.location, i)
+ t.writeParquetWithStoredSchema(mustFS(t.T(),
tbl).(iceio.WriteFileIO), filePath, renamedTbl)
+ files = append(files, filePath)
+ }
+
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.AddFiles(t.ctx, files, nil, false))
+
+ stagedTbl, err := tx.StagedTable()
+ t.Require().NoError(err)
+ t.NotNil(stagedTbl.NameMapping())
+
+ scan, err := tx.Scan()
+ t.Require().NoError(err)
+
+ contents, err := scan.ToArrowTable(context.Background())
+ t.Require().NoError(err)
+ defer contents.Release()
+
+ t.EqualValues(5, contents.NumRows())
+
+ expectedSchema := arrow.NewSchema([]arrow.Field{
+ {Name: "foo", Type: arrow.FixedWidthTypes.Boolean, Nullable:
true},
+ {Name: "bar", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "baz", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
+ {Name: "qux", Type: arrow.PrimitiveTypes.Date32, Nullable:
true},
+ }, nil)
+ t.Truef(expectedSchema.Equal(contents.Schema()),
+ "expected table schema names, got: %s", contents.Schema())
+
+ for i := range 5 {
+ t.Equal(true,
contents.Column(0).Data().Chunk(i).(*array.Boolean).Value(0))
+ t.Equal("bar_string",
contents.Column(1).Data().Chunk(i).(*array.String).Value(0))
+ t.Equal(int32(123),
contents.Column(2).Data().Chunk(i).(*array.Int32).Value(0))
+ t.Equal(arrow.Date32(19789),
contents.Column(3).Data().Chunk(i).(*array.Date32).Value(0))
+ }
}
func (t *TableWritingTestSuite) TestAddFilesFailsSchemaMismatch() {