This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 2b83eed3 fix(table): don't always set schema.name-mapping.default
(#803)
2b83eed3 is described below
commit 2b83eed356d68fa99bd0f3bafde8880f7e096617
Author: Tyler Rockwood <[email protected]>
AuthorDate: Thu Mar 19 12:05:30 2026 -0500
fix(table): don't always set schema.name-mapping.default (#803)
It seems Databricks Unity Catalog is unhappy with this, so don't set it
by default and ask the caller if they are in the sophisticated method of
supplying their own DataFiles.
Fixes: https://github.com/apache/iceberg-go/issues/802
---
table/table_test.go | 74 +++++++++++++++++++++++++++++++++++++++++++
table/transaction.go | 89 ++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 140 insertions(+), 23 deletions(-)
diff --git a/table/table_test.go b/table/table_test.go
index d9949449..59927d1f 100644
--- a/table/table_test.go
+++ b/table/table_test.go
@@ -1046,6 +1046,80 @@ func (t *TableWritingTestSuite) TestAddDataFiles() {
t.Equal("1",
staged.CurrentSnapshot().Summary.Properties["added-data-files"])
}
+func (t *TableWritingTestSuite) TestAddDataFilesAutoNameMapping() {
+ for _, tc := range []struct {
+ name string
+ opts []table.WriteOption
+ expectSet bool
+ }{
+ {"default", nil, true},
+ {"disabled",
[]table.WriteOption{table.WithoutAutoNameMapping()}, false},
+ } {
+ t.Run(tc.name, func() {
+ ident := table.Identifier{"default",
fmt.Sprintf("add_data_files_name_mapping_%s_v%d", tc.name, t.formatVersion)}
+ tbl := t.createTable(ident, t.formatVersion,
*iceberg.UnpartitionedSpec, t.tableSchema)
+
+ _, hasMapping :=
tbl.Properties()[table.DefaultNameMappingKey]
+ t.False(hasMapping, "freshly created table should have
no name mapping")
+
+ filePath := fmt.Sprintf("%s/%s/test.parquet",
t.location, ident[1])
+ t.writeParquet(mustFS(t.T(), tbl).(iceio.WriteFileIO),
filePath, t.arrTbl)
+ df := mustDataFile(t.T(), *iceberg.UnpartitionedSpec,
filePath, nil, 1, mustFileSize(t.T(), filePath))
+
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.AddDataFiles(t.ctx,
[]iceberg.DataFile{df}, nil, tc.opts...))
+
+ staged, err := tx.StagedTable()
+ t.Require().NoError(err)
+ _, hasMapping =
staged.Properties()[table.DefaultNameMappingKey]
+ t.Equal(tc.expectSet, hasMapping)
+ })
+ }
+}
+
+func (t *TableWritingTestSuite)
TestReplaceDataFilesWithDataFilesAutoNameMapping() {
+ for _, tc := range []struct {
+ name string
+ opts []table.WriteOption
+ expectSet bool
+ }{
+ {"default", nil, true},
+ {"disabled",
[]table.WriteOption{table.WithoutAutoNameMapping()}, false},
+ } {
+ t.Run(tc.name, func() {
+ ident := table.Identifier{"default",
fmt.Sprintf("replace_data_files_name_mapping_%s_v%d", tc.name, t.formatVersion)}
+ tbl := t.createTable(ident, t.formatVersion,
*iceberg.UnpartitionedSpec, t.tableSchema)
+
+ filePath := fmt.Sprintf("%s/%s/data.parquet",
t.location, ident[1])
+ t.writeParquet(mustFS(t.T(), tbl).(iceio.WriteFileIO),
filePath, t.arrTbl)
+ df := mustDataFile(t.T(), *iceberg.UnpartitionedSpec,
filePath, nil, 1, mustFileSize(t.T(), filePath))
+
+ // Seed the table without setting a name mapping.
+ tx := tbl.NewTransaction()
+ t.Require().NoError(tx.AddDataFiles(t.ctx,
[]iceberg.DataFile{df}, nil, table.WithoutAutoNameMapping()))
+ tbl, err := tx.Commit(t.ctx)
+ t.Require().NoError(err)
+
+ _, hasMapping :=
tbl.Properties()[table.DefaultNameMappingKey]
+ t.False(hasMapping, "setup: table should have no name
mapping")
+
+ replacementPath :=
fmt.Sprintf("%s/%s/replacement.parquet", t.location, ident[1])
+ t.writeParquet(mustFS(t.T(), tbl).(iceio.WriteFileIO),
replacementPath, t.arrTbl)
+
+ deleteFile := mustDataFile(t.T(),
*iceberg.UnpartitionedSpec, filePath, nil, 1, mustFileSize(t.T(), filePath))
+ addFile := mustDataFile(t.T(),
*iceberg.UnpartitionedSpec, replacementPath, nil, 1, mustFileSize(t.T(),
replacementPath))
+
+ tx = tbl.NewTransaction()
+
t.Require().NoError(tx.ReplaceDataFilesWithDataFiles(t.ctx,
[]iceberg.DataFile{deleteFile}, []iceberg.DataFile{addFile}, nil, tc.opts...))
+
+ staged, err := tx.StagedTable()
+ t.Require().NoError(err)
+ _, hasMapping =
staged.Properties()[table.DefaultNameMappingKey]
+ t.Equal(tc.expectSet, hasMapping)
+ })
+ }
+}
+
func (t *TableWritingTestSuite) TestReplaceDataFilesWithDataFiles() {
ident := table.Identifier{"default",
"replace_data_files_with_datafiles_v" + strconv.Itoa(t.formatVersion)}
tbl := t.createTable(ident, t.formatVersion,
*iceberg.UnpartitionedSpec, t.tableSchema)
diff --git a/table/transaction.go b/table/transaction.go
index 860b70e2..815d3459 100644
--- a/table/transaction.go
+++ b/table/transaction.go
@@ -511,6 +511,41 @@ func (t *Transaction) validateDataFilesToAdd(dataFiles
[]iceberg.DataFile, opera
return setToAdd, nil
}
+// WriteOption is an option for methods that operate on pre-built DataFile
objects.
+type WriteOption func(*dataFileCfg)
+
+type dataFileCfg struct {
+ skipAutoNameMapping bool
+}
+
+// WithoutAutoNameMapping disables the automatic setting of the schema name
+// mapping in table properties. By default, methods like
[Transaction.AddDataFiles]
+// and [Transaction.ReplaceDataFilesWithDataFiles] will set the name mapping if
+// one does not already exist. This option is useful when working with catalogs
+// (such as Databricks Unity Catalog) that reject the name mapping property.
+func WithoutAutoNameMapping() WriteOption {
+ return func(cfg *dataFileCfg) {
+ cfg.skipAutoNameMapping = true
+ }
+}
+
+// ensureNameMapping sets the schema name mapping in table properties if one
+// does not already exist. This is extracted as a helper so it can be called
+// from any method that accepts WriteOption.
+func (t *Transaction) ensureNameMapping() error {
+ if t.meta.NameMapping() == nil {
+ nameMapping := t.meta.CurrentSchema().NameMapping()
+ mappingJson, err := json.Marshal(nameMapping)
+ if err != nil {
+ return err
+ }
+
+ return
t.SetProperties(iceberg.Properties{DefaultNameMappingKey: string(mappingJson)})
+ }
+
+ return nil
+}
+
// AddDataFiles adds pre-built DataFiles to the table without scanning them
from storage.
// This is useful for clients who have already constructed DataFile objects
with metadata,
// avoiding the need to read files to extract schema and statistics.
@@ -518,18 +553,34 @@ func (t *Transaction) validateDataFilesToAdd(dataFiles
[]iceberg.DataFile, opera
// Unlike AddFiles, this method does not read files from storage. It validates
only metadata
// that can be checked without opening files (for example spec-id and
partition field IDs).
//
+// By default this method automatically sets the schema name mapping in table
+// properties if one does not already exist. Pass [WithoutAutoNameMapping] to
+// disable this behavior, for example when working with catalogs that reject
+// the name mapping property.
+//
// Callers are responsible for ensuring each DataFile is valid and consistent
with the table.
// Supplying incorrect DataFile metadata can produce an invalid snapshot and
break reads.
-func (t *Transaction) AddDataFiles(ctx context.Context, dataFiles
[]iceberg.DataFile, snapshotProps iceberg.Properties) error {
+func (t *Transaction) AddDataFiles(ctx context.Context, dataFiles
[]iceberg.DataFile, snapshotProps iceberg.Properties, opts ...WriteOption)
error {
if len(dataFiles) == 0 {
return nil
}
+ var cfg dataFileCfg
+ for _, o := range opts {
+ o(&cfg)
+ }
+
setToAdd, err := t.validateDataFilesToAdd(dataFiles, "AddDataFiles")
if err != nil {
return err
}
+ if !cfg.skipAutoNameMapping {
+ if err := t.ensureNameMapping(); err != nil {
+ return err
+ }
+ }
+
fs, err := t.tbl.fsF(ctx)
if err != nil {
return err
@@ -552,18 +603,6 @@ func (t *Transaction) AddDataFiles(ctx context.Context,
dataFiles []iceberg.Data
}
}
- if t.meta.NameMapping() == nil {
- nameMapping := t.meta.CurrentSchema().NameMapping()
- mappingJson, err := json.Marshal(nameMapping)
- if err != nil {
- return err
- }
- err = t.SetProperties(iceberg.Properties{DefaultNameMappingKey:
string(mappingJson)})
- if err != nil {
- return err
- }
- }
-
appendFiles := t.appendSnapshotProducer(fs, snapshotProps)
for _, df := range dataFiles {
appendFiles.appendDataFile(df)
@@ -587,6 +626,11 @@ func (t *Transaction) AddDataFiles(ctx context.Context,
dataFiles []iceberg.Data
// This method does not open files. It validates only metadata that can be
checked
// without reading file contents.
//
+// By default this method automatically sets the schema name mapping in table
+// properties if one does not already exist. Pass [WithoutAutoNameMapping] to
+// disable this behavior, for example when working with catalogs that reject
+// the name mapping property.
+//
// Callers are responsible for ensuring each DataFile is valid and consistent
with the table.
// Supplying incorrect DataFile metadata can produce an invalid snapshot and
break reads.
//
@@ -594,15 +638,20 @@ func (t *Transaction) AddDataFiles(ctx context.Context,
dataFiles []iceberg.Data
// - Files are written via a separate I/O path and metadata is already known
// - Avoiding file scanning improves performance or reliability
// - Working with storage systems where immediate file reads may be
unreliable
-func (t *Transaction) ReplaceDataFilesWithDataFiles(ctx context.Context,
filesToDelete, filesToAdd []iceberg.DataFile, snapshotProps iceberg.Properties)
error {
+func (t *Transaction) ReplaceDataFilesWithDataFiles(ctx context.Context,
filesToDelete, filesToAdd []iceberg.DataFile, snapshotProps iceberg.Properties,
opts ...WriteOption) error {
if len(filesToDelete) == 0 {
if len(filesToAdd) > 0 {
- return t.AddDataFiles(ctx, filesToAdd, snapshotProps)
+ return t.AddDataFiles(ctx, filesToAdd, snapshotProps,
opts...)
}
return nil
}
+ var cfg dataFileCfg
+ for _, o := range opts {
+ o(&cfg)
+ }
+
setToAdd, err := t.validateDataFilesToAdd(filesToAdd,
"ReplaceDataFilesWithDataFiles")
if err != nil {
return err
@@ -654,14 +703,8 @@ func (t *Transaction) ReplaceDataFilesWithDataFiles(ctx
context.Context, filesTo
return errors.New("cannot delete files that do not belong to
the table")
}
- if t.meta.NameMapping() == nil {
- nameMapping := t.meta.CurrentSchema().NameMapping()
- mappingJson, err := json.Marshal(nameMapping)
- if err != nil {
- return err
- }
- err = t.SetProperties(iceberg.Properties{DefaultNameMappingKey:
string(mappingJson)})
- if err != nil {
+ if !cfg.skipAutoNameMapping {
+ if err := t.ensureNameMapping(); err != nil {
return err
}
}