This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new d4d08e7b feat(metadata): Add support for source-ids (#651)
d4d08e7b is described below
commit d4d08e7b59796b5e43d73d21bc579328c4c5bef1
Author: Alex Stephen <[email protected]>
AuthorDate: Thu Dec 11 11:51:16 2025 -0800
feat(metadata): Add support for source-ids (#651)
Part of #589
We don't currently support the `source-ids` field on PartitionFields.
This will be more important when we have support for multi-arg
transforms.
Since we don't have support for them, I'm just using the UnmarshalJSON
field to set SourceID. We'll need a (much larger) refactor eventually to
support an array of SourceIDs.
---
partitions.go | 19 +++++++++++++
partitions_test.go | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+)
diff --git a/partitions.go b/partitions.go
index 40249429..ecba79ae 100644
--- a/partitions.go
+++ b/partitions.go
@@ -76,9 +76,21 @@ func (p *PartitionField) String() string {
}
func (p *PartitionField) UnmarshalJSON(b []byte) error {
+ var raw map[string]json.RawMessage
+ if err := json.Unmarshal(b, &raw); err != nil {
+ return fmt.Errorf("%w: failed to unmarshal partition field",
err)
+ }
+
+ if _, ok := raw["source-id"]; ok {
+ if _, ok := raw["source-ids"]; ok {
+ return errors.New("partition field cannot contain both
source-id and source-ids")
+ }
+ }
+
type Alias PartitionField
aux := struct {
TransformString string `json:"transform"`
+ SourceIDs []int `json:"source-ids,omitempty"`
*Alias
}{
Alias: (*Alias)(p),
@@ -89,6 +101,13 @@ func (p *PartitionField) UnmarshalJSON(b []byte) error {
return err
}
+ if len(aux.SourceIDs) > 0 {
+ if len(aux.SourceIDs) != 1 {
+ return errors.New("partition field source-ids must
contain exactly one id")
+ }
+ p.SourceID = aux.SourceIDs[0]
+ }
+
if p.Transform, err = ParseTransform(aux.TransformString); err != nil {
return err
}
diff --git a/partitions_test.go b/partitions_test.go
index 8a04aed2..5ad96300 100644
--- a/partitions_test.go
+++ b/partitions_test.go
@@ -246,3 +246,84 @@ func TestGetPartitionFieldName(t *testing.T) {
})
}
}
+
+func TestPartitionFieldUnmarshalJSON(t *testing.T) {
+ t.Run("unmarshal with source-id", func(t *testing.T) {
+ jsonData := `
+ {
+ "source-id": 1,
+ "field-id": 1000,
+ "transform": "truncate[19]",
+ "name": "str_truncate"
+ }`
+ var field iceberg.PartitionField
+ err := json.Unmarshal([]byte(jsonData), &field)
+ require.NoError(t, err)
+ assert.Equal(t, 1, field.SourceID)
+ assert.Equal(t, 1000, field.FieldID)
+ assert.Equal(t, "str_truncate", field.Name)
+ assert.Equal(t, iceberg.TruncateTransform{Width: 19},
field.Transform)
+ })
+
+ t.Run("unmarshal with source-ids", func(t *testing.T) {
+ jsonData := `
+ {
+ "source-ids": [2],
+ "field-id": 1001,
+ "transform": "bucket[25]",
+ "name": "int_bucket"
+ }`
+ var field iceberg.PartitionField
+ err := json.Unmarshal([]byte(jsonData), &field)
+ require.NoError(t, err)
+ assert.Equal(t, 2, field.SourceID)
+ assert.Equal(t, 1001, field.FieldID)
+ assert.Equal(t, "int_bucket", field.Name)
+ assert.Equal(t, iceberg.BucketTransform{NumBuckets: 25},
field.Transform)
+ })
+
+ t.Run("unmarshal with multiple source-ids should fail", func(t
*testing.T) {
+ jsonData := `
+ {
+ "source-ids": [2, 3],
+ "field-id": 1001,
+ "transform": "bucket[25]",
+ "name": "int_bucket"
+ }`
+ var field iceberg.PartitionField
+ err := json.Unmarshal([]byte(jsonData), &field)
+ require.Error(t, err)
+ assert.EqualError(t, err, "partition field source-ids must
contain exactly one id")
+ })
+
+ t.Run("unmarshal with both source-id and source-ids", func(t
*testing.T) {
+ jsonData := `
+ {
+ "source-id": 1,
+ "source-ids": [2],
+ "field-id": 1002,
+ "transform": "identity",
+ "name": "identity"
+ }`
+ var field iceberg.PartitionField
+ err := json.Unmarshal([]byte(jsonData), &field)
+ require.Error(t, err)
+ assert.EqualError(t, err, "partition field cannot contain both
source-id and source-ids")
+ })
+
+ t.Run("unmarshal with no source id", func(t *testing.T) {
+ jsonData := `
+ {
+ "field-id": 1003,
+ "transform": "void",
+ "name": "void"
+ }`
+ var field iceberg.PartitionField
+ err := json.Unmarshal([]byte(jsonData), &field)
+ require.NoError(t, err)
+ assert.Zero(t, field.SourceID)
+ assert.Equal(t, 1003, field.FieldID)
+ assert.Equal(t, "void", field.Name)
+ assert.Equal(t, iceberg.VoidTransform{}, field.Transform)
+ })
+}