This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new d4d08e7b feat(metadata): Add support for source-ids (#651)
d4d08e7b is described below

commit d4d08e7b59796b5e43d73d21bc579328c4c5bef1
Author: Alex Stephen <[email protected]>
AuthorDate: Thu Dec 11 11:51:16 2025 -0800

    feat(metadata): Add support for source-ids (#651)
    
    Part of #589
    
    We don't currently support the `source-ids` field on PartitionFields.
    This will be more important when we have support for multi-arg
    transforms.
    
    Since we don't have support for them, I'm just using the UnmarshalJSON
    field to set SourceID. We'll need a (much larger) refactor eventually to
    support an array of SourceIDs.
---
 partitions.go      | 19 +++++++++++++
 partitions_test.go | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/partitions.go b/partitions.go
index 40249429..ecba79ae 100644
--- a/partitions.go
+++ b/partitions.go
@@ -76,9 +76,21 @@ func (p *PartitionField) String() string {
 }
 
 func (p *PartitionField) UnmarshalJSON(b []byte) error {
+       var raw map[string]json.RawMessage
+       if err := json.Unmarshal(b, &raw); err != nil {
+               return fmt.Errorf("%w: failed to unmarshal partition field", 
err)
+       }
+
+       if _, ok := raw["source-id"]; ok {
+               if _, ok := raw["source-ids"]; ok {
+                       return errors.New("partition field cannot contain both 
source-id and source-ids")
+               }
+       }
+
        type Alias PartitionField
        aux := struct {
                TransformString string `json:"transform"`
+               SourceIDs       []int  `json:"source-ids,omitempty"`
                *Alias
        }{
                Alias: (*Alias)(p),
@@ -89,6 +101,13 @@ func (p *PartitionField) UnmarshalJSON(b []byte) error {
                return err
        }
 
+       if len(aux.SourceIDs) > 0 {
+               if len(aux.SourceIDs) != 1 {
+                       return errors.New("partition field source-ids must 
contain exactly one id")
+               }
+               p.SourceID = aux.SourceIDs[0]
+       }
+
        if p.Transform, err = ParseTransform(aux.TransformString); err != nil {
                return err
        }
diff --git a/partitions_test.go b/partitions_test.go
index 8a04aed2..5ad96300 100644
--- a/partitions_test.go
+++ b/partitions_test.go
@@ -246,3 +246,84 @@ func TestGetPartitionFieldName(t *testing.T) {
                })
        }
 }
+
+func TestPartitionFieldUnmarshalJSON(t *testing.T) {
+       t.Run("unmarshal with source-id", func(t *testing.T) {
+               jsonData := `
+               {
+                       "source-id": 1,
+                       "field-id": 1000,
+                       "transform": "truncate[19]",
+                       "name": "str_truncate"
+               }`
+               var field iceberg.PartitionField
+               err := json.Unmarshal([]byte(jsonData), &field)
+               require.NoError(t, err)
+               assert.Equal(t, 1, field.SourceID)
+               assert.Equal(t, 1000, field.FieldID)
+               assert.Equal(t, "str_truncate", field.Name)
+               assert.Equal(t, iceberg.TruncateTransform{Width: 19}, 
field.Transform)
+       })
+
+       t.Run("unmarshal with source-ids", func(t *testing.T) {
+               jsonData := `
+               {
+                       "source-ids": [2],
+                       "field-id": 1001,
+                       "transform": "bucket[25]",
+                       "name": "int_bucket"
+               }`
+               var field iceberg.PartitionField
+               err := json.Unmarshal([]byte(jsonData), &field)
+               require.NoError(t, err)
+               assert.Equal(t, 2, field.SourceID)
+               assert.Equal(t, 1001, field.FieldID)
+               assert.Equal(t, "int_bucket", field.Name)
+               assert.Equal(t, iceberg.BucketTransform{NumBuckets: 25}, 
field.Transform)
+       })
+
+       t.Run("unmarshal with multiple source-ids should fail", func(t 
*testing.T) {
+               jsonData := `
+               {
+                       "source-ids": [2, 3],
+                       "field-id": 1001,
+                       "transform": "bucket[25]",
+                       "name": "int_bucket"
+               }`
+               var field iceberg.PartitionField
+               err := json.Unmarshal([]byte(jsonData), &field)
+               require.Error(t, err)
+               assert.EqualError(t, err, "partition field source-ids must 
contain exactly one id")
+       })
+
+       t.Run("unmarshal with both source-id and source-ids", func(t 
*testing.T) {
+               jsonData := `
+               {
+                       "source-id": 1,
+                       "source-ids": [2],
+                       "field-id": 1002,
+                       "transform": "identity",
+                       "name": "identity"
+               }`
+               var field iceberg.PartitionField
+               err := json.Unmarshal([]byte(jsonData), &field)
+               require.Error(t, err)
+               assert.EqualError(t, err, "partition field cannot contain both 
source-id and source-ids")
+       })
+
+       t.Run("unmarshal with no source id", func(t *testing.T) {
+               jsonData := `
+               {
+                       "field-id": 1003,
+                       "transform": "void",
+                       "name": "void"
+               }`
+               var field iceberg.PartitionField
+               err := json.Unmarshal([]byte(jsonData), &field)
+               require.NoError(t, err)
+               assert.Zero(t, field.SourceID)
+               assert.Equal(t, 1003, field.FieldID)
+               assert.Equal(t, "void", field.Name)
+               assert.Equal(t, iceberg.VoidTransform{}, field.Transform)
+       })
+}

Reply via email to