This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 1432d8a9 fix: handle LargeString columns in partition transforms (#780)
1432d8a9 is described below

commit 1432d8a963b5268e16931be8cc0a581b3f86088b
Author: Alex <[email protected]>
AuthorDate: Mon Mar 16 16:11:41 2026 -0600

    fix: handle LargeString columns in partition transforms (#780)
    
    `getArrowValueAsIcebergLiteral` handled `*array.String` (UTF8) but not
    `*array.LargeString` (LargeUTF8), causing writes to fail with
    `unsupported value type: string` whenever a schema produced by
    `SchemaToArrowSchema(..., useLargeTypes=true)` was used as a partition
    source field. This adds the missing `*array.LargeString` case.
    
    Existing transform tests (identity, bucket, truncate) are extended with
    a `large_name` (`LargeString`) column to cover this path.
---
 table/partitioned_fanout_writer.go      |  6 ++++++
 table/partitioned_fanout_writer_test.go | 36 +++++++++++++++++++--------------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/table/partitioned_fanout_writer.go 
b/table/partitioned_fanout_writer.go
index 81479085..2e6ea82e 100644
--- a/table/partitioned_fanout_writer.go
+++ b/table/partitioned_fanout_writer.go
@@ -394,6 +394,9 @@ func getArrowValueAsIcebergLiteral(column arrow.Array, row 
int) (iceberg.Literal
 
        case *array.String:
 
+               return iceberg.NewLiteral(arr.Value(row)), nil
+       case *array.LargeString:
+
                return iceberg.NewLiteral(arr.Value(row)), nil
        case *array.Int64:
 
@@ -431,6 +434,9 @@ func getArrowValueAsIcebergLiteral(column arrow.Array, row 
int) (iceberg.Literal
        case *array.Binary:
 
                return iceberg.NewLiteral(arr.Value(row)), nil
+       case *array.LargeBinary:
+
+               return iceberg.NewLiteral(arr.Value(row)), nil
 
        default:
                val := column.GetOneForMarshal(row)
diff --git a/table/partitioned_fanout_writer_test.go 
b/table/partitioned_fanout_writer_test.go
index 77727bf1..9c95885c 100644
--- a/table/partitioned_fanout_writer_test.go
+++ b/table/partitioned_fanout_writer_test.go
@@ -166,54 +166,60 @@ func (s *FanoutWriterTestSuite) TestIdentityTransform() {
        arrSchema := arrow.NewSchema([]arrow.Field{
                {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
                {Name: "name", Type: arrow.BinaryTypes.String, Nullable: true},
+               {Name: "large_name", Type: arrow.BinaryTypes.LargeString, 
Nullable: true},
        }, nil)
 
        testRecord := s.createCustomTestRecord(arrSchema, [][]any{
-               {int32(1), "partition_a"},
-               {int32(2), "partition_b"},
-               {int32(3), "partition_a"},
-               {int32(4), "partition_b"},
-               {nil, nil},
+               {int32(1), "partition_a", "partition_a"},
+               {int32(2), "partition_b", "partition_b"},
+               {int32(3), "partition_a", "partition_c"},
+               {int32(4), "partition_b", "partition_d"},
+               {nil, nil, nil},
        })
        defer testRecord.Release()
 
        s.testTransformPartition(iceberg.IdentityTransform{}, "name", 
"identity", testRecord, 3)
+       s.testTransformPartition(iceberg.IdentityTransform{}, "large_name", 
"identity_large_string", testRecord, 5)
 }
 
 func (s *FanoutWriterTestSuite) TestBucketTransform() {
        arrSchema := arrow.NewSchema([]arrow.Field{
                {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
                {Name: "name", Type: arrow.BinaryTypes.String, Nullable: true},
+               {Name: "large_name", Type: arrow.BinaryTypes.LargeString, 
Nullable: true},
        }, nil)
 
        testRecord := s.createCustomTestRecord(arrSchema, [][]any{
-               {int32(1), "partition_a"},
-               {int32(2), "partition_b"},
-               {int32(3), "partition_a"},
-               {int32(4), "partition_b"},
-               {nil, nil},
+               {int32(1), "partition_a", "partition_a"},
+               {int32(2), "partition_b", "partition_b"},
+               {int32(3), "partition_a", "partition_c"},
+               {int32(4), "partition_b", "partition_d"},
+               {nil, nil, nil},
        })
        defer testRecord.Release()
 
        s.testTransformPartition(iceberg.BucketTransform{NumBuckets: 3}, "id", 
"bucket", testRecord, 3)
+       s.testTransformPartition(iceberg.BucketTransform{NumBuckets: 3}, 
"large_name", "bucket_large_string", testRecord, 3)
 }
 
 func (s *FanoutWriterTestSuite) TestTruncateTransform() {
        arrSchema := arrow.NewSchema([]arrow.Field{
                {Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
                {Name: "name", Type: arrow.BinaryTypes.String, Nullable: true},
+               {Name: "large_name", Type: arrow.BinaryTypes.LargeString, 
Nullable: true},
        }, nil)
 
        testRecord := s.createCustomTestRecord(arrSchema, [][]any{
-               {int32(1), "abcdef"},
-               {int32(2), "abcxyz"},
-               {int32(3), "abcuvw"},
-               {int32(4), "defghi"},
-               {nil, nil},
+               {int32(1), "abcdef", "abcdef"},
+               {int32(2), "abcxyz", "abcxyz"},
+               {int32(3), "abcuvw", "bcduvw"},
+               {int32(4), "defghi", "defghi"},
+               {nil, nil, nil},
        })
        defer testRecord.Release()
 
        s.testTransformPartition(iceberg.TruncateTransform{Width: 3}, "name", 
"truncate", testRecord, 3)
+       s.testTransformPartition(iceberg.TruncateTransform{Width: 3}, 
"large_name", "truncate_large_string", testRecord, 4)
 }
 
 func (s *FanoutWriterTestSuite) TestYearTransform() {

Reply via email to