This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 1432d8a9 fix: handle LargeString columns in partition transforms (#780)
1432d8a9 is described below
commit 1432d8a963b5268e16931be8cc0a581b3f86088b
Author: Alex <[email protected]>
AuthorDate: Mon Mar 16 16:11:41 2026 -0600
fix: handle LargeString columns in partition transforms (#780)
`getArrowValueAsIcebergLiteral` handled `*array.String` (UTF8) but not
`*array.LargeString` (LargeUTF8), causing writes to fail with
`unsupported value type: string` whenever a schema produced by
`SchemaToArrowSchema(..., useLargeTypes=true)` was used as a partition
source field. This adds the missing `*array.LargeString` case.
Existing transform tests (identity, bucket, truncate) are extended with
a `large_name` (`LargeString`) column to cover this path.
---
table/partitioned_fanout_writer.go | 6 ++++++
table/partitioned_fanout_writer_test.go | 36 +++++++++++++++++++--------------
2 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/table/partitioned_fanout_writer.go
b/table/partitioned_fanout_writer.go
index 81479085..2e6ea82e 100644
--- a/table/partitioned_fanout_writer.go
+++ b/table/partitioned_fanout_writer.go
@@ -394,6 +394,9 @@ func getArrowValueAsIcebergLiteral(column arrow.Array, row
int) (iceberg.Literal
case *array.String:
+ return iceberg.NewLiteral(arr.Value(row)), nil
+ case *array.LargeString:
+
return iceberg.NewLiteral(arr.Value(row)), nil
case *array.Int64:
@@ -431,6 +434,9 @@ func getArrowValueAsIcebergLiteral(column arrow.Array, row
int) (iceberg.Literal
case *array.Binary:
return iceberg.NewLiteral(arr.Value(row)), nil
+ case *array.LargeBinary:
+
+ return iceberg.NewLiteral(arr.Value(row)), nil
default:
val := column.GetOneForMarshal(row)
diff --git a/table/partitioned_fanout_writer_test.go
b/table/partitioned_fanout_writer_test.go
index 77727bf1..9c95885c 100644
--- a/table/partitioned_fanout_writer_test.go
+++ b/table/partitioned_fanout_writer_test.go
@@ -166,54 +166,60 @@ func (s *FanoutWriterTestSuite) TestIdentityTransform() {
arrSchema := arrow.NewSchema([]arrow.Field{
{Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
{Name: "name", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "large_name", Type: arrow.BinaryTypes.LargeString,
Nullable: true},
}, nil)
testRecord := s.createCustomTestRecord(arrSchema, [][]any{
- {int32(1), "partition_a"},
- {int32(2), "partition_b"},
- {int32(3), "partition_a"},
- {int32(4), "partition_b"},
- {nil, nil},
+ {int32(1), "partition_a", "partition_a"},
+ {int32(2), "partition_b", "partition_b"},
+ {int32(3), "partition_a", "partition_c"},
+ {int32(4), "partition_b", "partition_d"},
+ {nil, nil, nil},
})
defer testRecord.Release()
s.testTransformPartition(iceberg.IdentityTransform{}, "name",
"identity", testRecord, 3)
+ s.testTransformPartition(iceberg.IdentityTransform{}, "large_name",
"identity_large_string", testRecord, 5)
}
func (s *FanoutWriterTestSuite) TestBucketTransform() {
arrSchema := arrow.NewSchema([]arrow.Field{
{Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
{Name: "name", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "large_name", Type: arrow.BinaryTypes.LargeString,
Nullable: true},
}, nil)
testRecord := s.createCustomTestRecord(arrSchema, [][]any{
- {int32(1), "partition_a"},
- {int32(2), "partition_b"},
- {int32(3), "partition_a"},
- {int32(4), "partition_b"},
- {nil, nil},
+ {int32(1), "partition_a", "partition_a"},
+ {int32(2), "partition_b", "partition_b"},
+ {int32(3), "partition_a", "partition_c"},
+ {int32(4), "partition_b", "partition_d"},
+ {nil, nil, nil},
})
defer testRecord.Release()
s.testTransformPartition(iceberg.BucketTransform{NumBuckets: 3}, "id",
"bucket", testRecord, 3)
+ s.testTransformPartition(iceberg.BucketTransform{NumBuckets: 3},
"large_name", "bucket_large_string", testRecord, 3)
}
func (s *FanoutWriterTestSuite) TestTruncateTransform() {
arrSchema := arrow.NewSchema([]arrow.Field{
{Name: "id", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
{Name: "name", Type: arrow.BinaryTypes.String, Nullable: true},
+ {Name: "large_name", Type: arrow.BinaryTypes.LargeString,
Nullable: true},
}, nil)
testRecord := s.createCustomTestRecord(arrSchema, [][]any{
- {int32(1), "abcdef"},
- {int32(2), "abcxyz"},
- {int32(3), "abcuvw"},
- {int32(4), "defghi"},
- {nil, nil},
+ {int32(1), "abcdef", "abcdef"},
+ {int32(2), "abcxyz", "abcxyz"},
+ {int32(3), "abcuvw", "bcduvw"},
+ {int32(4), "defghi", "defghi"},
+ {nil, nil, nil},
})
defer testRecord.Release()
s.testTransformPartition(iceberg.TruncateTransform{Width: 3}, "name",
"truncate", testRecord, 3)
+ s.testTransformPartition(iceberg.TruncateTransform{Width: 3},
"large_name", "truncate_large_string", testRecord, 4)
}
func (s *FanoutWriterTestSuite) TestYearTransform() {