laskoviymishka commented on code in PR #779:
URL: https://github.com/apache/iceberg-go/pull/779#discussion_r2928014398
##########
table/arrow_utils.go:
##########
@@ -715,6 +716,45 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// writeDefaultToScalar converts an Iceberg write-default value to an Arrow
scalar.
+func writeDefaultToScalar(v any, t iceberg.Type, dt arrow.DataType)
scalar.Scalar {
+ switch t.(type) {
+ case iceberg.DateType:
+ return scalar.NewDate32Scalar(arrow.Date32(v.(iceberg.Date)))
+ case iceberg.TimeType:
+ return scalar.NewTime64Scalar(arrow.Time64(v.(iceberg.Time)),
dt)
+ case iceberg.TimestampType, iceberg.TimestampTzType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(v.(iceberg.Timestamp)), dt)
+ case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(v.(iceberg.TimestampNano)), dt)
+ case iceberg.UUIDType:
+ u := v.(uuid.UUID)
+
+ return retOrPanic(scalar.MakeScalarParam(u[:],
&arrow.FixedSizeBinaryType{ByteWidth: 16}))
+ case iceberg.DecimalType:
+ d := v.(iceberg.Decimal)
+
+ return scalar.NewDecimal128Scalar(d.Val, dt)
+ default:
+ return retOrPanic(scalar.MakeScalarParam(v, dt))
+ }
+}
+
+// writeDefaultToArray creates an Arrow array of length n filled with the
write-default value v.
+func writeDefaultToArray(v any, t iceberg.Type, dt arrow.DataType, n int,
alloc memory.Allocator) arrow.Array {
+ out := retOrPanic(scalar.MakeArrayFromScalar(writeDefaultToScalar(v, t,
dt), n, alloc))
+ if _, ok := dt.(arrow.ExtensionType); ok {
Review Comment:
This branch is UUID-specific but matches all extension types. A future
extension type will hit this path and silently produce wrong data (child arrays
set to nil).
Either narrow the check to *extensions.UUIDType or add a comment stating
this only applies to UUID.
##########
table/arrow_utils.go:
##########
@@ -715,6 +716,45 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// writeDefaultToScalar converts an Iceberg write-default value to an Arrow
scalar.
+func writeDefaultToScalar(v any, t iceberg.Type, dt arrow.DataType)
scalar.Scalar {
+ switch t.(type) {
+ case iceberg.DateType:
+ return scalar.NewDate32Scalar(arrow.Date32(v.(iceberg.Date)))
+ case iceberg.TimeType:
+ return scalar.NewTime64Scalar(arrow.Time64(v.(iceberg.Time)),
dt)
+ case iceberg.TimestampType, iceberg.TimestampTzType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(v.(iceberg.Timestamp)), dt)
+ case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(v.(iceberg.TimestampNano)), dt)
+ case iceberg.UUIDType:
+ u := v.(uuid.UUID)
+
+ return retOrPanic(scalar.MakeScalarParam(u[:],
&arrow.FixedSizeBinaryType{ByteWidth: 16}))
+ case iceberg.DecimalType:
+ d := v.(iceberg.Decimal)
+
+ return scalar.NewDecimal128Scalar(d.Val, dt)
+ default:
+ return retOrPanic(scalar.MakeScalarParam(v, dt))
Review Comment:
retOrPanic converts the error into a panic with no context — no field name,
no type, no value. Hard to debug in production.
AVOID PANIC AT ALL COST.
##########
table/arrow_utils_test.go:
##########
@@ -585,3 +587,182 @@ func TestToRequestedSchema(t *testing.T) {
assert.True(t, array.RecordEqual(rec, rec2))
}
+
+func TestToRequestedSchemaWriteDefaults(t *testing.T) {
Review Comment:
Worse to add at least one round-trip through JSON.
Add at least one test that unmarshals a NestedField from a JSON snippet (as
it would arrive from a metadata file or REST catalog) and then projects it.
That test will expose the type assertion panics described above.
##########
table/arrow_utils.go:
##########
@@ -832,7 +872,12 @@ func (a *arrowProjectionVisitor) Struct(st
iceberg.StructType, structArr arrow.A
} else if !field.Required {
dt := retOrPanic(TypeToArrowType(field.Type, false,
a.useLargeTypes))
- arr =
array.MakeArrayOfNull(compute.GetAllocator(a.ctx), dt, structArr.Len())
+ if field.WriteDefault != nil {
Review Comment:
Use field.InitialDefault instead. write-default is for new writes;
initial-default is what fills missing columns on the read path. Change the
condition and the value passed to writeDefaultToArray.
##########
table/arrow_utils.go:
##########
@@ -715,6 +716,45 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// writeDefaultToScalar converts an Iceberg write-default value to an Arrow
scalar.
+func writeDefaultToScalar(v any, t iceberg.Type, dt arrow.DataType)
scalar.Scalar {
+ switch t.(type) {
+ case iceberg.DateType:
+ return scalar.NewDate32Scalar(arrow.Date32(v.(iceberg.Date)))
Review Comment:
Panics if v is not exactly iceberg.Date. When this field comes from JSON
(REST catalog, metadata file), v will be float64 — not iceberg.Date. Same
problem applies to every assertion in this
```
switch: iceberg.Time, iceberg.Timestamp, iceberg.TimestampNano,
iceberg.Decimal.
```
##########
table/arrow_utils.go:
##########
@@ -715,6 +716,45 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// writeDefaultToScalar converts an Iceberg write-default value to an Arrow
scalar.
+func writeDefaultToScalar(v any, t iceberg.Type, dt arrow.DataType)
scalar.Scalar {
+ switch t.(type) {
+ case iceberg.DateType:
+ return scalar.NewDate32Scalar(arrow.Date32(v.(iceberg.Date)))
+ case iceberg.TimeType:
+ return scalar.NewTime64Scalar(arrow.Time64(v.(iceberg.Time)),
dt)
+ case iceberg.TimestampType, iceberg.TimestampTzType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(v.(iceberg.Timestamp)), dt)
+ case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(v.(iceberg.TimestampNano)), dt)
+ case iceberg.UUIDType:
+ u := v.(uuid.UUID)
Review Comment:
Same. UUID stored in JSON is a string. v.(uuid.UUID) panics on any
deserialized metadata.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]