zeroshade commented on code in PR #830:
URL: https://github.com/apache/arrow-go/pull/830#discussion_r3461663003
##########
arrow/avro/reader_types.go:
##########
@@ -574,126 +539,108 @@ func mapFieldBuilders(b array.Builder, field
arrow.Field, parent *fieldPos) {
}
case *array.Time32Builder:
f.appendFunc = func(data interface{}) error {
- appendTime32Data(bt, data)
- return nil
+ return appendTime32Data(bt, data)
}
case *array.Time64Builder:
f.appendFunc = func(data interface{}) error {
- appendTime64Data(bt, data)
- return nil
+ return appendTime64Data(bt, data)
}
case *array.TimestampBuilder:
f.appendFunc = func(data interface{}) error {
- appendTimestampData(bt, data)
- return nil
+ return appendTimestampData(bt, data)
}
}
}
-func appendBinaryData(b *array.BinaryBuilder, data interface{}) {
+// appendUUIDData accepts the two shapes a UUID may arrive as: a [16]byte
+// (fixed(16)+uuid) or a hex-dash string (string+uuid). Other byte lengths
+// are rejected rather than re-interpreted.
+func appendUUIDData(b *extensions.UUIDBuilder, data any, fieldName string)
error {
switch dt := data.(type) {
case nil:
b.AppendNull()
- case map[string]any:
- switch ct := dt["bytes"].(type) {
- case nil:
- b.AppendNull()
+ case string:
+ return b.AppendValueFromString(dt)
+ case [16]byte:
+ b.AppendBytes(dt)
+ case []byte:
+ switch len(dt) {
+ case 16:
+ b.AppendBytes([16]byte(dt))
+ case 36:
+ return b.AppendValueFromString(string(dt))
Review Comment:
as far as I'm aware, I don't think `twmb` ever yields a 36-byte `[]byte` for
a uuid case, so what scenario is this testing? If this is unreachable can we
just remove it?
##########
arrow/avro/reader.go:
##########
Review Comment:
nit: should we change this to doing an actual schema check instead? just so
that we don't end up rejecting cases with different whitespace/formatting
##########
arrow/avro/reader_types.go:
##########
@@ -92,21 +90,10 @@ func (d *dataLoader) drawTree(field *fieldPos) {
// Since array.StructBuilder.AppendNull() will recursively append null to all
of the
// struct's fields, in the case of nil being passed to a struct's builderFunc
it will
// return a ErrNullStructData error to signal that all its sub-fields can be
skipped.
-// filterNullStruct drops ErrNullStructData, which signals a null struct
-// whose sub-fields can be skipped rather than a failure.
-func filterNullStruct(err error) error {
- if err == ErrNullStructData {
- return nil
- }
- return err
-}
-
func (d *dataLoader) loadDatum(data any) error {
Review Comment:
this currently propagates errors for top-level scalar fields, but discards
them everywhere else:
- mapValue struct loader (line 96)
- recursive calls to a child's `loadDatum` (lines 137, 155, 160, 162, 164,
195, 198, 222-223)
- list append and items: lines 172, 174, 177, 203, 206, 227-228
- map branch append/key/value: lines 234, 237, 239, 241, 243
The original test being replaced asserted that the map-value and list-item
type mismatches would have surfaced as errors. Can we re-wrap the nested
appendFunc/loadDatum calls with the same error/`ErrNullStructData` handling
used in the scalar loops? And can we restore a
`TestLoadDatumPropagatesNestedAppendErrors` unit test (using `twmb` of course)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]