emkornfield commented on a change in pull request #11359: URL: https://github.com/apache/arrow/pull/11359#discussion_r741389891
########## File path: go/arrow/array/util.go ########## @@ -16,9 +16,178 @@ package array +import ( + "errors" + "fmt" + "io" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/memory" + "github.com/goccy/go-json" +) + func min(a, b int) int { if a < b { return a } return b } + +type fromJSONCfg struct { + multiDocument bool + startOffset int64 +} + +type FromJSONOption func(*fromJSONCfg) + +func WithMultipleDocs() FromJSONOption { + return func(c *fromJSONCfg) { + c.multiDocument = true + } +} + +// WithStartOffset attempts to start decoding from the reader at the offset +// passed in. If using this option the reader must fulfill the io.ReadSeeker +// interface, or else an error will be returned. +// +// It will call Seek(off, io.SeekStart) on the reader +func WithStartOffset(off int64) FromJSONOption { + return func(c *fromJSONCfg) { + c.startOffset = off + } +} + +// FromJSON creates an array.Interface from a corresponding JSON stream and defined data type. If the types in the +// json do not match the type provided, it will return errors. This is *not* the integration test format +// and should not be used as such. This intended to be used by consumers more similarly to the current exposing of +// the csv reader/writer. It also returns the input offset in the reader where it finished decoding since buffering +// by the decoder could leave the reader's cursor past where the parsing finished if attempting to parse multiple json +// arrays from one stream. +// +// All the Array types implement json.Marshaller and thus can be written to json +// using the json.Marshal function +// +// The JSON provided must be formatted in one of two ways: +// Default: the top level of the json must be a list which matches the type specified exactly +// Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], null, [], .....]` for a List type +// Struct arrays are represented a list of objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]` Review comment: Maybe also document time/interal/duration types? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org