emkornfield commented on a change in pull request #11359:
URL: https://github.com/apache/arrow/pull/11359#discussion_r741356787



##########
File path: go/arrow/array/binary.go
##########
@@ -117,6 +118,21 @@ func (a *Binary) setData(data *Data) {
        }
 }
 
+func (a *Binary) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       return a.Value(i)
+}
+
+func (a *Binary) MarshalJSON() ([]byte, error) {
+       vals := make([]interface{}, a.Len())
+       for i := 0; i < a.Len(); i++ {
+               vals[i] = a.getOneForMarshal(i)
+       }
+       return json.Marshal(vals)

Review comment:
       How do non-printable characters get marshalled in json?  Escaped bytes?

##########
File path: go/arrow/array/binary.go
##########
@@ -117,6 +118,21 @@ func (a *Binary) setData(data *Data) {
        }
 }
 
+func (a *Binary) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       return a.Value(i)
+}
+
+func (a *Binary) MarshalJSON() ([]byte, error) {
+       vals := make([]interface{}, a.Len())
+       for i := 0; i < a.Len(); i++ {
+               vals[i] = a.getOneForMarshal(i)
+       }
+       return json.Marshal(vals)

Review comment:
       Okay, based on below is looks like base64 encoded?  This is not 
something I could find as a reference in the docs?

##########
File path: go/arrow/array/decimal128.go
##########
@@ -229,6 +253,67 @@ func (b *Decimal128Builder) newData() (data *Data) {
        return
 }
 
+func (b *Decimal128Builder) unmarshalOne(dec *json.Decoder) error {
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       var out *big.Float
+
+       switch v := t.(type) {
+       case float64:
+               out = big.NewFloat(v)
+       case string:
+               out, _, err = big.ParseFloat(v, 10, 0, big.ToNearestAway)

Review comment:
       any strong rationale for ToNearestAway?  maybe document?

##########
File path: go/arrow/array/decimal128.go
##########
@@ -229,6 +253,67 @@ func (b *Decimal128Builder) newData() (data *Data) {
        return
 }
 
+func (b *Decimal128Builder) unmarshalOne(dec *json.Decoder) error {
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       var out *big.Float
+
+       switch v := t.(type) {
+       case float64:
+               out = big.NewFloat(v)
+       case string:
+               out, _, err = big.ParseFloat(v, 10, 0, big.ToNearestAway)

Review comment:
       any strong rationale for ToNearestAway?  maybe document?

##########
File path: go/arrow/array/decimal128.go
##########
@@ -229,6 +253,67 @@ func (b *Decimal128Builder) newData() (data *Data) {
        return
 }
 
+func (b *Decimal128Builder) unmarshalOne(dec *json.Decoder) error {
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       var out *big.Float
+
+       switch v := t.(type) {
+       case float64:
+               out = big.NewFloat(v)
+       case string:
+               out, _, err = big.ParseFloat(v, 10, 0, big.ToNearestAway)

Review comment:
       Also, is 0 the right value for precision here shouldn't it be 128?
   
   Also if I read the docs correctly it seems that this will silently truncate 
decimal values that require larger precision, do you have thoughts on if it 
would be better to check for these types of values and return an error instead?

##########
File path: go/arrow/array/decimal128.go
##########
@@ -229,6 +253,67 @@ func (b *Decimal128Builder) newData() (data *Data) {
        return
 }
 
+func (b *Decimal128Builder) unmarshalOne(dec *json.Decoder) error {
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       var out *big.Float
+
+       switch v := t.(type) {
+       case float64:
+               out = big.NewFloat(v)
+       case string:
+               out, _, err = big.ParseFloat(v, 10, 0, big.ToNearestAway)
+               if err != nil {
+                       return err
+               }
+       case json.Number:

Review comment:
       I might be misunderstanding but it seems like json.Number is an alias 
for string (https://pkg.go.dev/encoding/json#Number).  Should this go through 
the same logic above for string?

##########
File path: go/arrow/array/fixed_size_list.go
##########
@@ -110,6 +112,44 @@ func (a *FixedSizeList) Release() {
        a.values.Release()
 }
 
+func (a *FixedSizeList) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       slice := a.newListValue(i)
+       defer slice.Release()
+       v, err := json.Marshal(slice)
+       if err != nil {
+               panic(err)
+       }
+
+       return json.RawMessage(v)
+}
+
+func (a *FixedSizeList) MarshalJSON() ([]byte, error) {
+       var buf bytes.Buffer
+       enc := json.NewEncoder(&buf)
+
+       buf.WriteByte('[')

Review comment:
       Is this using a string to encode FixedSizeLists?  That might be 
counter-intuitive to people?  I guess this is needed to round-trip with 
full-fidelity? 

##########
File path: go/arrow/array/float16_builder.go
##########
@@ -163,3 +168,59 @@ func (b *Float16Builder) newData() (data *Data) {
 
        return
 }
+
+func (b *Float16Builder) unmarshalOne(dec *json.Decoder) error {
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       switch v := t.(type) {
+       case float64:
+               b.Append(float16.New(float32(v)))
+       case string:
+               f, err := strconv.ParseFloat(v, 32)
+               if err != nil {
+                       return err
+               }
+               b.Append(float16.New(float32(f)))

Review comment:
       Does this silently truncate or does it throw an error?

##########
File path: go/arrow/array/float16_builder.go
##########
@@ -163,3 +168,59 @@ func (b *Float16Builder) newData() (data *Data) {
 
        return
 }
+
+func (b *Float16Builder) unmarshalOne(dec *json.Decoder) error {
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       switch v := t.(type) {
+       case float64:
+               b.Append(float16.New(float32(v)))
+       case string:
+               f, err := strconv.ParseFloat(v, 32)
+               if err != nil {
+                       return err
+               }
+               b.Append(float16.New(float32(f)))
+       case json.Number:
+               f, err := v.Float64()
+               if err != nil {
+                       return err
+               }
+               b.Append(float16.New(float32(f)))
+       case nil:
+               b.AppendNull()
+       default:
+               return &json.UnmarshalTypeError{
+                       Value:  fmt.Sprint(t),
+                       Type:   reflect.TypeOf(float16.Num{}),
+                       Offset: dec.InputOffset(),
+               }
+       }
+       return nil
+}
+
+func (b *Float16Builder) unmarshal(dec *json.Decoder) error {
+       for dec.More() {
+               if err := b.unmarshalOne(dec); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func (b *Float16Builder) UnmarshalJSON(data []byte) error {
+       dec := json.NewDecoder(bytes.NewReader(data))
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       if delim, ok := t.(json.Delim); !ok || delim != '[' {
+               return fmt.Errorf("binary builder must unpack from json array, 
found %s", delim)

Review comment:
       check for copy and paste but in error messages?

##########
File path: go/arrow/array/interval.go
##########
@@ -86,6 +88,29 @@ func (a *MonthInterval) setData(data *Data) {
        }
 }
 
+func (a *MonthInterval) getOneForMarshal(i int) interface{} {
+       if a.IsValid(i) {
+               return a.values[i]
+       }
+       return nil
+}
+
+func (a *MonthInterval) MarshalJSON() ([]byte, error) {
+       if a.NullN() == 0 {
+               return json.Marshal(a.values)
+       }
+       vals := make([]interface{}, a.Len())
+       for i := 0; i < a.Len(); i++ {
+               if a.IsValid(i) {
+                       vals[i] = a.values[i]

Review comment:
       In this case, it seems like it might be better to have a formatted 
string (like "1m") or "1months"

##########
File path: go/arrow/array/interval.go
##########
@@ -279,6 +341,28 @@ func (a *DayTimeInterval) setData(data *Data) {
        }
 }
 
+func (a *DayTimeInterval) getOneForMarshal(i int) interface{} {
+       if a.IsValid(i) {
+               return a.values[i]
+       }
+       return nil
+}
+
+func (a *DayTimeInterval) MarshalJSON() ([]byte, error) {
+       if a.NullN() == 0 {
+               return json.Marshal(a.values)

Review comment:
       same comment, as months, consider making a formatted string that can be 
reparsed?  I guess whether we use a formatted string or the raw values comes 
down to who we expect to consume the JSON.  I think for machines the numbers 
might be more natural, but for human consumption formatted string makes it 
clearer what the types are.

##########
File path: go/arrow/array/map.go
##########
@@ -266,6 +270,28 @@ func (b *MapBuilder) ValueBuilder() *StructBuilder {
        return b.listBuilder.ValueBuilder().(*StructBuilder)
 }
 
+func (b *MapBuilder) unmarshalOne(dec *json.Decoder) error {
+       return b.listBuilder.unmarshalOne(dec)
+}
+
+func (b *MapBuilder) unmarshal(dec *json.Decoder) error {
+       return b.listBuilder.unmarshal(dec)
+}
+
+func (b *MapBuilder) UnmarshalJSON(data []byte) error {
+       dec := json.NewDecoder(bytes.NewReader(data))
+       t, err := dec.Token()
+       if err != nil {
+               return err
+       }
+
+       if delim, ok := t.(json.Delim); !ok || delim != '[' {
+               return fmt.Errorf("binary builder must unpack from json array, 
found %s", delim)

Review comment:
       copy-paste error message?

##########
File path: go/arrow/array/numeric.gen.go
##########
@@ -80,6 +82,27 @@ func (a *Int64) setData(data *Data) {
        }
 }
 
+func (a *Int64) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+
+       return float64(a.values[i]) // prevent uint8 from being seen as binary 
data

Review comment:
       won't this truncate int64 values that require > 52 bits of storage?

##########
File path: go/arrow/array/numeric.gen.go
##########
@@ -750,6 +962,22 @@ func (a *Timestamp) setData(data *Data) {
        }
 }
 
+func (a *Timestamp) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       return 
a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 
15:04:05.999999999")

Review comment:
       should formatting be conditional on precision?

##########
File path: go/arrow/array/numeric.gen.go
##########
@@ -817,6 +1045,22 @@ func (a *Time32) setData(data *Data) {
        }
 }
 
+func (a *Time32) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       return 
a.values[i].ToTime(a.DataType().(*arrow.Time32Type).Unit).Format("15:04:05.999999999")

Review comment:
       since it is a different type, it is probably easy to format this to only 
the precision required?

##########
File path: go/arrow/array/numeric.gen.go
##########
@@ -1085,6 +1377,22 @@ func (a *Duration) setData(data *Data) {
        }
 }
 
+func (a *Duration) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       return fmt.Sprint(time.Duration(a.values[i]) * 
a.DataType().(*arrow.DurationType).Unit.Multiplier())

Review comment:
       this logic is a little opaque to me, would you mind adding a comment on 
intended output.

##########
File path: go/arrow/array/numeric.gen.go
##########
@@ -1085,6 +1377,22 @@ func (a *Duration) setData(data *Data) {
        }
 }
 
+func (a *Duration) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+       return fmt.Sprint(time.Duration(a.values[i]) * 
a.DataType().(*arrow.DurationType).Unit.Multiplier())

Review comment:
       Are there overflow issues for courser precisions?

##########
File path: go/arrow/array/struct.go
##########
@@ -105,6 +107,36 @@ func (a *Struct) setData(data *Data) {
        }
 }
 
+func (a *Struct) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+
+       tmp := make(map[string]interface{})
+       fieldList := a.data.dtype.(*arrow.StructType).Fields()
+       for j, d := range a.fields {
+               tmp[fieldList[j].Name] = d.getOneForMarshal(i)

Review comment:
       I don't think the arrow spec says struct names need to be unique, it 
might be worth checking for that case here and throwing an error OR using a 
list of tuple representation in that case.

##########
File path: go/arrow/array/struct.go
##########
@@ -105,6 +107,36 @@ func (a *Struct) setData(data *Data) {
        }
 }
 
+func (a *Struct) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+
+       tmp := make(map[string]interface{})
+       fieldList := a.data.dtype.(*arrow.StructType).Fields()
+       for j, d := range a.fields {
+               tmp[fieldList[j].Name] = d.getOneForMarshal(i)
+       }
+       return tmp
+}
+
+func (a *Struct) MarshalJSON() ([]byte, error) {
+       var buf bytes.Buffer
+       enc := json.NewEncoder(&buf)
+
+       buf.WriteByte('[')
+       for i := 0; i < a.Len(); i++ {

Review comment:
       I might be getting a little lost here but IIUC the other json functions 
return a list representing the column value.  Here it seems we transpose each 
column to a row first?  This seems to lack symmetry?  Maybe in this case the 
MarshalJSON function shouldn't use getOneForMarshal?

##########
File path: go/arrow/array/util.go
##########
@@ -16,9 +16,178 @@
 
 package array
 
+import (
+       "errors"
+       "fmt"
+       "io"
+
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/goccy/go-json"
+)
+
 func min(a, b int) int {
        if a < b {
                return a
        }
        return b
 }
+
+type fromJSONCfg struct {
+       multiDocument bool
+       startOffset   int64
+}
+
+type FromJSONOption func(*fromJSONCfg)
+
+func WithMultipleDocs() FromJSONOption {
+       return func(c *fromJSONCfg) {
+               c.multiDocument = true
+       }
+}
+
+// WithStartOffset attempts to start decoding from the reader at the offset
+// passed in. If using this option the reader must fulfill the io.ReadSeeker
+// interface, or else an error will be returned.
+//
+// It will call Seek(off, io.SeekStart) on the reader
+func WithStartOffset(off int64) FromJSONOption {
+       return func(c *fromJSONCfg) {
+               c.startOffset = off
+       }
+}
+
+// FromJSON creates an array.Interface from a corresponding JSON stream and 
defined data type. If the types in the
+// json do not match the type provided, it will return errors. This is *not* 
the integration test format
+// and should not be used as such. This intended to be used by consumers more 
similarly to the current exposing of
+// the csv reader/writer. It also returns the input offset in the reader where 
it finished decoding since buffering
+// by the decoder could leave the reader's cursor past where the parsing 
finished if attempting to parse multiple json
+// arrays from one stream.
+//
+// All the Array types implement json.Marshaller and thus can be written to 
json
+// using the json.Marshal function
+//
+// The JSON provided must be formatted in one of two ways:
+//             Default: the top level of the json must be a list which matches 
the type specified exactly
+//             Example: `[1, 2, 3, 4, 5]` for any integer type or `[[...], 
null, [], .....]` for a List type
+//                                     Struct arrays are represented a list of 
objects: `[{"foo": 1, "bar": "moo"}, {"foo": 5, "bar": "baz"}]`

Review comment:
       Maybe also document time/interal/duration types?

##########
File path: go/arrow/array/util_test.go
##########
@@ -0,0 +1,406 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+       "bytes"
+       "fmt"
+       "io"
+       "reflect"
+       "strings"
+       "testing"
+
+       "github.com/apache/arrow/go/arrow"
+       "github.com/apache/arrow/go/arrow/array"
+       "github.com/apache/arrow/go/arrow/decimal128"
+       "github.com/apache/arrow/go/arrow/internal/arrdata"
+       "github.com/apache/arrow/go/arrow/memory"
+       "github.com/goccy/go-json"
+       "github.com/stretchr/testify/assert"
+)
+
+var typemap = map[arrow.DataType]reflect.Type{
+       arrow.PrimitiveTypes.Int8:   reflect.TypeOf(int8(0)),
+       arrow.PrimitiveTypes.Uint8:  reflect.TypeOf(uint8(0)),
+       arrow.PrimitiveTypes.Int16:  reflect.TypeOf(int16(0)),
+       arrow.PrimitiveTypes.Uint16: reflect.TypeOf(uint16(0)),
+       arrow.PrimitiveTypes.Int32:  reflect.TypeOf(int32(0)),
+       arrow.PrimitiveTypes.Uint32: reflect.TypeOf(uint32(0)),
+       arrow.PrimitiveTypes.Int64:  reflect.TypeOf(int64(0)),
+       arrow.PrimitiveTypes.Uint64: reflect.TypeOf(uint64(0)),
+}
+
+func TestIntegerArrsJSON(t *testing.T) {
+       const N = 10
+       types := []arrow.DataType{
+               arrow.PrimitiveTypes.Int8,
+               arrow.PrimitiveTypes.Uint8,
+               arrow.PrimitiveTypes.Int16,
+               arrow.PrimitiveTypes.Uint16,
+               arrow.PrimitiveTypes.Int32,
+               arrow.PrimitiveTypes.Uint32,
+               arrow.PrimitiveTypes.Int64,
+               arrow.PrimitiveTypes.Uint64,
+       }
+
+       for _, tt := range types {
+               t.Run(fmt.Sprint(tt), func(t *testing.T) {
+                       mem := 
memory.NewCheckedAllocator(memory.NewGoAllocator())
+                       defer mem.AssertSize(t, 0)
+
+                       jsontest := make([]int, N)
+                       vals := reflect.MakeSlice(reflect.SliceOf(typemap[tt]), 
N, N)
+                       for i := 0; i < N; i++ {
+                               
vals.Index(i).Set(reflect.ValueOf(i).Convert(typemap[tt]))
+                               jsontest[i] = i
+                       }
+
+                       data, _ := json.Marshal(jsontest)
+                       arr, _, err := array.FromJSON(mem, tt, 
bytes.NewReader(data))
+                       assert.NoError(t, err)
+                       defer arr.Release()
+
+                       assert.EqualValues(t, N, arr.Len())
+                       assert.Zero(t, arr.NullN())
+
+                       output, err := json.Marshal(arr)
+                       assert.NoError(t, err)
+                       assert.JSONEq(t, string(data), string(output))
+               })
+               t.Run(fmt.Sprint(tt)+" errors", func(t *testing.T) {
+                       _, _, err := array.FromJSON(memory.DefaultAllocator, 
tt, strings.NewReader(""))
+                       assert.Error(t, err)
+
+                       _, _, err = array.FromJSON(memory.DefaultAllocator, tt, 
strings.NewReader("["))
+                       assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
+
+                       _, _, err = array.FromJSON(memory.DefaultAllocator, tt, 
strings.NewReader("0"))
+                       assert.Error(t, err)
+
+                       _, _, err = array.FromJSON(memory.DefaultAllocator, tt, 
strings.NewReader("{}"))
+                       assert.Error(t, err)
+
+                       _, _, err = array.FromJSON(memory.DefaultAllocator, tt, 
strings.NewReader("[[0]]"))
+                       assert.EqualError(t, err, "json: cannot unmarshal [ 
into Go value of type "+tt.Name())
+               })
+       }
+}
+
+func TestStringsJSON(t *testing.T) {
+       tests := []struct {
+               jsonstring string
+               values     []string
+               valids     []bool
+       }{
+               {"[]", []string{}, []bool{}},
+               {`["", "foo"]`, []string{"", "foo"}, nil},
+               {`["", null]`, []string{"", ""}, []bool{true, false}},
+               // NUL character in string
+               {`["", "some\u0000char"]`, []string{"", "some\x00char"}, nil},
+               // utf8 sequence in string
+               {"[\"\xc3\xa9\"]", []string{"\xc3\xa9"}, nil},
+               // bytes < 0x20 can be represented as JSON unicode escapes
+               {`["\u0000\u001f"]`, []string{"\x00\x1f"}, nil},
+       }
+
+       for _, tt := range tests {
+               t.Run("json "+tt.jsonstring, func(t *testing.T) {
+                       bldr := array.NewStringBuilder(memory.DefaultAllocator)
+                       defer bldr.Release()
+
+                       bldr.AppendValues(tt.values, tt.valids)
+                       expected := bldr.NewStringArray()
+                       defer expected.Release()
+
+                       arr, _, err := array.FromJSON(memory.DefaultAllocator, 
arrow.BinaryTypes.String, strings.NewReader(tt.jsonstring))
+                       assert.NoError(t, err)
+                       defer arr.Release()
+
+                       assert.Truef(t, array.ArrayEqual(expected, arr), 
"expected: %s\ngot: %s\n", expected, arr)
+
+                       data, err := json.Marshal(arr)
+                       assert.NoError(t, err)
+                       assert.JSONEq(t, tt.jsonstring, string(data))
+               })
+       }
+
+       t.Run("errors", func(t *testing.T) {
+               _, _, err := array.FromJSON(memory.DefaultAllocator, 
arrow.BinaryTypes.String, strings.NewReader("[0]"))
+               assert.Error(t, err)
+
+               _, _, err = array.FromJSON(memory.DefaultAllocator, 
arrow.BinaryTypes.String, strings.NewReader("[[]]"))
+               assert.Error(t, err)
+       })
+}
+
+func TestStructArrayFromJSON(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       jsonStr := `[{"hello": 3.5, "world": true, "yo": "foo"},{"hello": 3.25, 
"world": false, "yo": "bar"}]`
+
+       arr, _, err := array.FromJSON(mem, arrow.StructOf(
+               arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64},
+               arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean},
+               arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String},
+       ), strings.NewReader(jsonStr))
+       assert.NoError(t, err)
+       defer arr.Release()
+
+       output, err := json.Marshal(arr)
+       assert.NoError(t, err)
+       assert.JSONEq(t, jsonStr, string(output))
+}
+
+func TestArrayFromJSONMulti(t *testing.T) {
+       arr, _, err := array.FromJSON(memory.DefaultAllocator, arrow.StructOf(
+               arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64},
+               arrow.Field{Name: "world", Type: arrow.FixedWidthTypes.Boolean},
+               arrow.Field{Name: "yo", Type: arrow.BinaryTypes.String},
+       ), strings.NewReader("{\"hello\": 3.5, \"world\": true, \"yo\": 
\"foo\"}\n{\"hello\": 3.25, \"world\": false, \"yo\": \"bar\"}\n"),
+               array.WithMultipleDocs())
+       assert.NoError(t, err)
+       defer arr.Release()
+
+       assert.EqualValues(t, 2, arr.Len())
+       assert.Zero(t, arr.NullN())
+}
+
+func TestNestedJSONArrs(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       jsonStr := `[{"hello": 1.5, "world": [1, 2, 3, 4], "yo": [{"foo": 
"2005-05-06", "bar": "15:02:04.123"},{"foo": "1956-01-02", "bar": 
"02:10:00"}]}]`
+
+       arr, _, err := array.FromJSON(mem, arrow.StructOf(
+               arrow.Field{Name: "hello", Type: arrow.PrimitiveTypes.Float64},
+               arrow.Field{Name: "world", Type: 
arrow.ListOf(arrow.PrimitiveTypes.Int32)},
+               arrow.Field{Name: "yo", Type: arrow.FixedSizeListOf(2, 
arrow.StructOf(
+                       arrow.Field{Name: "foo", Type: 
arrow.FixedWidthTypes.Date32},
+                       arrow.Field{Name: "bar", Type: 
arrow.FixedWidthTypes.Time32ms},
+               ))},
+       ), strings.NewReader(jsonStr))
+       defer arr.Release()
+       assert.NoError(t, err)
+
+       v, err := json.Marshal(arr)
+       assert.NoError(t, err)
+       assert.JSONEq(t, jsonStr, string(v))
+}
+
+func TestGetNullsFromJSON(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       jsonStr := `[
+               {"yo": "thing", "arr": null, "nuf": {"ps": "今日は"}},
+               {"yo": null, "nuf": {"ps": null}, "arr": []},
+               { "nuf": null, "yo": "今日は", "arr": [1,2,3]}
+       ]`
+
+       rec, _, err := array.RecordFromJSON(mem, arrow.NewSchema([]arrow.Field{
+               {Name: "yo", Type: arrow.BinaryTypes.String, Nullable: true},
+               {Name: "arr", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), 
Nullable: true},
+               {Name: "nuf", Type: arrow.StructOf(arrow.Field{Name: "ps", 
Type: arrow.BinaryTypes.String, Nullable: true}), Nullable: true},
+       }, nil), strings.NewReader(jsonStr))
+       assert.NoError(t, err)
+       defer rec.Release()
+
+       assert.EqualValues(t, 3, rec.NumCols())
+       assert.EqualValues(t, 3, rec.NumRows())
+
+       data, err := json.Marshal(rec)
+       assert.NoError(t, err)
+       assert.JSONEq(t, jsonStr, string(data))
+}
+
+func TestTimestampsJSON(t *testing.T) {
+       tests := []struct {
+               unit    arrow.TimeUnit
+               jsonstr string
+               values  []arrow.Timestamp
+       }{
+               {arrow.Second, `["1970-01-01", "2000-02-29", "3989-07-14", 
"1900-02-28"]`, []arrow.Timestamp{0, 951782400, 63730281600, -2203977600}},
+               {arrow.Nanosecond, `["1970-01-01", "2000-02-29", 
"1900-02-28"]`, []arrow.Timestamp{0, 951782400000000000, -2203977600000000000}},
+       }
+
+       for _, tt := range tests {
+               dtype := &arrow.TimestampType{Unit: tt.unit}
+               bldr := array.NewTimestampBuilder(memory.DefaultAllocator, 
dtype)
+               defer bldr.Release()
+
+               bldr.AppendValues(tt.values, nil)
+               expected := bldr.NewArray()
+               defer expected.Release()
+
+               arr, _, err := array.FromJSON(memory.DefaultAllocator, dtype, 
strings.NewReader(tt.jsonstr))
+               assert.NoError(t, err)
+               defer arr.Release()
+
+               assert.Truef(t, array.ArrayEqual(expected, arr), "expected: 
%s\ngot: %s\n", expected, arr)
+       }
+}
+
+func TestDateJSON(t *testing.T) {
+       t.Run("date32", func(t *testing.T) {
+               bldr := array.NewDate32Builder(memory.DefaultAllocator)
+               defer bldr.Release()
+
+               jsonstr := `["1970-01-06", null, "1970-02-12"]`
+
+               bldr.AppendValues([]arrow.Date32{5, 0, 42}, []bool{true, false, 
true})
+               expected := bldr.NewArray()
+               defer expected.Release()
+
+               arr, _, err := array.FromJSON(memory.DefaultAllocator, 
arrow.FixedWidthTypes.Date32, strings.NewReader(jsonstr))
+               assert.NoError(t, err)
+               defer arr.Release()
+
+               assert.Truef(t, array.ArrayEqual(expected, arr), "expected: 
%s\ngot: %s\n", expected, arr)
+
+               data, err := json.Marshal(arr)
+               assert.NoError(t, err)
+               assert.JSONEq(t, jsonstr, string(data))
+       })
+       t.Run("date64", func(t *testing.T) {
+               bldr := array.NewDate64Builder(memory.DefaultAllocator)
+               defer bldr.Release()
+
+               jsonstr := `["1970-01-02", null, "2286-11-20"]`
+
+               bldr.AppendValues([]arrow.Date64{86400000, 0, 9999936000000}, 
[]bool{true, false, true})
+               expected := bldr.NewArray()
+               defer expected.Release()
+
+               arr, _, err := array.FromJSON(memory.DefaultAllocator, 
arrow.FixedWidthTypes.Date64, strings.NewReader(jsonstr))
+               assert.NoError(t, err)
+               defer arr.Release()
+
+               assert.Truef(t, array.ArrayEqual(expected, arr), "expected: 
%s\ngot: %s\n", expected, arr)
+
+               data, err := json.Marshal(arr)
+               assert.NoError(t, err)
+               assert.JSONEq(t, jsonstr, string(data))
+       })
+}
+
+func TestTimeJSON(t *testing.T) {
+       tententen := 60*(60*(10)+10) + 10
+       tests := []struct {
+               dt       arrow.DataType
+               jsonstr  string
+               valueadd int
+       }{
+               {arrow.FixedWidthTypes.Time32s, `[null, "10:10:10"]`, 123},
+               {arrow.FixedWidthTypes.Time32ms, `[null, "10:10:10.123"]`, 456},

Review comment:
       oh is the formatter smart to not extend 0 values on the decimal?

##########
File path: go/arrow/datatype_fixedwidth.go
##########
@@ -54,6 +56,140 @@ type (
        Duration  int64
 )
 
+// Date32FromTime returns a Date32 value from a time object
+func Date32FromTime(t time.Time) Date32 {
+       return Date32(t.Unix() / int64((time.Hour * 24).Seconds()))
+}
+
+func (d Date32) ToTime() time.Time {
+       return time.Unix(0, 0).UTC().AddDate(0, 0, int(d))
+}
+
+// Date64FromTime returns a Date64 value from a time object
+func Date64FromTime(t time.Time) Date64 {
+       return Date64(t.Unix()*1e3 + int64(t.Nanosecond())/1e6)
+}
+
+func (d Date64) ToTime() time.Time {
+       days := int(int64(d) / (time.Hour * 24).Milliseconds())
+       return time.Unix(0, 0).UTC().AddDate(0, 0, days)
+}
+
+// TimestampFromString parses a string and returns a timestamp for the given 
unit
+// level.
+//
+// The timestamp should be in one of the following forms, [T] can be either T
+// or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of
+// fractions of a second.
+//
+//      YYYY-MM-DD
+//      YYYY-MM-DD[T]HH
+//   YYYY-MM-DD[T]HH:MM
+//   YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz]
+func TimestampFromString(val string, unit TimeUnit) (Timestamp, error) {
+       format := "2006-01-02"
+       if val[len(val)-1] == 'Z' {
+               val = val[:len(val)-1]
+       }
+
+       switch {
+       case len(val) == 13:
+               format += string(val[10]) + "15"
+       case len(val) == 16:
+               format += string(val[10]) + "15:04"
+       case len(val) >= 19:
+               format += string(val[10]) + "15:04:05.999999999"
+       }
+
+       out, err := time.ParseInLocation(format, val, time.UTC)
+       if err != nil {
+               return 0, err
+       }
+
+       switch unit {
+       case Second:
+               return Timestamp(out.Unix()), nil
+       case Millisecond:
+               return Timestamp(out.Unix()*1e3 + int64(out.Nanosecond())/1e6), 
nil

Review comment:
       I know I asked this before but Go panics on overflow?

##########
File path: go/arrow/datatype_fixedwidth.go
##########
@@ -54,6 +56,140 @@ type (
        Duration  int64
 )
 
+// Date32FromTime returns a Date32 value from a time object
+func Date32FromTime(t time.Time) Date32 {
+       return Date32(t.Unix() / int64((time.Hour * 24).Seconds()))
+}
+
+func (d Date32) ToTime() time.Time {
+       return time.Unix(0, 0).UTC().AddDate(0, 0, int(d))
+}
+
+// Date64FromTime returns a Date64 value from a time object
+func Date64FromTime(t time.Time) Date64 {
+       return Date64(t.Unix()*1e3 + int64(t.Nanosecond())/1e6)
+}
+
+func (d Date64) ToTime() time.Time {
+       days := int(int64(d) / (time.Hour * 24).Milliseconds())
+       return time.Unix(0, 0).UTC().AddDate(0, 0, days)
+}
+
+// TimestampFromString parses a string and returns a timestamp for the given 
unit
+// level.
+//
+// The timestamp should be in one of the following forms, [T] can be either T
+// or a space, and [.zzzzzzzzz] can be either left out or up to 9 digits of
+// fractions of a second.
+//
+//      YYYY-MM-DD
+//      YYYY-MM-DD[T]HH
+//   YYYY-MM-DD[T]HH:MM
+//   YYYY-MM-DD[T]HH:MM:SS[.zzzzzzzz]
+func TimestampFromString(val string, unit TimeUnit) (Timestamp, error) {
+       format := "2006-01-02"
+       if val[len(val)-1] == 'Z' {
+               val = val[:len(val)-1]
+       }
+
+       switch {
+       case len(val) == 13:
+               format += string(val[10]) + "15"
+       case len(val) == 16:
+               format += string(val[10]) + "15:04"
+       case len(val) >= 19:
+               format += string(val[10]) + "15:04:05.999999999"
+       }
+
+       out, err := time.ParseInLocation(format, val, time.UTC)
+       if err != nil {
+               return 0, err
+       }
+
+       switch unit {
+       case Second:
+               return Timestamp(out.Unix()), nil
+       case Millisecond:
+               return Timestamp(out.Unix()*1e3 + int64(out.Nanosecond())/1e6), 
nil

Review comment:
       I think at least in C++ errors are raised when if precision is truncated 
(i.e. we'd expect everything past the millisecond precision to be 0)

##########
File path: go/arrow/array/struct.go
##########
@@ -105,6 +107,36 @@ func (a *Struct) setData(data *Data) {
        }
 }
 
+func (a *Struct) getOneForMarshal(i int) interface{} {
+       if a.IsNull(i) {
+               return nil
+       }
+
+       tmp := make(map[string]interface{})
+       fieldList := a.data.dtype.(*arrow.StructType).Fields()
+       for j, d := range a.fields {
+               tmp[fieldList[j].Name] = d.getOneForMarshal(i)

Review comment:
       hmm, OK, I might be wrong about the struct then, I might have been 
thinking about schema.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to