This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d9c2d59617 GH-35828: [Go] Add `array.WithUnorderedMapKeys` option for
`array.ApproxEqual` (#35823)
d9c2d59617 is described below
commit d9c2d5961766d16fa0cae3b0ba9b2335ffd6bdb6
Author: Alex Shcherbakov <[email protected]>
AuthorDate: Wed May 31 20:12:19 2023 +0300
GH-35828: [Go] Add `array.WithUnorderedMapKeys` option for
`array.ApproxEqual` (#35823)
### Rationale for this change
If we store the value of the `*array.Map` into Go map we can't expect to
put the values back in the same order, as the map traversal order in Go is
undefined.
This PR extends `array.ApproxEqual` by allowing map keys to be in different
order.
### What changes are included in this PR?
New helper functions:
* `array.arrayApproxEqualMap` that is now used instead of
`array.arrayApproxEqualList` for map comparison
* `array.arrayApproxEqualSingleMapEntry` that checks if the single map
entry we have matches to the other one without having keys sorted the same way
### Are these changes tested?
1. Newly added test `array.TestArrayApproxEqualMaps`
2. https://github.com/cloudquery/cloudquery/pull/11078
### Are there any user-facing changes?
New `array.WithUnorderedMapKeys` option for `array.ApproxEqual` that allows
users to control if the entries order is important or not.
* Closes: #35828
Authored-by: candiduslynx <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
---
go/arrow/array/compare.go | 86 +++++++++++++++++++++++++++++-
go/arrow/array/compare_test.go | 116 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 200 insertions(+), 2 deletions(-)
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index a55a9d0c48..48b0df9c84 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -385,8 +385,9 @@ func sliceApproxEqual(left arrow.Array, lbeg, lend int64,
right arrow.Array, rbe
const defaultAbsoluteTolerance = 1e-5
type equalOption struct {
- atol float64 // absolute tolerance
- nansEq bool // whether NaNs are considered equal.
+ atol float64 // absolute tolerance
+ nansEq bool // whether NaNs are considered equal.
+ unorderedMapKeys bool // whether maps are allowed to have different
entries order
}
func (eq equalOption) f16(f1, f2 float16.Num) bool {
@@ -450,6 +451,13 @@ func WithAbsTolerance(atol float64) EqualOption {
}
}
+// WithUnorderedMapKeys configures the comparison functions so that Map with
different entries order are considered equal.
+func WithUnorderedMapKeys(v bool) EqualOption {
+ return func(o *equalOption) {
+ o.unorderedMapKeys = v
+ }
+}
+
// ArrayApproxEqual reports whether the two provided arrays are approximately
equal.
// For non-floating point arrays, it is equivalent to ArrayEqual.
//
@@ -581,6 +589,9 @@ func arrayApproxEqual(left, right arrow.Array, opt
equalOption) bool {
return arrayEqualDuration(l, r)
case *Map:
r := right.(*Map)
+ if opt.unorderedMapKeys {
+ return arrayApproxEqualMap(l, r, opt)
+ }
return arrayApproxEqualList(l.List, r.List, opt)
case *Dictionary:
r := right.(*Dictionary)
@@ -732,3 +743,74 @@ func arrayApproxEqualStruct(left, right *Struct, opt
equalOption) bool {
}
return true
}
+
+// arrayApproxEqualMap doesn't care about the order of keys (in Go map
traversal order is undefined)
+func arrayApproxEqualMap(left, right *Map, opt equalOption) bool {
+ for i := 0; i < left.Len(); i++ {
+ if left.IsNull(i) {
+ continue
+ }
+ if
!arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct),
right.newListValue(i).(*Struct), opt) {
+ return false
+ }
+ }
+ return true
+}
+
+// arrayApproxEqualSingleMapEntry is a helper function that checks if a single
entry pair is approx equal.
+// Basically, it doesn't care about key order.
+// structs passed will be released
+func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool
{
+ defer left.Release()
+ defer right.Release()
+
+ // we don't compare the validity bitmap, but we want other checks from
baseArrayEqual
+ switch {
+ case left.Len() != right.Len():
+ return false
+ case left.NullN() != right.NullN():
+ return false
+ case !arrow.TypeEqual(left.DataType(), right.DataType()): // We do not
check for metadata as in the C++ implementation.
+ return false
+ case left.NullN() == left.Len():
+ return true
+ }
+
+ used := make(map[int]bool, right.Len())
+ for i := 0; i < left.Len(); i++ {
+ if left.IsNull(i) {
+ continue
+ }
+
+ found := false
+ lBeg, lEnd := int64(i), int64(i+1)
+ for j := 0; j < right.Len(); j++ {
+ if used[j] {
+ continue
+ }
+ if right.IsNull(j) {
+ used[j] = true
+ continue
+ }
+
+ rBeg, rEnd := int64(j), int64(j+1)
+
+ // check keys (field 0)
+ if !sliceApproxEqual(left.Field(0), lBeg, lEnd,
right.Field(0), rBeg, rEnd, opt) {
+ continue
+ }
+
+ // only now check the values
+ if sliceApproxEqual(left.Field(1), lBeg, lEnd,
right.Field(1), rBeg, rEnd, opt) {
+ found = true
+ used[j] = true
+ break
+ }
+ }
+ if !found {
+ return false
+ }
+ }
+
+ return len(used) == right.Len()
+}
diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go
index 71109d0ac0..8bee75ffcc 100644
--- a/go/arrow/array/compare_test.go
+++ b/go/arrow/array/compare_test.go
@@ -19,6 +19,7 @@ package array_test
import (
"fmt"
"math"
+ "sort"
"testing"
"github.com/apache/arrow/go/v13/arrow"
@@ -302,6 +303,121 @@ func TestArrayApproxEqualFloats(t *testing.T) {
}
}
+func testStringMap(mem memory.Allocator, m map[string]string, keys []string)
*array.Map {
+ dt := arrow.MapOf(arrow.BinaryTypes.String, arrow.BinaryTypes.String)
+ builder := array.NewMapBuilderWithType(mem, dt)
+ defer builder.Release()
+ key, item := builder.KeyBuilder().(*array.StringBuilder),
builder.ItemBuilder().(*array.StringBuilder)
+
+ builder.AppendNull()
+ builder.Append(true)
+
+ for _, k := range keys {
+ key.Append(k)
+
+ v, ok := m[k]
+ if !ok {
+ item.AppendNull()
+ continue
+ }
+
+ item.Append(v)
+ }
+
+ return builder.NewMapArray()
+}
+
+func TestArrayApproxEqualMaps(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ t.Run("different order", func(t *testing.T) {
+ m := map[string]string{"x": "x", "y": "y", "z": "z"}
+
+ keys := []string{"z", "y", "x", "null"}
+ a := testStringMap(mem, m, keys)
+ defer a.Release()
+
+ asc := make([]string, len(keys))
+ copy(asc, keys)
+ sort.Strings(asc)
+ assert.NotEqual(t, keys, asc)
+
+ b := testStringMap(mem, m, asc)
+ defer b.Release()
+
+ assert.False(t, array.ApproxEqual(a, b))
+ assert.True(t, array.ApproxEqual(a, b,
array.WithUnorderedMapKeys(true)))
+ })
+
+ t.Run("extra left value", func(t *testing.T) {
+ m := map[string]string{"x": "x", "y": "y", "z": "z", "extra":
"extra"}
+
+ aKeys := []string{"z", "y", "x", "extra"}
+ a := testStringMap(mem, m, aKeys)
+ defer a.Release()
+
+ bKeys := []string{"z", "y", "x"}
+ b := testStringMap(mem, m, bKeys)
+ defer b.Release()
+
+ assert.NotEqual(t, aKeys, bKeys)
+ assert.Equal(t, a.NullN(), b.NullN())
+ assert.False(t, array.ApproxEqual(a, b))
+ assert.False(t, array.ApproxEqual(a, b,
array.WithUnorderedMapKeys(true)))
+ })
+
+ t.Run("extra right value", func(t *testing.T) {
+ m := map[string]string{"x": "x", "y": "y", "z": "z", "extra":
"extra"}
+
+ aKeys := []string{"z", "y", "x"}
+ a := testStringMap(mem, m, aKeys)
+ defer a.Release()
+
+ bKeys := []string{"z", "y", "x", "extra"}
+ b := testStringMap(mem, m, bKeys)
+ defer b.Release()
+
+ assert.NotEqual(t, aKeys, bKeys)
+ assert.Equal(t, a.NullN(), b.NullN())
+ assert.False(t, array.ApproxEqual(a, b))
+ assert.False(t, array.ApproxEqual(a, b,
array.WithUnorderedMapKeys(true)))
+ })
+
+ t.Run("unmatched value", func(t *testing.T) {
+ m := map[string]string{"x": "x", "y": "y", "z": "z", "extra":
"extra", "extra2": "extra"}
+
+ aKeys := []string{"z", "y", "x", "extra"}
+ a := testStringMap(mem, m, aKeys)
+ defer a.Release()
+
+ bKeys := []string{"z", "y", "x", "extra2"}
+ b := testStringMap(mem, m, bKeys)
+ defer b.Release()
+
+ assert.NotEqual(t, aKeys, bKeys)
+ assert.Equal(t, a.NullN(), b.NullN())
+ assert.False(t, array.ApproxEqual(a, b))
+ assert.False(t, array.ApproxEqual(a, b,
array.WithUnorderedMapKeys(true)))
+ })
+
+ t.Run("different value", func(t *testing.T) {
+ m := map[string]string{"x": "x", "y": "y", "z": "z", "extra":
"extra"}
+
+ keys := []string{"z", "y", "x", "extra"}
+ a := testStringMap(mem, m, keys)
+ defer a.Release()
+
+ m["extra"] = "different"
+ b := testStringMap(mem, m, keys)
+ defer b.Release()
+
+ assert.Equal(t, a.NullN(), b.NullN())
+ assert.False(t, array.ApproxEqual(a, b))
+ assert.False(t, array.ApproxEqual(a, b,
array.WithUnorderedMapKeys(true)))
+ })
+}
+
func arrayOf(mem memory.Allocator, a interface{}, valids []bool) arrow.Array {
if mem == nil {
mem = memory.NewGoAllocator()