Re: [PR] GH-45167: [C++] Implement Compute Equals for List Types [arrow]

via GitHub Fri, 18 Jul 2025 06:58:59 -0700


WillAyd commented on code in PR #45272:
URL: https://github.com/apache/arrow/pull/45272#discussion_r2216113095



##########
cpp/src/arrow/compare_internal.h:
##########
@@ -0,0 +1,849 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cmath>
+
+#include "arrow/array/array_dict.h"
+#include "arrow/array/data.h"
+#include "arrow/array/diff.h"
+#include "arrow/compare.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/binary_view_util.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/float16.h"
+#include "arrow/util/logging_internal.h"
+#include "arrow/util/memory_internal.h"
+#include "arrow/util/ree_util.h"
+#include "arrow/visit_scalar_inline.h"
+#include "arrow/visit_type_inline.h"
+
+namespace arrow {
+
+using internal::BitmapEquals;
+using internal::BitmapReader;
+using internal::BitmapUInt64Reader;
+using internal::checked_cast;
+using internal::OptionalBitmapEquals;
+using util::Float16;
+
+// TODO also handle HALF_FLOAT NaNs
+
+template <bool Approximate, bool NansEqual, bool SignedZerosEqual>
+struct FloatingEqualityFlags {
+  static constexpr bool approximate = Approximate;
+  static constexpr bool nans_equal = NansEqual;
+  static constexpr bool signed_zeros_equal = SignedZerosEqual;
+};
+
+template <typename T, typename Flags>
+struct FloatingEquality {
+  explicit FloatingEquality(const EqualOptions& options)
+      : epsilon(static_cast<T>(options.atol())) {}
+
+  bool operator()(T x, T y) const {
+    if (x == y) {
+      return Flags::signed_zeros_equal || (std::signbit(x) == std::signbit(y));
+    }
+    if (Flags::nans_equal && std::isnan(x) && std::isnan(y)) {
+      return true;
+    }
+    if (Flags::approximate && (fabs(x - y) <= epsilon)) {
+      return true;
+    }
+    return false;
+  }
+
+  const T epsilon;
+};
+
+// For half-float equality.
+template <typename Flags>
+struct FloatingEquality<uint16_t, Flags> {
+  explicit FloatingEquality(const EqualOptions& options)
+      : epsilon(static_cast<float>(options.atol())) {}
+
+  bool operator()(uint16_t x, uint16_t y) const {
+    Float16 f_x = Float16::FromBits(x);
+    Float16 f_y = Float16::FromBits(y);
+    if (x == y) {
+      return Flags::signed_zeros_equal || (f_x.signbit() == f_y.signbit());
+    }
+    if (Flags::nans_equal && f_x.is_nan() && f_y.is_nan()) {
+      return true;
+    }
+    if (Flags::approximate && (fabs(f_x.ToFloat() - f_y.ToFloat()) <= 
epsilon)) {
+      return true;
+    }
+    return false;
+  }
+
+  const float epsilon;
+};
+
+template <typename T, typename Visitor>
+struct FloatingEqualityDispatcher {
+  const EqualOptions& options;
+  bool floating_approximate;
+  Visitor&& visit;
+
+  template <bool Approximate, bool NansEqual>
+  void DispatchL3() {
+    if (options.signed_zeros_equal()) {
+      visit(FloatingEquality<T, FloatingEqualityFlags<Approximate, NansEqual, 
true>>{
+          options});
+    } else {
+      visit(FloatingEquality<T, FloatingEqualityFlags<Approximate, NansEqual, 
false>>{
+          options});
+    }
+  }
+
+  template <bool Approximate>
+  void DispatchL2() {
+    if (options.nans_equal()) {
+      DispatchL3<Approximate, true>();
+    } else {
+      DispatchL3<Approximate, false>();
+    }
+  }
+
+  void Dispatch() {
+    if (floating_approximate) {
+      DispatchL2<true>();
+    } else {
+      DispatchL2<false>();
+    }
+  }
+};
+
+// Call `visit(equality_func)` where `equality_func` has the signature 
`bool(T, T)`
+// and returns true if the two values compare equal.
+template <typename T, typename Visitor>
+void VisitFloatingEquality(const EqualOptions& options, bool 
floating_approximate,
+                           Visitor&& visit) {
+  FloatingEqualityDispatcher<T, Visitor>{options, floating_approximate,
+                                         std::forward<Visitor>(visit)}
+      .Dispatch();
+}
+
+inline bool IdentityImpliesEqualityNansNotEqual(const DataType& type) {
+  if (type.id() == Type::FLOAT || type.id() == Type::DOUBLE) {
+    return false;
+  }
+  for (const auto& child : type.fields()) {
+    if (!IdentityImpliesEqualityNansNotEqual(*child->type())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool IdentityImpliesEquality(const DataType& type, const EqualOptions& 
options) {
+  if (options.nans_equal()) {
+    return true;
+  }
+  return IdentityImpliesEqualityNansNotEqual(type);
+}
+
+ARROW_EXPORT bool CompareArrayRanges(const ArrayData& left, const ArrayData& 
right,

Review Comment:
   Shouldn't need the ARROW_EXPORT here - only temporarily required because 
this CompareArrayRanges function is used in the definition of some 
RangeDataEqualsImpl methods. However, I just want to test that this structure 
helps the R tests pass CI. If so the next PR should move the 
RangeDataEqualsImpl method definitions out of the header into the source file



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Re: [PR] GH-45167: [C++] Implement Compute Equals for List Types [arrow]

Reply via email to