Re: [PR] GH-46739: [C++] Fix Float16 signed zero/NaN equality comparisons [arrow]

via GitHub Tue, 26 Aug 2025 07:22:11 -0700


pitrou commented on code in PR #46973:
URL: https://github.com/apache/arrow/pull/46973#discussion_r2301118627



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -101,8 +103,19 @@ struct GenerateOptions {
     pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);
 
-    // A static cast is required due to the int16 -> int8 handling.
-    std::generate(data, data + n, [&] { return 
static_cast<ValueType>(dist(rng)); });
+    if constexpr (is_half_float_type<ArrowType>::value) {
+      // Special handling is required to prevent generating Float16 NaNs

Review Comment:
   This doesn't fix `GenerateTypedData` when `nan_probability_` is non-zero (it 
will use `std::numeric_limits<uint16_t>::quiet_NaN()` which is 0 and translates 
to `Float16(0.0)`).



##########
cpp/src/arrow/testing/gtest_util.h:
##########
@@ -572,4 +574,21 @@ ARROW_TESTING_EXPORT std::shared_ptr<ArrayData> 
UnalignBuffers(const ArrayData&
 /// This method does not recurse into the dictionary or children
 ARROW_TESTING_EXPORT std::shared_ptr<Array> UnalignBuffers(const Array& array);
 
+/// \brief Convert a floating point value to it's corresponding C Type
+///
+/// Useful when testing HalfFloat (uint16_t) values alongside native floating 
point types
+template <typename ARROW_TYPE>
+auto RealToCType(double d) {
+  if constexpr (is_half_float_type<ARROW_TYPE>::value) {
+    const auto h = util::Float16::FromDouble(d);
+    // Double check that nan/inf/sign are preserved
+    EXPECT_EQ(h.is_nan(), std::isnan(d));
+    EXPECT_EQ(h.is_infinity(), std::isinf(d));
+    EXPECT_EQ(h.signbit(), std::signbit(d));

Review Comment:
   I wouldn't expect these checks in a helper function, especially as we 
supposedly have unit tests for this already (otherwise we should add them).



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -101,8 +103,19 @@ struct GenerateOptions {
     pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);

Review Comment:
   `DistributionType` will be `std::uniform_int_distribution<uint16_t>` which 
will certainly not respect the min and max values once translated to Float16?



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -101,8 +103,19 @@ struct GenerateOptions {
     pcg32_fast rng(seed_++);
     DistributionType dist(min_, max_);

Review Comment:
   Perhaps `ValueType` needs to be `Float16` here to make sure we don't misuse 
`uint16_t` like this, and `DistributionType` could be 
`::arrow::random::uniform_real_distribution<float>`.



##########
cpp/src/arrow/scalar.h:
##########
@@ -969,6 +976,18 @@ struct MakeScalarImpl {
     return Status::OK();
   }
 
+  // This isn't captured by the generic case above because `util::Float16` 
isn't implicity
+  // convertible to `uint16_t` (HalfFloat's ValueType)
+  template <typename T>
+  std::enable_if_t<std::is_same_v<std::remove_reference_t<ValueRef>, 
util::Float16> &&

Review Comment:
   Perhaps use `std::decay_t` instead of `std::remove_reference_t`?



##########
cpp/src/arrow/scalar_test.cc:
##########
@@ -201,22 +203,33 @@ TEST(TestScalar, IdentityCast) {
   */
 }
 
+template <typename ARROW_TYPE>
+struct NumericHelper {
+  using ArgType = typename ARROW_TYPE::c_type;
+};
+template <>
+struct NumericHelper<HalfFloatType> {
+  using ArgType = Float16;
+};
+
 template <typename T>
 class TestNumericScalar : public ::testing::Test {
  public:
   TestNumericScalar() = default;
 };
 
-TYPED_TEST_SUITE(TestNumericScalar, NumericArrowTypes);
+using NumericArrowTypesPlusHalfFloat =
+    testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, 
Int16Type,
+                   Int32Type, Int64Type, FloatType, DoubleType, HalfFloatType>;
+TYPED_TEST_SUITE(TestNumericScalar, NumericArrowTypesPlusHalfFloat);
 
 TYPED_TEST(TestNumericScalar, Basics) {
-  using T = typename TypeParam::c_type;
+  using T = typename NumericHelper<TypeParam>::ArgType;
   using ScalarType = typename TypeTraits<TypeParam>::ScalarType;
 
   T value = static_cast<T>(1);
 
   auto scalar_val = std::make_shared<ScalarType>(value);
-  ASSERT_EQ(value, scalar_val->value);

Review Comment:
   We should probably keep the check here by using `if constexpr` as below?



##########
cpp/src/arrow/scalar_test.cc:
##########
@@ -255,46 +275,46 @@ TYPED_TEST(TestNumericScalar, Basics) {
   ASSERT_OK(two->ValidateFull());
 
   ASSERT_TRUE(null->Equals(*null_value));
-  ASSERT_TRUE(one->Equals(ScalarType(1)));
-  ASSERT_FALSE(one->Equals(ScalarType(2)));
-  ASSERT_TRUE(two->Equals(ScalarType(2)));
-  ASSERT_FALSE(two->Equals(ScalarType(3)));
+  ASSERT_TRUE(one->Equals(ScalarType(static_cast<T>(1))));
+  ASSERT_FALSE(one->Equals(ScalarType(static_cast<T>(2))));
+  ASSERT_TRUE(two->Equals(ScalarType(static_cast<T>(2))));
+  ASSERT_FALSE(two->Equals(ScalarType(static_cast<T>(3))));
 
   ASSERT_TRUE(null->ApproxEquals(*null_value));
-  ASSERT_TRUE(one->ApproxEquals(ScalarType(1)));
-  ASSERT_FALSE(one->ApproxEquals(ScalarType(2)));
-  ASSERT_TRUE(two->ApproxEquals(ScalarType(2)));
-  ASSERT_FALSE(two->ApproxEquals(ScalarType(3)));
+  ASSERT_TRUE(one->ApproxEquals(ScalarType(static_cast<T>(1))));
+  ASSERT_FALSE(one->ApproxEquals(ScalarType(static_cast<T>(2))));
+  ASSERT_TRUE(two->ApproxEquals(ScalarType(static_cast<T>(2))));
+  ASSERT_FALSE(two->ApproxEquals(ScalarType(static_cast<T>(3))));
 }
 
 TYPED_TEST(TestNumericScalar, Hashing) {
-  using T = typename TypeParam::c_type;
+  using T = typename NumericHelper<TypeParam>::ArgType;
   using ScalarType = typename TypeTraits<TypeParam>::ScalarType;
 
   std::unordered_set<std::shared_ptr<Scalar>, Scalar::Hash, Scalar::PtrsEqual> 
set;
   set.emplace(std::make_shared<ScalarType>());
-  for (T i = 0; i < 10; ++i) {
-    set.emplace(std::make_shared<ScalarType>(i));
+  for (int i = 0; i < 10; ++i) {
+    set.emplace(std::make_shared<ScalarType>(static_cast<T>(i)));

Review Comment:
   Since we are changing this, perhaps `ASSERT_TRUE(set.emplace(...).second)` 
would be better?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-46739: [C++] Fix Float16 signed zero/NaN equality comparisons [arrow]

Reply via email to