(arrow) branch main updated: GH-41089: [C++] Clean up remaining tasks related to half float casts (#41084)

apitrou Wed, 10 Apr 2024 00:35:33 -0700

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 71ec6d4b16 GH-41089: [C++] Clean up remaining tasks related to half 
float casts (#41084)
71ec6d4b16 is described below

commit 71ec6d4b164a0435a480946fc704d00fe3b25de5
Author: Clif Houck <[email protected]>
AuthorDate: Wed Apr 10 02:34:14 2024 -0500

    GH-41089: [C++] Clean up remaining tasks related to half float casts 
(#41084)
    
    
    
    ### Rationale for this change
    
    Address remaining tasks brought up post-merge in this PR: 
https://github.com/apache/arrow/pull/40067
    
    ### What changes are included in this PR?
    
    Adds a couple of tests and cleans up cruft on another test.
    
    ### Are these changes tested?
    
    Yes, as these are test-only related changes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #41089
    
    Authored-by: Clif Houck <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/ipc/json_simple_test.cc      | 30 ++++---------------------
 cpp/src/arrow/util/formatting_util_test.cc | 28 +++++++++++++++++++++++
 cpp/src/arrow/util/value_parsing_test.cc   | 36 ++++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/ipc/json_simple_test.cc 
b/cpp/src/arrow/ipc/json_simple_test.cc
index b3f7fc5b34..c6f14b1e1d 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -189,21 +189,6 @@ class TestIntegers : public ::testing::Test {
 
 TYPED_TEST_SUITE_P(TestIntegers);
 
-template <typename DataType>
-std::vector<typename DataType::c_type> TestIntegersMutateIfNeeded(
-    std::vector<typename DataType::c_type> data) {
-  return data;
-}
-
-// TODO: This works, but is it the right way to do this?
-template <>
-std::vector<HalfFloatType::c_type> TestIntegersMutateIfNeeded<HalfFloatType>(
-    std::vector<HalfFloatType::c_type> data) {
-  std::for_each(data.begin(), data.end(),
-                [](HalfFloatType::c_type& value) { value = 
Float16(value).bits(); });
-  return data;
-}
-
 TYPED_TEST_P(TestIntegers, Basics) {
   using T = TypeParam;
   using c_type = typename T::c_type;
@@ -212,17 +197,16 @@ TYPED_TEST_P(TestIntegers, Basics) {
   auto type = this->type();
 
   AssertJSONArray<T>(type, "[]", {});
-  AssertJSONArray<T>(type, "[4, 0, 5]", TestIntegersMutateIfNeeded<T>({4, 0, 
5}));
-  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true},
-                     TestIntegersMutateIfNeeded<T>({4, 0, 5}));
+  AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
+  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
 
   // Test limits
   const auto min_val = std::numeric_limits<c_type>::min();
   const auto max_val = std::numeric_limits<c_type>::max();
   std::string json_string = JSONArray(0, 1, min_val);
-  AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1, 
min_val}));
+  AssertJSONArray<T>(type, json_string, {0, 1, min_val});
   json_string = JSONArray(0, 1, max_val);
-  AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1, 
max_val}));
+  AssertJSONArray<T>(type, json_string, {0, 1, max_val});
 }
 
 TYPED_TEST_P(TestIntegers, Errors) {
@@ -289,12 +273,6 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers, 
UInt8Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
-// FIXME: I understand that HalfFloatType is backed by a uint16_t, but does it
-// make sense to run this test over it?
-// The way ConvertNumber for HalfFloatType is currently written, it allows the
-// conversion of floating point notation to a half float, which causes failures
-// in this test, one example is asserting 0.0 cannot be parsed as a half float.
-// INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
 
 template <typename T>
 class TestStrings : public ::testing::Test {
diff --git a/cpp/src/arrow/util/formatting_util_test.cc 
b/cpp/src/arrow/util/formatting_util_test.cc
index fcbeec347d..f5ae789b23 100644
--- a/cpp/src/arrow/util/formatting_util_test.cc
+++ b/cpp/src/arrow/util/formatting_util_test.cc
@@ -26,11 +26,13 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/formatting.h"
 
 namespace arrow {
 
 using internal::StringFormatter;
+using util::Float16;
 
 class StringAppender {
  public:
@@ -280,6 +282,32 @@ TEST(Formatting, Double) {
   AssertFormatting(formatter, -HUGE_VAL, "-inf");
 }
 
+TEST(Formatting, HalfFloat) {
+  StringFormatter<HalfFloatType> formatter;
+
+  AssertFormatting(formatter, Float16(0.0f).bits(), "0");
+  AssertFormatting(formatter, Float16(-0.0f).bits(), "-0");
+  AssertFormatting(formatter, Float16(1.5f).bits(), "1.5");
+
+  // Slightly adapted from values present here
+  // 
https://blogs.mathworks.com/cleve/2017/05/08/half-precision-16-bit-floating-point-arithmetic/
+  AssertFormatting(formatter, 0x3c00, "1");
+  AssertFormatting(formatter, 0x3c01, "1.0009765625");
+  AssertFormatting(formatter, 0x0400, "0.00006103515625");
+  AssertFormatting(formatter, 0x0001, "5.960464477539063e-8");
+
+  // Can't avoid loss of precision here.
+  AssertFormatting(formatter, Float16(1234.567f).bits(), "1235");
+  AssertFormatting(formatter, Float16(1e3f).bits(), "1000");
+  AssertFormatting(formatter, Float16(1e4f).bits(), "10000");
+  AssertFormatting(formatter, Float16(1e10f).bits(), "inf");
+  AssertFormatting(formatter, Float16(1e15f).bits(), "inf");
+
+  AssertFormatting(formatter, 0xffff, "nan");
+  AssertFormatting(formatter, 0x7c00, "inf");
+  AssertFormatting(formatter, 0xfc00, "-inf");
+}
+
 template <typename T>
 void TestDecimalFormatter() {
   struct TestParam {
diff --git a/cpp/src/arrow/util/value_parsing_test.cc 
b/cpp/src/arrow/util/value_parsing_test.cc
index 92d727019a..7cd1ab1e25 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -24,9 +24,13 @@
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
+
+using util::Float16;
+
 namespace internal {
 
 template <typename T>
@@ -152,6 +156,25 @@ TEST(StringConversion, ToDouble) {
   AssertConversionFails(&converter, "1.5");
 }
 
+TEST(StringConversion, ToHalfFloat) {
+  AssertConversion<HalfFloatType>("1.5", Float16(1.5f).bits());
+  AssertConversion<HalfFloatType>("0", Float16(0.0f).bits());
+  AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f).bits());
+  AssertConversion<HalfFloatType>("-1e15", Float16(-1e15).bits());
+  AssertConversion<HalfFloatType>("+Infinity", 0x7c00);
+  AssertConversion<HalfFloatType>("-Infinity", 0xfc00);
+  AssertConversion<HalfFloatType>("Infinity", 0x7c00);
+
+  AssertConversionFails<HalfFloatType>("");
+  AssertConversionFails<HalfFloatType>("e");
+  AssertConversionFails<HalfFloatType>("1,5");
+
+  StringConverter<HalfFloatType> converter(/*decimal_point=*/',');
+  AssertConversion(&converter, "1,5", Float16(1.5f).bits());
+  AssertConversion(&converter, "0", Float16(0.0f).bits());
+  AssertConversionFails(&converter, "1.5");
+}
+
 #if !defined(_WIN32) || defined(NDEBUG)
 
 TEST(StringConversion, ToFloatLocale) {
@@ -180,6 +203,19 @@ TEST(StringConversion, ToDoubleLocale) {
   AssertConversionFails(&converter, "1,5");
 }
 
+TEST(StringConversion, ToHalfFloatLocale) {
+  // French locale uses the comma as decimal point
+  LocaleGuard locale_guard("fr_FR.UTF-8");
+
+  AssertConversion<HalfFloatType>("1.5", Float16(1.5).bits());
+  AssertConversionFails<HalfFloatType>("1,5");
+
+  StringConverter<HalfFloatType> converter(/*decimal_point=*/'#');
+  AssertConversion(&converter, "1#5", Float16(1.5).bits());
+  AssertConversionFails(&converter, "1.5");
+  AssertConversionFails(&converter, "1,5");
+}
+
 #endif  // _WIN32
 
 TEST(StringConversion, ToInt8) {

(arrow) branch main updated: GH-41089: [C++] Clean up remaining tasks related to half float casts (#41084)

Reply via email to