This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 71ec6d4b16 GH-41089: [C++] Clean up remaining tasks related to half
float casts (#41084)
71ec6d4b16 is described below
commit 71ec6d4b164a0435a480946fc704d00fe3b25de5
Author: Clif Houck <[email protected]>
AuthorDate: Wed Apr 10 02:34:14 2024 -0500
GH-41089: [C++] Clean up remaining tasks related to half float casts
(#41084)
### Rationale for this change
Address remaining tasks brought up post-merge in this PR:
https://github.com/apache/arrow/pull/40067
### What changes are included in this PR?
Adds a couple of tests and cleans up cruft on another test.
### Are these changes tested?
Yes, as these are test-only related changes.
### Are there any user-facing changes?
No.
* GitHub Issue: #41089
Authored-by: Clif Houck <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/ipc/json_simple_test.cc | 30 ++++---------------------
cpp/src/arrow/util/formatting_util_test.cc | 28 +++++++++++++++++++++++
cpp/src/arrow/util/value_parsing_test.cc | 36 ++++++++++++++++++++++++++++++
3 files changed, 68 insertions(+), 26 deletions(-)
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc
b/cpp/src/arrow/ipc/json_simple_test.cc
index b3f7fc5b34..c6f14b1e1d 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -189,21 +189,6 @@ class TestIntegers : public ::testing::Test {
TYPED_TEST_SUITE_P(TestIntegers);
-template <typename DataType>
-std::vector<typename DataType::c_type> TestIntegersMutateIfNeeded(
- std::vector<typename DataType::c_type> data) {
- return data;
-}
-
-// TODO: This works, but is it the right way to do this?
-template <>
-std::vector<HalfFloatType::c_type> TestIntegersMutateIfNeeded<HalfFloatType>(
- std::vector<HalfFloatType::c_type> data) {
- std::for_each(data.begin(), data.end(),
- [](HalfFloatType::c_type& value) { value =
Float16(value).bits(); });
- return data;
-}
-
TYPED_TEST_P(TestIntegers, Basics) {
using T = TypeParam;
using c_type = typename T::c_type;
@@ -212,17 +197,16 @@ TYPED_TEST_P(TestIntegers, Basics) {
auto type = this->type();
AssertJSONArray<T>(type, "[]", {});
- AssertJSONArray<T>(type, "[4, 0, 5]", TestIntegersMutateIfNeeded<T>({4, 0,
5}));
- AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true},
- TestIntegersMutateIfNeeded<T>({4, 0, 5}));
+ AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
+ AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
// Test limits
const auto min_val = std::numeric_limits<c_type>::min();
const auto max_val = std::numeric_limits<c_type>::max();
std::string json_string = JSONArray(0, 1, min_val);
- AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1,
min_val}));
+ AssertJSONArray<T>(type, json_string, {0, 1, min_val});
json_string = JSONArray(0, 1, max_val);
- AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1,
max_val}));
+ AssertJSONArray<T>(type, json_string, {0, 1, max_val});
}
TYPED_TEST_P(TestIntegers, Errors) {
@@ -289,12 +273,6 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers,
UInt8Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
-// FIXME: I understand that HalfFloatType is backed by a uint16_t, but does it
-// make sense to run this test over it?
-// The way ConvertNumber for HalfFloatType is currently written, it allows the
-// conversion of floating point notation to a half float, which causes failures
-// in this test, one example is asserting 0.0 cannot be parsed as a half float.
-// INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
template <typename T>
class TestStrings : public ::testing::Test {
diff --git a/cpp/src/arrow/util/formatting_util_test.cc
b/cpp/src/arrow/util/formatting_util_test.cc
index fcbeec347d..f5ae789b23 100644
--- a/cpp/src/arrow/util/formatting_util_test.cc
+++ b/cpp/src/arrow/util/formatting_util_test.cc
@@ -26,11 +26,13 @@
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
#include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
#include "arrow/util/formatting.h"
namespace arrow {
using internal::StringFormatter;
+using util::Float16;
class StringAppender {
public:
@@ -280,6 +282,32 @@ TEST(Formatting, Double) {
AssertFormatting(formatter, -HUGE_VAL, "-inf");
}
+TEST(Formatting, HalfFloat) {
+ StringFormatter<HalfFloatType> formatter;
+
+ AssertFormatting(formatter, Float16(0.0f).bits(), "0");
+ AssertFormatting(formatter, Float16(-0.0f).bits(), "-0");
+ AssertFormatting(formatter, Float16(1.5f).bits(), "1.5");
+
+ // Slightly adapted from values present here
+ //
https://blogs.mathworks.com/cleve/2017/05/08/half-precision-16-bit-floating-point-arithmetic/
+ AssertFormatting(formatter, 0x3c00, "1");
+ AssertFormatting(formatter, 0x3c01, "1.0009765625");
+ AssertFormatting(formatter, 0x0400, "0.00006103515625");
+ AssertFormatting(formatter, 0x0001, "5.960464477539063e-8");
+
+ // Can't avoid loss of precision here.
+ AssertFormatting(formatter, Float16(1234.567f).bits(), "1235");
+ AssertFormatting(formatter, Float16(1e3f).bits(), "1000");
+ AssertFormatting(formatter, Float16(1e4f).bits(), "10000");
+ AssertFormatting(formatter, Float16(1e10f).bits(), "inf");
+ AssertFormatting(formatter, Float16(1e15f).bits(), "inf");
+
+ AssertFormatting(formatter, 0xffff, "nan");
+ AssertFormatting(formatter, 0x7c00, "inf");
+ AssertFormatting(formatter, 0xfc00, "-inf");
+}
+
template <typename T>
void TestDecimalFormatter() {
struct TestParam {
diff --git a/cpp/src/arrow/util/value_parsing_test.cc
b/cpp/src/arrow/util/value_parsing_test.cc
index 92d727019a..7cd1ab1e25 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -24,9 +24,13 @@
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
+#include "arrow/util/float16.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
+
+using util::Float16;
+
namespace internal {
template <typename T>
@@ -152,6 +156,25 @@ TEST(StringConversion, ToDouble) {
AssertConversionFails(&converter, "1.5");
}
+TEST(StringConversion, ToHalfFloat) {
+ AssertConversion<HalfFloatType>("1.5", Float16(1.5f).bits());
+ AssertConversion<HalfFloatType>("0", Float16(0.0f).bits());
+ AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f).bits());
+ AssertConversion<HalfFloatType>("-1e15", Float16(-1e15).bits());
+ AssertConversion<HalfFloatType>("+Infinity", 0x7c00);
+ AssertConversion<HalfFloatType>("-Infinity", 0xfc00);
+ AssertConversion<HalfFloatType>("Infinity", 0x7c00);
+
+ AssertConversionFails<HalfFloatType>("");
+ AssertConversionFails<HalfFloatType>("e");
+ AssertConversionFails<HalfFloatType>("1,5");
+
+ StringConverter<HalfFloatType> converter(/*decimal_point=*/',');
+ AssertConversion(&converter, "1,5", Float16(1.5f).bits());
+ AssertConversion(&converter, "0", Float16(0.0f).bits());
+ AssertConversionFails(&converter, "1.5");
+}
+
#if !defined(_WIN32) || defined(NDEBUG)
TEST(StringConversion, ToFloatLocale) {
@@ -180,6 +203,19 @@ TEST(StringConversion, ToDoubleLocale) {
AssertConversionFails(&converter, "1,5");
}
+TEST(StringConversion, ToHalfFloatLocale) {
+ // French locale uses the comma as decimal point
+ LocaleGuard locale_guard("fr_FR.UTF-8");
+
+ AssertConversion<HalfFloatType>("1.5", Float16(1.5).bits());
+ AssertConversionFails<HalfFloatType>("1,5");
+
+ StringConverter<HalfFloatType> converter(/*decimal_point=*/'#');
+ AssertConversion(&converter, "1#5", Float16(1.5).bits());
+ AssertConversionFails(&converter, "1.5");
+ AssertConversionFails(&converter, "1,5");
+}
+
#endif // _WIN32
TEST(StringConversion, ToInt8) {