[arrow] branch main updated: GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)

apitrou Tue, 18 Jul 2023 06:48:55 -0700

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 245141e00a GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)
245141e00a is described below

commit 245141e00a6fe2e72da701a72be6e72ad116154f
Author: Jin Shang <[email protected]>
AuthorDate: Tue Jul 18 21:46:42 2023 +0800

    GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)
    
    
    
    ### Rationale for this change
    
    The current implementation of `Decimal::ToReal` can be naively represented 
as the following pseudocode:
    ```
    Real v = static_cast<Real>(decimal.as_int128/256())
    return v * (10.0**-scale)
    ```
    It stores the intermediate unscaled int128/256 value as a float/double. The 
unscaled int128/256 value can be very large when the decimal has a large scale, 
which causes precision issues such as in #36602.
    
    ### What changes are included in this PR?
    
    Avoid storing the unscaled large int as float if the representation is not 
precise, by spliting the decimal into integral and fractional parts and dealing 
with them separately. This algorithm guarantees that:
    1. If the decimal is an integer, the conversion is exact.
    2. If the number of fractional digits is <= 
RealTraits<Real>::kMantissaDigits (e.g. 8 for float and 16 for double), the 
conversion is within 1 ULP of the exact value. For example 
Decimal128::ToReal<float>(9999.999) falls into this category because the 
integer 9999999 is precisely representable by float, whereas 9999.9999 would be 
in the next category.
    3. Otherwise, the conversion is within 
2^(-RealTraits<Real>::kMantissaDigits+1) (e.g. 2^-23 for float and 2^-52 for 
double) of the exact value.
    
    Here "exact value" means the closest representable value by Real.
    
    I believe this algorithm is good enough, because an"exact" algorithm would 
require iterative multiplication and subtraction of decimals to determain the 
binary representation of its fractional part. Yet the result would still almost 
always be inaccurate because float/double can only accurately represent powers 
of two. IMHO It's not worth it to spend that many expensive operations just to 
improve the result by one ULP.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
     No.
    
    * Closes: #35942
    
    Lead-authored-by: Jin Shang <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/scalar_cast_test.cc |   3 +-
 cpp/src/arrow/util/basic_decimal.cc               |  10 ++
 cpp/src/arrow/util/basic_decimal.h                |   4 +
 cpp/src/arrow/util/decimal.cc                     |  58 ++++++++-
 cpp/src/arrow/util/decimal_internal.h             |   4 +
 cpp/src/arrow/util/decimal_test.cc                | 148 ++++++++++++++++------
 6 files changed, 183 insertions(+), 44 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 083a85eb34..1db06a7625 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) {
     }
   }
 
-  // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal()
+  // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in
+  // decimal_test.cc
 }
 
 TEST(Cast, DecimalToString) {
diff --git a/cpp/src/arrow/util/basic_decimal.cc 
b/cpp/src/arrow/util/basic_decimal.cc
index f2fd39d6f3..0835ab9074 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) 
const {
   return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision];
 }
 
+void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole,
+                                          BasicDecimal256* fraction) const {
+  DCHECK_GE(scale, 0);
+  DCHECK_LE(scale, 76);
+
+  BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]);
+  auto s = Divide(multiplier, whole, fraction);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+}
+
 const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) {
   DCHECK_GE(scale, 0);
   DCHECK_LE(scale, 76);
diff --git a/cpp/src/arrow/util/basic_decimal.h 
b/cpp/src/arrow/util/basic_decimal.h
index b263bb234a..d8a91ea76b 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public 
GenericBasicDecimal<BasicDecimal256,
   /// \brief Get the lowest bits of the two's complement representation of the 
number.
   uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; 
}
 
+  /// \brief separate the integer and fractional parts for the given scale.
+  void GetWholeAndFraction(int32_t scale, BasicDecimal256* whole,
+                           BasicDecimal256* fraction) const;
+
   /// \brief Scale multiplier for given scale value.
   static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
   /// \brief Half-scale multiplier for given scale value.
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 1f8447059f..704b6bb9d4 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -305,12 +305,39 @@ struct Decimal128RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) {
     Real x = 
RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high_bits()));
     x += static_cast<Real>(decimal.low_bits());
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal128 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= 
RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the 
exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 
2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+    if (scale <= 0 || (decimal.high_bits() == 0 &&
+                       decimal.low_bits() <= 
RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) 
or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split decimal into whole and fractional parts to avoid precision loss
+    BasicDecimal128 whole_decimal, fraction_decimal;
+    decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);
+
+    Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
+
+    return whole + fraction;
+  }
 };
 
 }  // namespace
@@ -967,7 +994,7 @@ struct Decimal256RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) {
     DCHECK_GE(decimal, 0);
     Real x = 0;
     const auto parts_le = 
bit_util::little_endian::Make(decimal.native_endian_array());
@@ -978,6 +1005,33 @@ struct Decimal256RealConversion
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal256 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= 
RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the 
exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 
2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+    const auto parts_le = decimal.little_endian_array();
+    if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 
0 &&
+                       parts_le[0] < RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) 
or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split the decimal into whole and fractional parts to avoid precision 
loss
+    BasicDecimal256 whole_decimal, fraction_decimal;
+    decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);
+
+    Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
+    return whole + fraction;
+  }
 };
 
 }  // namespace
diff --git a/cpp/src/arrow/util/decimal_internal.h 
b/cpp/src/arrow/util/decimal_internal.h
index 041aac4ef8..51a7229ab6 100644
--- a/cpp/src/arrow/util/decimal_internal.h
+++ b/cpp/src/arrow/util/decimal_internal.h
@@ -451,6 +451,8 @@ struct RealTraits<float> {
   static constexpr int kMantissaBits = 24;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 8;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
 };
 
 template <>
@@ -464,6 +466,8 @@ struct RealTraits<double> {
   static constexpr int kMantissaBits = 53;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 16;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
 };
 
 template <typename DecimalType>
diff --git a/cpp/src/arrow/util/decimal_test.cc 
b/cpp/src/arrow/util/decimal_test.cc
index 1401750ce7..6376a9545a 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& 
decimal_value, int32_t scale, Real ex
       << "Decimal value: " << decimal_value << " Scale: " << scale;
 }
 
+template <typename Decimal, typename Real>
+void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t 
scale,
+                                    Real expected) {
+  Decimal dec(decimal_value);
+  auto result = dec.template ToReal<Real>(scale);
+  ASSERT_TRUE(result == expected || result == std::nextafter(expected, 
expected + 1) ||
+              result == std::nextafter(expected, expected - 1))
+      << "Decimal value: " << decimal_value << " Scale: " << scale;
+}
+
+template <typename Decimal, typename Real>
+void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t 
scale,
+                                     Real epsilon, Real expected) {
+  Decimal dec(decimal_value);
+  ASSERT_TRUE(std::abs(dec.template ToReal<Real>(scale) - expected) <= epsilon)
+      << "Decimal value: " << decimal_value << " Scale: " << scale;
+}
+
 template <typename Decimal>
 void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
                               float expected) {
@@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test {
       }
     }
   }
+};
 
-  // Test precision of conversions to float values
-  void TestPrecision() {
-    // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
-    CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
-    CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, 
-9.223373e+18f);
-    // 2**64 + 2**41 (exactly representable in a float)
-    CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 
1.8446746e+19f);
-    CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
-  }
+TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
+TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+
+// Custom test for Decimal::ToReal<float>
+template <typename DecimalType>
+class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<DecimalType, 
float>> {};
+TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes);
 
-  // Test conversions with a range of scales
-  void TestLargeValues(int32_t max_scale) {
-    // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
-    // Nevertheless, power-of-ten factors are not all exactly representable
-    // in binary floating point.
-    for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
+TYPED_TEST(TestDecimalToRealFloat, LargeValues) {
+  auto max_scale = TypeParam::kMaxScale;
+  // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
 #ifdef _WIN32
-      // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-      if (scale == 45) continue;
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
 #endif
-      CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
-    }
-    for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
+    CheckDecimalToRealApprox<TypeParam>("1", scale, this->Pow10(-scale));
+  }
+  for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
 #ifdef _WIN32
-      // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-      if (scale == 45) continue;
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
 #endif
-      const Real factor = static_cast<Real>(123);
-      CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
-    }
+    const auto factor = static_cast<float>(123);
+    CheckDecimalToRealApprox<TypeParam>("123", scale, factor * 
this->Pow10(-scale));
   }
-};
-
-TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
-
-TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+}
 
-// Custom test for Decimal128::ToReal<float>
-class TestDecimal128ToRealFloat : public 
TestDecimalToReal<std::pair<Decimal128, float>> {
-};
-TEST_F(TestDecimal128ToRealFloat, LargeValues) { 
TestLargeValues(/*max_scale=*/38); }
-TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); }
-// Custom test for Decimal256::ToReal<float>
-class TestDecimal256ToRealFloat : public 
TestDecimalToReal<std::pair<Decimal256, float>> {
-};
-TEST_F(TestDecimal256ToRealFloat, LargeValues) { 
TestLargeValues(/*max_scale=*/76); }
-TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); }
+TYPED_TEST(TestDecimalToRealFloat, Precision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<TypeParam, float>("9223373136366403584", 0, 
9.223373e+18f);
+  CheckDecimalToReal<TypeParam, float>("-9223373136366403584", 0, 
-9.223373e+18f);
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<TypeParam, float>("18446746272732807168", 0, 
1.8446746e+19f);
+  CheckDecimalToReal<TypeParam, float>("-18446746272732807168", 0, 
-1.8446746e+19f);
+
+  // Integers are always exact
+  auto scale = TypeParam::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');  // pad with trailing zeros
+  CheckDecimalToReal<TypeParam, float>(seven, scale, 7.0f);
+  CheckDecimalToReal<TypeParam, float>("-" + seven, scale, -7.0f);
+
+  CheckDecimalToReal<TypeParam, 
float>("99999999999999999999.0000000000000000", 16,
+                                       99999999999999999999.0f);
+  CheckDecimalToReal<TypeParam, 
float>("-99999999999999999999.0000000000000000", 16,
+                                       -99999999999999999999.0f);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.9", 1, 9999999.9f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.9", 1, 
-9999999.9f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.999999", 6, 
9999999.999999f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.999999", 6,
+                                                   -9999999.999999f);
+
+  // Large fractions are within 2^-23
+  constexpr float epsilon = 1.1920928955078125e-07f;  // 2^-23
+  CheckDecimalToRealWithinEpsilon<TypeParam, float>(
+      "112334829348925.99070703983306884765625", 23, epsilon,
+      112334829348925.99070703983306884765625f);
+  CheckDecimalToRealWithinEpsilon<TypeParam, float>(
+      "1.987748987892758765582589910934859345", 36, epsilon,
+      1.987748987892758765582589910934859345f);
+}
 
 // ToReal<double> tests are disabled on MinGW because of precision issues in 
results
 #ifndef __MINGW32__
 
-// Custom test for Decimal128::ToReal<double>
+// Custom test for Decimal::ToReal<double>
 template <typename DecimalType>
 class TestDecimalToRealDouble : public 
TestDecimalToReal<std::pair<DecimalType, double>> {
 };
@@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) {
                                         9.999999999999998e+47);
   CheckDecimalToReal<TypeParam, 
double>("-99999999999999978859343891977453174784", -10,
                                         -9.999999999999998e+47);
+  // Integers are always exact
+  auto scale = TypeParam::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');
+  CheckDecimalToReal<TypeParam, double>(seven, scale, 7.0);
+  CheckDecimalToReal<TypeParam, double>("-" + seven, scale, -7.0);
+
+  CheckDecimalToReal<TypeParam, 
double>("99999999999999999999.0000000000000000", 16,
+                                        99999999999999999999.0);
+  CheckDecimalToReal<TypeParam, 
double>("-99999999999999999999.0000000000000000", 16,
+                                        -99999999999999999999.0);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.9", 1, 9999999.9);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.9", 1, 
-9999999.9);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.999999999999999", 
15,
+                                                    9999999.999999999999999);
+  CheckDecimalToRealWithinOneULP<TypeParam, 
double>("-9999999.999999999999999", 15,
+                                                    -9999999.999999999999999);
+
+  // Large fractions are within 2^-52
+  constexpr double epsilon = 2.220446049250313080847263336181640625e-16;  // 
2^-52
+  CheckDecimalToRealWithinEpsilon<TypeParam, double>(
+      "112334829348925.99070703983306884765625", 23, epsilon,
+      112334829348925.99070703983306884765625);
+  CheckDecimalToRealWithinEpsilon<TypeParam, double>(
+      "1.987748987892758765582589910934859345", 36, epsilon,
+      1.987748987892758765582589910934859345);
 }
 
 #endif  // __MINGW32__

[arrow] branch main updated: GH-35942: [C++] Improve Decimal ToReal accuracy (#36667)

Reply via email to