[jira] [Commented] (ARROW-1709) [C++] Decimal.ToString is incorrect for negative scale

ASF GitHub Bot (JIRA) Wed, 08 Nov 2017 17:52:47 -0800

    [ 
https://issues.apache.org/jira/browse/ARROW-1709?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16245094#comment-16245094
 ]


ASF GitHub Bot commented on ARROW-1709:
---------------------------------------

wesm closed pull request #1292: ARROW-1709: [C++] Decimal.ToString is incorrect 
for negative scale
URL: https://github.com/apache/arrow/pull/1292
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/util/decimal-test.cc 
b/cpp/src/arrow/util/decimal-test.cc
index b0271fff1..0d0c08cc4 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -18,6 +18,7 @@
 
 #include <cstdint>
 #include <string>
+#include <tuple>
 
 #include <gtest/gtest.h>
 
@@ -291,4 +292,78 @@ TEST(Decimal128Test, PrintMinValue) {
   ASSERT_EQ(string_value, printed_value);
 }
 
+class Decimal128PrintingTest
+    : public ::testing::TestWithParam<std::tuple<int32_t, int32_t, 
std::string>> {};
+
+TEST_P(Decimal128PrintingTest, Print) {
+  int32_t test_value;
+  int32_t scale;
+  std::string expected_string;
+  std::tie(test_value, scale, expected_string) = GetParam();
+  const Decimal128 value(test_value);
+  const std::string printed_value = value.ToString(scale);
+  ASSERT_EQ(expected_string, printed_value);
+}
+
+INSTANTIATE_TEST_CASE_P(Decimal128PrintingTest, Decimal128PrintingTest,
+                        ::testing::Values(std::make_tuple(123, 1, "12.3"),
+                                          std::make_tuple(123, 5, "0.00123"),
+                                          std::make_tuple(123, 10, "1.23E-8"),
+                                          std::make_tuple(123, -1, "1.23E+3"),
+                                          std::make_tuple(-123, -1, 
"-1.23E+3"),
+                                          std::make_tuple(123, -3, "1.23E+5"),
+                                          std::make_tuple(-123, -3, 
"-1.23E+5"),
+                                          std::make_tuple(12345, -3, 
"1.2345E+7")));
+
+class Decimal128ParsingTest
+    : public ::testing::TestWithParam<std::tuple<std::string, uint64_t, 
int32_t>> {};
+
+TEST_P(Decimal128ParsingTest, Parse) {
+  std::string test_string;
+  uint64_t expected_low_bits;
+  int32_t expected_scale;
+  std::tie(test_string, expected_low_bits, expected_scale) = GetParam();
+  Decimal128 value;
+  int32_t scale;
+  ASSERT_OK(Decimal128::FromString(test_string, &value, NULLPTR, &scale));
+  ASSERT_EQ(value.low_bits(), expected_low_bits);
+  ASSERT_EQ(expected_scale, scale);
+}
+
+INSTANTIATE_TEST_CASE_P(Decimal128ParsingTest, Decimal128ParsingTest,
+                        ::testing::Values(std::make_tuple("12.3", 123ULL, 1),
+                                          std::make_tuple("0.00123", 123ULL, 
5),
+                                          std::make_tuple("1.23E-8", 123ULL, 
10),
+                                          std::make_tuple("-1.23E-8", -123LL, 
10),
+                                          std::make_tuple("1.23E+3", 123ULL, 
-1),
+                                          std::make_tuple("-1.23E+3", -123LL, 
-1),
+                                          std::make_tuple("1.23E+5", 123ULL, 
-3),
+                                          std::make_tuple("1.2345E+7", 
12345ULL, -3),
+                                          std::make_tuple("1.23e-8", 123ULL, 
10),
+                                          std::make_tuple("-1.23e-8", -123LL, 
10),
+                                          std::make_tuple("1.23e+3", 123ULL, 
-1),
+                                          std::make_tuple("-1.23e+3", -123LL, 
-1),
+                                          std::make_tuple("1.23e+5", 123ULL, 
-3),
+                                          std::make_tuple("1.2345e+7", 
12345ULL, -3)));
+
+class Decimal128ParsingTestInvalid : public 
::testing::TestWithParam<std::string> {};
+
+TEST_P(Decimal128ParsingTestInvalid, Parse) {
+  std::string test_string = GetParam();
+  Decimal128 value;
+  ASSERT_RAISES(Invalid, Decimal128::FromString(test_string, &value));
+}
+
+INSTANTIATE_TEST_CASE_P(Decimal128ParsingTestInvalid, 
Decimal128ParsingTestInvalid,
+                        ::testing::Values("0.00123D/3", "1.23eA8", "1.23E+3A",
+                                          "-1.23E--5", "1.2345E+++07"));
+
+TEST(Decimal128ParseTest, WithExponentAndNullptrScale) {
+  Decimal128 value;
+  ASSERT_OK(Decimal128::FromString("1.23E-8", &value));
+
+  const Decimal128 expected_value(123);
+  ASSERT_EQ(expected_value, value);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index cc180258a..447cae5c5 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -105,6 +105,22 @@ Decimal128::operator int64_t() const {
   return static_cast<int64_t>(low_bits_);
 }
 
+static std::string ToStringNegativeScale(const std::string& str,
+                                         int32_t adjusted_exponent, bool 
is_negative) {
+  std::stringstream buf;
+
+  size_t offset = 0;
+  buf << str[offset++];
+
+  if (is_negative) {
+    buf << str[offset++];
+  }
+
+  buf << '.' << str.substr(offset, std::string::npos) << 'E' << std::showpos
+      << adjusted_exponent;
+  return buf.str();
+}
+
 std::string Decimal128::ToString(int32_t scale) const {
   const std::string str(ToIntegerString());
 
@@ -112,9 +128,18 @@ std::string Decimal128::ToString(int32_t scale) const {
     return str;
   }
 
-  if (*this < 0) {
-    const auto len = static_cast<int32_t>(str.size());
+  const bool is_negative = *this < 0;
 
+  const auto len = static_cast<int32_t>(str.size());
+  const auto is_negative_offset = static_cast<int32_t>(is_negative);
+  const int32_t adjusted_exponent = -scale + (len - 1 - is_negative_offset);
+
+  /// Note that the -6 is taken from the Java BigDecimal documentation.
+  if (scale < 0 || adjusted_exponent < -6) {
+    return ToStringNegativeScale(str, adjusted_exponent, is_negative);
+  }
+
+  if (is_negative) {
     if (len - 1 > scale) {
       const auto n = static_cast<size_t>(len - scale);
       return str.substr(0, n) + "." + str.substr(n, 
static_cast<size_t>(scale));
@@ -128,8 +153,6 @@ std::string Decimal128::ToString(int32_t scale) const {
     return result + str.substr(1, std::string::npos);
   }
 
-  const auto len = static_cast<int32_t>(str.size());
-
   if (len > scale) {
     const auto n = static_cast<size_t>(len - scale);
     return str.substr(0, n) + "." + str.substr(n, static_cast<size_t>(scale));
@@ -164,10 +187,12 @@ static constexpr int64_t kPowersOfTen[kInt64DecimalDigits 
+ 1] = {1LL,
                                                                   
100000000000000000LL,
                                                                   
1000000000000000000LL};
 
+static inline bool isdigit(char value) { return std::isdigit(value) != 0; }
+
 static void StringToInteger(const std::string& str, Decimal128* out) {
   using std::size_t;
 
-  DCHECK_NE(out, nullptr) << "Decimal128 output variable cannot be nullptr";
+  DCHECK_NE(out, NULLPTR) << "Decimal128 output variable cannot be NULLPTR";
   DCHECK_EQ(*out, 0)
       << "When converting a string to Decimal128 the initial output must be 0";
 
@@ -189,7 +214,7 @@ static void StringToInteger(const std::string& str, 
Decimal128* out) {
 
 Status Decimal128::FromString(const std::string& s, Decimal128* out, int* 
precision,
                               int* scale) {
-  // Implements this regex: "(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?";
+  // Implements this regex: 
"(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?((E|e)(\\+|-)?\\d+)?";
   if (s.empty()) {
     return Status::Invalid("Empty string cannot be converted to decimal");
   }
@@ -215,21 +240,21 @@ Status Decimal128::FromString(const std::string& s, 
Decimal128* out, int* precis
   DCHECK_LT(charp, end);
 
   // skip leading zeros
-  charp = std::find_if_not(charp, end, [](char c) { return c == '0'; });
+  charp = std::find_if_not(charp, end, [](char value) { return value == '0'; 
});
 
   // all zeros and no decimal point
   if (charp == end) {
-    if (out != nullptr) {
+    if (out != NULLPTR) {
       *out = 0;
     }
 
     // Not sure what other libraries assign precision to for this case (this 
case of
     // a string consisting only of one or more zeros)
-    if (precision != nullptr) {
+    if (precision != NULLPTR) {
       *precision = static_cast<int>(charp - numeric_string_start);
     }
 
-    if (scale != nullptr) {
+    if (scale != NULLPTR) {
       *scale = 0;
     }
 
@@ -238,7 +263,7 @@ Status Decimal128::FromString(const std::string& s, 
Decimal128* out, int* precis
 
   std::string::const_iterator whole_part_start = charp;
 
-  charp = std::find_if_not(charp, end, [](char c) { return std::isdigit(c) != 
0; });
+  charp = std::find_if_not(charp, end, isdigit);
 
   std::string::const_iterator whole_part_end = charp;
   std::string whole_part(whole_part_start, whole_part_end);
@@ -269,14 +294,13 @@ Status Decimal128::FromString(const std::string& s, 
Decimal128* out, int* precis
 
   std::string::const_iterator fractional_part_start = charp;
 
-  // The rest must be digits, because if we have a decimal point it must be 
followed by
-  // digits
+  // The rest must be digits or an exponent
   if (charp != end) {
-    charp = std::find_if_not(charp, end, [](char c) { return std::isdigit(c) 
!= 0; });
+    charp = std::find_if_not(charp, end, isdigit);
 
     // The while loop has ended before the end of the string which means we've 
hit a
-    // character that isn't a base ten digit
-    if (charp != end) {
+    // character that isn't a base ten digit or "E" for exponent
+    if (charp != end && *charp != 'E' && *charp != 'e') {
       std::stringstream ss;
       ss << "Found non base ten digit character '" << *charp
          << "' before the end of the string";
@@ -287,15 +311,55 @@ Status Decimal128::FromString(const std::string& s, 
Decimal128* out, int* precis
   std::string::const_iterator fractional_part_end = charp;
   std::string fractional_part(fractional_part_start, fractional_part_end);
 
-  if (precision != nullptr) {
+  if (precision != NULLPTR) {
     *precision = static_cast<int>(whole_part.size() + fractional_part.size());
   }
 
-  if (scale != nullptr) {
-    *scale = static_cast<int>(fractional_part.size());
+  if (charp != end) {
+    // we must have an exponent, if this aborts then we have somehow not 
caught this and
+    // raised a proper error
+    DCHECK(*charp == 'E' || *charp == 'e');
+
+    ++charp;
+
+    const char value = *charp;
+    const bool starts_with_plus_or_minus = value == '+' || value == '-';
+
+    // we use this to construct the adjusted exponent integer later
+    std::string::const_iterator digit_start = charp;
+
+    // skip plus or minus
+    charp += starts_with_plus_or_minus;
+
+    // confirm that the rest of the characters are digits
+    charp = std::find_if_not(charp, end, isdigit);
+
+    if (charp != end) {
+      // we have something other than digits here
+      std::stringstream ss;
+      ss << "Found non decimal digit exponent value '" << *charp << "'";
+      return Status::Invalid(ss.str());
+    }
+
+    if (scale != NULLPTR) {
+      // compute the scale from the adjusted exponent
+      std::string adjusted_exponent_string(digit_start, end);
+      DCHECK(std::all_of(adjusted_exponent_string.cbegin() + 
starts_with_plus_or_minus,
+                         adjusted_exponent_string.cend(), isdigit))
+          << "Non decimal digit character found in " << 
adjusted_exponent_string;
+      const auto adjusted_exponent =
+          static_cast<int32_t>(std::stol(adjusted_exponent_string));
+      const auto len = static_cast<int32_t>(whole_part.size() + 
fractional_part.size());
+
+      *scale = -adjusted_exponent + len - 1;
+    }
+  } else {
+    if (scale != NULLPTR) {
+      *scale = static_cast<int>(fractional_part.size());
+    }
   }
 
-  if (out != nullptr) {
+  if (out != NULLPTR) {
     // zero out in case we've passed in a previously used value
     *out = 0;
     StringToInteger(whole_part + fractional_part, out);


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [C++] Decimal.ToString is incorrect for negative scale
> ------------------------------------------------------
>
>                 Key: ARROW-1709
>                 URL: https://issues.apache.org/jira/browse/ARROW-1709
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++
>    Affects Versions: 0.7.1
>            Reporter: Phillip Cloud
>            Assignee: Phillip Cloud
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> {{Decimal128::ToString(int precision, int scale)}} doesn't handle {{scale < 
> 0}} correctly. It should tack on an extra {{e<abs(scale)>}} to the end.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

[jira] [Commented] (ARROW-1709) [C++] Decimal.ToString is incorrect for negative scale

Reply via email to