[GitHub] [arrow] MingyuZhong commented on a change in pull request #8266: Refactor BasicDecimal128 Multiplication to use unsigned helper

GitBox Fri, 25 Sep 2020 06:21:43 -0700


MingyuZhong commented on a change in pull request #8266:
URL: https://github.com/apache/arrow/pull/8266#discussion_r494673844




##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;
+  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_hi = x >> 32;
+  const uint64_t y_hi = y >> 32;
 
-  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
+  const uint64_t t = x_lo * y_lo;
+  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_hi = t >> 32;
 
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
+  const uint64_t u = x_hi * y_lo + t_hi;
+  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_hi = u >> 32;
 
-  uint64_t sum = product >> 32;
+  const uint64_t v = x_lo * y_hi + u_lo;
+  const uint64_t v_hi = v >> 32;
 
-  product = L2 * R3;
-  sum += product;
-  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+  *hi = x_hi * y_hi + u_hi + v_hi;
+  *lo = (v << 32) + t_lo;
+}
 
-  product = L3 * R2;
-  sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,
+                     uint64_t* hi, uint64_t* lo) {
+  extendAndMultiplyUint64(x_lo, y_lo, hi, lo);
+  *hi += (x_hi * y_lo) + (x_lo * y_hi);
+}
 
-  low_bits_ += sum << 32;
+}  // namespace
 
-  if (sum < product) {
-    high_bits_ += kCarryBit;
+BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
+  // Since the max value of BasicDecimal128 is supposed to be 1e38 - 1 and the
+  // min the negation taking the absolute values here should always be safe.
+  const bool negate = Sign() != right.Sign();
+  BasicDecimal128 x = BasicDecimal128::Abs(*this);
+  BasicDecimal128 y = BasicDecimal128::Abs(right);
+  uint64_t hi;
+  multiplyUint128(x.high_bits(), x.low_bits(), y.high_bits(), y.low_bits(), 
&hi,

Review comment:
       Why not pass &high_bits_ directly to multiplyUint128?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {

Review comment:
       Upper case initial "E", for consistency with the existing global 
functions?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;

Review comment:
       I think we can add an optimization:
   
   ```
   #ifdef __SIZEOF_INT128__
     // use __uint128_t
   #else
     // fallback to current logic
   #endif
   ```
   Please also run the micro benchmark a few times (the results of the first 
few runs after compilation are usually noisy).

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;
+  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_hi = x >> 32;
+  const uint64_t y_hi = y >> 32;
 
-  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
+  const uint64_t t = x_lo * y_lo;
+  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_hi = t >> 32;
 
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
+  const uint64_t u = x_hi * y_lo + t_hi;
+  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_hi = u >> 32;
 
-  uint64_t sum = product >> 32;
+  const uint64_t v = x_lo * y_hi + u_lo;
+  const uint64_t v_hi = v >> 32;
 
-  product = L2 * R3;
-  sum += product;
-  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+  *hi = x_hi * y_hi + u_hi + v_hi;
+  *lo = (v << 32) + t_lo;
+}
 
-  product = L3 * R2;
-  sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,
+                     uint64_t* hi, uint64_t* lo) {
+  extendAndMultiplyUint64(x_lo, y_lo, hi, lo);
+  *hi += (x_hi * y_lo) + (x_lo * y_hi);

Review comment:
       We can also apply the same optimization here.

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;
+  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_hi = x >> 32;
+  const uint64_t y_hi = y >> 32;
 
-  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
+  const uint64_t t = x_lo * y_lo;
+  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_hi = t >> 32;
 
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
+  const uint64_t u = x_hi * y_lo + t_hi;
+  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_hi = u >> 32;
 
-  uint64_t sum = product >> 32;
+  const uint64_t v = x_lo * y_hi + u_lo;
+  const uint64_t v_hi = v >> 32;
 
-  product = L2 * R3;
-  sum += product;
-  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+  *hi = x_hi * y_hi + u_hi + v_hi;
+  *lo = (v << 32) + t_lo;
+}
 
-  product = L3 * R2;
-  sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,

Review comment:
       MultiplyUint128?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] MingyuZhong commented on a change in pull request #8266: Refactor BasicDecimal128 Multiplication to use unsigned helper

Reply via email to