[GitHub] [arrow] Luminarys commented on a change in pull request #8266: Refactor BasicDecimal128 Multiplication to use unsigned helper

GitBox Fri, 25 Sep 2020 17:42:34 -0700


Luminarys commented on a change in pull request #8266:
URL: https://github.com/apache/arrow/pull/8266#discussion_r495346728




##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;
+  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_hi = x >> 32;
+  const uint64_t y_hi = y >> 32;
 
-  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
+  const uint64_t t = x_lo * y_lo;
+  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_hi = t >> 32;
 
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
+  const uint64_t u = x_hi * y_lo + t_hi;
+  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_hi = u >> 32;
 
-  uint64_t sum = product >> 32;
+  const uint64_t v = x_lo * y_hi + u_lo;
+  const uint64_t v_hi = v >> 32;
 
-  product = L2 * R3;
-  sum += product;
-  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+  *hi = x_hi * y_hi + u_hi + v_hi;
+  *lo = (v << 32) + t_lo;
+}
 
-  product = L3 * R2;
-  sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,
+                     uint64_t* hi, uint64_t* lo) {
+  extendAndMultiplyUint64(x_lo, y_lo, hi, lo);
+  *hi += (x_hi * y_lo) + (x_lo * y_hi);
+}
 
-  low_bits_ += sum << 32;
+}  // namespace
 
-  if (sum < product) {
-    high_bits_ += kCarryBit;
+BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
+  // Since the max value of BasicDecimal128 is supposed to be 1e38 - 1 and the
+  // min the negation taking the absolute values here should always be safe.
+  const bool negate = Sign() != right.Sign();
+  BasicDecimal128 x = BasicDecimal128::Abs(*this);
+  BasicDecimal128 y = BasicDecimal128::Abs(right);
+  uint64_t hi;
+  multiplyUint128(x.high_bits(), x.low_bits(), y.high_bits(), y.low_bits(), 
&hi,

Review comment:
       Because high_bits_ is stored as int64_t we can't directly pass it or 
static cast it.

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {

Review comment:
       Done.

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;

Review comment:
       Done.
   
   Previous results:
   BinaryMathOp                          137 ns          137 ns 
   BinaryMathOpAggregate          143 ns          143 ns
   
   New results:
   BinaryMathOp                          135 ns          135 ns
   BinaryMathOpAggregate          143 ns          143 ns
   
   I also performed some micro benchmarks on my own which show the current 
multiplication method takes ~3.2 ns, __int128 multiplication takes ~0.7 ns and 
the new method can take between ~2.3 and ~3.2 ns depending on how many 
negations must occur.

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t 
bits) {
   return *this;
 }
 
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
-  // Break the left and right numbers into 32 bit chunks
-  // so that we can multiply them without overflow.
-  const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
-  const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
-  const uint64_t L2 = low_bits_ >> 32;
-  const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t* 
lo) {
+  const uint64_t x_lo = x & kIntMask;
+  const uint64_t y_lo = y & kIntMask;
+  const uint64_t x_hi = x >> 32;
+  const uint64_t y_hi = y >> 32;
 
-  const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
-  const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
-  const uint64_t R2 = right.low_bits_ >> 32;
-  const uint64_t R3 = right.low_bits_ & kIntMask;
+  const uint64_t t = x_lo * y_lo;
+  const uint64_t t_lo = t & kIntMask;
+  const uint64_t t_hi = t >> 32;
 
-  uint64_t product = L3 * R3;
-  low_bits_ = product & kIntMask;
+  const uint64_t u = x_hi * y_lo + t_hi;
+  const uint64_t u_lo = u & kIntMask;
+  const uint64_t u_hi = u >> 32;
 
-  uint64_t sum = product >> 32;
+  const uint64_t v = x_lo * y_hi + u_lo;
+  const uint64_t v_hi = v >> 32;
 
-  product = L2 * R3;
-  sum += product;
-  high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+  *hi = x_hi * y_hi + u_hi + v_hi;
+  *lo = (v << 32) + t_lo;
+}
 
-  product = L3 * R2;
-  sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t 
y_lo,
+                     uint64_t* hi, uint64_t* lo) {
+  extendAndMultiplyUint64(x_lo, y_lo, hi, lo);
+  *hi += (x_hi * y_lo) + (x_lo * y_hi);

Review comment:
       Done.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] Luminarys commented on a change in pull request #8266: Refactor BasicDecimal128 Multiplication to use unsigned helper

Reply via email to