Luminarys commented on a change in pull request #8266:
URL: https://github.com/apache/arrow/pull/8266#discussion_r495346728
##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t
bits) {
return *this;
}
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
- // Break the left and right numbers into 32 bit chunks
- // so that we can multiply them without overflow.
- const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
- const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
- const uint64_t L2 = low_bits_ >> 32;
- const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t*
lo) {
+ const uint64_t x_lo = x & kIntMask;
+ const uint64_t y_lo = y & kIntMask;
+ const uint64_t x_hi = x >> 32;
+ const uint64_t y_hi = y >> 32;
- const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
- const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
- const uint64_t R2 = right.low_bits_ >> 32;
- const uint64_t R3 = right.low_bits_ & kIntMask;
+ const uint64_t t = x_lo * y_lo;
+ const uint64_t t_lo = t & kIntMask;
+ const uint64_t t_hi = t >> 32;
- uint64_t product = L3 * R3;
- low_bits_ = product & kIntMask;
+ const uint64_t u = x_hi * y_lo + t_hi;
+ const uint64_t u_lo = u & kIntMask;
+ const uint64_t u_hi = u >> 32;
- uint64_t sum = product >> 32;
+ const uint64_t v = x_lo * y_hi + u_lo;
+ const uint64_t v_hi = v >> 32;
- product = L2 * R3;
- sum += product;
- high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+ *hi = x_hi * y_hi + u_hi + v_hi;
+ *lo = (v << 32) + t_lo;
+}
- product = L3 * R2;
- sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t
y_lo,
+ uint64_t* hi, uint64_t* lo) {
+ extendAndMultiplyUint64(x_lo, y_lo, hi, lo);
+ *hi += (x_hi * y_lo) + (x_lo * y_hi);
+}
- low_bits_ += sum << 32;
+} // namespace
- if (sum < product) {
- high_bits_ += kCarryBit;
+BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
+ // Since the max value of BasicDecimal128 is supposed to be 1e38 - 1 and the
+ // min the negation taking the absolute values here should always be safe.
+ const bool negate = Sign() != right.Sign();
+ BasicDecimal128 x = BasicDecimal128::Abs(*this);
+ BasicDecimal128 y = BasicDecimal128::Abs(right);
+ uint64_t hi;
+ multiplyUint128(x.high_bits(), x.low_bits(), y.high_bits(), y.low_bits(),
&hi,
Review comment:
Because high_bits_ is stored as int64_t we can't directly pass it or
static cast it.
##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t
bits) {
return *this;
}
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
- // Break the left and right numbers into 32 bit chunks
- // so that we can multiply them without overflow.
- const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
- const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
- const uint64_t L2 = low_bits_ >> 32;
- const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t*
lo) {
Review comment:
Done.
##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t
bits) {
return *this;
}
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
- // Break the left and right numbers into 32 bit chunks
- // so that we can multiply them without overflow.
- const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
- const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
- const uint64_t L2 = low_bits_ >> 32;
- const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t*
lo) {
+ const uint64_t x_lo = x & kIntMask;
Review comment:
Done.
Previous results:
BinaryMathOp 137 ns 137 ns
BinaryMathOpAggregate 143 ns 143 ns
New results:
BinaryMathOp 135 ns 135 ns
BinaryMathOpAggregate 143 ns 143 ns
I also performed some micro benchmarks on my own which show the current
multiplication method takes ~3.2 ns, __int128 multiplication takes ~0.7 ns and
the new method can take between ~2.3 and ~3.2 ns depending on how many
negations must occur.
##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -248,40 +247,50 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t
bits) {
return *this;
}
-BasicDecimal128& BasicDecimal128::operator*=(const BasicDecimal128& right) {
- // Break the left and right numbers into 32 bit chunks
- // so that we can multiply them without overflow.
- const uint64_t L0 = static_cast<uint64_t>(high_bits_) >> 32;
- const uint64_t L1 = static_cast<uint64_t>(high_bits_) & kIntMask;
- const uint64_t L2 = low_bits_ >> 32;
- const uint64_t L3 = low_bits_ & kIntMask;
+namespace {
+
+void extendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t*
lo) {
+ const uint64_t x_lo = x & kIntMask;
+ const uint64_t y_lo = y & kIntMask;
+ const uint64_t x_hi = x >> 32;
+ const uint64_t y_hi = y >> 32;
- const uint64_t R0 = static_cast<uint64_t>(right.high_bits_) >> 32;
- const uint64_t R1 = static_cast<uint64_t>(right.high_bits_) & kIntMask;
- const uint64_t R2 = right.low_bits_ >> 32;
- const uint64_t R3 = right.low_bits_ & kIntMask;
+ const uint64_t t = x_lo * y_lo;
+ const uint64_t t_lo = t & kIntMask;
+ const uint64_t t_hi = t >> 32;
- uint64_t product = L3 * R3;
- low_bits_ = product & kIntMask;
+ const uint64_t u = x_hi * y_lo + t_hi;
+ const uint64_t u_lo = u & kIntMask;
+ const uint64_t u_hi = u >> 32;
- uint64_t sum = product >> 32;
+ const uint64_t v = x_lo * y_hi + u_lo;
+ const uint64_t v_hi = v >> 32;
- product = L2 * R3;
- sum += product;
- high_bits_ = static_cast<int64_t>(sum < product ? kCarryBit : 0);
+ *hi = x_hi * y_hi + u_hi + v_hi;
+ *lo = (v << 32) + t_lo;
+}
- product = L3 * R2;
- sum += product;
+void multiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t
y_lo,
+ uint64_t* hi, uint64_t* lo) {
+ extendAndMultiplyUint64(x_lo, y_lo, hi, lo);
+ *hi += (x_hi * y_lo) + (x_lo * y_hi);
Review comment:
Done.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]