This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.6 by this push:
new 08ee75b ORC-1008: Fix overflow detection code for C++ int64_t / java
long (#950)
08ee75b is described below
commit 08ee75b5e9a844c711ea7f18699a9f4e7573bfc8
Author: Guiyanakaung <[email protected]>
AuthorDate: Wed Oct 27 16:04:48 2021 +0800
ORC-1008: Fix overflow detection code for C++ int64_t / java long (#950)
### What changes were proposed in this pull request?
This pr aims to fix overflow detection code for C++ int64_t / java long.
### Why are the changes needed?
Fix bug.
### How was this patch tested?
Pass the CIs.
---
c++/src/Adaptor.hh.in | 35 ++++++++++++++++++++++
c++/src/CMakeLists.txt | 8 +++++
c++/src/Statistics.cc | 12 --------
c++/src/Statistics.hh | 26 ++++++++++++----
c++/test/TestColumnStatistics.cc | 8 +++++
.../org/apache/orc/impl/ColumnStatisticsImpl.java | 19 +++++++-----
.../test/org/apache/orc/TestColumnStatistics.java | 11 +++++++
7 files changed, 94 insertions(+), 25 deletions(-)
diff --git a/c++/src/Adaptor.hh.in b/c++/src/Adaptor.hh.in
index a7795be..cd32b16 100644
--- a/c++/src/Adaptor.hh.in
+++ b/c++/src/Adaptor.hh.in
@@ -31,6 +31,7 @@
#cmakedefine HAS_POST_2038
#cmakedefine HAS_STD_ISNAN
#cmakedefine HAS_STD_MUTEX
+#cmakedefine HAS_BUILTIN_OVERFLOW_CHECK
#cmakedefine NEEDS_REDUNDANT_MOVE
#cmakedefine NEEDS_Z_PREFIX
@@ -173,6 +174,40 @@ namespace orc {
std::string to_string(int64_t val);
}
+#ifdef HAS_BUILTIN_OVERFLOW_CHECK
+ #define multiplyExact !__builtin_mul_overflow
+ #define addExact !__builtin_add_overflow
+#else
+namespace orc {
+ /**
+ * Compute value * repetitions, return false if overflow, return true
otherwise
+ * and save the result at the address pointed to by result
+ * imitates the jdk Math.multiplyExact implementation
+ * but this method makes the assumption that repetitions > 1
+ */
+ static bool multiplyExact(int64_t value, int64_t repetitions, int64_t*
result) {
+ int64_t r = value * repetitions;
+ if (((value < 0 ? -value : value) | repetitions) >> 31 != 0 && r /
repetitions != value) {
+ return false;
+ }
+ *result = r;
+ return true;
+ }
+
+ /**
+ * imitates the jdk Math.addExact implementation
+ */
+ static bool addExact(int64_t sum, int64_t increment, int64_t* result) {
+ int64_t r = sum + increment;
+ if (((sum ^ r) & (increment ^ r)) < 0) {
+ return false;
+ }
+ *result = r;
+ return true;
+ }
+}
+#endif
+
#ifndef HAS_CONSTEXPR
#define constexpr const
#endif
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 3d4a162..0372565 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -43,6 +43,14 @@ CHECK_CXX_SOURCE_COMPILES("
)
CHECK_CXX_SOURCE_COMPILES("
+ int main(){
+ int a;
+ return __builtin_add_overflow(1, 2, &a);
+ }"
+ HAS_BUILTIN_OVERFLOW_CHECK
+)
+
+CHECK_CXX_SOURCE_COMPILES("
#include<stdint.h>
#include<stdio.h>
int main(int,char*[]){
diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc
index 645ae31..2401f5e 100644
--- a/c++/src/Statistics.cc
+++ b/c++/src/Statistics.cc
@@ -167,18 +167,6 @@ namespace orc {
// PASS
}
- void IntegerColumnStatisticsImpl::update(int64_t value, int repetitions) {
- _stats.updateMinMax(value);
-
- if (_stats.hasSum()) {
- bool wasPositive = _stats.getSum() >= 0;
- _stats.setSum(value * repetitions + _stats.getSum());
- if ((value >= 0) == wasPositive) {
- _stats.setHasSum((_stats.getSum() >= 0) == wasPositive);
- }
- }
- }
-
StringColumnStatisticsImpl::~StringColumnStatisticsImpl() {
// PASS
}
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index 633450f..be9e932 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -963,7 +963,23 @@ namespace orc {
_stats.setSum(sum);
}
- void update(int64_t value, int repetitions);
+ void update(int64_t value, int repetitions) {
+ _stats.updateMinMax(value);
+
+ if (_stats.hasSum()) {
+ if (repetitions > 1) {
+ _stats.setHasSum(multiplyExact(value, repetitions, &value));
+ }
+
+ if (_stats.hasSum()) {
+ _stats.setHasSum(addExact(_stats.getSum(), value, &value));
+
+ if (_stats.hasSum()) {
+ _stats.setSum(value);
+ }
+ }
+ }
+ }
void merge(const MutableColumnStatistics& other) override {
const IntegerColumnStatisticsImpl& intStats =
@@ -974,10 +990,10 @@ namespace orc {
// update sum and check overflow
_stats.setHasSum(_stats.hasSum() && intStats.hasSum());
if (_stats.hasSum()) {
- bool wasPositive = _stats.getSum() >= 0;
- _stats.setSum(_stats.getSum() + intStats.getSum());
- if ((intStats.getSum() >= 0) == wasPositive) {
- _stats.setHasSum((_stats.getSum() >= 0) == wasPositive);
+ int64_t value;
+ _stats.setHasSum(addExact(_stats.getSum(), intStats.getSum(), &value));
+ if (_stats.hasSum()) {
+ _stats.setSum(value);
}
}
}
diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc
index 315ac47..432d5cb 100644
--- a/c++/test/TestColumnStatistics.cc
+++ b/c++/test/TestColumnStatistics.cc
@@ -106,6 +106,14 @@ namespace orc {
EXPECT_EQ(std::numeric_limits<int64_t>::min() + 500, other->getSum());
intStats->merge(*other);
EXPECT_FALSE(intStats->hasSum());
+
+ std::unique_ptr<IntegerColumnStatisticsImpl> intStats2(
+ new IntegerColumnStatisticsImpl());
+
+ intStats2->update(1, 1);
+ EXPECT_TRUE(intStats2->hasSum());
+ intStats2->update(std::numeric_limits<int64_t>::max(), 3);
+ EXPECT_FALSE(intStats2->hasSum());
}
TEST(ColumnStatistics, doubleColumnStatistics) {
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index 7f8180e..48685a7 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -381,10 +381,13 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
maximum = value;
}
if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += value * repetitions;
- if ((value >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
+ try {
+ long increment = repetitions > 1
+ ? Math.multiplyExact(value, repetitions)
+ : value;
+ sum = Math.addExact(sum, increment);
+ } catch (ArithmeticException e) {
+ overflow = true;
}
}
}
@@ -408,10 +411,10 @@ public class ColumnStatisticsImpl implements
ColumnStatistics {
overflow |= otherInt.overflow;
if (!overflow) {
- boolean wasPositive = sum >= 0;
- sum += otherInt.sum;
- if ((otherInt.sum >= 0) == wasPositive) {
- overflow = (sum >= 0) != wasPositive;
+ try {
+ sum = Math.addExact(sum, otherInt.sum);
+ } catch (ArithmeticException e) {
+ overflow = true;
}
}
} else {
diff --git a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
index 5e87eaa..14ed933 100644
--- a/java/core/src/test/org/apache/orc/TestColumnStatistics.java
+++ b/java/core/src/test/org/apache/orc/TestColumnStatistics.java
@@ -43,6 +43,7 @@ import java.text.SimpleDateFormat;
import java.util.TimeZone;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
@@ -51,6 +52,16 @@ import static org.junit.Assert.assertTrue;
public class TestColumnStatistics {
@Test
+ public void testLongSumOverflow() {
+ TypeDescription schema = TypeDescription.createInt();
+ ColumnStatisticsImpl stats = ColumnStatisticsImpl.create(schema);
+ stats.updateInteger(1, 1);
+ assertTrue(((IntegerColumnStatistics) stats).isSumDefined());
+ stats.updateInteger(Long.MAX_VALUE, 3);
+ assertFalse(((IntegerColumnStatistics) stats).isSumDefined());
+ }
+
+ @Test
public void testLongMerge() throws Exception {
TypeDescription schema = TypeDescription.createInt();