spark git commit: [SPARK-24452][SQL][CORE] Avoid possible overflow in int add or multiple

2018-06-15 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.3 a7d378e78 -> d42610440


[SPARK-24452][SQL][CORE] Avoid possible overflow in int add or multiple

This PR fixes possible overflow in int add or multiply. In particular, their 
overflows in multiply are detected by [Spotbugs](https://spotbugs.github.io/)

The following assignments may cause overflow in right hand side. As a result, 
the result may be negative.
```
long = int * int
long = int + int
```

To avoid this problem, this PR performs cast from int to long in right hand 
side.

Existing UTs.

Author: Kazuaki Ishizaki 

Closes #21481 from kiszk/SPARK-24452.

(cherry picked from commit 90da7dc241f8eec2348c0434312c97c116330bc4)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4261044
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4261044
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4261044

Branch: refs/heads/branch-2.3
Commit: d42610440ac2e58ef77fcf42ad81ee4fdf5691ba
Parents: a7d378e
Author: Kazuaki Ishizaki 
Authored: Fri Jun 15 13:47:48 2018 -0700
Committer: Wenchen Fan 
Committed: Fri Jun 15 13:49:04 2018 -0700

--
 .../spark/unsafe/map/BytesToBytesMap.java   |   2 +-
 .../spark/deploy/worker/DriverRunner.scala  |   2 +-
 .../org/apache/spark/rdd/AsyncRDDActions.scala  |   2 +-
 .../org/apache/spark/storage/BlockManager.scala |   2 +-
 .../catalyst/expressions/UnsafeArrayData.java   |  14 +--
 .../VariableLengthRowBasedKeyValueBatch.java|   2 +-
 .../vectorized/OffHeapColumnVector.java | 106 +--
 .../vectorized/OnHeapColumnVector.java  |  10 +-
 .../apache/spark/sql/hive/client/HiveShim.scala |   2 +-
 .../streaming/util/FileBasedWriteAheadLog.scala |   2 +-
 10 files changed, 72 insertions(+), 72 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4261044/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
--
diff --git 
a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java 
b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 5f00455..9a767dd 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -703,7 +703,7 @@ public final class BytesToBytesMap extends MemoryConsumer {
   // must be stored in the same memory page.
   // (8 byte key length) (key) (value) (8 byte pointer to next value)
   int uaoSize = UnsafeAlignedOffset.getUaoSize();
-  final long recordLength = (2 * uaoSize) + klen + vlen + 8;
+  final long recordLength = (2L * uaoSize) + klen + vlen + 8;
   if (currentPage == null || currentPage.size() - pageCursor < 
recordLength) {
 if (!acquireNewPage(recordLength + uaoSize)) {
   return false;

http://git-wip-us.apache.org/repos/asf/spark/blob/d4261044/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala 
b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 58a1811..a6d13d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -225,7 +225,7 @@ private[deploy] class DriverRunner(
   // check if attempting another run
   keepTrying = supervise && exitCode != 0 && !killed
   if (keepTrying) {
-if (clock.getTimeMillis() - processStart > successfulRunDuration * 
1000) {
+if (clock.getTimeMillis() - processStart > successfulRunDuration * 
1000L) {
   waitSeconds = 1
 }
 logInfo(s"Command exited with status $exitCode, re-launching after 
$waitSeconds s.")

http://git-wip-us.apache.org/repos/asf/spark/blob/d4261044/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
--
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala 
b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index c9ed12f..47669a0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -95,7 +95,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends 
Serializable with Loggi
 // the left side of max is >=1 whenever partsScanned >= 2
 numPartsToTry = Math.max(1,
   (1.5 * num * partsScanned / results.size).toInt - partsScanned)
-numPartsToTry = Math.min(numPartsToTry, partsScanned * 4)
+  

spark git commit: [SPARK-24452][SQL][CORE] Avoid possible overflow in int add or multiple

2018-06-15 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master b5ccf0d39 -> 90da7dc24


[SPARK-24452][SQL][CORE] Avoid possible overflow in int add or multiple

## What changes were proposed in this pull request?

This PR fixes possible overflow in int add or multiply. In particular, their 
overflows in multiply are detected by [Spotbugs](https://spotbugs.github.io/)

The following assignments may cause overflow in right hand side. As a result, 
the result may be negative.
```
long = int * int
long = int + int
```

To avoid this problem, this PR performs cast from int to long in right hand 
side.

## How was this patch tested?

Existing UTs.

Author: Kazuaki Ishizaki 

Closes #21481 from kiszk/SPARK-24452.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90da7dc2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90da7dc2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90da7dc2

Branch: refs/heads/master
Commit: 90da7dc241f8eec2348c0434312c97c116330bc4
Parents: b5ccf0d
Author: Kazuaki Ishizaki 
Authored: Fri Jun 15 13:47:48 2018 -0700
Committer: Wenchen Fan 
Committed: Fri Jun 15 13:47:48 2018 -0700

--
 .../spark/unsafe/map/BytesToBytesMap.java   |   2 +-
 .../spark/deploy/worker/DriverRunner.scala  |   2 +-
 .../org/apache/spark/rdd/AsyncRDDActions.scala  |   2 +-
 .../org/apache/spark/storage/BlockManager.scala |   2 +-
 .../catalyst/expressions/UnsafeArrayData.java   |  14 +--
 .../VariableLengthRowBasedKeyValueBatch.java|   2 +-
 .../vectorized/OffHeapColumnVector.java | 106 +--
 .../vectorized/OnHeapColumnVector.java  |  10 +-
 .../sources/RateStreamMicroBatchReader.scala|   2 +-
 .../apache/spark/sql/hive/client/HiveShim.scala |   2 +-
 .../streaming/util/FileBasedWriteAheadLog.scala |   2 +-
 11 files changed, 73 insertions(+), 73 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/90da7dc2/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
--
diff --git 
a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java 
b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 5f00455..9a767dd 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -703,7 +703,7 @@ public final class BytesToBytesMap extends MemoryConsumer {
   // must be stored in the same memory page.
   // (8 byte key length) (key) (value) (8 byte pointer to next value)
   int uaoSize = UnsafeAlignedOffset.getUaoSize();
-  final long recordLength = (2 * uaoSize) + klen + vlen + 8;
+  final long recordLength = (2L * uaoSize) + klen + vlen + 8;
   if (currentPage == null || currentPage.size() - pageCursor < 
recordLength) {
 if (!acquireNewPage(recordLength + uaoSize)) {
   return false;

http://git-wip-us.apache.org/repos/asf/spark/blob/90da7dc2/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala 
b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 58a1811..a6d13d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -225,7 +225,7 @@ private[deploy] class DriverRunner(
   // check if attempting another run
   keepTrying = supervise && exitCode != 0 && !killed
   if (keepTrying) {
-if (clock.getTimeMillis() - processStart > successfulRunDuration * 
1000) {
+if (clock.getTimeMillis() - processStart > successfulRunDuration * 
1000L) {
   waitSeconds = 1
 }
 logInfo(s"Command exited with status $exitCode, re-launching after 
$waitSeconds s.")

http://git-wip-us.apache.org/repos/asf/spark/blob/90da7dc2/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
--
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala 
b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index 13db498..ba9dae4 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -95,7 +95,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends 
Serializable with Loggi
 // the left side of max is >=1 whenever partsScanned >= 2
 numPartsToTry = Math.max(1,
   (1.5 * num * partsScanned / results.size).toInt - partsScanned)
-numPartsToTry = Math.min(numP