spark git commit: [SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.

2015-12-01 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 fc3fb8463 -> 7460e4309


[SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.

This bug was exposed as memory corruption in Timsort which uses copyMemory to 
copy
large regions that can overlap. The prior implementation did not handle this 
case
half the time and always copied forward, resulting in the data being corrupt.

Author: Nong Li 

Closes #10068 from nongli/spark-12030.

(cherry picked from commit 2cef1cdfbb5393270ae83179b6a4e50c3cbf9e93)
Signed-off-by: Yin Huai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7460e430
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7460e430
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7460e430

Branch: refs/heads/branch-1.5
Commit: 7460e430929473152230d70964a04a7ff834066c
Parents: fc3fb84
Author: Nong Li 
Authored: Tue Dec 1 12:59:53 2015 -0800
Committer: Yin Huai 
Committed: Tue Dec 1 16:11:25 2015 -0800

--
 .../java/org/apache/spark/unsafe/Platform.java  | 27 +++--
 .../apache/spark/unsafe/PlatformUtilSuite.java  | 61 
 2 files changed, 82 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7460e430/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
--
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java 
b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 1c16da9..0d6b215 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -107,12 +107,27 @@ public final class Platform {
 
   public static void copyMemory(
 Object src, long srcOffset, Object dst, long dstOffset, long length) {
-while (length > 0) {
-  long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
-  _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
-  length -= size;
-  srcOffset += size;
-  dstOffset += size;
+// Check if dstOffset is before or after srcOffset to determine if we 
should copy
+// forward or backwards. This is necessary in case src and dst overlap.
+if (dstOffset < srcOffset) {
+  while (length > 0) {
+long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+length -= size;
+srcOffset += size;
+dstOffset += size;
+  }
+} else {
+  srcOffset += length;
+  dstOffset += length;
+  while (length > 0) {
+long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+srcOffset -= size;
+dstOffset -= size;
+_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+length -= size;
+  }
+
 }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/7460e430/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
--
diff --git 
a/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java 
b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
new file mode 100644
index 000..693ec6e
--- /dev/null
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PlatformUtilSuite {
+
+  @Test
+  public void overlappingCopyMemory() {
+byte[] data = new byte[3 * 1024 * 1024];
+int size = 2 * 1024 * 1024;
+for (int i = 0; i < data.length; ++i) {
+  data[i] = (byte)i;
+}
+
+Platform.copyMemory(data, Platform.BYTE_ARRAY_OFFSET, data, 
Platform.BYTE_ARRAY_OFFSET, size);
+for (int i = 0; i < data.length; ++i) {
+  Assert.assertEquals((byte)i, data[i]);
+}
+
+Platform.copyMemory(
+data,
+Platform.BYTE_ARRAY_OFFSET + 1,
+data,
+Platform.BYTE_ARRAY_OFF

spark git commit: [SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.

2015-12-01 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master 34e7093c1 -> 2cef1cdfb


[SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.

This bug was exposed as memory corruption in Timsort which uses copyMemory to 
copy
large regions that can overlap. The prior implementation did not handle this 
case
half the time and always copied forward, resulting in the data being corrupt.

Author: Nong Li 

Closes #10068 from nongli/spark-12030.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2cef1cdf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2cef1cdf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2cef1cdf

Branch: refs/heads/master
Commit: 2cef1cdfbb5393270ae83179b6a4e50c3cbf9e93
Parents: 34e7093
Author: Nong Li 
Authored: Tue Dec 1 12:59:53 2015 -0800
Committer: Yin Huai 
Committed: Tue Dec 1 12:59:53 2015 -0800

--
 .../java/org/apache/spark/unsafe/Platform.java  | 27 +++--
 .../apache/spark/unsafe/PlatformUtilSuite.java  | 61 
 2 files changed, 82 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2cef1cdf/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
--
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java 
b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 1c16da9..0d6b215 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -107,12 +107,27 @@ public final class Platform {
 
   public static void copyMemory(
 Object src, long srcOffset, Object dst, long dstOffset, long length) {
-while (length > 0) {
-  long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
-  _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
-  length -= size;
-  srcOffset += size;
-  dstOffset += size;
+// Check if dstOffset is before or after srcOffset to determine if we 
should copy
+// forward or backwards. This is necessary in case src and dst overlap.
+if (dstOffset < srcOffset) {
+  while (length > 0) {
+long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+length -= size;
+srcOffset += size;
+dstOffset += size;
+  }
+} else {
+  srcOffset += length;
+  dstOffset += length;
+  while (length > 0) {
+long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+srcOffset -= size;
+dstOffset -= size;
+_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+length -= size;
+  }
+
 }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2cef1cdf/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
--
diff --git 
a/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java 
b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
new file mode 100644
index 000..693ec6e
--- /dev/null
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PlatformUtilSuite {
+
+  @Test
+  public void overlappingCopyMemory() {
+byte[] data = new byte[3 * 1024 * 1024];
+int size = 2 * 1024 * 1024;
+for (int i = 0; i < data.length; ++i) {
+  data[i] = (byte)i;
+}
+
+Platform.copyMemory(data, Platform.BYTE_ARRAY_OFFSET, data, 
Platform.BYTE_ARRAY_OFFSET, size);
+for (int i = 0; i < data.length; ++i) {
+  Assert.assertEquals((byte)i, data[i]);
+}
+
+Platform.copyMemory(
+data,
+Platform.BYTE_ARRAY_OFFSET + 1,
+data,
+Platform.BYTE_ARRAY_OFFSET,
+size);
+for (int i = 0; i < size; ++i) {
+  Assert.assertEquals((byte)(i + 1), da

spark git commit: [SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.

2015-12-01 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 ab2a124c8 -> 1cf9d3858


[SPARK-12030] Fix Platform.copyMemory to handle overlapping regions.

This bug was exposed as memory corruption in Timsort which uses copyMemory to 
copy
large regions that can overlap. The prior implementation did not handle this 
case
half the time and always copied forward, resulting in the data being corrupt.

Author: Nong Li 

Closes #10068 from nongli/spark-12030.

(cherry picked from commit 2cef1cdfbb5393270ae83179b6a4e50c3cbf9e93)
Signed-off-by: Yin Huai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1cf9d385
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1cf9d385
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1cf9d385

Branch: refs/heads/branch-1.6
Commit: 1cf9d3858c8a3a5796b64a9fbea22509f02d778a
Parents: ab2a124
Author: Nong Li 
Authored: Tue Dec 1 12:59:53 2015 -0800
Committer: Yin Huai 
Committed: Tue Dec 1 13:00:05 2015 -0800

--
 .../java/org/apache/spark/unsafe/Platform.java  | 27 +++--
 .../apache/spark/unsafe/PlatformUtilSuite.java  | 61 
 2 files changed, 82 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1cf9d385/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
--
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java 
b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index 1c16da9..0d6b215 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -107,12 +107,27 @@ public final class Platform {
 
   public static void copyMemory(
 Object src, long srcOffset, Object dst, long dstOffset, long length) {
-while (length > 0) {
-  long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
-  _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
-  length -= size;
-  srcOffset += size;
-  dstOffset += size;
+// Check if dstOffset is before or after srcOffset to determine if we 
should copy
+// forward or backwards. This is necessary in case src and dst overlap.
+if (dstOffset < srcOffset) {
+  while (length > 0) {
+long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+length -= size;
+srcOffset += size;
+dstOffset += size;
+  }
+} else {
+  srcOffset += length;
+  dstOffset += length;
+  while (length > 0) {
+long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+srcOffset -= size;
+dstOffset -= size;
+_UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+length -= size;
+  }
+
 }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1cf9d385/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
--
diff --git 
a/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java 
b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
new file mode 100644
index 000..693ec6e
--- /dev/null
+++ b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PlatformUtilSuite {
+
+  @Test
+  public void overlappingCopyMemory() {
+byte[] data = new byte[3 * 1024 * 1024];
+int size = 2 * 1024 * 1024;
+for (int i = 0; i < data.length; ++i) {
+  data[i] = (byte)i;
+}
+
+Platform.copyMemory(data, Platform.BYTE_ARRAY_OFFSET, data, 
Platform.BYTE_ARRAY_OFFSET, size);
+for (int i = 0; i < data.length; ++i) {
+  Assert.assertEquals((byte)i, data[i]);
+}
+
+Platform.copyMemory(
+data,
+Platform.BYTE_ARRAY_OFFSET + 1,
+data,
+Platform.BYTE_ARRAY_OFF