Repository: spark
Updated Branches:
  refs/heads/master 458468ad5 -> 3e033035a


[SPARK-25258][SPARK-23131][SPARK-25176][BUILD] Upgrade Kryo to 4.0.2

## What changes were proposed in this pull request?

Upgrade chill to 0.9.3, Kryo to 4.0.2, to get bug fixes and improvements.

The resolved tickets includes:
- SPARK-25258 Upgrade kryo package to version 4.0.2
- SPARK-23131 Kryo raises StackOverflow during serializing GLR model
- SPARK-25176 Kryo fails to serialize a parametrised type hierarchy

More details:
https://github.com/twitter/chill/releases/tag/v0.9.3
https://github.com/twitter/chill/commit/cc3910d501a844f3c882249fef8fc2560b95b6dd

## How was this patch tested?

Existing tests.

Closes #22179 from wangyum/SPARK-23131.

Lead-authored-by: Yuming Wang <yumw...@ebay.com>
Co-authored-by: Dongjoon Hyun <dongj...@apache.org>
Signed-off-by: Sean Owen <sean.o...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e033035
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e033035
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e033035

Branch: refs/heads/master
Commit: 3e033035a3c0b7d46c2ae18d0d322d4af3808711
Parents: 458468a
Author: Yuming Wang <yumw...@ebay.com>
Authored: Wed Sep 5 15:48:41 2018 -0700
Committer: Sean Owen <sean.o...@databricks.com>
Committed: Wed Sep 5 15:48:41 2018 -0700

----------------------------------------------------------------------
 .../spark/serializer/KryoSerializerSuite.scala  | 20 ++++++++++++++++++++
 dev/deps/spark-deps-hadoop-2.6                  |  8 ++++----
 dev/deps/spark-deps-hadoop-2.7                  |  8 ++++----
 dev/deps/spark-deps-hadoop-3.1                  |  8 ++++----
 docs/tuning.md                                  |  2 +-
 .../GeneralizedLinearRegressionSuite.scala      | 11 ++++++++++-
 pom.xml                                         |  6 +++++-
 7 files changed, 48 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala 
b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 240f8cf..3691244 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -412,6 +412,26 @@ class KryoSerializerSuite extends SparkFunSuite with 
SharedSparkContext {
     assert(!ser2.getAutoReset)
   }
 
+  test("SPARK-25176 ClassCastException when writing a Map after previously " +
+    "reading a Map with different generic type") {
+    // This test uses the example in 
https://github.com/EsotericSoftware/kryo/issues/384
+    import java.util._
+    val ser = new KryoSerializer(new 
SparkConf).newInstance().asInstanceOf[KryoSerializerInstance]
+
+    class MapHolder {
+      private val mapOne = new HashMap[Int, String]
+      private val mapTwo = this.mapOne
+    }
+
+    val serializedMapHolder = ser.serialize(new MapHolder)
+    ser.deserialize[MapHolder](serializedMapHolder)
+
+    val stringMap = new HashMap[Int, List[String]]
+    stringMap.put(1, new ArrayList[String])
+    val serializedMap = ser.serialize[Map[Int, List[String]]](stringMap)
+    ser.deserialize[HashMap[Int, List[String]]](serializedMap)
+  }
+
   private def testSerializerInstanceReuse(autoReset: Boolean, 
referenceTracking: Boolean): Unit = {
     val conf = new SparkConf(loadDefaults = false)
       .set("spark.kryo.referenceTracking", referenceTracking.toString)

http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/dev/deps/spark-deps-hadoop-2.6
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index fc42af9..62ae04d 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
-chill-java-0.8.4.jar
-chill_2.11-0.8.4.jar
+chill-java-0.9.3.jar
+chill_2.11-0.9.3.jar
 commons-beanutils-1.7.0.jar
 commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
@@ -130,7 +130,7 @@ jsr305-1.3.9.jar
 jta-1.1.jar
 jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
-kryo-shaded-3.0.3.jar
+kryo-shaded-4.0.2.jar
 kubernetes-client-3.0.0.jar
 kubernetes-model-2.0.0.jar
 leveldbjni-all-1.8.jar
@@ -149,7 +149,7 @@ metrics-jvm-3.1.5.jar
 minlog-1.3.0.jar
 netty-3.9.9.Final.jar
 netty-all-4.1.17.Final.jar
-objenesis-2.1.jar
+objenesis-2.5.1.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/dev/deps/spark-deps-hadoop-2.7
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 54e5055..5e12ca0 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
-chill-java-0.8.4.jar
-chill_2.11-0.8.4.jar
+chill-java-0.9.3.jar
+chill_2.11-0.9.3.jar
 commons-beanutils-1.7.0.jar
 commons-beanutils-core-1.8.0.jar
 commons-cli-1.2.jar
@@ -132,7 +132,7 @@ jsr305-1.3.9.jar
 jta-1.1.jar
 jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
-kryo-shaded-3.0.3.jar
+kryo-shaded-4.0.2.jar
 kubernetes-client-3.0.0.jar
 kubernetes-model-2.0.0.jar
 leveldbjni-all-1.8.jar
@@ -151,7 +151,7 @@ metrics-jvm-3.1.5.jar
 minlog-1.3.0.jar
 netty-3.9.9.Final.jar
 netty-all-4.1.17.Final.jar
-objenesis-2.1.jar
+objenesis-2.5.1.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/dev/deps/spark-deps-hadoop-3.1
----------------------------------------------------------------------
diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1
index ff5713b..641b4a1 100644
--- a/dev/deps/spark-deps-hadoop-3.1
+++ b/dev/deps/spark-deps-hadoop-3.1
@@ -25,8 +25,8 @@ breeze_2.11-0.13.2.jar
 calcite-avatica-1.2.0-incubating.jar
 calcite-core-1.2.0-incubating.jar
 calcite-linq4j-1.2.0-incubating.jar
-chill-java-0.8.4.jar
-chill_2.11-0.8.4.jar
+chill-java-0.9.3.jar
+chill_2.11-0.9.3.jar
 commons-beanutils-1.9.3.jar
 commons-cli-1.2.jar
 commons-codec-1.10.jar
@@ -146,7 +146,7 @@ kerby-config-1.0.1.jar
 kerby-pkix-1.0.1.jar
 kerby-util-1.0.1.jar
 kerby-xdr-1.0.1.jar
-kryo-shaded-3.0.3.jar
+kryo-shaded-4.0.2.jar
 kubernetes-client-3.0.0.jar
 kubernetes-model-2.0.0.jar
 leveldbjni-all-1.8.jar
@@ -167,7 +167,7 @@ mssql-jdbc-6.2.1.jre7.jar
 netty-3.9.9.Final.jar
 netty-all-4.1.17.Final.jar
 nimbus-jose-jwt-4.41.1.jar
-objenesis-2.1.jar
+objenesis-2.5.1.jar
 okhttp-2.7.5.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/docs/tuning.md
----------------------------------------------------------------------
diff --git a/docs/tuning.md b/docs/tuning.md
index 1c3bd0e..f60971a 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -35,7 +35,7 @@ in your operations) and performance. It provides two 
serialization libraries:
   Java serialization is flexible but often quite slow, and leads to large
   serialized formats for many classes.
 * [Kryo serialization](https://github.com/EsotericSoftware/kryo): Spark can 
also use
-  the Kryo library (version 2) to serialize objects more quickly. Kryo is 
significantly
+  the Kryo library (version 4) to serialize objects more quickly. Kryo is 
significantly
   faster and more compact than Java serialization (often as much as 10x), but 
does not support all
   `Serializable` types and requires you to *register* the classes you'll use 
in the program in advance
   for best performance.

http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 997c501..600a432 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
 
 import scala.util.Random
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
@@ -29,6 +29,7 @@ import org.apache.spark.ml.util.{DefaultReadWriteTest, 
MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.random._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.FloatType
@@ -1687,6 +1688,14 @@ class GeneralizedLinearRegressionSuite extends MLTest 
with DefaultReadWriteTest
     assert(evalSummary.deviance === summary.deviance)
     assert(evalSummary.aic === summary.aic)
   }
+
+  test("SPARK-23131 Kryo raises StackOverflow during serializing GLR model") {
+    val conf = new SparkConf(false)
+    val ser = new KryoSerializer(conf).newInstance()
+    val trainer = new GeneralizedLinearRegression()
+    val model = trainer.fit(Seq(Instance(1.0, 1.0, Vectors.dense(1.0, 
7.0))).toDF)
+    ser.serialize[GeneralizedLinearRegressionModel](model)
+  }
 }
 
 object GeneralizedLinearRegressionSuite {

http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 6988c65..da526a1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,7 @@
     <hive.parquet.version>1.6.0</hive.parquet.version>
     <jetty.version>9.3.24.v20180605</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
-    <chill.version>0.8.4</chill.version>
+    <chill.version>0.9.3</chill.version>
     <ivy.version>2.4.0</ivy.version>
     <oro.version>2.0.8</oro.version>
     <codahale.metrics.version>3.1.5</codahale.metrics.version>
@@ -1770,6 +1770,10 @@
             <groupId>org.apache.hive</groupId>
             <artifactId>hive-storage-api</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId> com.esotericsoftware</groupId>
+            <artifactId>kryo-shaded</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to