Repository: spark Updated Branches: refs/heads/master 458468ad5 -> 3e033035a
[SPARK-25258][SPARK-23131][SPARK-25176][BUILD] Upgrade Kryo to 4.0.2 ## What changes were proposed in this pull request? Upgrade chill to 0.9.3, Kryo to 4.0.2, to get bug fixes and improvements. The resolved tickets includes: - SPARK-25258 Upgrade kryo package to version 4.0.2 - SPARK-23131 Kryo raises StackOverflow during serializing GLR model - SPARK-25176 Kryo fails to serialize a parametrised type hierarchy More details: https://github.com/twitter/chill/releases/tag/v0.9.3 https://github.com/twitter/chill/commit/cc3910d501a844f3c882249fef8fc2560b95b6dd ## How was this patch tested? Existing tests. Closes #22179 from wangyum/SPARK-23131. Lead-authored-by: Yuming Wang <yumw...@ebay.com> Co-authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Sean Owen <sean.o...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e033035 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e033035 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e033035 Branch: refs/heads/master Commit: 3e033035a3c0b7d46c2ae18d0d322d4af3808711 Parents: 458468a Author: Yuming Wang <yumw...@ebay.com> Authored: Wed Sep 5 15:48:41 2018 -0700 Committer: Sean Owen <sean.o...@databricks.com> Committed: Wed Sep 5 15:48:41 2018 -0700 ---------------------------------------------------------------------- .../spark/serializer/KryoSerializerSuite.scala | 20 ++++++++++++++++++++ dev/deps/spark-deps-hadoop-2.6 | 8 ++++---- dev/deps/spark-deps-hadoop-2.7 | 8 ++++---- dev/deps/spark-deps-hadoop-3.1 | 8 ++++---- docs/tuning.md | 2 +- .../GeneralizedLinearRegressionSuite.scala | 11 ++++++++++- pom.xml | 6 +++++- 7 files changed, 48 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala index 240f8cf..3691244 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala @@ -412,6 +412,26 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext { assert(!ser2.getAutoReset) } + test("SPARK-25176 ClassCastException when writing a Map after previously " + + "reading a Map with different generic type") { + // This test uses the example in https://github.com/EsotericSoftware/kryo/issues/384 + import java.util._ + val ser = new KryoSerializer(new SparkConf).newInstance().asInstanceOf[KryoSerializerInstance] + + class MapHolder { + private val mapOne = new HashMap[Int, String] + private val mapTwo = this.mapOne + } + + val serializedMapHolder = ser.serialize(new MapHolder) + ser.deserialize[MapHolder](serializedMapHolder) + + val stringMap = new HashMap[Int, List[String]] + stringMap.put(1, new ArrayList[String]) + val serializedMap = ser.serialize[Map[Int, List[String]]](stringMap) + ser.deserialize[HashMap[Int, List[String]]](serializedMap) + } + private def testSerializerInstanceReuse(autoReset: Boolean, referenceTracking: Boolean): Unit = { val conf = new SparkConf(loadDefaults = false) .set("spark.kryo.referenceTracking", referenceTracking.toString) http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/dev/deps/spark-deps-hadoop-2.6 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index fc42af9..62ae04d 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.4.jar -chill_2.11-0.8.4.jar +chill-java-0.9.3.jar +chill_2.11-0.9.3.jar commons-beanutils-1.7.0.jar commons-beanutils-core-1.8.0.jar commons-cli-1.2.jar @@ -130,7 +130,7 @@ jsr305-1.3.9.jar jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar +kryo-shaded-4.0.2.jar kubernetes-client-3.0.0.jar kubernetes-model-2.0.0.jar leveldbjni-all-1.8.jar @@ -149,7 +149,7 @@ metrics-jvm-3.1.5.jar minlog-1.3.0.jar netty-3.9.9.Final.jar netty-all-4.1.17.Final.jar -objenesis-2.1.jar +objenesis-2.5.1.jar okhttp-3.8.1.jar okio-1.13.0.jar opencsv-2.3.jar http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/dev/deps/spark-deps-hadoop-2.7 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 54e5055..5e12ca0 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -27,8 +27,8 @@ breeze_2.11-0.13.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.4.jar -chill_2.11-0.8.4.jar +chill-java-0.9.3.jar +chill_2.11-0.9.3.jar commons-beanutils-1.7.0.jar commons-beanutils-core-1.8.0.jar commons-cli-1.2.jar @@ -132,7 +132,7 @@ jsr305-1.3.9.jar jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar -kryo-shaded-3.0.3.jar +kryo-shaded-4.0.2.jar kubernetes-client-3.0.0.jar kubernetes-model-2.0.0.jar leveldbjni-all-1.8.jar @@ -151,7 +151,7 @@ metrics-jvm-3.1.5.jar minlog-1.3.0.jar netty-3.9.9.Final.jar netty-all-4.1.17.Final.jar -objenesis-2.1.jar +objenesis-2.5.1.jar okhttp-3.8.1.jar okio-1.13.0.jar opencsv-2.3.jar http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/dev/deps/spark-deps-hadoop-3.1 ---------------------------------------------------------------------- diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1 index ff5713b..641b4a1 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.1 @@ -25,8 +25,8 @@ breeze_2.11-0.13.2.jar calcite-avatica-1.2.0-incubating.jar calcite-core-1.2.0-incubating.jar calcite-linq4j-1.2.0-incubating.jar -chill-java-0.8.4.jar -chill_2.11-0.8.4.jar +chill-java-0.9.3.jar +chill_2.11-0.9.3.jar commons-beanutils-1.9.3.jar commons-cli-1.2.jar commons-codec-1.10.jar @@ -146,7 +146,7 @@ kerby-config-1.0.1.jar kerby-pkix-1.0.1.jar kerby-util-1.0.1.jar kerby-xdr-1.0.1.jar -kryo-shaded-3.0.3.jar +kryo-shaded-4.0.2.jar kubernetes-client-3.0.0.jar kubernetes-model-2.0.0.jar leveldbjni-all-1.8.jar @@ -167,7 +167,7 @@ mssql-jdbc-6.2.1.jre7.jar netty-3.9.9.Final.jar netty-all-4.1.17.Final.jar nimbus-jose-jwt-4.41.1.jar -objenesis-2.1.jar +objenesis-2.5.1.jar okhttp-2.7.5.jar okhttp-3.8.1.jar okio-1.13.0.jar http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/docs/tuning.md ---------------------------------------------------------------------- diff --git a/docs/tuning.md b/docs/tuning.md index 1c3bd0e..f60971a 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -35,7 +35,7 @@ in your operations) and performance. It provides two serialization libraries: Java serialization is flexible but often quite slow, and leads to large serialized formats for many classes. * [Kryo serialization](https://github.com/EsotericSoftware/kryo): Spark can also use - the Kryo library (version 2) to serialize objects more quickly. Kryo is significantly + the Kryo library (version 4) to serialize objects more quickly. Kryo is significantly faster and more compact than Java serialization (often as much as 10x), but does not support all `Serializable` types and requires you to *register* the classes you'll use in the program in advance for best performance. http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala index 997c501..600a432 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.regression import scala.util.Random -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.ml.classification.LogisticRegressionSuite._ import org.apache.spark.ml.feature.{Instance, OffsetInstance} import org.apache.spark.ml.feature.{LabeledPoint, RFormula} @@ -29,6 +29,7 @@ import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.random._ import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.FloatType @@ -1687,6 +1688,14 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest assert(evalSummary.deviance === summary.deviance) assert(evalSummary.aic === summary.aic) } + + test("SPARK-23131 Kryo raises StackOverflow during serializing GLR model") { + val conf = new SparkConf(false) + val ser = new KryoSerializer(conf).newInstance() + val trainer = new GeneralizedLinearRegression() + val model = trainer.fit(Seq(Instance(1.0, 1.0, Vectors.dense(1.0, 7.0))).toDF) + ser.serialize[GeneralizedLinearRegressionModel](model) + } } object GeneralizedLinearRegressionSuite { http://git-wip-us.apache.org/repos/asf/spark/blob/3e033035/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 6988c65..da526a1 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ <hive.parquet.version>1.6.0</hive.parquet.version> <jetty.version>9.3.24.v20180605</jetty.version> <javaxservlet.version>3.1.0</javaxservlet.version> - <chill.version>0.8.4</chill.version> + <chill.version>0.9.3</chill.version> <ivy.version>2.4.0</ivy.version> <oro.version>2.0.8</oro.version> <codahale.metrics.version>3.1.5</codahale.metrics.version> @@ -1770,6 +1770,10 @@ <groupId>org.apache.hive</groupId> <artifactId>hive-storage-api</artifactId> </exclusion> + <exclusion> + <groupId> com.esotericsoftware</groupId> + <artifactId>kryo-shaded</artifactId> + </exclusion> </exclusions> </dependency> <dependency> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org