ConeyLiu commented on issue #25470: [SPARK-28751][Core][WIP] Improve java serializer deserialization performance URL: https://github.com/apache/spark/pull/25470#issuecomment-547238238 I have tested it locally, the performance is almost equally: Test case: ```scala object JavaSerializerBenchmark2 extends BenchmarkBase { var sc: SparkContext = null val N = 500 override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { val name = "Benchmark Cache resolved classes for JavaSerializer" runBenchmark(name) { val benchmark = new Benchmark(name, N, 10, output = output) Seq(true, false).foreach(useCache => run(useCache, benchmark)) benchmark.run() } } private def run(useCache: Boolean, benchmark: Benchmark): Unit = { lazy val sc = createSparkContext(useCache) benchmark.addCase(s"cache resolvedClasses:$useCache") { _ => val futures = for (_ <- 0 until N) yield { Future { val rdd = sc.parallelize(0 until 100).map(i => i + 1).persist(StorageLevel.OFF_HEAP) rdd.count() rdd.unpersist(true) } } val future = Future.sequence(futures) ThreadUtils.awaitResult(future, 10.minutes) } } def createSparkContext(useCache: Boolean): SparkContext = { val conf = new SparkConf() // SPARK-29282 This is for consistency between JDK8 and JDK11. conf.set(EXECUTOR_EXTRA_JAVA_OPTIONS, "-XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads") conf.set(SERIALIZER, "org.apache.spark.serializer.JavaSerializer") conf.set(JAVA_SERIALIZER_CACHE_RESOLVED_CLASSES, useCache) conf.set(MEMORY_OFFHEAP_ENABLED, true) conf.set(MEMORY_OFFHEAP_SIZE, 100L * 1024) if (sc != null) { sc.stop() } sc = new SparkContext("local-cluster[4,1,1024]", "test", conf) sc } override def afterAll(): Unit = { if (sc != null) { sc.stop() } } } ``` results: ``` Java HotSpot(TM) 64-Bit Server VM 1.8.0_111-b14 on Linux 4.15.0-66-generic Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz Benchmark Cache resolved classes for JavaSerializer: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ cache resolvedClasses:true 2077 2373 289 0.0 4153346.1 1.0X cache resolvedClasses:false 2116 2508 453 0.0 4231394.3 1.0X ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org