ConeyLiu commented on issue #25470: [SPARK-28751][Core][WIP] Improve java 
serializer deserialization performance
URL: https://github.com/apache/spark/pull/25470#issuecomment-547238238
 
 
   I have tested it locally, the performance is almost equally:
   Test case:
   ```scala
   object JavaSerializerBenchmark2 extends BenchmarkBase {
   
     var sc: SparkContext = null
     val N = 500
     override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
       val name = "Benchmark Cache resolved classes for JavaSerializer"
       runBenchmark(name) {
         val benchmark = new Benchmark(name, N, 10, output = output)
         Seq(true, false).foreach(useCache => run(useCache, benchmark))
         benchmark.run()
       }
     }
   
     private def run(useCache: Boolean, benchmark: Benchmark): Unit = {
       lazy val sc = createSparkContext(useCache)
   
       benchmark.addCase(s"cache resolvedClasses:$useCache") { _ =>
         val futures = for (_ <- 0 until N) yield {
           Future {
             val rdd = sc.parallelize(0 until 100).map(i => i + 
1).persist(StorageLevel.OFF_HEAP)
             rdd.count()
             rdd.unpersist(true)
           }
         }
   
         val future = Future.sequence(futures)
   
         ThreadUtils.awaitResult(future, 10.minutes)
       }
     }
   
     def createSparkContext(useCache: Boolean): SparkContext = {
       val conf = new SparkConf()
       // SPARK-29282 This is for consistency between JDK8 and JDK11.
       conf.set(EXECUTOR_EXTRA_JAVA_OPTIONS,
         "-XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads")
       conf.set(SERIALIZER, "org.apache.spark.serializer.JavaSerializer")
       conf.set(JAVA_SERIALIZER_CACHE_RESOLVED_CLASSES, useCache)
       conf.set(MEMORY_OFFHEAP_ENABLED, true)
       conf.set(MEMORY_OFFHEAP_SIZE, 100L * 1024)
   
       if (sc != null) {
         sc.stop()
       }
   
       sc = new SparkContext("local-cluster[4,1,1024]", "test", conf)
       sc
     }
   
     override def afterAll(): Unit = {
       if (sc != null) {
         sc.stop()
       }
     }
   
   }
   
   ```
   
   results:
   ```
   Java HotSpot(TM) 64-Bit Server VM 1.8.0_111-b14 on Linux 4.15.0-66-generic
   Intel(R) Core(TM) i7-6950X CPU @ 3.00GHz
   Benchmark Cache resolved classes for JavaSerializer:  Best Time(ms)   Avg 
Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
   
------------------------------------------------------------------------------------------------------------------------
   cache resolvedClasses:true                         2077           2373       
  289          0.0     4153346.1       1.0X
   cache resolvedClasses:false                        2116           2508       
  453          0.0     4231394.3       1.0X
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to