yanghua commented on a change in pull request #1418: [HUDI-678] Make config 
package spark free
URL: https://github.com/apache/incubator-hudi/pull/1418#discussion_r398345709
 
 

 ##########
 File path: 
hudi-client/src/main/java/org/apache/hudi/config/HoodieMemoryConfig.java
 ##########
 @@ -113,52 +112,20 @@ public Builder withWriteStatusFailureFraction(double 
failureFraction) {
       return this;
     }
 
-    /**
-     * Dynamic calculation of max memory to use for for spillable map. 
user.available.memory = spark.executor.memory *
-     * (1 - spark.memory.fraction) spillable.available.memory = 
user.available.memory * hoodie.memory.fraction. Anytime
-     * the spark.executor.memory or the spark.memory.fraction is changed, the 
memory used for spillable map changes
-     * accordingly
-     */
-    private long getMaxMemoryAllowedForMerge(String maxMemoryFraction) {
-      final String SPARK_EXECUTOR_MEMORY_PROP = "spark.executor.memory";
-      final String SPARK_EXECUTOR_MEMORY_FRACTION_PROP = 
"spark.memory.fraction";
-      // This is hard-coded in spark code {@link
-      // 
https://github.com/apache/spark/blob/576c43fb4226e4efa12189b41c3bc862019862c6/core/src/main/scala/org/apache/
-      // spark/memory/UnifiedMemoryManager.scala#L231} so have to re-define 
this here
-      final String DEFAULT_SPARK_EXECUTOR_MEMORY_FRACTION = "0.6";
-      // This is hard-coded in spark code {@link
-      // 
https://github.com/apache/spark/blob/576c43fb4226e4efa12189b41c3bc862019862c6/core/src/main/scala/org/apache/
-      // spark/SparkContext.scala#L471} so have to re-define this here
-      final String DEFAULT_SPARK_EXECUTOR_MEMORY_MB = "1024"; // in MB
-
-      if (SparkEnv.get() != null) {
-        // 1 GB is the default conf used by Spark, look at SparkContext.scala
-        long executorMemoryInBytes = Utils.memoryStringToMb(
-            SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_PROP, 
DEFAULT_SPARK_EXECUTOR_MEMORY_MB)) * 1024 * 1024L;
-        // 0.6 is the default value used by Spark,
-        // look at {@link
-        // 
https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/SparkConf.scala#L507}
-        double memoryFraction = Double.parseDouble(
-            SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_FRACTION_PROP, 
DEFAULT_SPARK_EXECUTOR_MEMORY_FRACTION));
-        double maxMemoryFractionForMerge = 
Double.parseDouble(maxMemoryFraction);
-        double userAvailableMemory = executorMemoryInBytes * (1 - 
memoryFraction);
-        long maxMemoryForMerge = (long) Math.floor(userAvailableMemory * 
maxMemoryFractionForMerge);
-        return Math.max(DEFAULT_MIN_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES, 
maxMemoryForMerge);
-      } else {
-        return DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES;
-      }
-    }
-
     public HoodieMemoryConfig build() {
       HoodieMemoryConfig config = new HoodieMemoryConfig(props);
       setDefaultOnCondition(props, 
!props.containsKey(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP),
           MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP, 
DEFAULT_MAX_MEMORY_FRACTION_FOR_COMPACTION);
       setDefaultOnCondition(props, 
!props.containsKey(MAX_MEMORY_FRACTION_FOR_MERGE_PROP),
           MAX_MEMORY_FRACTION_FOR_MERGE_PROP, 
DEFAULT_MAX_MEMORY_FRACTION_FOR_MERGE);
+      long maxMemoryAllowedForMerge =
+          
SparkConfigUtils.getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_MERGE_PROP));
       setDefaultOnCondition(props, 
!props.containsKey(MAX_MEMORY_FOR_MERGE_PROP), MAX_MEMORY_FOR_MERGE_PROP,
-          
String.valueOf(getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_MERGE_PROP))));
+          String.valueOf(maxMemoryAllowedForMerge));
+      long maxMemoryAllowedForCompaction =
+          
SparkConfigUtils.getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP));
 
 Review comment:
   Yes, you are right. Maybe we have to extract these two lines into an extra 
method.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to