zhli1142015 commented on code in PR #9585:
URL: https://github.com/apache/incubator-gluten/pull/9585#discussion_r2111678259
##########
gluten-core/src/main/java/org/apache/gluten/memory/memtarget/DynamicOffHeapSizingMemoryTarget.java:
##########
@@ -125,4 +225,159 @@ public String name() {
public MemoryUsageStats stats() {
return recorder.toStats();
}
+
+ public static boolean isJava9OrLater() {
+ String spec = System.getProperty("java.specification.version", "1.8");
+ // "1.8" → 8, "9" → 9, "11" → 11, etc.
+ if (spec.startsWith("1.")) {
+ spec = spec.substring(2);
+ }
+ try {
+ return Integer.parseInt(spec) >= 9;
+ } catch (NumberFormatException e) {
+ return false;
+ }
+ }
+
+ public static boolean canShrinkJVMMemory(long totalMemory, long freeMemory) {
+ // Check if the JVM memory can be shrunk by a full GC.
+ return freeMemory > totalMemory * GC_MAX_HEAP_FREE_RATIO;
+ }
+
+ public static long getTotalExplicitGCCount() {
+ return TOTAL_EXPLICIT_GC_COUNT.get();
+ }
+
+ private static boolean exceedsMaxMemoryUsage(
+ long totalOnHeapMemory, long totalOffHeapMemory, long requestedSize,
double ratio) {
+ return requestedSize + totalOffHeapMemory + totalOnHeapMemory >=
TOTAL_MEMORY_SHARED * ratio;
+ }
+
+ private static boolean shouldTriggerAsyncOnHeapMemoryShrink(
+ long totalOnHeapMemory, long freeOnHeapMemory, long totalOffHeapMemory,
long requestedSize) {
+ // If most of the memory has already been used, there's a high chance that
memory will be fully
+ // consumed. We proactively detect this situation to trigger JVM memory
shrinking using the
+ // following conditions.
+
+ boolean exceedsMaxMemoryUsageRatio =
+ exceedsMaxMemoryUsage(
+ totalOnHeapMemory,
+ totalOffHeapMemory,
+ requestedSize,
+ ASYNC_GC_MAX_TOTAL_MEMORY_USAGE_RATIO);
+ return exceedsMaxMemoryUsageRatio
+ && canShrinkJVMMemory(totalOnHeapMemory, freeOnHeapMemory)
+ // Limit GC frequency to prevent performance impact from excessive
garbage collection.
+ && totalOnHeapMemory > TOTAL_MEMORY_SHARED *
ASYNC_GC_MAX_ON_HEAP_MEMORY_RATIO
+ && (!ASYNC_GC_SUSPEND.get()
+ || freeOnHeapMemory > totalOnHeapMemory *
(ORIGINAL_MIN_HEAP_FREE_RATIO / 100.0));
+ }
+
+ private static long shrinkOnHeapMemoryInternal(
+ long totalMemory, long freeMemory, boolean isAsyncGc) {
+ long totalOffHeapMemory = USED_OFF_HEAP_BYTES.get();
+ LOG.warn(
+ String.format(
+ "Starting %sfull gc to shrink JVM memory: "
+ + "Total On-heap: %d, Free On-heap: %d, "
+ + "Total Off-heap: %d, Used On-Heap: %d, Executor memory: %d.",
+ isAsyncGc ? "async " : "",
+ totalMemory,
+ freeMemory,
+ totalOffHeapMemory,
+ (totalMemory - freeMemory),
+ TOTAL_MEMORY_SHARED));
+ // Explicitly calling System.gc() to trigger a full garbage collection.
+ // This is necessary in this context to attempt to shrink JVM memory usage
+ // when off-heap memory allocation is constrained. Use of System.gc() is
+ // generally discouraged due to its unpredictable performance impact, but
+ // here it is used as a last resort to prevent memory allocation failures.
+ System.gc();
+ long newTotalMemory = Runtime.getRuntime().totalMemory();
+ long newFreeMemory = Runtime.getRuntime().freeMemory();
+ int gcRetryTimes = 0;
+ while (!isAsyncGc
+ && gcRetryTimes < MAX_GC_RETRY_TIMES
+ && newTotalMemory >= totalMemory
+ && canShrinkJVMMemory(newTotalMemory, newFreeMemory)) {
+ // System.gc() is just a suggestion; the JVM may ignore it or perform
only a partial GC.
+ // Here, the total memory is not reduced but the free memory ratio is
bigger than the
+ // GC_MAX_HEAP_FREE_RATIO. So we need to call System.gc() again to try
to reduce the total
+ // memory.
+ // This is a workaround for the JVM's behavior of not reducing the total
memory after GC.
+ System.gc();
+ newTotalMemory = Runtime.getRuntime().totalMemory();
+ newFreeMemory = Runtime.getRuntime().freeMemory();
+ gcRetryTimes++;
+ }
+ // If the memory usage is still high after GC, we need to suspend the
async GC for a while.
+ if (isAsyncGc) {
+ ASYNC_GC_SUSPEND.set(
+ totalMemory - newTotalMemory < totalMemory *
(ORIGINAL_MIN_HEAP_FREE_RATIO / 100.0));
+ }
+
+ TOTAL_EXPLICIT_GC_COUNT.getAndAdd(1);
+ LOG.warn(
+ String.format(
+ "Finished %sfull gc to shrink JVM memory: "
+ + "Total On-heap: %d, Free On-heap: %d, "
+ + "Total Off-heap: %d, Used On-Heap: %d, Executor memory: %d, "
+ + "[GC Retry times: %d].",
+ isAsyncGc ? "async " : "",
+ newTotalMemory,
+ newFreeMemory,
+ totalOffHeapMemory,
+ (newTotalMemory - newFreeMemory),
+ TOTAL_MEMORY_SHARED,
+ gcRetryTimes));
+ return newTotalMemory;
+ }
+
+ public static long shrinkOnHeapMemory(long totalMemory, long freeMemory,
boolean isAsyncGc) {
+ boolean updateMaxHeapFreeRatio = false;
+ Object hotSpotBean = null;
+ String maxHeapFreeRatioName = "MaxHeapFreeRatio";
Review Comment:
What we want to achieve is that both non‑native and native operators can use
the full executor memory quota, which requires both non‑native and native
operators to return the memory they occupy back to the system. As it stands,
only the JVM holds on to free memory. Therefore, the main work is to make the
JVM give that memory back to the system.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]