XComp commented on code in PR #27921:
URL: https://github.com/apache/flink/pull/27921#discussion_r3356270379


##########
flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptive/Executing.java:
##########
@@ -182,6 +190,106 @@ public ScheduledFuture<?> scheduleOperation(Runnable 
callback, Duration delay) {
         return context.runIfState(this, callback, delay);
     }
 
+    @Override
+    public void requestActiveCheckpointTrigger() {
+        if (!activeCheckpointTriggerEnabled) {
+            return;
+        }
+        final CheckpointCoordinator checkpointCoordinator =
+                getExecutionGraph().getCheckpointCoordinator();
+        if (checkpointCoordinator == null
+                || !checkpointCoordinator.isPeriodicCheckpointingConfigured()) 
{
+            getLogger()
+                    .debug(
+                            "Skipping active checkpoint trigger for rescale: 
checkpointing not configured.");
+            return;
+        }
+
+        final Optional<Duration> triggerDelay =
+                checkpointCoordinator.getActiveCheckpointTriggerDelay();
+        if (triggerDelay.isEmpty()) {
+            getLogger()
+                    .debug(
+                            "Skipping active checkpoint trigger for rescale: 
checkpoint already in progress.");
+            return;
+        }
+        scheduleActiveCheckpointTriggerRetry(triggerDelay.get());
+    }
+
+    private void scheduleActiveCheckpointTriggerRetry(Duration delay) {
+        if (activeCheckpointTriggerScheduled) {
+            return;
+        }
+        activeCheckpointTriggerScheduled = true;
+        if (!delay.isZero()) {
+            getLogger()
+                    .debug(
+                            "Min pause not satisfied, scheduling active 
checkpoint trigger retry in {} ms.",
+                            delay.toMillis());
+        }
+        context.runIfState(this, this::tryFireActiveCheckpointAfterRetry, 
delay);
+    }
+
+    private void tryFireActiveCheckpointAfterRetry() {
+        activeCheckpointTriggerScheduled = false;
+
+        // Parallelism is the only guard re-evaluated here: it can change 
between the request
+        // and the scheduled fire (e.g. resources changed again, or the 
parallelism was reverted
+        // back to the current value while we waited for min-pause). The null 
check and
+        // periodic-checkpoint config are invariants validated at request time.
+        if (!parallelismChanged()) {
+            getLogger()
+                    .debug("Active checkpoint trigger for rescale dropped: 
parallelism unchanged.");
+            return;
+        }
+        final CheckpointCoordinator checkpointCoordinator =
+                Preconditions.checkNotNull(
+                        getExecutionGraph().getCheckpointCoordinator(),
+                        "Checkpoint coordinator was non-null when the trigger 
was scheduled; "
+                                + "an Executing state never drops its 
coordinator.");
+        final Optional<Duration> triggerDelay =
+                checkpointCoordinator.getActiveCheckpointTriggerDelay();
+        if (triggerDelay.isEmpty()) {
+            getLogger()
+                    .debug(
+                            "Active checkpoint trigger for rescale dropped: 
checkpoint already in progress after retry.");
+        } else if (triggerDelay.get().isZero()) {
+            fireActiveCheckpointTrigger(checkpointCoordinator);
+        } else {
+            getLogger()
+                    .debug(
+                            "Active checkpoint trigger for rescale silently 
dropped: a periodic checkpoint completed while the trigger was scheduled.");
+        }
+    }
+
+    private void fireActiveCheckpointTrigger(CheckpointCoordinator 
checkpointCoordinator) {
+        Preconditions.checkState(
+                activeCheckpointTriggerEnabled,
+                "Active checkpoint trigger fired while the feature is 
disabled.");
+        activeCheckpointTriggerScheduled = false;

Review Comment:
   Isn't that one obsolete? We're setting it already in the calling method. 🤔 



##########
flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/CheckpointCoordinator.java:
##########
@@ -1999,6 +2000,29 @@ public long getCheckpointTimeout() {
         return checkpointTimeout;
     }
 
+    /**
+     * Returns the remaining {@link Duration} until {@code 
minPauseBetweenCheckpoints} is satisfied
+     * for a new active-trigger checkpoint, computed from the time elapsed 
since the last completed
+     * checkpoint (or from the coordinator clock's epoch when no checkpoint 
has completed yet —
+     * which is normally far in the past in production). {@link Duration#ZERO} 
means the trigger can
+     * fire immediately.
+     *
+     * <p>Returns {@link Optional#empty()} as a fallback if a checkpoint is 
already in flight
+     * (triggering or pending), in which case no active trigger should be 
scheduled.
+     *
+     * <p>All checks are made under the coordinator lock.

Review Comment:
   that's an implementation details that doesn't need to be exposed via JavaDoc



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to