This is an automated email from the ASF dual-hosted git repository.

gaojun2048 pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new 88d4ff5da [Engine][Bug] Fix bug can't get error in checkpoint (#3692)
88d4ff5da is described below

commit 88d4ff5da6889c6373609337ee6e5e496c121ef1
Author: Hisoka <[email protected]>
AuthorDate: Sat Dec 10 10:56:43 2022 +0800

    [Engine][Bug] Fix bug can't get error in checkpoint (#3692)
    
    * [Engine][Bug] Fix bug can't get error in checkpoint
---
 .../seatunnel/engine/server/TaskExecutionService.java  |  3 +--
 .../server/checkpoint/CheckpointCoordinator.java       | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git 
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
 
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
index 37b3e93e9..af96ea7ba 100644
--- 
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
+++ 
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
@@ -513,8 +513,7 @@ public class TaskExecutionService implements 
DynamicMetricsProvider {
             logger.info("taskDone: " + taskGroup.getTaskGroupLocation());
             if (completionLatch.decrementAndGet() == 0) {
                 TaskGroupLocation taskGroupLocation = 
taskGroup.getTaskGroupLocation();
-                finishedExecutionContexts.put(taskGroupLocation, 
executionContexts.get(taskGroupLocation));
-                executionContexts.remove(taskGroupLocation);
+                finishedExecutionContexts.put(taskGroupLocation, 
executionContexts.remove(taskGroupLocation));
                 cancellationFutures.remove(taskGroupLocation);
                 Throwable ex = executionException.get();
                 if (ex == null) {
diff --git 
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
 
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
index f7855bc59..debb6d777 100644
--- 
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
+++ 
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
@@ -28,7 +28,6 @@ import 
org.apache.seatunnel.engine.checkpoint.storage.PipelineState;
 import org.apache.seatunnel.engine.checkpoint.storage.api.CheckpointStorage;
 import 
org.apache.seatunnel.engine.checkpoint.storage.common.ProtoStuffSerializer;
 import org.apache.seatunnel.engine.checkpoint.storage.common.Serializer;
-import 
org.apache.seatunnel.engine.checkpoint.storage.exception.CheckpointStorageException;
 import org.apache.seatunnel.engine.common.config.server.CheckpointConfig;
 import org.apache.seatunnel.engine.common.exception.SeaTunnelEngineException;
 import org.apache.seatunnel.engine.common.utils.PassiveCompletableFuture;
@@ -52,7 +51,6 @@ import lombok.SneakyThrows;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.time.Instant;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@@ -273,6 +271,7 @@ public class CheckpointCoordinator {
     private void waitingPendingCheckpointDone() {
         while (pendingCounter.get() != 0) {
             try {
+                LOG.info("waiting pending checkpoint completed, pending 
counter: " + pendingCounter.get());
                 Thread.sleep(500);
             } catch (InterruptedException e) {
                 throw new SeaTunnelEngineException(e);
@@ -305,7 +304,17 @@ public class CheckpointCoordinator {
         pendingCompletableFuture.thenAcceptAsync(pendingCheckpoint -> {
             LOG.info("wait checkpoint completed: " + 
pendingCheckpoint.getCheckpointId());
             PassiveCompletableFuture<CompletedCheckpoint> completableFuture = 
pendingCheckpoint.getCompletableFuture();
-            completableFuture.thenAcceptAsync(this::completePendingCheckpoint);
+            completableFuture.whenCompleteAsync((completedCheckpoint, error) 
-> {
+                if (error != null) {
+                    LOG.error("trigger checkpoint failed", error);
+                } else {
+                    try {
+                        completePendingCheckpoint(completedCheckpoint);
+                    } catch (Throwable e) {
+                        LOG.error("complete checkpoint failed", e);
+                    }
+                }
+            });
 
             // Trigger the barrier and wait for all tasks to ACK
             LOG.debug("trigger checkpoint barrier {}/{}/{}, {}", 
pendingCheckpoint.getJobId(), pendingCheckpoint.getPipelineId(), 
pendingCheckpoint.getCheckpointId(), pendingCheckpoint.getCheckpointType());
@@ -490,7 +499,8 @@ public class CheckpointCoordinator {
                     String.valueOf(superfluous.getPipelineId()),
                     String.valueOf(superfluous.getCheckpointId()));
             }
-        } catch (IOException | CheckpointStorageException e) {
+        } catch (Throwable e) {
+            LOG.error("store checkpoint states failed.", e);
             sneakyThrow(e);
         }
         LOG.info("pending checkpoint({}/{}@{}) notify finished!", 
completedCheckpoint.getCheckpointId(), completedCheckpoint.getPipelineId(), 
completedCheckpoint.getJobId());

Reply via email to