This is an automated email from the ASF dual-hosted git repository.
gaojun2048 pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel.git
The following commit(s) were added to refs/heads/dev by this push:
new 88d4ff5da [Engine][Bug] Fix bug can't get error in checkpoint (#3692)
88d4ff5da is described below
commit 88d4ff5da6889c6373609337ee6e5e496c121ef1
Author: Hisoka <[email protected]>
AuthorDate: Sat Dec 10 10:56:43 2022 +0800
[Engine][Bug] Fix bug can't get error in checkpoint (#3692)
* [Engine][Bug] Fix bug can't get error in checkpoint
---
.../seatunnel/engine/server/TaskExecutionService.java | 3 +--
.../server/checkpoint/CheckpointCoordinator.java | 18 ++++++++++++++----
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
index 37b3e93e9..af96ea7ba 100644
---
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
+++
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java
@@ -513,8 +513,7 @@ public class TaskExecutionService implements
DynamicMetricsProvider {
logger.info("taskDone: " + taskGroup.getTaskGroupLocation());
if (completionLatch.decrementAndGet() == 0) {
TaskGroupLocation taskGroupLocation =
taskGroup.getTaskGroupLocation();
- finishedExecutionContexts.put(taskGroupLocation,
executionContexts.get(taskGroupLocation));
- executionContexts.remove(taskGroupLocation);
+ finishedExecutionContexts.put(taskGroupLocation,
executionContexts.remove(taskGroupLocation));
cancellationFutures.remove(taskGroupLocation);
Throwable ex = executionException.get();
if (ex == null) {
diff --git
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
index f7855bc59..debb6d777 100644
---
a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
+++
b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointCoordinator.java
@@ -28,7 +28,6 @@ import
org.apache.seatunnel.engine.checkpoint.storage.PipelineState;
import org.apache.seatunnel.engine.checkpoint.storage.api.CheckpointStorage;
import
org.apache.seatunnel.engine.checkpoint.storage.common.ProtoStuffSerializer;
import org.apache.seatunnel.engine.checkpoint.storage.common.Serializer;
-import
org.apache.seatunnel.engine.checkpoint.storage.exception.CheckpointStorageException;
import org.apache.seatunnel.engine.common.config.server.CheckpointConfig;
import org.apache.seatunnel.engine.common.exception.SeaTunnelEngineException;
import org.apache.seatunnel.engine.common.utils.PassiveCompletableFuture;
@@ -52,7 +51,6 @@ import lombok.SneakyThrows;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
import java.time.Instant;
import java.util.ArrayDeque;
import java.util.ArrayList;
@@ -273,6 +271,7 @@ public class CheckpointCoordinator {
private void waitingPendingCheckpointDone() {
while (pendingCounter.get() != 0) {
try {
+ LOG.info("waiting pending checkpoint completed, pending
counter: " + pendingCounter.get());
Thread.sleep(500);
} catch (InterruptedException e) {
throw new SeaTunnelEngineException(e);
@@ -305,7 +304,17 @@ public class CheckpointCoordinator {
pendingCompletableFuture.thenAcceptAsync(pendingCheckpoint -> {
LOG.info("wait checkpoint completed: " +
pendingCheckpoint.getCheckpointId());
PassiveCompletableFuture<CompletedCheckpoint> completableFuture =
pendingCheckpoint.getCompletableFuture();
- completableFuture.thenAcceptAsync(this::completePendingCheckpoint);
+ completableFuture.whenCompleteAsync((completedCheckpoint, error)
-> {
+ if (error != null) {
+ LOG.error("trigger checkpoint failed", error);
+ } else {
+ try {
+ completePendingCheckpoint(completedCheckpoint);
+ } catch (Throwable e) {
+ LOG.error("complete checkpoint failed", e);
+ }
+ }
+ });
// Trigger the barrier and wait for all tasks to ACK
LOG.debug("trigger checkpoint barrier {}/{}/{}, {}",
pendingCheckpoint.getJobId(), pendingCheckpoint.getPipelineId(),
pendingCheckpoint.getCheckpointId(), pendingCheckpoint.getCheckpointType());
@@ -490,7 +499,8 @@ public class CheckpointCoordinator {
String.valueOf(superfluous.getPipelineId()),
String.valueOf(superfluous.getCheckpointId()));
}
- } catch (IOException | CheckpointStorageException e) {
+ } catch (Throwable e) {
+ LOG.error("store checkpoint states failed.", e);
sneakyThrow(e);
}
LOG.info("pending checkpoint({}/{}@{}) notify finished!",
completedCheckpoint.getCheckpointId(), completedCheckpoint.getPipelineId(),
completedCheckpoint.getJobId());