This is an automated email from the ASF dual-hosted git repository.
mpochatkin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/ignite-3.git
The following commit(s) were added to refs/heads/main by this push:
new 9565e696257 IGNITE-26274 Fix on demand deploy race condition (#6470)
9565e696257 is described below
commit 9565e696257f92a666ea905c9024204f55fae4c8
Author: Vadim Pakhnushev <[email protected]>
AuthorDate: Mon Sep 1 17:25:19 2025 +0300
IGNITE-26274 Fix on demand deploy race condition (#6470)
---
.../ignite/internal/deployunit/UnitDownloader.java | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
diff --git
a/modules/code-deployment/src/main/java/org/apache/ignite/internal/deployunit/UnitDownloader.java
b/modules/code-deployment/src/main/java/org/apache/ignite/internal/deployunit/UnitDownloader.java
index a1c472710e2..012728c89f9 100644
---
a/modules/code-deployment/src/main/java/org/apache/ignite/internal/deployunit/UnitDownloader.java
+++
b/modules/code-deployment/src/main/java/org/apache/ignite/internal/deployunit/UnitDownloader.java
@@ -20,6 +20,7 @@ package org.apache.ignite.internal.deployunit;
import static org.apache.ignite.internal.deployunit.DeploymentStatus.DEPLOYED;
import static
org.apache.ignite.internal.deployunit.UnitContent.toDeploymentUnit;
import static
org.apache.ignite.internal.util.CompletableFutures.falseCompletedFuture;
+import static
org.apache.ignite.internal.util.CompletableFutures.trueCompletedFuture;
import java.util.Collection;
import java.util.List;
@@ -87,7 +88,22 @@ class UnitDownloader {
* @param nodes Nodes where the unit is deployed.
*/
CompletableFuture<Boolean> downloadUnit(String id, Version version,
Collection<String> nodes) {
- return tracker.track(id, version, () ->
messaging.downloadUnitContent(id, version, nodes)
+ // Get the status again inside the tracker. There could be a race when
another thread just completed the download, updated the
+ // status to DEPLOYED and stopped tracking. In this case the tracked
job will be started, but now we have outdated status and
+ // we call downloadUnitContent again. When it completes and unit is
deployed again, updating node status returns false since it's
+ // already DEPLOYED from another thread. Subsequently
IgniteDeployment.onDemandDeploy returns false and the job fails.
+ return tracker.track(id, version, () ->
deploymentUnitStore.getNodeStatus(nodeName, id, version)
+ .thenCompose(status -> {
+ if (status.status() == DEPLOYED) {
+ return trueCompletedFuture();
+ }
+ return downloadUnitContent(id, version, nodes);
+ })
+ );
+ }
+
+ private CompletableFuture<Boolean> downloadUnitContent(String id, Version
version, Collection<String> nodes) {
+ return messaging.downloadUnitContent(id, version, nodes)
.thenCompose(content -> {
DeploymentUnit unit = toDeploymentUnit(content);
return deployer.deploy(id, version, unit)
@@ -104,7 +120,6 @@ class UnitDownloader {
return deploymentUnitStore.updateNodeStatus(nodeName,
id, version, DEPLOYED);
}
return falseCompletedFuture();
- })
- );
+ });
}
}