Alexey Kukushkin created IGNITE-28592:
-----------------------------------------
Summary: Broken Ignite Service Node Filter in Ignite 2.17
Key: IGNITE-28592
URL: https://issues.apache.org/jira/browse/IGNITE-28592
Project: Ignite
Issue Type: Improvement
Affects Versions: 2.17
Reporter: Alexey Kukushkin
Ignite 2.17 introduced several severe issues with service node filters that did
not exist in Ignite 2.16:
# {*}Services deployed on non-matching nodes{*}: A service with a node filter
that does not match a node is actually deployed on that node.
# {*}Services not deployed on matching nodes{*}: A service with a node filter
that matches a node is actually not deployed on that node.
# {*}Cluster instability on node departure{*}: When a node without a service
leaves the cluster, other nodes hosting that service crash due to a
{{{}NullPointerException{}}}.
h2. Analysis
All these issues were likely caused by IGNITE-23226.
h2. Reproducer
The test {{ServiceNodeFilterTest#doesNotDeployServiceNotMatchingFilter}}
demonstrates issue #1.
The test {{ServiceNodeFilterTest#servicelessNodeDoesNotFailServiceNode}} is
flaky and can demonstrate:
- {*}Issue #2{*}: When it fails to wait for the expected service deployment.
- {*}Issue #3{*}: When it fails with an {{AssertionError}} on [line 481 of
ServiceDeploymentTask.java|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L481]
(if assertions are enabled) or a {{NullPointerException}} on [line
483|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L483]
otherwise.
{code:java}
package sandbox.ignite;
import org.apache.ignite.Ignite;
import org.apache.ignite.Ignition;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.lang.IgnitePredicate;
import org.apache.ignite.services.Service;
import org.apache.ignite.services.ServiceConfiguration;
import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
import org.junit.jupiter.api.Test;
import java.time.Duration;
import java.util.Collections;
import java.util.Objects;
import java.util.function.Supplier;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
/**
* Reproduces Ignite Service node filter issues in Ignite 2.17 that did not
exist in Ignite 2.16.
*/
public class ServiceNodeFilterTest {
private static final String SERVICE_NAME =
IgniteEchoService.class.getName();
/**
*** Ignite should not deploy a service on a node that does not match the
service's node filter.
*/
@Test
public void doesNotDeployServiceNotMatchingFilter() throws
InterruptedException {
// GIVEN configuration for an Ignite cluster with two server nodes and
a service whose node filter
// does not match any of the servers
final var serviceConfig = new ServiceConfiguration()
.setName(SERVICE_NAME)
.setService(new IgniteEchoService())
.setTotalCount(0)
.setMaxPerNodeCount(1)
.setNodeFilter(new NodeConsistentIdFilter("noSuchId"));
final var ignite1Config =
getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
final var ignite2Config =
getIgniteConfiguration("ignite2").setServiceConfiguration(serviceConfig);
// WHEN the cluster is formed
try (final var ignite1 = Ignition.start(ignite1Config);
final var ignored = Ignition.start(ignite2Config)) {
// THEN the service is not deployed on any node
final var isServiceDeployed = waitFor(() ->
IsServiceDeployedLocally(ignite1), Duration.ofSeconds(10));
assertFalse(isServiceDeployed);
}
}
/**
*** When a node without a service leaves the cluster, it should not cause
another node to crash.
*/
@Test
public void servicelessNodeDoesNotFailServiceNode() throws
InterruptedException {
// GIVEN configuration for an Ignite cluster with three server nodes
and a service whose node filter
// matches only one node
final var SERVICE_NODE_ID = "ignite2";
final var serviceConfig = new ServiceConfiguration()
.setName(SERVICE_NAME)
.setService(new IgniteEchoService())
.setTotalCount(1)
.setMaxPerNodeCount(1)
.setNodeFilter(new NodeConsistentIdFilter(SERVICE_NODE_ID));
final var ignite1Config =
getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
final var serviceNodeConfig =
getIgniteConfiguration(SERVICE_NODE_ID).setServiceConfiguration(serviceConfig);
final var ignite3Config =
getIgniteConfiguration("ignite3").setServiceConfiguration(serviceConfig);
// AND the cluster is formed
// AND the service is deployed on the second node to join the cluster
try (final var ignored1 = Ignition.start(ignite1Config);
final var ignite2 = Ignition.start(serviceNodeConfig)) {
var isServiceDeployed = waitFor(() ->
IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
assertTrue(isServiceDeployed);
try (final var ignored3 = Ignition.start(ignite3Config)) {
Thread.sleep(10);
// WHEN the last node to join the cluster leaves
}
// THEN the service remains deployed on the second node
isServiceDeployed = waitFor(() ->
IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
assertTrue(isServiceDeployed);
}
}
private static Boolean IsServiceDeployedLocally(final Ignite ignite) {
final var services = ignite.services();
final var serviceDescriptors = services.serviceDescriptors();
if (serviceDescriptors.size() == 1) {
final var descriptor = serviceDescriptors.iterator().next();
assertEquals(SERVICE_NAME, descriptor.name());
final var localNodeId = ignite.cluster().localNode().id();
return descriptor.topologySnapshot().getOrDefault(localNodeId, -1)
> 0;
}
return false;
}
private static Boolean waitFor(final Supplier[Boolean] condition, final
Duration duration)
throws InterruptedException {
final var sleepMs = 100;
final var durationMs = duration.toMillis();
final var count = durationMs / sleepMs + (durationMs % sleepMs > 0 ? 1
: 0);
var result = false;
for (var i = 0; i [ count; ++i) {
result = condition.get();
if (result) {
break;
}
Thread.sleep(sleepMs);
}
return result;
}
private static IgniteConfiguration getIgniteConfiguration(final String
name) {
return new IgniteConfiguration()
.setIgniteInstanceName(name)
.setConsistentId(name)
.setMetricsLogFrequency(0)
.setFailureDetectionTimeout(600_000)
.setClientFailureDetectionTimeout(600_000)
.setDiscoverySpi(
new TcpDiscoverySpi()
.setIpFinder(new
TcpDiscoveryVmIpFinder().setAddresses(Collections.singleton("127.0.0.1:48500")))
.setLocalPort(48500));
}
public static class IgniteEchoService implements Service {
}
public static class NodeConsistentIdFilter implements
IgnitePredicate<ClusterNode] {
private final String expectedId;
public NodeConsistentIdFilter(final String expectedId) {
this.expectedId = Objects.requireNonNull(expectedId);
}
@Override
public boolean apply(final ClusterNode clusterNode) {
final var actualId = clusterNode.consistentId();
return expectedId.equals(actualId);
}
}
}
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)