[
https://issues.apache.org/jira/browse/IGNITE-28592?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Nikolay Izhikov reassigned IGNITE-28592:
----------------------------------------
Assignee: Nikolay Izhikov
> Broken Ignite Service Node Filter in Ignite 2.17
> ------------------------------------------------
>
> Key: IGNITE-28592
> URL: https://issues.apache.org/jira/browse/IGNITE-28592
> Project: Ignite
> Issue Type: Improvement
> Affects Versions: 2.17
> Reporter: Alexey Kukushkin
> Assignee: Nikolay Izhikov
> Priority: Major
>
> Ignite 2.17 introduced several severe issues with service node filters that
> did not exist in Ignite 2.16:
> # {*}Services deployed on non-matching nodes{*}: A service with a node
> filter that does not match a node is actually deployed on that node.
> # {*}Services not deployed on matching nodes{*}: A service with a node
> filter that matches a node is actually not deployed on that node.
> # {*}Cluster instability on node departure{*}: When a node without a service
> leaves the cluster, other nodes hosting that service crash due to a
> {{{}NullPointerException{}}}.
> h2. Analysis
> All these issues were likely caused by IGNITE-23226.
> h2. Reproducer
> The test {{ServiceNodeFilterTest#doesNotDeployServiceNotMatchingFilter}}
> demonstrates issue #1.
> The test {{ServiceNodeFilterTest#servicelessNodeDoesNotFailServiceNode}} is
> flaky and can demonstrate:
> - {*}Issue #2{*}: When it fails to wait for the expected service deployment.
> - {*}Issue #3{*}: When it fails with an {{AssertionError}} on [line 481 of
> ServiceDeploymentTask.java|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L481]
> (if assertions are enabled) or a {{NullPointerException}} on [line
> 483|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L483]
> otherwise.
> {code:java}
> package sandbox.ignite;
> import org.apache.ignite.Ignite;
> import org.apache.ignite.Ignition;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.lang.IgnitePredicate;
> import org.apache.ignite.services.Service;
> import org.apache.ignite.services.ServiceConfiguration;
> import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
> import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
> import org.junit.jupiter.api.Test;
> import java.time.Duration;
> import java.util.Collections;
> import java.util.Objects;
> import java.util.function.Supplier;
> import static org.junit.jupiter.api.Assertions.assertEquals;
> import static org.junit.jupiter.api.Assertions.assertFalse;
> import static org.junit.jupiter.api.Assertions.assertTrue;
> /**
> * Reproduces Ignite Service node filter issues in Ignite 2.17 that did not
> exist in Ignite 2.16.
> */
> public class ServiceNodeFilterTest {
> private static final String SERVICE_NAME =
> IgniteEchoService.class.getName();
> /**
> *** Ignite should not deploy a service on a node that does not match the
> service's node filter.
> */
> @Test
> public void doesNotDeployServiceNotMatchingFilter() throws
> InterruptedException {
> // GIVEN configuration for an Ignite cluster with two server nodes
> and a service whose node filter
> // does not match any of the servers
> final var serviceConfig = new ServiceConfiguration()
> .setName(SERVICE_NAME)
> .setService(new IgniteEchoService())
> .setTotalCount(0)
> .setMaxPerNodeCount(1)
> .setNodeFilter(new NodeConsistentIdFilter("noSuchId"));
> final var ignite1Config =
> getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
> final var ignite2Config =
> getIgniteConfiguration("ignite2").setServiceConfiguration(serviceConfig);
> // WHEN the cluster is formed
> try (final var ignite1 = Ignition.start(ignite1Config);
> final var ignored = Ignition.start(ignite2Config)) {
> // THEN the service is not deployed on any node
> final var isServiceDeployed = waitFor(() ->
> IsServiceDeployedLocally(ignite1), Duration.ofSeconds(10));
> assertFalse(isServiceDeployed);
> }
> }
> /**
> *** When a node without a service leaves the cluster, it should not cause
> another node to crash.
> */
> @Test
> public void servicelessNodeDoesNotFailServiceNode() throws
> InterruptedException {
> // GIVEN configuration for an Ignite cluster with three server nodes
> and a service whose node filter
> // matches only one node
> final var SERVICE_NODE_ID = "ignite2";
> final var serviceConfig = new ServiceConfiguration()
> .setName(SERVICE_NAME)
> .setService(new IgniteEchoService())
> .setTotalCount(1)
> .setMaxPerNodeCount(1)
> .setNodeFilter(new NodeConsistentIdFilter(SERVICE_NODE_ID));
> final var ignite1Config =
> getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
> final var serviceNodeConfig =
> getIgniteConfiguration(SERVICE_NODE_ID).setServiceConfiguration(serviceConfig);
> final var ignite3Config =
> getIgniteConfiguration("ignite3").setServiceConfiguration(serviceConfig);
> // AND the cluster is formed
> // AND the service is deployed on the second node to join the cluster
> try (final var ignored1 = Ignition.start(ignite1Config);
> final var ignite2 = Ignition.start(serviceNodeConfig)) {
> var isServiceDeployed = waitFor(() ->
> IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
> assertTrue(isServiceDeployed);
> try (final var ignored3 = Ignition.start(ignite3Config)) {
> Thread.sleep(10);
> // WHEN the last node to join the cluster leaves
> }
> // THEN the service remains deployed on the second node
> isServiceDeployed = waitFor(() ->
> IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
> assertTrue(isServiceDeployed);
> }
> }
> private static Boolean IsServiceDeployedLocally(final Ignite ignite) {
> final var services = ignite.services();
> final var serviceDescriptors = services.serviceDescriptors();
> if (serviceDescriptors.size() == 1) {
> final var descriptor = serviceDescriptors.iterator().next();
> assertEquals(SERVICE_NAME, descriptor.name());
> final var localNodeId = ignite.cluster().localNode().id();
> return descriptor.topologySnapshot().getOrDefault(localNodeId,
> -1) > 0;
> }
> return false;
> }
> private static Boolean waitFor(final Supplier[Boolean] condition, final
> Duration duration)
> throws InterruptedException {
> final var sleepMs = 100;
> final var durationMs = duration.toMillis();
> final var count = durationMs / sleepMs + (durationMs % sleepMs > 0 ?
> 1 : 0);
> var result = false;
> for (var i = 0; i [ count; ++i) {
> result = condition.get();
> if (result) {
> break;
> }
> Thread.sleep(sleepMs);
> }
> return result;
> }
> private static IgniteConfiguration getIgniteConfiguration(final String
> name) {
> return new IgniteConfiguration()
> .setIgniteInstanceName(name)
> .setConsistentId(name)
> .setMetricsLogFrequency(0)
> .setFailureDetectionTimeout(600_000)
> .setClientFailureDetectionTimeout(600_000)
> .setDiscoverySpi(
> new TcpDiscoverySpi()
> .setIpFinder(new
> TcpDiscoveryVmIpFinder().setAddresses(Collections.singleton("127.0.0.1:48500")))
> .setLocalPort(48500));
> }
> public static class IgniteEchoService implements Service {
> }
> public static class NodeConsistentIdFilter implements
> IgnitePredicate<ClusterNode] {
> private final String expectedId;
> public NodeConsistentIdFilter(final String expectedId) {
> this.expectedId = Objects.requireNonNull(expectedId);
> }
> @Override
> public boolean apply(final ClusterNode clusterNode) {
> final var actualId = clusterNode.consistentId();
> return expectedId.equals(actualId);
> }
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)