[ 
https://issues.apache.org/jira/browse/IGNITE-28592?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nikolay Izhikov reassigned IGNITE-28592:
----------------------------------------

    Assignee: Nikolay Izhikov

> Broken Ignite Service Node Filter in Ignite 2.17
> ------------------------------------------------
>
>                 Key: IGNITE-28592
>                 URL: https://issues.apache.org/jira/browse/IGNITE-28592
>             Project: Ignite
>          Issue Type: Improvement
>    Affects Versions: 2.17
>            Reporter: Alexey Kukushkin
>            Assignee: Nikolay Izhikov
>            Priority: Major
>
> Ignite 2.17 introduced several severe issues with service node filters that 
> did not exist in Ignite 2.16:
>  # {*}Services deployed on non-matching nodes{*}: A service with a node 
> filter that does not match a node is actually deployed on that node.
>  # {*}Services not deployed on matching nodes{*}: A service with a node 
> filter that matches a node is actually not deployed on that node.
>  # {*}Cluster instability on node departure{*}: When a node without a service 
> leaves the cluster, other nodes hosting that service crash due to a 
> {{{}NullPointerException{}}}.
> h2. Analysis
> All these issues were likely caused by IGNITE-23226.
> h2. Reproducer
> The test {{ServiceNodeFilterTest#doesNotDeployServiceNotMatchingFilter}} 
> demonstrates issue #1.
> The test {{ServiceNodeFilterTest#servicelessNodeDoesNotFailServiceNode}} is 
> flaky and can demonstrate:
>  - {*}Issue #2{*}: When it fails to wait for the expected service deployment.
>  - {*}Issue #3{*}: When it fails with an {{AssertionError}} on [line 481 of 
> ServiceDeploymentTask.java|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L481]
>  (if assertions are enabled) or a {{NullPointerException}} on [line 
> 483|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L483]
>  otherwise.
> {code:java}
> package sandbox.ignite;
> import org.apache.ignite.Ignite;
> import org.apache.ignite.Ignition;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.lang.IgnitePredicate;
> import org.apache.ignite.services.Service;
> import org.apache.ignite.services.ServiceConfiguration;
> import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
> import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
> import org.junit.jupiter.api.Test;
> import java.time.Duration;
> import java.util.Collections;
> import java.util.Objects;
> import java.util.function.Supplier;
> import static org.junit.jupiter.api.Assertions.assertEquals;
> import static org.junit.jupiter.api.Assertions.assertFalse;
> import static org.junit.jupiter.api.Assertions.assertTrue;
> /**
> * Reproduces Ignite Service node filter issues in Ignite 2.17 that did not 
> exist in Ignite 2.16.
>  */
> public class ServiceNodeFilterTest {
>     private static final String SERVICE_NAME = 
> IgniteEchoService.class.getName();
>     /**
> *** Ignite should not deploy a service on a node that does not match the 
> service's node filter.
>      */
>     @Test
>     public void doesNotDeployServiceNotMatchingFilter() throws 
> InterruptedException {
>         // GIVEN configuration for an Ignite cluster with two server nodes 
> and a service whose node filter
>         // does not match any of the servers
>         final var serviceConfig = new ServiceConfiguration()
>             .setName(SERVICE_NAME)
>             .setService(new IgniteEchoService())
>             .setTotalCount(0)
>             .setMaxPerNodeCount(1)
>             .setNodeFilter(new NodeConsistentIdFilter("noSuchId"));
>         final var ignite1Config = 
> getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
>         final var ignite2Config = 
> getIgniteConfiguration("ignite2").setServiceConfiguration(serviceConfig);
>         // WHEN the cluster is formed
>         try (final var ignite1 = Ignition.start(ignite1Config);
>              final var ignored = Ignition.start(ignite2Config)) {
>             // THEN the service is not deployed on any node
>             final var isServiceDeployed = waitFor(() -> 
> IsServiceDeployedLocally(ignite1), Duration.ofSeconds(10));
>             assertFalse(isServiceDeployed);
>         }
>     }
>     /**
> *** When a node without a service leaves the cluster, it should not cause 
> another node to crash.
>      */
>     @Test
>     public void servicelessNodeDoesNotFailServiceNode() throws 
> InterruptedException {
>         // GIVEN configuration for an Ignite cluster with three server nodes 
> and a service whose node filter
>         // matches only one node
>         final var SERVICE_NODE_ID = "ignite2";
>         final var serviceConfig = new ServiceConfiguration()
>             .setName(SERVICE_NAME)
>             .setService(new IgniteEchoService())
>             .setTotalCount(1)
>             .setMaxPerNodeCount(1)
>             .setNodeFilter(new NodeConsistentIdFilter(SERVICE_NODE_ID));
>         final var ignite1Config = 
> getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
>         final var serviceNodeConfig = 
> getIgniteConfiguration(SERVICE_NODE_ID).setServiceConfiguration(serviceConfig);
>         final var ignite3Config = 
> getIgniteConfiguration("ignite3").setServiceConfiguration(serviceConfig);
>         // AND the cluster is formed
>         // AND the service is deployed on the second node to join the cluster
>         try (final var ignored1 = Ignition.start(ignite1Config);
>              final var ignite2 = Ignition.start(serviceNodeConfig)) {
>             var isServiceDeployed = waitFor(() -> 
> IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
>             assertTrue(isServiceDeployed);
>             try (final var ignored3 = Ignition.start(ignite3Config)) {
>                 Thread.sleep(10);
>                 // WHEN the last node to join the cluster leaves
>             }
>             // THEN the service remains deployed on the second node
>             isServiceDeployed = waitFor(() -> 
> IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
>             assertTrue(isServiceDeployed);
>         }
>     }
>     private static Boolean IsServiceDeployedLocally(final Ignite ignite) {
>         final var services = ignite.services();
>         final var serviceDescriptors = services.serviceDescriptors();
>         if (serviceDescriptors.size() == 1) {
>             final var descriptor = serviceDescriptors.iterator().next();
>             assertEquals(SERVICE_NAME, descriptor.name());
>             final var localNodeId = ignite.cluster().localNode().id();
>             return descriptor.topologySnapshot().getOrDefault(localNodeId, 
> -1) > 0;
>         }
>         return false;
>     }
>     private static Boolean waitFor(final Supplier[Boolean] condition, final 
> Duration duration)
>         throws InterruptedException {
>         final var sleepMs = 100;
>         final var durationMs = duration.toMillis();
>         final var count = durationMs / sleepMs + (durationMs % sleepMs > 0 ? 
> 1 : 0);
>         var result = false;
>         for (var i = 0; i [ count; ++i) {
>             result = condition.get();
>             if (result) {
>                 break;
>             }
>             Thread.sleep(sleepMs);
>         }
>         return result;
>     }
>     private static IgniteConfiguration getIgniteConfiguration(final String 
> name) {
>         return new IgniteConfiguration()
>             .setIgniteInstanceName(name)
>             .setConsistentId(name)
>             .setMetricsLogFrequency(0)
>             .setFailureDetectionTimeout(600_000)
>             .setClientFailureDetectionTimeout(600_000)
>             .setDiscoverySpi(
>                 new TcpDiscoverySpi()
>                     .setIpFinder(new 
> TcpDiscoveryVmIpFinder().setAddresses(Collections.singleton("127.0.0.1:48500")))
>                     .setLocalPort(48500));
>     }
>     public static class IgniteEchoService implements Service {
>     }
>     public static class NodeConsistentIdFilter implements 
> IgnitePredicate<ClusterNode] {
>         private final String expectedId;
>         public NodeConsistentIdFilter(final String expectedId) {
>             this.expectedId = Objects.requireNonNull(expectedId);
>         }
>         @Override
>         public boolean apply(final ClusterNode clusterNode) {
>             final var actualId = clusterNode.consistentId();
>             return expectedId.equals(actualId);
>         }
>     }
> }
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to