Alexey Kukushkin created IGNITE-28592:
-----------------------------------------

             Summary: Broken Ignite Service Node Filter in Ignite 2.17
                 Key: IGNITE-28592
                 URL: https://issues.apache.org/jira/browse/IGNITE-28592
             Project: Ignite
          Issue Type: Improvement
    Affects Versions: 2.17
            Reporter: Alexey Kukushkin


Ignite 2.17 introduced several severe issues with service node filters that did 
not exist in Ignite 2.16:
 # {*}Services deployed on non-matching nodes{*}: A service with a node filter 
that does not match a node is actually deployed on that node.
 # {*}Services not deployed on matching nodes{*}: A service with a node filter 
that matches a node is actually not deployed on that node.
 # {*}Cluster instability on node departure{*}: When a node without a service 
leaves the cluster, other nodes hosting that service crash due to a 
{{{}NullPointerException{}}}.

h2. Analysis

All these issues were likely caused by IGNITE-23226.
h2. Reproducer

The test {{ServiceNodeFilterTest#doesNotDeployServiceNotMatchingFilter}} 
demonstrates issue #1.

The test {{ServiceNodeFilterTest#servicelessNodeDoesNotFailServiceNode}} is 
flaky and can demonstrate:
 - {*}Issue #2{*}: When it fails to wait for the expected service deployment.
 - {*}Issue #3{*}: When it fails with an {{AssertionError}} on [line 481 of 
ServiceDeploymentTask.java|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L481]
 (if assertions are enabled) or a {{NullPointerException}} on [line 
483|https://github.com/apache/ignite/blob/2.17.0/modules/core/src/main/java/org/apache/ignite/internal/processors/service/ServiceDeploymentTask.java#L483]
 otherwise.

{code:java}
package sandbox.ignite;

import org.apache.ignite.Ignite;
import org.apache.ignite.Ignition;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.lang.IgnitePredicate;
import org.apache.ignite.services.Service;
import org.apache.ignite.services.ServiceConfiguration;
import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
import org.junit.jupiter.api.Test;

import java.time.Duration;
import java.util.Collections;
import java.util.Objects;
import java.util.function.Supplier;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
* Reproduces Ignite Service node filter issues in Ignite 2.17 that did not 
exist in Ignite 2.16.
 */
public class ServiceNodeFilterTest {
    private static final String SERVICE_NAME = 
IgniteEchoService.class.getName();

    /**
*** Ignite should not deploy a service on a node that does not match the 
service's node filter.
     */
    @Test
    public void doesNotDeployServiceNotMatchingFilter() throws 
InterruptedException {
        // GIVEN configuration for an Ignite cluster with two server nodes and 
a service whose node filter
        // does not match any of the servers
        final var serviceConfig = new ServiceConfiguration()
            .setName(SERVICE_NAME)
            .setService(new IgniteEchoService())
            .setTotalCount(0)
            .setMaxPerNodeCount(1)
            .setNodeFilter(new NodeConsistentIdFilter("noSuchId"));
        final var ignite1Config = 
getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
        final var ignite2Config = 
getIgniteConfiguration("ignite2").setServiceConfiguration(serviceConfig);

        // WHEN the cluster is formed
        try (final var ignite1 = Ignition.start(ignite1Config);
             final var ignored = Ignition.start(ignite2Config)) {
            // THEN the service is not deployed on any node
            final var isServiceDeployed = waitFor(() -> 
IsServiceDeployedLocally(ignite1), Duration.ofSeconds(10));
            assertFalse(isServiceDeployed);
        }
    }

    /**
*** When a node without a service leaves the cluster, it should not cause 
another node to crash.
     */
    @Test
    public void servicelessNodeDoesNotFailServiceNode() throws 
InterruptedException {
        // GIVEN configuration for an Ignite cluster with three server nodes 
and a service whose node filter
        // matches only one node
        final var SERVICE_NODE_ID = "ignite2";
        final var serviceConfig = new ServiceConfiguration()
            .setName(SERVICE_NAME)
            .setService(new IgniteEchoService())
            .setTotalCount(1)
            .setMaxPerNodeCount(1)
            .setNodeFilter(new NodeConsistentIdFilter(SERVICE_NODE_ID));
        final var ignite1Config = 
getIgniteConfiguration("ignite1").setServiceConfiguration(serviceConfig);
        final var serviceNodeConfig = 
getIgniteConfiguration(SERVICE_NODE_ID).setServiceConfiguration(serviceConfig);
        final var ignite3Config = 
getIgniteConfiguration("ignite3").setServiceConfiguration(serviceConfig);

        // AND the cluster is formed
        // AND the service is deployed on the second node to join the cluster
        try (final var ignored1 = Ignition.start(ignite1Config);
             final var ignite2 = Ignition.start(serviceNodeConfig)) {
            var isServiceDeployed = waitFor(() -> 
IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
            assertTrue(isServiceDeployed);
            try (final var ignored3 = Ignition.start(ignite3Config)) {
                Thread.sleep(10);
                // WHEN the last node to join the cluster leaves
            }

            // THEN the service remains deployed on the second node
            isServiceDeployed = waitFor(() -> 
IsServiceDeployedLocally(ignite2), Duration.ofSeconds(10));
            assertTrue(isServiceDeployed);
        }
    }

    private static Boolean IsServiceDeployedLocally(final Ignite ignite) {
        final var services = ignite.services();
        final var serviceDescriptors = services.serviceDescriptors();
        if (serviceDescriptors.size() == 1) {
            final var descriptor = serviceDescriptors.iterator().next();
            assertEquals(SERVICE_NAME, descriptor.name());
            final var localNodeId = ignite.cluster().localNode().id();
            return descriptor.topologySnapshot().getOrDefault(localNodeId, -1) 
> 0;
        }
        return false;
    }

    private static Boolean waitFor(final Supplier[Boolean] condition, final 
Duration duration)
        throws InterruptedException {
        final var sleepMs = 100;
        final var durationMs = duration.toMillis();
        final var count = durationMs / sleepMs + (durationMs % sleepMs > 0 ? 1 
: 0);
        var result = false;
        for (var i = 0; i [ count; ++i) {
            result = condition.get();
            if (result) {
                break;
            }
            Thread.sleep(sleepMs);
        }
        return result;
    }

    private static IgniteConfiguration getIgniteConfiguration(final String 
name) {
        return new IgniteConfiguration()
            .setIgniteInstanceName(name)
            .setConsistentId(name)
            .setMetricsLogFrequency(0)
            .setFailureDetectionTimeout(600_000)
            .setClientFailureDetectionTimeout(600_000)
            .setDiscoverySpi(
                new TcpDiscoverySpi()
                    .setIpFinder(new 
TcpDiscoveryVmIpFinder().setAddresses(Collections.singleton("127.0.0.1:48500")))
                    .setLocalPort(48500));
    }

    public static class IgniteEchoService implements Service {
    }

    public static class NodeConsistentIdFilter implements 
IgnitePredicate<ClusterNode] {
        private final String expectedId;

        public NodeConsistentIdFilter(final String expectedId) {
            this.expectedId = Objects.requireNonNull(expectedId);
        }

        @Override
        public boolean apply(final ClusterNode clusterNode) {
            final var actualId = clusterNode.consistentId();
            return expectedId.equals(actualId);
        }
    }
}
{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to