sashapolo commented on code in PR #7569:
URL: https://github.com/apache/ignite-3/pull/7569#discussion_r2791892500


##########
modules/network/src/integrationTest/java/org/apache/ignite/internal/network/ItStaticNodeFinderTest.java:
##########
@@ -17,47 +17,112 @@
 
 package org.apache.ignite.internal.network;
 
+import static 
org.apache.ignite.internal.ClusterConfiguration.DEFAULT_BASE_PORT;
 import static 
org.apache.ignite.internal.testframework.IgniteTestUtils.assertThrowsWithCause;
+import static 
org.apache.ignite.internal.testframework.IgniteTestUtils.runAsync;
+import static 
org.apache.ignite.internal.testframework.matchers.CompletableFutureExceptionMatcher.willThrowWithCauseOrSuppressed;
 import static org.apache.ignite.internal.util.ExceptionUtils.unwrapRootCause;
 import static 
org.apache.ignite.lang.ErrorGroups.Network.ADDRESS_UNRESOLVED_ERR;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-import org.apache.ignite.internal.ClusterPerClassIntegrationTest;
+import com.typesafe.config.parser.ConfigDocumentFactory;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.ignite.internal.ClusterPerTestIntegrationTest;
+import org.apache.ignite.internal.NodeBootstrapConfigUpdater;
+import org.apache.ignite.internal.failure.FailureManager;
 import org.apache.ignite.internal.lang.IgniteInternalException;
+import org.apache.ignite.internal.testframework.log4j2.LogInspector;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.TestInfo;
 
-/**
- * Tests that node finder failure causes node shutdown.
- */
-class ItStaticNodeFinderTest extends ClusterPerClassIntegrationTest {
+class ItStaticNodeFinderTest extends ClusterPerTestIntegrationTest {
     @Override
     protected int initialNodes() {
-        return 1;
-    }
-
-    @Override
-    protected String getNodeBootstrapConfigTemplate() {
-        return "ignite {\n"
-                + "  network: {\n"
-                + "    nodeFinder.netClusterNodes: [ \"bad.host:1234\" ]\n"
-                + "  },\n"
-                + "}";
-    }
-
-    @Override
-    protected boolean needInitializeCluster() {
-        return false;
+        return 0;
     }
 
+    /** Tests that node finder failure causes node shutdown. */
     @Test
     void testNodeShutdownOnNodeFinderFailure(TestInfo testInfo) {
         Throwable throwable = assertThrowsWithCause(
-                () -> CLUSTER.startAndInit(testInfo, initialNodes(), 
cmgMetastoreNodes(), this::configureInitParameters),
-                IgniteInternalException.class);
+                () -> startEmbeddedNode(
+                        testInfo,
+                        0,
+                        config -> ConfigDocumentFactory.parseString(config)
+                                
.withValueText("ignite.network.nodeFinder.netClusterNodes", "[ 
\"bad.host:1234\" ]")
+                                
.withValueText("ignite.network.nodeFinder.nameResolutionAttempts", "1")
+                                .render()
+                ),
+                IgniteInternalException.class
+        );
 
         IgniteInternalException actual = (IgniteInternalException) 
unwrapRootCause(throwable);
         assertEquals(ADDRESS_UNRESOLVED_ERR, actual.code());
         assertEquals("No network addresses resolved through any provided 
names", actual.getMessage());
     }
+
+    /**
+     * Verifies a situation when two nodes are started simultaneously, but one 
of them is stuck trying to resolve host names. We then
+     * check that no network threads are blocked while name resolution is in 
progress.
+     */
+    @Test
+    void testNameResolutionDoesNotBlockNetworkThreads(TestInfo testInfo) {
+        LogInspector watchdogLogInspector = 
LogInspector.create(FailureManager.class, true);
+
+        var blockedThreadsCounter = new AtomicInteger();
+
+        watchdogLogInspector.addHandler(
+                event -> {
+                    Throwable thrown = event.getThrown();
+
+                    return thrown != null && thrown.getMessage().contains("A 
critical thread is blocked");
+                },
+                blockedThreadsCounter::incrementAndGet
+        );
+
+        try {
+            // First, start the node that will get stuck trying to resolve 
host names.
+            CompletableFuture<Void> startBrokenNodeFuture = runAsync(() -> 
startEmbeddedNode(
+                    testInfo,
+                    0,
+                    config -> ConfigDocumentFactory.parseString(config)
+                            .withValueText("ignite.network.port", 
String.valueOf(DEFAULT_BASE_PORT))
+                            
.withValueText("ignite.network.nodeFinder.netClusterNodes", "[ 
\"bad.host:1234\" ]")
+                            
.withValueText("ignite.network.nodeFinder.nameResolutionAttempts", "3")
+                            .render()
+            ));
+
+            // Start a second node that will try to open a connection to the 
first node. It should start successfully.

Review Comment:
   Because the network thread gets blocked when it tries to open an incoming 
connection. Do you think it's possible to reproduce without a second node?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to