This is an automated email from the ASF dual-hosted git repository.

av pushed a commit to branch ignite-ducktape
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/ignite-ducktape by this push:
     new a44d5a3  IGNITE-13645 : Discovery ducktape test should detect failed 
nodes by asking the cluster. (#8409)
a44d5a3 is described below

commit a44d5a3695663475563b661b305421efcd6f3422
Author: Vladsz83 <vlads...@gmail.com>
AuthorDate: Thu Nov 5 18:23:55 2020 +0300

    IGNITE-13645 : Discovery ducktape test should detect failed nodes by asking 
the cluster. (#8409)
---
 .../services/utils/templates/log4j.xml.j2          |  4 +++
 .../tests/ignitetest/tests/discovery_test.py       | 31 +++++++++-------------
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git 
a/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2 
b/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2
index b2a9a7b..a0eeff0 100644
--- a/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2
+++ b/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2
@@ -48,6 +48,10 @@
         <level value="WARN"/>
     </category>
 
+    <category name="org.apache.ignite.spi.discovery.tcp">
+        <level value="INFO"/>
+    </category>
+
     <root>
         <level value="INFO"/>
         <appender-ref ref="CONSOLE_ERR"/>
diff --git a/modules/ducktests/tests/ignitetest/tests/discovery_test.py 
b/modules/ducktests/tests/ignitetest/tests/discovery_test.py
index f2be35b..fbd392c 100644
--- a/modules/ducktests/tests/ignitetest/tests/discovery_test.py
+++ b/modules/ducktests/tests/ignitetest/tests/discovery_test.py
@@ -163,8 +163,7 @@ class DiscoveryTest(IgniteTest):
 
         results['Ignite cluster start time (s)'] = start_servers_sec
 
-        failed_nodes, survived_node = choose_node_to_kill(servers, 
test_config.nodes_to_kill,
-                                                          
test_config.sequential_failure)
+        failed_nodes = choose_node_to_kill(servers, test_config.nodes_to_kill, 
test_config.sequential_failure)
 
         if test_config.load_type is not ClusterLoad.NONE:
             load_config = ignite_config._replace(client_mode=True) if 
test_config.with_zk else \
@@ -181,12 +180,11 @@ class DiscoveryTest(IgniteTest):
 
             start_load_app(self.test_context, ignite_config=load_config, 
params=params, modules=modules)
 
-        results.update(self._simulate_nodes_failure(servers, 
node_fail_task(ignite_config, test_config), failed_nodes,
-                                                    survived_node))
+        results.update(self._simulate_nodes_failure(servers, 
node_fail_task(ignite_config, test_config), failed_nodes))
 
         return results
 
-    def _simulate_nodes_failure(self, servers, kill_node_task, failed_nodes, 
survived_node):
+    def _simulate_nodes_failure(self, servers, kill_node_task, failed_nodes):
         """
         Perform node failure scenario
         """
@@ -206,20 +204,19 @@ class DiscoveryTest(IgniteTest):
         logged_timestamps = []
         data = {}
 
-        for failed_id in ids_to_wait:
-            logged_timestamps.append(
-                get_event_time(servers, survived_node, 
node_failed_event_pattern(failed_id)))
+        for survivor in [n for n in servers.nodes if n not in failed_nodes]:
+            for failed_id in ids_to_wait:
+                logged_timestamps.append(get_event_time(servers, survivor, 
node_failed_event_pattern(failed_id)))
+
+                self._check_failed_number(failed_nodes, survivor)
 
-        self._check_failed_number(failed_nodes, survived_node)
         self._check_not_segmented(failed_nodes)
 
         logged_timestamps.sort(reverse=True)
 
-        first_kill_time = epoch_mills(first_terminated)
-        detection_delay = epoch_mills(logged_timestamps[0]) - first_kill_time
-
-        data['Detection of node(s) failure (ms)'] = detection_delay
-        data['All detection delays (ms):'] = str([epoch_mills(ts) - 
first_kill_time for ts in logged_timestamps])
+        data['Detection of node(s) failure (ms)'] = 
epoch_mills(logged_timestamps[0]) - epoch_mills(first_terminated)
+        data['All detection delays (ms):'] = str(
+            [epoch_mills(ts) - epoch_mills(first_terminated) for ts in 
logged_timestamps])
         data['Nodes failed'] = len(failed_nodes)
 
         return data
@@ -348,13 +345,9 @@ def choose_node_to_kill(servers, nodes_to_kill, 
sequential):
     idx = random.randint(0, len(to_kill) - nodes_to_kill)
     to_kill = to_kill[idx:idx + nodes_to_kill]
 
-    survive = random.choice([node for node in servers.nodes if node not in 
to_kill])
-
     assert len(to_kill) == nodes_to_kill, "Unable to pick up required number 
of nodes to kill."
 
-    assert survive, "Unable to select survived node to monitor the cluster on 
it."
-
-    return to_kill, survive
+    return to_kill
 
 
 def order(node):

Reply via email to