This is an automated email from the ASF dual-hosted git repository. av pushed a commit to branch ignite-ducktape in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/ignite-ducktape by this push: new a44d5a3 IGNITE-13645 : Discovery ducktape test should detect failed nodes by asking the cluster. (#8409) a44d5a3 is described below commit a44d5a3695663475563b661b305421efcd6f3422 Author: Vladsz83 <vlads...@gmail.com> AuthorDate: Thu Nov 5 18:23:55 2020 +0300 IGNITE-13645 : Discovery ducktape test should detect failed nodes by asking the cluster. (#8409) --- .../services/utils/templates/log4j.xml.j2 | 4 +++ .../tests/ignitetest/tests/discovery_test.py | 31 +++++++++------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2 b/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2 index b2a9a7b..a0eeff0 100644 --- a/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2 +++ b/modules/ducktests/tests/ignitetest/services/utils/templates/log4j.xml.j2 @@ -48,6 +48,10 @@ <level value="WARN"/> </category> + <category name="org.apache.ignite.spi.discovery.tcp"> + <level value="INFO"/> + </category> + <root> <level value="INFO"/> <appender-ref ref="CONSOLE_ERR"/> diff --git a/modules/ducktests/tests/ignitetest/tests/discovery_test.py b/modules/ducktests/tests/ignitetest/tests/discovery_test.py index f2be35b..fbd392c 100644 --- a/modules/ducktests/tests/ignitetest/tests/discovery_test.py +++ b/modules/ducktests/tests/ignitetest/tests/discovery_test.py @@ -163,8 +163,7 @@ class DiscoveryTest(IgniteTest): results['Ignite cluster start time (s)'] = start_servers_sec - failed_nodes, survived_node = choose_node_to_kill(servers, test_config.nodes_to_kill, - test_config.sequential_failure) + failed_nodes = choose_node_to_kill(servers, test_config.nodes_to_kill, test_config.sequential_failure) if test_config.load_type is not ClusterLoad.NONE: load_config = ignite_config._replace(client_mode=True) if test_config.with_zk else \ @@ -181,12 +180,11 @@ class DiscoveryTest(IgniteTest): start_load_app(self.test_context, ignite_config=load_config, params=params, modules=modules) - results.update(self._simulate_nodes_failure(servers, node_fail_task(ignite_config, test_config), failed_nodes, - survived_node)) + results.update(self._simulate_nodes_failure(servers, node_fail_task(ignite_config, test_config), failed_nodes)) return results - def _simulate_nodes_failure(self, servers, kill_node_task, failed_nodes, survived_node): + def _simulate_nodes_failure(self, servers, kill_node_task, failed_nodes): """ Perform node failure scenario """ @@ -206,20 +204,19 @@ class DiscoveryTest(IgniteTest): logged_timestamps = [] data = {} - for failed_id in ids_to_wait: - logged_timestamps.append( - get_event_time(servers, survived_node, node_failed_event_pattern(failed_id))) + for survivor in [n for n in servers.nodes if n not in failed_nodes]: + for failed_id in ids_to_wait: + logged_timestamps.append(get_event_time(servers, survivor, node_failed_event_pattern(failed_id))) + + self._check_failed_number(failed_nodes, survivor) - self._check_failed_number(failed_nodes, survived_node) self._check_not_segmented(failed_nodes) logged_timestamps.sort(reverse=True) - first_kill_time = epoch_mills(first_terminated) - detection_delay = epoch_mills(logged_timestamps[0]) - first_kill_time - - data['Detection of node(s) failure (ms)'] = detection_delay - data['All detection delays (ms):'] = str([epoch_mills(ts) - first_kill_time for ts in logged_timestamps]) + data['Detection of node(s) failure (ms)'] = epoch_mills(logged_timestamps[0]) - epoch_mills(first_terminated) + data['All detection delays (ms):'] = str( + [epoch_mills(ts) - epoch_mills(first_terminated) for ts in logged_timestamps]) data['Nodes failed'] = len(failed_nodes) return data @@ -348,13 +345,9 @@ def choose_node_to_kill(servers, nodes_to_kill, sequential): idx = random.randint(0, len(to_kill) - nodes_to_kill) to_kill = to_kill[idx:idx + nodes_to_kill] - survive = random.choice([node for node in servers.nodes if node not in to_kill]) - assert len(to_kill) == nodes_to_kill, "Unable to pick up required number of nodes to kill." - assert survive, "Unable to select survived node to monitor the cluster on it." - - return to_kill, survive + return to_kill def order(node):