[ https://issues.apache.org/jira/browse/CASSANDRA-17891?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Brandon Williams updated CASSANDRA-17891: ----------------------------------------- Reviewers: Brandon Williams Status: Review In Progress (was: Needs Committer) > Fix hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window > ----------------------------------------------------------------------- > > Key: CASSANDRA-17891 > URL: https://issues.apache.org/jira/browse/CASSANDRA-17891 > Project: Cassandra > Issue Type: Bug > Components: CI > Reporter: Ekaterina Dimitrova > Assignee: Stefan Miklosovic > Priority: Normal > Fix For: 4.1.x, 4.x > > > hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window is flaky > > {code:java} > self = <hintedhandoff_test.TestHintedHandoff object at 0x7f2d903b8630> > @since('4.1') > def test_hintedhandoff_window(self): > """ > Test that we only store at a maximum the hint window worth of > hints. > Prior to CASSANDRA-14309 we would store another window worth of > hints > if the down node was brought up and then taken back down > immediately. > We would also store another window of hints on a live node if the > live > node was restarted. > @jira_ticket CASSANDRA-14309 > """ > > # hint_window_persistent_enabled is set to true by default > self.cluster.set_configuration_options({'max_hint_window_in_ms': > 10000, > 'hinted_handoff_enabled': > True, > 'max_hints_delivery_threads': > 1, > 'hints_flush_period_in_ms': > 100, }) > self.cluster.populate(2).start() > node1, node2 = self.cluster.nodelist() > session = self.patient_cql_connection(node1) > create_ks(session, 'ks', 2) > create_c1c2_table(self, session) > > # Stop handoff until very end and take node2 down for first round of > hints > node1.nodetool('pausehandoff') > > node2.nodetool('disablebinary') > node2.watch_log_for(["Stop listening for CQL clients"], timeout=120) > > node2.nodetool('disablegossip') > node2.watch_log_for(["Announcing shutdown", "state jump to > shutdown"], timeout=120) > node1.watch_log_for(["state jump to shutdown"], timeout=120) > > log_mark_node_1 = node1.mark_log() > log_mark_node_2 = node2.mark_log() > > # First round of hints. We expect these to be replayed and the only > # hints within the window > insert_c1c2(session, n=(0, 100), consistency=ConsistencyLevel.ONE) > > # Let hint window pass > time.sleep(15) > > # Re-enable and disable the node. Prior to CASSANDRA-14215 this > should make the hint window on node1 reset. > node2.nodetool('enablegossip') > node2.watch_log_for(["state jump to NORMAL"], timeout=120, > from_mark=log_mark_node_2) > node1.watch_log_for(["state jump to NORMAL"], timeout=120, > from_mark=log_mark_node_1) > > log_mark_node_1 = node1.mark_log() > log_mark_node_2 = node2.mark_log() > > node2.nodetool('disablegossip') > > node2.watch_log_for(["Announcing shutdown", "state jump to > shutdown"], timeout=120, from_mark=log_mark_node_2) > node1.watch_log_for(["state jump to shutdown"], timeout=120, > from_mark=log_mark_node_1) > > log_mark_node_1 = node1.mark_log() > log_mark_node_2 = node2.mark_log() > > def endpoint_downtime(node_to_query, node): > mbean = make_mbean('net', type='Gossiper') > with JolokiaAgent(node_to_query) as jmx: > return jmx.execute_method(mbean, > 'getEndpointDowntime(java.lang.String)', [node]) > > while endpoint_downtime(node1, "127.0.0.2") <= 5000: > time.sleep(1) > > # Second round of inserts. We do not expect hints to be stored. > insert_c1c2(session, n=(100, 200), consistency=ConsistencyLevel.ONE) > > # Restart node1. Prior to CASSANDRA-14215 this would reset node1's > hint window. > node1.stop() > node1.start(wait_for_binary_proto=True, wait_other_notice=False) > session = self.patient_exclusive_cql_connection(node1) > session.execute('USE ks') > # Third round of inserts. We do not expect hints to be stored. > insert_c1c2(session, n=(200, 300), consistency=ConsistencyLevel.ONE) > > # Enable node2 and wait for hints to be replayed > node2.nodetool('enablegossip') > node2.watch_log_for(["state jump to NORMAL"], timeout=120, > from_mark=log_mark_node_2) > > node2.nodetool('enablebinary') > node2.watch_log_for(["Starting listening for CQL clients"], > timeout=120, from_mark=log_mark_node_2) > node1.nodetool('resumehandoff') > node1.watch_log_for('Finished hinted handoff') > # Stop node1 so that we only query node2 > node1.stop() > > session = self.patient_exclusive_cql_connection(node2) > session.execute('USE ks') > # Ensure first dataset is present (through replayed hints) > for x in range(0, 100): > query_c1c2(session, x, ConsistencyLevel.ONE) > > # Ensure second and third datasets are not present > for x in range(100, 300): > > query_c1c2(session, x, ConsistencyLevel.ONE, > > tolerate_missing=True, must_be_missing=True) > hintedhandoff_test.py:303: > _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > _ > tools/data.py:47: in query_c1c2 > assertions.assert_length_equal(rows, 0) > _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ > _ > object_with_length = [Row(c1='value1', c2='value2')], expected_length = 0 > def assert_length_equal(object_with_length, expected_length): > """ > Assert an object has a specific length. > @param object_with_length The object whose length will be checked > @param expected_length The expected length of the object > > Examples: > assert_length_equal(res, nb_counter) > """ > assert len(object_with_length) == expected_length, \ > "Expected {} to have length {}, but instead is of length {}"\ > > .format(object_with_length, expected_length, > > len(object_with_length)) > E AssertionError: Expected [Row(c1='value1', c2='value2')] to have > length 0, but instead is of length 1 > tools/assertions.py:269: AssertionError > {code} > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org