This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 8b2bd18 [client-test] fix TestFailedDnsResolution 8b2bd18 is described below commit 8b2bd18c6624301ad45ce361c90bfd29691afea7 Author: Alexey Serbin <ale...@apache.org> AuthorDate: Sun Feb 28 23:18:45 2021 -0800 [client-test] fix TestFailedDnsResolution I noticed that ClientTest.TestFailedDnsResolution fails unexpectedly with the following error when running on macOS: src/kudu/client/client-test.cc:3205: Failure Value of: s.IsIOError() Actual: false Expected: true unexpected status: OK It turned out that the scenario didn't expect that (a) the results of DNS resolution are cached (b) a tablet server's address can be the same as master's (a) turned true with changelist 48467ccf4, and (b) is true in case of running test mini-cluster with other than UNIQUE_LOOPBACK bind mode: e.g., on macOS it's run in LOOPBACK mode. I updated the scenario to use a non-caching DNS resolver. I also increased the timeout for write operations because the scenario was failing from time to time in case TSAN builds. In addition, since timeouts in GetTableLocations RPC are reported two-fold due to the client's metacache activity, the test was failing rarely due to receiving other non-expected error message. I updated the list of expected error messages to add the missing case. With this patch, the scenario succeeds on macOS and runs more stable for Linux TSAN builds. Change-Id: I0493d992c43adb14ef02efae0a15dddc53301d7d Reviewed-on: http://gerrit.cloudera.org:8080/17142 Tested-by: Kudu Jenkins Reviewed-by: Bankim Bhavsar <ban...@cloudera.com> Reviewed-by: Andrew Wong <aw...@cloudera.com> --- src/kudu/client/client-test.cc | 75 +++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc index b844ec9..b9cde36 100644 --- a/src/kudu/client/client-test.cc +++ b/src/kudu/client/client-test.cc @@ -174,6 +174,7 @@ DECLARE_int64(on_disk_size_for_testing); DECLARE_string(location_mapping_cmd); DECLARE_string(superuser_acl); DECLARE_string(user_acl); +DECLARE_uint32(dns_resolver_cache_capacity_mb); DECLARE_uint32(txn_keepalive_interval_ms); DECLARE_uint32(txn_staleness_tracker_interval_ms); DECLARE_uint32(txn_manager_status_table_num_replicas); @@ -3188,42 +3189,56 @@ TEST_F(ClientTest, TestWriteWhileRestarting) { } TEST_F(ClientTest, TestFailedDnsResolution) { - shared_ptr<KuduSession> session = client_->NewSession(); + // Create a dedicated instance of a client which doesn't cache DNS resolution + // results. This is to avoid using the DNS resolver cache if the hostname/IP + // address of tablet server is the same as master's address. The latter is + // the case when using other than UNIQUE_LOOPBACK binding mode for the + // server components of the test mini-cluster. + FLAGS_dns_resolver_cache_capacity_mb = 0; + shared_ptr<KuduClient> c; + ASSERT_OK(KuduClientBuilder() + .add_master_server_addr(cluster_->mini_master()->bound_rpc_addr().ToString()) + .Build(&c)); + shared_ptr<KuduTable> t; + ASSERT_OK(c->OpenTable(kTableName, &t)); + + shared_ptr<KuduSession> session = c->NewSession(); ASSERT_OK(session->SetFlushMode(KuduSession::MANUAL_FLUSH)); - const string kMasterError = "timed out after deadline expired: GetTableLocations RPC"; - - // First time disable dns resolution. - // Set the timeout to be short since we know it can't succeed, but not to the point where we - // can timeout before getting the dns error. - { - for (int i = 0;;i++) { - google::FlagSaver saver; - FLAGS_fail_dns_resolution = true; - session->SetTimeoutMillis(500); - ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 1, 1, "row")); - Status s = session->Flush(); - ASSERT_TRUE(s.IsIOError()) << "unexpected status: " << s.ToString(); - unique_ptr<KuduError> error = GetSingleErrorFromSession(session.get()); - ASSERT_TRUE(error->status().IsTimedOut()) << error->status().ToString(); - - // Due to KUDU-1466 there is a narrow window in which the error reported might be that the - // GetTableLocations RPC to the master timed out instead of the expected dns resolution error. - // In that case just loop again. - - if (error->status().ToString().find(kMasterError) != std::string::npos) { - ASSERT_LE(i, 10) << "Didn't get a dns resolution error after 10 tries."; - continue; - } - ASSERT_STR_CONTAINS(error->status().ToString(), - "Network error: Failed to resolve address for TS"); - break; + // First, make DNS resolution time out. + // Set the timeout to be short since we know it can't succeed, but not to the + // point where we can timeout before getting the DNS error. + FLAGS_fail_dns_resolution = true; + session->SetTimeoutMillis(1000); + + // Due to KUDU-1466, there is a narrow window in which the error reported + // might be that the GetTableLocations RPC to the master timed out instead of + // the expected DNS resolution error while trying to send Write RPC to + // tablet server. + for (auto i = 0;; ++i) { + constexpr const char* const kMasterErrors[] = { + "timed out after deadline expired: GetTableLocations RPC", + "LookupRpc timed out after deadline expired", + }; + ASSERT_OK(ApplyInsertToSession(session.get(), t, 1, 1, "row")); + auto s = session->Flush(); + ASSERT_TRUE(s.IsIOError()) << s.ToString(); + unique_ptr<KuduError> error = GetSingleErrorFromSession(session.get()); + ASSERT_TRUE(error->status().IsTimedOut()) << error->status().ToString(); + const auto row_status_str = error->status().ToString(); + if (row_status_str.find(kMasterErrors[0]) != std::string::npos || + row_status_str.find(kMasterErrors[1]) != std::string::npos) { + ASSERT_LE(i, 10) << "could not get DNS resolution error after 10 tries"; + continue; } + ASSERT_STR_CONTAINS(row_status_str, + "Network error: Failed to resolve address for TS"); + break; } - // Now re-enable dns resolution, the write should succeed. + // Now, re-enable the DNS resolution: the write should succeed. FLAGS_fail_dns_resolution = false; - ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 1, 1, "row")); + ASSERT_OK(ApplyInsertToSession(session.get(), t, 1, 1, "row")); ASSERT_OK(session->Flush()); }