This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b2bd18  [client-test] fix TestFailedDnsResolution
8b2bd18 is described below

commit 8b2bd18c6624301ad45ce361c90bfd29691afea7
Author: Alexey Serbin <ale...@apache.org>
AuthorDate: Sun Feb 28 23:18:45 2021 -0800

    [client-test] fix TestFailedDnsResolution
    
    I noticed that ClientTest.TestFailedDnsResolution fails unexpectedly
    with the following error when running on macOS:
    
      src/kudu/client/client-test.cc:3205: Failure
      Value of: s.IsIOError()
        Actual: false
      Expected: true
      unexpected status: OK
    
    It turned out that the scenario didn't expect that
      (a) the results of DNS resolution are cached
      (b) a tablet server's address can be the same as master's
    
    (a) turned true with changelist 48467ccf4, and (b) is true in case of
    running test mini-cluster with other than UNIQUE_LOOPBACK bind mode:
    e.g., on macOS it's run in LOOPBACK mode.
    
    I updated the scenario to use a non-caching DNS resolver.  I also
    increased the timeout for write operations because the scenario
    was failing from time to time in case TSAN builds.  In addition, since
    timeouts in GetTableLocations RPC are reported two-fold due to the
    client's metacache activity, the test was failing rarely due to
    receiving other non-expected error message.  I updated the list of
    expected error messages to add the missing case.
    
    With this patch, the scenario succeeds on macOS and runs more stable
    for Linux TSAN builds.
    
    Change-Id: I0493d992c43adb14ef02efae0a15dddc53301d7d
    Reviewed-on: http://gerrit.cloudera.org:8080/17142
    Tested-by: Kudu Jenkins
    Reviewed-by: Bankim Bhavsar <ban...@cloudera.com>
    Reviewed-by: Andrew Wong <aw...@cloudera.com>
---
 src/kudu/client/client-test.cc | 75 +++++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index b844ec9..b9cde36 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -174,6 +174,7 @@ DECLARE_int64(on_disk_size_for_testing);
 DECLARE_string(location_mapping_cmd);
 DECLARE_string(superuser_acl);
 DECLARE_string(user_acl);
+DECLARE_uint32(dns_resolver_cache_capacity_mb);
 DECLARE_uint32(txn_keepalive_interval_ms);
 DECLARE_uint32(txn_staleness_tracker_interval_ms);
 DECLARE_uint32(txn_manager_status_table_num_replicas);
@@ -3188,42 +3189,56 @@ TEST_F(ClientTest, TestWriteWhileRestarting) {
 }
 
 TEST_F(ClientTest, TestFailedDnsResolution) {
-  shared_ptr<KuduSession> session = client_->NewSession();
+  // Create a dedicated instance of a client which doesn't cache DNS resolution
+  // results. This is to avoid using the DNS resolver cache if the hostname/IP
+  // address of tablet server is the same as master's address. The latter is
+  // the case when using other than UNIQUE_LOOPBACK binding mode for the
+  // server components of the test mini-cluster.
+  FLAGS_dns_resolver_cache_capacity_mb = 0;
+  shared_ptr<KuduClient> c;
+  ASSERT_OK(KuduClientBuilder()
+      
.add_master_server_addr(cluster_->mini_master()->bound_rpc_addr().ToString())
+      .Build(&c));
+  shared_ptr<KuduTable> t;
+  ASSERT_OK(c->OpenTable(kTableName, &t));
+
+  shared_ptr<KuduSession> session = c->NewSession();
   ASSERT_OK(session->SetFlushMode(KuduSession::MANUAL_FLUSH));
-  const string kMasterError = "timed out after deadline expired: 
GetTableLocations RPC";
-
-  // First time disable dns resolution.
-  // Set the timeout to be short since we know it can't succeed, but not to 
the point where we
-  // can timeout before getting the dns error.
-  {
-    for (int i = 0;;i++) {
-      google::FlagSaver saver;
-      FLAGS_fail_dns_resolution = true;
-      session->SetTimeoutMillis(500);
-      ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 1, 1, 
"row"));
-      Status s = session->Flush();
-      ASSERT_TRUE(s.IsIOError()) << "unexpected status: " << s.ToString();
-      unique_ptr<KuduError> error = GetSingleErrorFromSession(session.get());
-      ASSERT_TRUE(error->status().IsTimedOut()) << error->status().ToString();
-
-      // Due to KUDU-1466 there is a narrow window in which the error reported 
might be that the
-      // GetTableLocations RPC to the master timed out instead of the expected 
dns resolution error.
-      // In that case just loop again.
-
-      if (error->status().ToString().find(kMasterError) != std::string::npos) {
-        ASSERT_LE(i, 10) << "Didn't get a dns resolution error after 10 
tries.";
-        continue;
-      }
 
-      ASSERT_STR_CONTAINS(error->status().ToString(),
-                          "Network error: Failed to resolve address for TS");
-      break;
+  // First, make DNS resolution time out.
+  // Set the timeout to be short since we know it can't succeed, but not to the
+  // point where we can timeout before getting the DNS error.
+  FLAGS_fail_dns_resolution = true;
+  session->SetTimeoutMillis(1000);
+
+  // Due to KUDU-1466, there is a narrow window in which the error reported
+  // might be that the GetTableLocations RPC to the master timed out instead of
+  // the expected DNS resolution error while trying to send Write RPC to
+  // tablet server.
+  for (auto i = 0;; ++i) {
+    constexpr const char* const kMasterErrors[] = {
+      "timed out after deadline expired: GetTableLocations RPC",
+      "LookupRpc timed out after deadline expired",
+    };
+    ASSERT_OK(ApplyInsertToSession(session.get(), t, 1, 1, "row"));
+    auto s = session->Flush();
+    ASSERT_TRUE(s.IsIOError()) << s.ToString();
+    unique_ptr<KuduError> error = GetSingleErrorFromSession(session.get());
+    ASSERT_TRUE(error->status().IsTimedOut()) << error->status().ToString();
+    const auto row_status_str = error->status().ToString();
+    if (row_status_str.find(kMasterErrors[0]) != std::string::npos ||
+        row_status_str.find(kMasterErrors[1]) != std::string::npos) {
+      ASSERT_LE(i, 10) << "could not get DNS resolution error after 10 tries";
+      continue;
     }
+    ASSERT_STR_CONTAINS(row_status_str,
+                        "Network error: Failed to resolve address for TS");
+    break;
   }
 
-  // Now re-enable dns resolution, the write should succeed.
+  // Now, re-enable the DNS resolution: the write should succeed.
   FLAGS_fail_dns_resolution = false;
-  ASSERT_OK(ApplyInsertToSession(session.get(), client_table_, 1, 1, "row"));
+  ASSERT_OK(ApplyInsertToSession(session.get(), t, 1, 1, "row"));
   ASSERT_OK(session->Flush());
 }
 

Reply via email to