Bakai Ádám created KUDU-3414:
--------------------------------

             Summary: Client tries with the same address over and over in case 
of negotiation timeout
                 Key: KUDU-3414
                 URL: https://issues.apache.org/jira/browse/KUDU-3414
             Project: Kudu
          Issue Type: Bug
          Components: client
            Reporter: Bakai Ádám


Reproduction:
1. Start new master and tablet servers.
2. Create a client and create a table and do a succesfull write operation with 
it.

3. Pause the leader tablet server.
4. Now if you attempt to write with the same client, it will timeout, and it 
will try negotiation with the same server until the session times out. (If a 
new client is created, then the write operation is succesful.)

KUDU-1580  works on a similar issue, the error is retriable for now, but 
[https://gerrit.cloudera.org/#/c/6926/] states that "the Kudu C++ client 
retries an RPC with other tablet replica if the connection negotiation with 
current replica timed out." which doesn't happen right now. (Didn't check if 
the current behaviour is a regression or not.)

Here is a reproduction test code into client-test.cc:
{code:java}
TEST_F(ClientTest, TestDefaultRPCTimeoutSessionTimeoutDifferent) {
    cluster_->Shutdown();
    FLAGS_raft_heartbeat_interval_ms = 50;
    FLAGS_leader_failure_exp_backoff_max_delta_ms = 1000;

    const string table_name = "TestTimeout";

    unique_ptr<ExternalMiniCluster> ext_cluster_;
    ext_cluster_.reset(new ExternalMiniCluster( ExternalMiniClusterOptions()));

    ext_cluster_->Start();

    ext_cluster_->AddMaster();

    ext_cluster_->AddTabletServer();
    ext_cluster_->AddTabletServer();
    ext_cluster_->AddTabletServer();
    ext_cluster_->AddTabletServer();
    ext_cluster_->AddTabletServer();
    ext_cluster_->WaitForTabletServerCount(5,MonoDelta::FromSeconds(1));


    client::sp::shared_ptr<client::KuduClient> client;

    client_= nullptr;
    KuduClientBuilder builder;
    builder.num_reactors(1);

    builder.connection_negotiation_timeout(MonoDelta::FromMilliseconds(1500));
    builder.default_rpc_timeout(MonoDelta::FromMilliseconds(500));

    ext_cluster_->CreateClient(&builder,&client);
    unique_ptr<KuduTableCreator> table_creator(client->NewTableCreator());

    table_creator->table_name(table_name)
            .schema(&schema_)
            .num_replicas(5)
            .set_range_partition_columns({ "key" })
            .timeout(MonoDelta::FromSeconds(60))
            .Create();

    shared_ptr<KuduTable> table;
    client->OpenTable(table_name,&table);


    unique_ptr<KuduInsert> insert;
    shared_ptr<KuduSession> session = client->NewSession();
    session->SetTimeoutMillis(60000);


    insert = BuildTestInsert(table.get(), 1);
    ASSERT_OK(session->Apply(insert.release()));
    session->Flush();

    KuduScanTokenBuilder builder2(table.get());
    vector<KuduScanToken*> tokens;
    ASSERT_OK(builder2.Build(&tokens));

    const KuduTablet& tablet = tokens[0]->tablet();
    for (const auto& a: tablet.replicas())
    {

        if (a->is_leader())
        {
            ext_cluster_->tablet_server_by_uuid(a->ts().uuid())->Pause();
            //ext_cluster_->tablet_server_by_uuid(a->ts().uuid())->Shutdown();
            break;
        }
    }
    LOG(INFO)<<"paused";
    //ext_cluster_->CreateClient(&builder,&client);


    session = client->NewSession();
    session->SetTimeoutMillis(60000);
    insert = BuildTestInsert(table.get(), 2);

    ASSERT_OK(session->Apply(insert.release()));
}
 {code}

(Don't forget to add the declarations and includes to the start of the file:)
{code:java}
#include "kudu/mini-cluster/external_mini_cluster.h"
using kudu::cluster::ExternalMiniCluster;
using kudu::cluster::ExternalMiniClusterOptions;
 {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to