This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push: new 50b1cc45f [tools] Add '--create_table_replication_factor' flag for 'kudu table copy' 50b1cc45f is described below commit 50b1cc45f9fde3deac5aa0fef216f4950246b2c9 Author: Yingchun Lai <acelyc1112...@gmail.com> AuthorDate: Mon Apr 25 16:43:59 2022 +0800 [tools] Add '--create_table_replication_factor' flag for 'kudu table copy' Now it's possible to specify the replication factor for the destination table when copying a table. Some usage scenarios, copy a table with RF=3 from a cluster with multiple tservers to a cluster with only one tserver, we can set --create_table_replication_factor=1. Change-Id: I9a4eebdcf85b5ec3666e023194b8c06d66b0a683 Reviewed-on: http://gerrit.cloudera.org:8080/18446 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <ale...@apache.org> --- src/kudu/tools/kudu-tool-test.cc | 50 +++++++++++++++++++++++++++++-------- src/kudu/tools/table_scanner.cc | 9 +++++-- src/kudu/tools/tool_action_table.cc | 15 +++++++++++ 3 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc index c87d39c55..94ed6dbfe 100644 --- a/src/kudu/tools/kudu-tool-test.cc +++ b/src/kudu/tools/kudu-tool-test.cc @@ -517,6 +517,7 @@ class ToolTest : public KuduTest { int64_t max_value; string columns; TableCopyMode mode; + int32_t create_table_replication_factor; }; void RunCopyTableCheck(const RunCopyTableCheckArgs& args) { @@ -566,14 +567,15 @@ class ToolTest : public KuduTest { string stdout; NO_FATALS(RunActionStdoutString( Substitute("table copy $0 $1 $2 -dst_table=$3 -predicates=$4 -write_type=$5 " - "-create_table=$6", + "-create_table=$6 -create_table_replication_factor=$7", cluster_->master()->bound_rpc_addr().ToString(), args.src_table_name, cluster_->master()->bound_rpc_addr().ToString(), kDstTableName, args.predicates_json, write_type, - create_table), + create_table, + args.create_table_replication_factor), &stdout)); // Check total count. @@ -599,10 +601,15 @@ class ToolTest : public KuduTest { cluster_->master()->bound_rpc_addr().ToString(), kDstTableName), &dst_schema)); - // Remove the first lines, which are the different table names. - src_schema.erase(src_schema.begin()); - dst_schema.erase(dst_schema.begin()); - ASSERT_EQ(src_schema, dst_schema); + ASSERT_EQ(src_schema.size(), dst_schema.size()); + for (int i = 0; i < src_schema.size(); ++i) { + // Table name is different. + if (HasPrefixString(src_schema[i], "TABLE ")) continue; + // Replication factor is different when explicitly set it to 3 (default 1). + if (args.create_table_replication_factor == 3 && + HasPrefixString(src_schema[i], "REPLICAS ")) continue; + ASSERT_EQ(src_schema[i], dst_schema[i]); + } } // Check all values. @@ -719,7 +726,13 @@ class ToolTestCopyTableParameterized : public: void SetUp() override { test_case_ = GetParam(); - NO_FATALS(StartExternalMiniCluster()); + ExternalMiniClusterOptions opts; + if (test_case_ == kTestCopyTableSchemaOnly) { + // In kTestCopyTableSchemaOnly case, we may create table with RF=3, + // means 3 tservers needed at least. + opts.num_tablet_servers = 3; + } + NO_FATALS(StartExternalMiniCluster(opts)); // Create the src table and write some data to it. TestWorkload ww(cluster_.get()); @@ -757,7 +770,8 @@ class ToolTestCopyTableParameterized : 1, total_rows_, kSimpleSchemaColumns, - TableCopyMode::INSERT_TO_EXIST_TABLE }; + TableCopyMode::INSERT_TO_EXIST_TABLE, + -1 }; switch (test_case_) { case kTestCopyTableDstTableExist: return { args }; @@ -767,9 +781,25 @@ class ToolTestCopyTableParameterized : case kTestCopyTableUpsert: args.mode = TableCopyMode::UPSERT_TO_EXIST_TABLE; return { args }; - case kTestCopyTableSchemaOnly: + case kTestCopyTableSchemaOnly: { args.mode = TableCopyMode::COPY_SCHEMA_ONLY; - return { args }; + vector<RunCopyTableCheckArgs> multi_args; + { + auto args_temp = args; + multi_args.emplace_back(std::move(args_temp)); + } + { + auto args_temp = args; + args_temp.create_table_replication_factor = 1; + multi_args.emplace_back(std::move(args_temp)); + } + { + auto args_temp = args; + args_temp.create_table_replication_factor = 3; + multi_args.emplace_back(std::move(args_temp)); + } + return multi_args; + } case kTestCopyTableComplexSchema: args.columns = kComplexSchemaColumns; args.mode = TableCopyMode::INSERT_TO_NOT_EXIST_TABLE; diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc index 8be3b4c15..f3e8beb73 100644 --- a/src/kudu/tools/table_scanner.cc +++ b/src/kudu/tools/table_scanner.cc @@ -88,6 +88,9 @@ using strings::Substitute; DEFINE_bool(create_table, true, "Whether to create the destination table if it doesn't exist."); +DEFINE_int32(create_table_replication_factor, -1, + "The replication factor of the destination table if the table will be created. " + "By default, the replication factor of source table will be used."); DEFINE_bool(fill_cache, true, "Whether to fill block cache when scanning."); DEFINE_string(predicates, "", @@ -116,7 +119,7 @@ DEFINE_bool(show_values, false, DEFINE_string(write_type, "insert", "How data should be copied to the destination table. Valid values are 'insert', " "'upsert' or the empty string. If the empty string, data will not be copied " - "(useful when create_table is 'true')."); + "(useful when --create_table=true)."); DEFINE_string(replica_selection, "CLOSEST", "Replica selection for scan operations. Acceptable values are: " "CLOSEST, LEADER (maps into KuduClient::CLOSEST_REPLICA and " @@ -415,10 +418,12 @@ Status CreateDstTableIfNeeded(const client::sp::shared_ptr<KuduTable>& src_table }; // Table schema and replica number. + int num_replicas = FLAGS_create_table_replication_factor == -1 ? + src_table->num_replicas() : FLAGS_create_table_replication_factor; unique_ptr<KuduTableCreator> table_creator(dst_client->NewTableCreator()); table_creator->table_name(dst_table_name) .schema(&dst_table_schema) - .num_replicas(src_table->num_replicas()); + .num_replicas(num_replicas); // Add hash partition schema. for (const auto& hash_dimension : partition_schema.hash_schema()) { diff --git a/src/kudu/tools/tool_action_table.cc b/src/kudu/tools/tool_action_table.cc index b9e6627dd..87ee2a5f1 100644 --- a/src/kudu/tools/tool_action_table.cc +++ b/src/kudu/tools/tool_action_table.cc @@ -58,6 +58,7 @@ #include "kudu/tools/tool.pb.h" #include "kudu/tools/tool_action.h" #include "kudu/tools/tool_action_common.h" +#include "kudu/util/flag_validators.h" #include "kudu/util/jsonreader.h" #include "kudu/util/jsonwriter.h" #include "kudu/util/status.h" @@ -130,6 +131,8 @@ DEFINE_bool(show_avro_format_schema, false, "table schema in Avro format without any other information like " "partition/owner/comments. It cannot be used in conjunction with other flags"); +DECLARE_bool(create_table); +DECLARE_int32(create_table_replication_factor); DECLARE_bool(row_count_only); DECLARE_bool(show_scanner_stats); @@ -146,6 +149,17 @@ DECLARE_bool(show_values); DECLARE_string(replica_selection); DECLARE_string(tables); +bool ValidateCreateTable() { + if (!FLAGS_create_table && FLAGS_create_table_replication_factor != -1) { + LOG(ERROR) << Substitute("--create_table_replication_factor is meaningless " + "when --create_table=false"); + return false; + } + return true; +} + +GROUP_FLAG_VALIDATOR(create_table, ValidateCreateTable); + namespace kudu { namespace tools { @@ -1553,6 +1567,7 @@ unique_ptr<Mode> BuildTableMode() { .AddRequiredParameter({ kTableNameArg, "Name of the source table" }) .AddRequiredParameter({ kDestMasterAddressesArg, kDestMasterAddressesArgDesc }) .AddOptionalParameter("create_table") + .AddOptionalParameter("create_table_replication_factor") .AddOptionalParameter("dst_table") .AddOptionalParameter("num_threads") .AddOptionalParameter("predicates")