This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 50b1cc45f [tools] Add '--create_table_replication_factor' flag for 
'kudu table copy'
50b1cc45f is described below

commit 50b1cc45f9fde3deac5aa0fef216f4950246b2c9
Author: Yingchun Lai <acelyc1112...@gmail.com>
AuthorDate: Mon Apr 25 16:43:59 2022 +0800

    [tools] Add '--create_table_replication_factor' flag for 'kudu table copy'
    
    Now it's possible to specify the replication factor for the
    destination table when copying a table.
    
    Some usage scenarios, copy a table with RF=3 from a cluster with
    multiple tservers to a cluster with only one tserver, we can set
    --create_table_replication_factor=1.
    
    Change-Id: I9a4eebdcf85b5ec3666e023194b8c06d66b0a683
    Reviewed-on: http://gerrit.cloudera.org:8080/18446
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <ale...@apache.org>
---
 src/kudu/tools/kudu-tool-test.cc    | 50 +++++++++++++++++++++++++++++--------
 src/kudu/tools/table_scanner.cc     |  9 +++++--
 src/kudu/tools/tool_action_table.cc | 15 +++++++++++
 3 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/src/kudu/tools/kudu-tool-test.cc b/src/kudu/tools/kudu-tool-test.cc
index c87d39c55..94ed6dbfe 100644
--- a/src/kudu/tools/kudu-tool-test.cc
+++ b/src/kudu/tools/kudu-tool-test.cc
@@ -517,6 +517,7 @@ class ToolTest : public KuduTest {
     int64_t max_value;
     string columns;
     TableCopyMode mode;
+    int32_t create_table_replication_factor;
   };
 
   void RunCopyTableCheck(const RunCopyTableCheckArgs& args) {
@@ -566,14 +567,15 @@ class ToolTest : public KuduTest {
     string stdout;
     NO_FATALS(RunActionStdoutString(
                 Substitute("table copy $0 $1 $2 -dst_table=$3 -predicates=$4 
-write_type=$5 "
-                           "-create_table=$6",
+                           "-create_table=$6 
-create_table_replication_factor=$7",
                            cluster_->master()->bound_rpc_addr().ToString(),
                            args.src_table_name,
                            cluster_->master()->bound_rpc_addr().ToString(),
                            kDstTableName,
                            args.predicates_json,
                            write_type,
-                           create_table),
+                           create_table,
+                           args.create_table_replication_factor),
                 &stdout));
 
     // Check total count.
@@ -599,10 +601,15 @@ class ToolTest : public KuduTest {
                    cluster_->master()->bound_rpc_addr().ToString(),
                    kDstTableName), &dst_schema));
 
-      // Remove the first lines, which are the different table names.
-      src_schema.erase(src_schema.begin());
-      dst_schema.erase(dst_schema.begin());
-      ASSERT_EQ(src_schema, dst_schema);
+      ASSERT_EQ(src_schema.size(), dst_schema.size());
+      for (int i = 0; i < src_schema.size(); ++i) {
+        // Table name is different.
+        if (HasPrefixString(src_schema[i], "TABLE ")) continue;
+        // Replication factor is different when explicitly set it to 3 
(default 1).
+        if (args.create_table_replication_factor == 3 &&
+            HasPrefixString(src_schema[i], "REPLICAS ")) continue;
+        ASSERT_EQ(src_schema[i], dst_schema[i]);
+      }
     }
 
     // Check all values.
@@ -719,7 +726,13 @@ class ToolTestCopyTableParameterized :
  public:
   void SetUp() override {
     test_case_ = GetParam();
-    NO_FATALS(StartExternalMiniCluster());
+    ExternalMiniClusterOptions opts;
+    if (test_case_ == kTestCopyTableSchemaOnly) {
+      // In kTestCopyTableSchemaOnly case, we may create table with RF=3,
+      // means 3 tservers needed at least.
+      opts.num_tablet_servers = 3;
+    }
+    NO_FATALS(StartExternalMiniCluster(opts));
 
     // Create the src table and write some data to it.
     TestWorkload ww(cluster_.get());
@@ -757,7 +770,8 @@ class ToolTestCopyTableParameterized :
                                    1,
                                    total_rows_,
                                    kSimpleSchemaColumns,
-                                   TableCopyMode::INSERT_TO_EXIST_TABLE };
+                                   TableCopyMode::INSERT_TO_EXIST_TABLE,
+                                   -1 };
     switch (test_case_) {
       case kTestCopyTableDstTableExist:
         return { args };
@@ -767,9 +781,25 @@ class ToolTestCopyTableParameterized :
       case kTestCopyTableUpsert:
         args.mode = TableCopyMode::UPSERT_TO_EXIST_TABLE;
         return { args };
-      case kTestCopyTableSchemaOnly:
+      case kTestCopyTableSchemaOnly: {
         args.mode = TableCopyMode::COPY_SCHEMA_ONLY;
-        return { args };
+        vector<RunCopyTableCheckArgs> multi_args;
+        {
+          auto args_temp = args;
+          multi_args.emplace_back(std::move(args_temp));
+        }
+        {
+          auto args_temp = args;
+          args_temp.create_table_replication_factor = 1;
+          multi_args.emplace_back(std::move(args_temp));
+        }
+        {
+          auto args_temp = args;
+          args_temp.create_table_replication_factor = 3;
+          multi_args.emplace_back(std::move(args_temp));
+        }
+        return multi_args;
+      }
       case kTestCopyTableComplexSchema:
         args.columns = kComplexSchemaColumns;
         args.mode = TableCopyMode::INSERT_TO_NOT_EXIST_TABLE;
diff --git a/src/kudu/tools/table_scanner.cc b/src/kudu/tools/table_scanner.cc
index 8be3b4c15..f3e8beb73 100644
--- a/src/kudu/tools/table_scanner.cc
+++ b/src/kudu/tools/table_scanner.cc
@@ -88,6 +88,9 @@ using strings::Substitute;
 
 DEFINE_bool(create_table, true,
             "Whether to create the destination table if it doesn't exist.");
+DEFINE_int32(create_table_replication_factor, -1,
+             "The replication factor of the destination table if the table 
will be created. "
+             "By default, the replication factor of source table will be 
used.");
 DEFINE_bool(fill_cache, true,
             "Whether to fill block cache when scanning.");
 DEFINE_string(predicates, "",
@@ -116,7 +119,7 @@ DEFINE_bool(show_values, false,
 DEFINE_string(write_type, "insert",
               "How data should be copied to the destination table. Valid 
values are 'insert', "
               "'upsert' or the empty string. If the empty string, data will 
not be copied "
-              "(useful when create_table is 'true').");
+              "(useful when --create_table=true).");
 DEFINE_string(replica_selection, "CLOSEST",
               "Replica selection for scan operations. Acceptable values are: "
               "CLOSEST, LEADER (maps into KuduClient::CLOSEST_REPLICA and "
@@ -415,10 +418,12 @@ Status CreateDstTableIfNeeded(const 
client::sp::shared_ptr<KuduTable>& src_table
   };
 
   // Table schema and replica number.
+  int num_replicas = FLAGS_create_table_replication_factor == -1 ?
+      src_table->num_replicas() : FLAGS_create_table_replication_factor;
   unique_ptr<KuduTableCreator> table_creator(dst_client->NewTableCreator());
   table_creator->table_name(dst_table_name)
       .schema(&dst_table_schema)
-      .num_replicas(src_table->num_replicas());
+      .num_replicas(num_replicas);
 
   // Add hash partition schema.
   for (const auto& hash_dimension : partition_schema.hash_schema()) {
diff --git a/src/kudu/tools/tool_action_table.cc 
b/src/kudu/tools/tool_action_table.cc
index b9e6627dd..87ee2a5f1 100644
--- a/src/kudu/tools/tool_action_table.cc
+++ b/src/kudu/tools/tool_action_table.cc
@@ -58,6 +58,7 @@
 #include "kudu/tools/tool.pb.h"
 #include "kudu/tools/tool_action.h"
 #include "kudu/tools/tool_action_common.h"
+#include "kudu/util/flag_validators.h"
 #include "kudu/util/jsonreader.h"
 #include "kudu/util/jsonwriter.h"
 #include "kudu/util/status.h"
@@ -130,6 +131,8 @@ DEFINE_bool(show_avro_format_schema, false,
             "table schema in Avro format without any other information like "
             "partition/owner/comments. It cannot be used in conjunction with 
other flags");
 
+DECLARE_bool(create_table);
+DECLARE_int32(create_table_replication_factor);
 DECLARE_bool(row_count_only);
 DECLARE_bool(show_scanner_stats);
 
@@ -146,6 +149,17 @@ DECLARE_bool(show_values);
 DECLARE_string(replica_selection);
 DECLARE_string(tables);
 
+bool ValidateCreateTable() {
+  if (!FLAGS_create_table && FLAGS_create_table_replication_factor != -1) {
+    LOG(ERROR) << Substitute("--create_table_replication_factor is meaningless 
"
+                             "when --create_table=false");
+    return false;
+  }
+  return true;
+}
+
+GROUP_FLAG_VALIDATOR(create_table, ValidateCreateTable);
+
 namespace kudu {
 namespace tools {
 
@@ -1553,6 +1567,7 @@ unique_ptr<Mode> BuildTableMode() {
       .AddRequiredParameter({ kTableNameArg, "Name of the source table" })
       .AddRequiredParameter({ kDestMasterAddressesArg, 
kDestMasterAddressesArgDesc })
       .AddOptionalParameter("create_table")
+      .AddOptionalParameter("create_table_replication_factor")
       .AddOptionalParameter("dst_table")
       .AddOptionalParameter("num_threads")
       .AddOptionalParameter("predicates")

Reply via email to