KUDU-2191 (9/n): HMS Catalog should short-circuit no-op alter and create table 
calls

This tweaks the HMS catalog class introduced in part 7 to be more
flexible with how tables are created and altered.  In particular, when
altering, renaming, or creating a table entry in the HMS, it returns an
OK status if the HMS already has the proper table entry, effectively
short-circuiting the operation. Some short-circuiting existed before,
but this makes it apply more uniformly in more situations.  This is
necessary for the notification listener to function correctly, since
it's not always possible to determine which notification log events have
aready been applied.

I also took the opportunity to refactor HMS catalog a bit, since
otherwise it would have been very unweildy to implement this
functionality. I've added some specific test cases to hms_catalog-test,
and some error message strings have changed slightly.

Change-Id: I9c3594db0db253bc88e032b8c5aa2c8667c49853
Reviewed-on: http://gerrit.cloudera.org:8080/9965
Reviewed-by: Adar Dembo <a...@cloudera.com>
Reviewed-by: Hao Hao <hao....@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/00c2754c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/00c2754c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/00c2754c

Branch: refs/heads/master
Commit: 00c2754cae8bd58bf3fa61ea95da74eed4133149
Parents: 21f651d
Author: Dan Burkert <danburk...@apache.org>
Authored: Mon Apr 9 15:46:47 2018 -0700
Committer: Dan Burkert <danburk...@apache.org>
Committed: Tue Apr 10 18:55:45 2018 +0000

----------------------------------------------------------------------
 src/kudu/hms/hms_catalog-test.cc               |  68 ++++++-----
 src/kudu/hms/hms_catalog.cc                    | 123 ++++++++++++--------
 src/kudu/hms/hms_catalog.h                     |  22 +++-
 src/kudu/hms/hms_client.cc                     |   1 +
 src/kudu/hms/hms_client.h                      |   1 +
 src/kudu/integration-tests/master_hms-itest.cc |   4 +-
 6 files changed, 134 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/00c2754c/src/kudu/hms/hms_catalog-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog-test.cc b/src/kudu/hms/hms_catalog-test.cc
index d060320..af0244b 100644
--- a/src/kudu/hms/hms_catalog-test.cc
+++ b/src/kudu/hms/hms_catalog-test.cc
@@ -258,6 +258,17 @@ TEST_P(HmsCatalogTestParameterized, TestTableLifecycle) {
   ASSERT_OK(hms_catalog_->CreateTable(kTableId, kTableName, schema));
   NO_FATALS(CheckTable(kHmsDatabase, kHmsTableName, kTableId, schema));
 
+  // Create the table again. This should succeed since the table ID matches. 
The
+  // HMS catalog will automatically short-circuit creating the table.
+  // TODO(dan): once we have HMS catalog stats, assert that the op short 
circuits.
+  ASSERT_OK(hms_catalog_->CreateTable(kTableId, kTableName, schema));
+  NO_FATALS(CheckTable(kHmsDatabase, kHmsTableName, kTableId, schema));
+
+  // Create the table again, but with a different table ID.
+  Status s = hms_catalog_->CreateTable("new-table-id", kTableName, schema);
+  ASSERT_TRUE(s.IsAlreadyPresent()) << s.ToString();
+  NO_FATALS(CheckTable(kHmsDatabase, kHmsTableName, kTableId, schema));
+
   // Alter the table.
   SchemaBuilder b(schema);
   b.AddColumn("new_column", DataType::INT32);
@@ -305,70 +316,73 @@ TEST_F(HmsCatalogTest, TestLegacyTables) {
 // belong to external tables from other systems.
 TEST_F(HmsCatalogTest, TestExternalTable) {
   const string kTableId = "table-id";
-  const string kHmsDatabase = "default";
-
-  const string kHmsExternalTable = "external_table";
-  const string kExternalTableName = Substitute("$0.$1", kHmsDatabase, 
kHmsExternalTable);
-
-  const string kHmsKuduTable = "kudu_table";
-  const string kKuduTableName = Substitute("$0.$1", kHmsDatabase, 
kHmsKuduTable);
 
-  // Create the external table.
+  // Create the external table (default.ext).
   hive::Table external_table;
-  external_table.dbName = kHmsDatabase;
-  external_table.tableName = kHmsExternalTable;
+  external_table.dbName = "default";
+  external_table.tableName = "ext";
   external_table.tableType = HmsClient::kManagedTable;
   ASSERT_OK(hms_client_->CreateTable(external_table));
   // Retrieve the full HMS table object so it can be compared later (the HMS
   // fills in some fields during creation).
-  ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kHmsExternalTable, 
&external_table));
+  ASSERT_OK(hms_client_->GetTable("default", "ext", &external_table));
 
   auto CheckExternalTable = [&] {
     hive::Table current_table;
-    ASSERT_OK(hms_client_->GetTable(kHmsDatabase, kHmsExternalTable, 
&current_table));
+    ASSERT_OK(hms_client_->GetTable("default", "ext", &current_table));
     ASSERT_EQ(current_table, external_table);
   };
 
-  // Create the Kudu table.
+  // Create the Kudu table (default.a).
   Schema schema = AllTypesSchema();
-  ASSERT_OK(hms_catalog_->CreateTable(kTableId, kKuduTableName, schema));
-  NO_FATALS(CheckTable(kHmsDatabase, kHmsKuduTable, kTableId, schema));
+  ASSERT_OK(hms_catalog_->CreateTable(kTableId, "default.a", schema));
+  NO_FATALS(CheckTable("default", "a", kTableId, schema));
 
   // Try and create a Kudu table with the same name as the external table.
-  Status s = hms_catalog_->CreateTable(kTableId, kKuduTableName, schema);
+  Status s = hms_catalog_->CreateTable(kTableId, "default.ext", schema);
   EXPECT_TRUE(s.IsAlreadyPresent()) << s.ToString();
   NO_FATALS(CheckExternalTable());
 
   // Try and rename the Kudu table to the external table name.
-  s = hms_catalog_->AlterTable(kTableId, kKuduTableName, kExternalTableName, 
schema);
+  s = hms_catalog_->AlterTable(kTableId, "default.a", "default.ext", schema);
   EXPECT_TRUE(s.IsIllegalState()) << s.ToString();
   NO_FATALS(CheckExternalTable());
-  NO_FATALS(CheckTable(kHmsDatabase, kHmsKuduTable, kTableId, schema));
+  NO_FATALS(CheckTable("default", "a", kTableId, schema));
 
   // Try and rename a Kudu table from the external table name to a new name.
   // This depends on the Kudu table not actually existing in the HMS catalog.
-  const string kHmsRenamedTable = "renamed_table";
-  const string kRenamedTableName = Substitute("$0.$1", kHmsDatabase, 
kHmsRenamedTable);
-  ASSERT_OK(hms_catalog_->AlterTable(kTableId, kExternalTableName, 
kRenamedTableName, schema));
+  ASSERT_OK(hms_catalog_->AlterTable(kTableId, "default.ext", "default.b", 
schema));
+  NO_FATALS(CheckExternalTable());
+  // The 'renamed' table is really just created with the new name.
+  NO_FATALS(CheckTable("default", "b", kTableId, schema));
+
+  // Try the previous alter operation again. This should succeed, since the
+  // destination table ID matches, so the HMS catalog knows its the same table.
+  // TODO(dan): once we have HMS catalog stats, assert that the op short 
circuits.
+  ASSERT_OK(hms_catalog_->AlterTable(kTableId, "default.ext", "default.b", 
schema));
   NO_FATALS(CheckExternalTable());
   // The 'renamed' table is really just created with the new name.
-  NO_FATALS(CheckTable(kHmsDatabase, kHmsRenamedTable, kTableId, schema));
+  NO_FATALS(CheckTable("default", "b", kTableId, schema));
+
+  // Try the previous alter operation again, but with a different table ID.
+  s = hms_catalog_->AlterTable("new-table-id", "default.ext", "default.b", 
schema);
+  ASSERT_TRUE(s.IsAlreadyPresent()) << s.ToString();
 
   // Try and alter a Kudu table with the same name as the external table.
   // This depends on the Kudu table not actually existing in the HMS catalog.
-  s = hms_catalog_->AlterTable(kTableId, kExternalTableName, 
kExternalTableName, schema);
-  EXPECT_TRUE(s.IsIllegalState()) << s.ToString();
+  s = hms_catalog_->AlterTable(kTableId, "default.ext", "default.ext", schema);
+  EXPECT_TRUE(s.IsAlreadyPresent()) << s.ToString();
   NO_FATALS(CheckExternalTable());
 
   // Try and drop the external table as if it were a Kudu table.  This should
   // return an OK status, but not actually modify the external table.
-  ASSERT_OK(hms_catalog_->DropTable(kTableId, kExternalTableName));
+  ASSERT_OK(hms_catalog_->DropTable(kTableId, "default.ext"));
   NO_FATALS(CheckExternalTable());
 
   // Drop a Kudu table with no corresponding HMS entry.
-  NO_FATALS(CheckTableDoesNotExist(kHmsDatabase, "bogus_table_name"));
+  NO_FATALS(CheckTableDoesNotExist("default", "bogus_table_name"));
   ASSERT_OK(hms_catalog_->DropTable(kTableId, "default.bogus_table_name"));
-  NO_FATALS(CheckTableDoesNotExist(kHmsDatabase, "bogus_table_name"));
+  NO_FATALS(CheckTableDoesNotExist("default", "bogus_table_name"));
 }
 
 // Checks that the HmsCatalog handles reconnecting to the metastore after a 
connection failure.

http://git-wip-us.apache.org/repos/asf/kudu/blob/00c2754c/src/kudu/hms/hms_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.cc b/src/kudu/hms/hms_catalog.cc
index a6433b3..a8b70a2 100644
--- a/src/kudu/hms/hms_catalog.cc
+++ b/src/kudu/hms/hms_catalog.cc
@@ -135,11 +135,8 @@ void HmsCatalog::Stop() {
 Status HmsCatalog::CreateTable(const string& id,
                                const string& name,
                                const Schema& schema) {
-  hive::Table table;
-  RETURN_NOT_OK(PopulateTable(id, name, schema, &table));
-
   return Execute([&] (HmsClient* client) {
-    return client->CreateTable(table);
+      return CreateOrUpdateTable(client, id, name, schema, master_addresses_);
   });
 }
 
@@ -200,19 +197,17 @@ Status HmsCatalog::AlterTable(const string& id,
       // - The new table name isn't a valid Hive database/table pair
       // - The original table entry does not exist in the HMS
       // - The original table entry doesn't match the Kudu table being altered
+      // - The alteration has already been applied to the HMS
       //
       // Where possible, we try to repair the HMS state to whatever it should 
be
       // in these situations, unless we detect that such a repair would alter 
an
       // unrelated table entry.
 
-      // A small function which creates the table, instead of altering it. We
-      // can't call HmsCatalog::CreateTable in this context because it would
-      // deadlock the single-thread executor.
-      auto create_table = [&] {
-        hive::Table table;
-        RETURN_NOT_OK(PopulateTable(id, new_name, schema, &table));
-        return client->CreateTable(table);
-      };
+      // If this is not a rename, then attempt to update the existing entry, or
+      // create it if it's missing.
+      if (name == new_name) {
+        return CreateOrUpdateTable(client, id, name, schema, 
master_addresses_);
+      }
 
       string hms_database;
       string hms_table;
@@ -220,53 +215,34 @@ Status HmsCatalog::AlterTable(const string& id,
       if (!s.ok()) {
         // Parsing the original table name has failed, so it can not be 
present in
         // the HMS. Instead of altering the table, create it in the HMS as a 
new table.
-        VLOG(1) << "Failed to parse the name of the table being altered as an "
-                   "HMS database/table pair, will attempt to create a new HMS 
table entry: "
+        VLOG(1) << "Failed to parse the name of the table being renamed as an "
+                   "HMS database/table pair, will attempt to create an HMS 
table entry "
+                   "with the new name: "
                 << s.ToString();
-        return create_table();
+        return CreateOrUpdateTable(client, id, new_name, schema, 
master_addresses_);
       }
 
-      hive::Table orig_table;
-      s = client->GetTable(hms_database, hms_table, &orig_table);
+      hive::Table table;
+      s = client->GetTable(hms_database, hms_table, &table);
+
       if (s.IsNotFound()) {
         // The table doesn't already exist in the HMS, so create it.
-        VLOG(1) << "The table being altered does not have an existing HMS 
entry, "
-                   "will attempt to create a new HMS table entry.";
-        return create_table();
+        VLOG(1) << "The table being renamed does not have an existing HMS 
entry, "
+                   "will attempt to create an HMS table entry with the new 
name.";
+        return CreateOrUpdateTable(client, id, new_name, schema, 
master_addresses_);
       }
 
-      // All other errors are fatal.
-      RETURN_NOT_OK(s);
-
       // Check that the HMS entry belongs to the table being altered.
-      if 
(orig_table.parameters[hive::g_hive_metastore_constants.META_TABLE_STORAGE] !=
-          HmsClient::kKuduStorageHandler ||
-          orig_table.parameters[HmsClient::kKuduTableIdKey] != id) {
+      if (table.parameters[HmsClient::kStorageHandlerKey] != 
HmsClient::kKuduStorageHandler ||
+          table.parameters[HmsClient::kKuduTableIdKey] != id) {
         // The original table isn't a Kudu table, or isn't the same Kudu table.
-        if (name != new_name) {
-          // If this is a rename table operation, then just attempt to create
-          // the table under the new name.
-          VLOG(1) << "The HMS entry for the table being altered belongs to 
another table, "
-                     "will attempt to create a new HMS table entry.";
-          return create_table();
-        }
-        // Otherwise we fail, since we should not alter an entry belonging to
-        // a different table.
-        return Status::IllegalState("the HMS entry does not belong to the Kudu 
table");
-      }
-
-      // Create a copy of the table object, and set the Kudu fields in it. If
-      // the original table object and the new table object match exactly then
-      // we don't need to alter the table in the HMS.
-      hive::Table table(orig_table);
-      RETURN_NOT_OK(PopulateTable(id, new_name, schema, &table));
-
-      if (orig_table == table) {
-        CHECK_EQ(name, new_name);
-        VLOG(1) << "Short-circuiting alter table for " << name << " (" << id 
<< ")";
-        return Status::OK();
+        VLOG(1) << "The HMS entry for the table being renamed belongs to 
another table, "
+                    "will attempt to create an HMS entry with the new name.";
+        return CreateOrUpdateTable(client, id, new_name, schema, 
master_addresses_);
       }
 
+      // Overwrite fields in the table that have changed, including the new 
name.
+      RETURN_NOT_OK(PopulateTable(id, new_name, schema, master_addresses_, 
&table));
       return client->AlterTable(hms_database, hms_table, table);
   });
 }
@@ -452,6 +428,7 @@ hive::FieldSchema column_to_field(const ColumnSchema& 
column) {
 Status HmsCatalog::PopulateTable(const string& id,
                                  const string& name,
                                  const Schema& schema,
+                                 const string& master_addresses,
                                  hive::Table* table) {
   RETURN_NOT_OK(ParseTableName(name, &table->dbName, &table->tableName));
   table->tableType = HmsClient::kManagedTable;
@@ -459,9 +436,8 @@ Status HmsCatalog::PopulateTable(const string& id,
   // Add the Kudu-specific parameters. This intentionally avoids overwriting
   // other parameters.
   table->parameters[HmsClient::kKuduTableIdKey] = id;
-  table->parameters[HmsClient::kKuduMasterAddrsKey] = master_addresses_;
-  table->parameters[hive::g_hive_metastore_constants.META_TABLE_STORAGE] =
-      HmsClient::kKuduStorageHandler;
+  table->parameters[HmsClient::kKuduMasterAddrsKey] = master_addresses;
+  table->parameters[HmsClient::kStorageHandlerKey] = 
HmsClient::kKuduStorageHandler;
 
   // Overwrite the entire set of columns.
   vector<hive::FieldSchema> fields;
@@ -473,6 +449,51 @@ Status HmsCatalog::PopulateTable(const string& id,
   return Status::OK();
 }
 
+Status HmsCatalog::CreateOrUpdateTable(hms::HmsClient* client,
+                                       const string& id,
+                                       const string& name,
+                                       const Schema& schema,
+                                       const string& master_addresses) {
+  string hms_database;
+  string hms_table;
+  RETURN_NOT_OK(ParseTableName(name, &hms_database, &hms_table));
+
+  hive::Table existing_table;
+  Status s = client->GetTable(hms_database, hms_table, &existing_table);
+
+  if (s.IsNotFound()) {
+      VLOG(1) << "Table " << name << " (" << id << ") does not have an 
existing HMS entry, "
+                  "will attempt to create a new HMS table entry.";
+      hive::Table table;
+      RETURN_NOT_OK(PopulateTable(id, name, schema, master_addresses, &table));
+      return client->CreateTable(table);
+  }
+
+  // All other errors are fatal.
+  RETURN_NOT_OK(s);
+
+  // The table already exists, so we update it.
+
+  // Check if the existing HMS entry belongs to the table being altered.
+  if (existing_table.parameters[HmsClient::kStorageHandlerKey] != 
HmsClient::kKuduStorageHandler ||
+      existing_table.parameters[HmsClient::kKuduTableIdKey] != id) {
+
+    // Otherwise we fail, since we must not update an entry belonging to a 
different table.
+    return Status::AlreadyPresent(Substitute("table $0 already exists in the 
HMS", name));
+  }
+
+  // Create a copy of the table object, and set the Kudu fields in it. If
+  // the original table object and the new table object match exactly then
+  // we don't need to alter the table in the HMS.
+  hive::Table table(existing_table);
+  RETURN_NOT_OK(PopulateTable(id, name, schema, master_addresses, &table));
+  if (existing_table == table) {
+    VLOG(1) << "Short-circuiting alter or update table for " << name << " (" 
<< id << ")";
+    return Status::OK();
+  }
+  return client->AlterTable(hms_database, hms_table, table);
+}
+
 Status HmsCatalog::ParseTableName(const string& table,
                                   string* hms_database,
                                   string* hms_table) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/00c2754c/src/kudu/hms/hms_catalog.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_catalog.h b/src/kudu/hms/hms_catalog.h
index f8768f4..d970a1b 100644
--- a/src/kudu/hms/hms_catalog.h
+++ b/src/kudu/hms/hms_catalog.h
@@ -105,13 +105,25 @@ class HmsCatalog {
 
   // Returns true if the RPC status is 'fatal', e.g. the Thrift connection on
   // which it occurred should be shut down.
-  bool IsFatalError(const Status& status);
+  static bool IsFatalError(const Status& status);
 
   // Sets the Kudu-specific fields in the table without overwriting unrelated 
fields.
-  Status PopulateTable(const std::string& id,
-                       const std::string& name,
-                       const Schema& schema,
-                       hive::Table* table) WARN_UNUSED_RESULT;
+  static Status PopulateTable(const std::string& id,
+                              const std::string& name,
+                              const Schema& schema,
+                              const std::string& master_addresses,
+                              hive::Table* table) WARN_UNUSED_RESULT;
+
+  // Creates a table entry in the HMS, or updates that existing entry if the
+  // table already has an entry.
+  //
+  // Instead of only attempting to create or alter a table entry, this method 
should be
+  // used to ensure the HMS is kept synchronized in as many edge cases as 
possible.
+  static Status CreateOrUpdateTable(hms::HmsClient* client,
+                                    const std::string& id,
+                                    const std::string& name,
+                                    const Schema& schema,
+                                    const std::string& master_addresses) 
WARN_UNUSED_RESULT;
 
   // Parses a Kudu table name into a Hive database and table name.
   // Returns an error if the Kudu table name is not correctly formatted.

http://git-wip-us.apache.org/repos/asf/kudu/blob/00c2754c/src/kudu/hms/hms_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.cc b/src/kudu/hms/hms_client.cc
index 1e889ad..aeb7c23 100644
--- a/src/kudu/hms/hms_client.cc
+++ b/src/kudu/hms/hms_client.cc
@@ -116,6 +116,7 @@ const char* const 
HmsClient::kDisallowIncompatibleColTypeChanges =
   "hive.metastore.disallow.incompatible.col.type.changes";
 const char* const HmsClient::kDbNotificationListener =
   "org.apache.hive.hcatalog.listener.DbNotificationListener";
+const char* const HmsClient::kStorageHandlerKey = "storage_handler";
 const char* const HmsClient::kKuduMetastorePlugin =
   "org.apache.kudu.hive.metastore.KuduMetastorePlugin";
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/00c2754c/src/kudu/hms/hms_client.h
----------------------------------------------------------------------
diff --git a/src/kudu/hms/hms_client.h b/src/kudu/hms/hms_client.h
index 38e9ab5..1b4d28d 100644
--- a/src/kudu/hms/hms_client.h
+++ b/src/kudu/hms/hms_client.h
@@ -89,6 +89,7 @@ class HmsClient {
 
   static const char* const kKuduTableIdKey;
   static const char* const kKuduMasterAddrsKey;
+  static const char* const kStorageHandlerKey;
   static const char* const kKuduStorageHandler;
 
   static const char* const kTransactionalEventListeners;

http://git-wip-us.apache.org/repos/asf/kudu/blob/00c2754c/src/kudu/integration-tests/master_hms-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_hms-itest.cc 
b/src/kudu/integration-tests/master_hms-itest.cc
index 06fabec..979d855 100644
--- a/src/kudu/integration-tests/master_hms-itest.cc
+++ b/src/kudu/integration-tests/master_hms-itest.cc
@@ -348,8 +348,8 @@ TEST_F(MasterHmsTest, TestAlterTable) {
 
   table_alterer.reset(client_->NewTableAlterer(table_name));
   s = table_alterer->DropColumn("int32_val")->Alter();
-  ASSERT_TRUE(s.IsIllegalState()) << s.ToString();
-  ASSERT_STR_CONTAINS(s.ToString(), "the HMS entry does not belong to the Kudu 
table");
+  EXPECT_TRUE(s.IsAlreadyPresent()) << s.ToString();
+  ASSERT_STR_CONTAINS(s.ToString(), "already exists in the HMS");
 }
 
 TEST_F(MasterHmsTest, TestDeleteTable) {

Reply via email to