This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new e89dfe9a7 [tablet] GC ancient, fully deleted rowsets without live row 
count stats
e89dfe9a7 is described below

commit e89dfe9a7d615b9cdb45e7222ac4dacb0e0916d3
Author: kedeng <kdeng...@gmail.com>
AuthorDate: Fri Mar 31 11:24:16 2023 +0800

    [tablet] GC ancient, fully deleted rowsets without live row count stats
    
    We added a background op to GC ancient, fully deleted rowsets for
    KUDU-1625 base on live row count. That patch is very useful, but
    does not work for older versions(earlier than 1.10) that do not
    support live row count stats. And during the upgrade process from
    a lower version to a higher version, live row count feature cannot
    be enabled for already existing data.
    
    To resolve this issue on a lower version of the kudu cluster, I submitted
    this patch. The main reason is to replace the use of live row count.
    However, due to the lack of a more accurate counting method, this patch
    may only release part of the storage space for ancient, fully deleted rows.
    Therefore, this feature can alleviate the storage space tension of older
    versions to a certain extent.
    
    If you need to enable this feature, enable the flag
    --enable_gc_deleted_rowsets_without_live_row_count and restart tservers.
    
    There's still room for improvement in this implementation in that, 
currently,
    we ignored the delete operation in DMS. I will resolve this issue in a 
follow-up
    patch.
    
    I ran this on a real cluster, the storage space of deleted rowsets that was 
not
    previously freed can be GCed as expected. And I also add unit test case to 
ensure
    it make sense.
    
    Change-Id: Iacdff107b8b07cbd56f47f296a93f4bcfbf56b41
    Reviewed-on: http://gerrit.cloudera.org:8080/19670
    Tested-by: Kudu Jenkins
    Reviewed-by: Yingchun Lai <laiyingc...@apache.org>
    Reviewed-by: Yuqi Du <shenxingwuy...@gmail.com>
---
 src/kudu/tablet/delta_tracker.cc          | 14 +++++++
 src/kudu/tablet/delta_tracker.h           |  5 +++
 src/kudu/tablet/diskrowset-test-base.h    |  2 +-
 src/kudu/tablet/diskrowset-test.cc        | 64 +++++++++++++++++++++++++++++++
 src/kudu/tablet/diskrowset.cc             | 19 ++++++++-
 src/kudu/tablet/diskrowset.h              | 12 ++++--
 src/kudu/tablet/tablet.cc                 | 12 +++++-
 src/kudu/tablet/tablet_history_gc-test.cc | 51 +++++++++++++++++++++---
 8 files changed, 167 insertions(+), 12 deletions(-)

diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc
index 166eebe4c..d336867fd 100644
--- a/src/kudu/tablet/delta_tracker.cc
+++ b/src/kudu/tablet/delta_tracker.cc
@@ -1010,6 +1010,20 @@ int64_t DeltaTracker::CountDeletedRows() const {
   return deleted_row_count_ + (dms_exists_ ? dms_->deleted_row_count() : 0);
 }
 
+int64_t DeltaTracker::CountDeletedRowsInRedos() const {
+  shared_lock<rw_spinlock> lock(component_lock_);
+
+  int64_t delete_count = 0;
+  for (const shared_ptr<DeltaStore>& ds : redo_delta_stores_) {
+    // We won't force open files just to read their stats.
+    if (!ds->has_delta_stats()) {
+      continue;
+    }
+    delete_count += ds->delta_stats().delete_count();
+  }
+  return delete_count;
+}
+
 string DeltaTracker::LogPrefix() const {
   return Substitute("T $0 P $1: ",
                     rowset_metadata_->tablet_metadata()->tablet_id(),
diff --git a/src/kudu/tablet/delta_tracker.h b/src/kudu/tablet/delta_tracker.h
index 17717d1f4..d0b454aa7 100644
--- a/src/kudu/tablet/delta_tracker.h
+++ b/src/kudu/tablet/delta_tracker.h
@@ -287,6 +287,11 @@ class DeltaTracker {
   // in a flushing DMS (if one exists)
   int64_t CountDeletedRows() const;
 
+  // Count the number of deleted rows in REDO delta stores per their
+  // delta stats.
+  // Note : we won't force open files just to read their stats.
+  int64_t CountDeletedRowsInRedos() const;
+
  private:
   FRIEND_TEST(TestRowSet, TestRowSetUpdate);
   FRIEND_TEST(TestRowSet, TestDMSFlush);
diff --git a/src/kudu/tablet/diskrowset-test-base.h 
b/src/kudu/tablet/diskrowset-test-base.h
index 23e424bbb..c5ce471df 100644
--- a/src/kudu/tablet/diskrowset-test-base.h
+++ b/src/kudu/tablet/diskrowset-test-base.h
@@ -98,7 +98,7 @@ class TestRowSet : public KuduRowSetTest {
   // The string values are padded out to 15 digits
   void WriteTestRowSet(int n_rows = 0, bool zero_vals = false) {
     DiskRowSetWriter drsw(rowset_meta_.get(), &schema_,
-                          BloomFilterSizing::BySizeAndFPRate(32*1024, 0.01f));
+                          BloomFilterSizing::BySizeAndFPRate(32*1024, 0.01F));
     DoWriteTestRowSet(n_rows, &drsw, zero_vals);
   }
 
diff --git a/src/kudu/tablet/diskrowset-test.cc 
b/src/kudu/tablet/diskrowset-test.cc
index d645bdaa3..6081dc281 100644
--- a/src/kudu/tablet/diskrowset-test.cc
+++ b/src/kudu/tablet/diskrowset-test.cc
@@ -768,6 +768,70 @@ TEST_F(TestRowSet, TestDiskSizeEstimation) {
   ASSERT_GT(drss.redo_deltas_size, 0);
 }
 
+TEST_F(TestRowSet, TestGCDRSWithoutLiveRowCount) {
+  // Write a rowset.
+  WriteTestRowSet();
+  shared_ptr<DiskRowSet> rs;
+
+  ASSERT_OK(OpenTestRowSet(&rs));
+  uint64_t rs_live_rows;
+  // The rowset's live row count should be equal when there are no live row 
count
+  // stats available if no DMS exists.
+  ASSERT_OK(rs->CountLiveRows(&rs_live_rows));
+  ASSERT_EQ(rs_live_rows, FLAGS_roundtrip_num_rows);
+  uint64_t rs_live_rows_without_lrc;
+  
CHECK_OK(rs->CountLiveRowsWithoutLiveRowCountStats(&rs_live_rows_without_lrc));
+  ASSERT_EQ(rs_live_rows_without_lrc, rs_live_rows);
+
+  // Write a delta file.
+  UpdateExistingRows(rs.get(), static_cast<float>(FLAGS_update_fraction), 
nullptr);
+  ASSERT_OK(rs->FlushDeltas(nullptr));
+
+  // The live row count is 0 if the DRS has been fully deleted if no DMS 
exists.
+  NO_FATALS(DeleteExistingRows(rs.get(), 0, FLAGS_roundtrip_num_rows, 
nullptr));
+  ASSERT_OK(rs->FlushDeltas(nullptr));
+  CHECK_OK(rs->CountLiveRows(&rs_live_rows));
+  ASSERT_EQ(rs_live_rows, 0);
+  
CHECK_OK(rs->CountLiveRowsWithoutLiveRowCountStats(&rs_live_rows_without_lrc));
+  ASSERT_EQ(rs_live_rows_without_lrc, 0);
+}
+
+TEST_F(TestRowSet, TestCountLiveRowsWithoutLiveRowCountStats) {
+  // Write a rowset.
+  WriteTestRowSet();
+  shared_ptr<DiskRowSet> rs;
+
+  ASSERT_OK(OpenTestRowSet(&rs));
+  uint64_t rs_live_rows;
+  // The rowset's live row count from different way should be equal if if no
+  // DMS exists.
+  CHECK_OK(rs->CountLiveRows(&rs_live_rows));
+  ASSERT_EQ(rs_live_rows, FLAGS_roundtrip_num_rows);
+  uint64_t rs_live_rows_without_lrc;
+  
CHECK_OK(rs->CountLiveRowsWithoutLiveRowCountStats(&rs_live_rows_without_lrc));
+  ASSERT_EQ(rs_live_rows_without_lrc, rs_live_rows);
+
+  // Write a delta file.
+  UpdateExistingRows(rs.get(), static_cast<float>(FLAGS_update_fraction), 
nullptr);
+  ASSERT_OK(rs->FlushDeltas(nullptr));
+
+  // The delta file is OK to deal with without a DMS.
+  CHECK_OK(rs->CountLiveRows(&rs_live_rows));
+  ASSERT_EQ(rs_live_rows, FLAGS_roundtrip_num_rows);
+  
CHECK_OK(rs->CountLiveRowsWithoutLiveRowCountStats(&rs_live_rows_without_lrc));
+  ASSERT_EQ(rs_live_rows_without_lrc, rs_live_rows);
+
+  // Get a new DMS.
+  UpdateExistingRows(rs.get(), static_cast<float>(FLAGS_update_fraction), 
nullptr);
+
+  // The 'CountLiveRowsWithoutLiveRowCountStats' may get a smaller value than
+  // 'CountLiveRows' if a DMS exists.
+  CHECK_OK(rs->CountLiveRows(&rs_live_rows));
+  
CHECK_OK(rs->CountLiveRowsWithoutLiveRowCountStats(&rs_live_rows_without_lrc));
+  ASSERT_LE(rs_live_rows_without_lrc, rs_live_rows);
+  ASSERT_GE(rs_live_rows_without_lrc, FLAGS_roundtrip_num_rows);
+}
+
 class DiffScanRowSetTest : public KuduRowSetTest,
                            public ::testing::WithParamInterface<tuple<bool, 
bool>> {
  public:
diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index cfbaf27f1..d0f985f87 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -39,6 +39,7 @@
 #include "kudu/common/types.h"
 #include "kudu/fs/block_manager.h"
 #include "kudu/fs/fs_manager.h"
+#include "kudu/fs/io_context.h"
 #include "kudu/gutil/port.h"
 #include "kudu/tablet/cfile_set.h"
 #include "kudu/tablet/compaction.h"
@@ -749,6 +750,17 @@ Status DiskRowSet::CountRows(const IOContext* io_context, 
rowid_t *count) const
   return Status::OK();
 }
 
+Status DiskRowSet::CountLiveRowsWithoutLiveRowCountStats(uint64_t* count) 
const {
+  IOContext io_context({ rowset_metadata_->tablet_metadata()->tablet_id() });
+  rowid_t rows_count;
+  // TODO(kedeng) : get operation count in DMS.
+  RETURN_NOT_OK(CountRows(&io_context, &rows_count));
+  int64_t deleted_count = delta_tracker_->CountDeletedRowsInRedos();
+  DCHECK_GE(rows_count, deleted_count);
+  *count = rows_count - deleted_count;
+  return Status::OK();
+}
+
 Status DiskRowSet::CountLiveRows(uint64_t* count) const {
   DCHECK_GE(rowset_metadata_->live_row_count(), 
delta_tracker_->CountDeletedRows());
   *count = rowset_metadata_->live_row_count() - 
delta_tracker_->CountDeletedRows();
@@ -877,7 +889,12 @@ Status 
DiskRowSet::EstimateBytesInPotentiallyAncientUndoDeltas(
 Status DiskRowSet::IsDeletedAndFullyAncient(Timestamp ancient_history_mark,
                                             bool* deleted_and_ancient) {
   uint64_t live_row_count = 0;
-  RETURN_NOT_OK(CountLiveRows(&live_row_count));
+  if (rowset_metadata_->tablet_metadata()->supports_live_row_count()) {
+    RETURN_NOT_OK(CountLiveRows(&live_row_count));
+  } else {
+    RETURN_NOT_OK(CountLiveRowsWithoutLiveRowCountStats(&live_row_count));
+  }
+
   if (live_row_count > 0) {
     *deleted_and_ancient = false;
     return Status::OK();
diff --git a/src/kudu/tablet/diskrowset.h b/src/kudu/tablet/diskrowset.h
index da5236379..742b5e248 100644
--- a/src/kudu/tablet/diskrowset.h
+++ b/src/kudu/tablet/diskrowset.h
@@ -384,14 +384,18 @@ class DiskRowSet :
 
   // Gets the number of rows in this rowset, checking 'num_rows_' first. If not
   // yet set, consults the base data and stores the result in 'num_rows_'.
-  Status CountRows(const fs::IOContext* io_context, rowid_t *count) const 
final override;
+  Status CountRows(const fs::IOContext* io_context, rowid_t *count) const 
final;
 
   // Count the number of live rows in this DRS.
-  virtual Status CountLiveRows(uint64_t* count) const override;
+  Status CountLiveRows(uint64_t* count) const override;
+  // Because possible operations in DMS and will be ignored, mainly because 
there is no API
+  // available in the old version(earlier than 1.10) of kudu to obtain this 
data, this API
+  // can only obtain an approximate live row count.
+  Status CountLiveRowsWithoutLiveRowCountStats(uint64_t* count) const;
 
   // See RowSet::GetBounds(...)
-  virtual Status GetBounds(std::string* min_encoded_key,
-                           std::string* max_encoded_key) const override;
+  Status GetBounds(std::string* min_encoded_key,
+                   std::string* max_encoded_key) const override;
 
   void GetDiskRowSetSpaceUsage(DiskRowSetSpace* drss) const;
 
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index f8fc7a7c0..29019173e 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -223,6 +223,15 @@ 
DEFINE_bool(rowset_compaction_ancient_delta_threshold_enabled, true,
 TAG_FLAG(rowset_compaction_ancient_delta_threshold_enabled, advanced);
 TAG_FLAG(rowset_compaction_ancient_delta_threshold_enabled, runtime);
 
+DEFINE_bool(enable_gc_deleted_rowsets_without_live_row_count, false,
+            "Whether to enable 'DeletedRowsetGCOp' for ancient, fully deleted "
+            "rowsets without live row count stats. This is used to release "
+            "the storage space of ancient, fully deleted rowsets generated "
+            "by Kudu clusters that do not have live row count stats. "
+            "If live row count feature is already supported in your kudu "
+            "cluster, just ignore this flag.");
+TAG_FLAG(enable_gc_deleted_rowsets_without_live_row_count, advanced);
+
 DECLARE_bool(enable_undo_delta_block_gc);
 DECLARE_uint32(rowset_compaction_estimate_min_deltas_size_mb);
 
@@ -1926,7 +1935,8 @@ void Tablet::RegisterMaintenanceOps(MaintenanceManager* 
maint_mgr) {
 
   // The deleted rowset GC operation relies on live rowset counting. If this
   // tablet doesn't support such counting, do not register the op.
-  if (metadata_->supports_live_row_count()) {
+  if (metadata_->supports_live_row_count()
+      || FLAGS_enable_gc_deleted_rowsets_without_live_row_count) {
     maintenance_ops.emplace_back(new DeletedRowsetGCOp(this));
     maint_mgr->RegisterOp(maintenance_ops.back().get());
   }
diff --git a/src/kudu/tablet/tablet_history_gc-test.cc 
b/src/kudu/tablet/tablet_history_gc-test.cc
index fe3ff54d0..ec67d3df4 100644
--- a/src/kudu/tablet/tablet_history_gc-test.cc
+++ b/src/kudu/tablet/tablet_history_gc-test.cc
@@ -61,6 +61,7 @@
 DECLARE_bool(enable_maintenance_manager);
 DECLARE_int32(tablet_history_max_age_sec);
 DECLARE_string(time_source);
+DECLARE_bool(enable_gc_deleted_rowsets_without_live_row_count);
 
 using kudu::clock::HybridClock;
 using std::nullopt;
@@ -112,6 +113,20 @@ class TabletHistoryGcTest : public 
TabletTestBase<IntKeyTestSetup<INT64>> {
     ASSERT_GT(metrics->deleted_rowset_gc_duration->TotalCount(), orig_count);
   }
 
+  // Attempt to run the deleted rowset GC, it is our expectation that GC will 
make no sense
+  // and nothing will change.
+  void TryRunningDeletedRowsetGCWithoutEffect() {
+    const auto& metrics = tablet()->metrics();
+    const int orig_bytes = metrics->deleted_rowset_gc_bytes_deleted->value();
+    const int orig_count = metrics->deleted_rowset_gc_duration->TotalCount();
+    int64_t bytes = 0;
+    ASSERT_OK(tablet()->GetBytesInAncientDeletedRowsets(&bytes));
+    ASSERT_EQ(0, bytes);
+    ASSERT_OK(tablet()->DeleteAncientDeletedRowsets());
+    ASSERT_EQ(orig_bytes, metrics->deleted_rowset_gc_bytes_deleted->value());
+    ASSERT_EQ(orig_count, metrics->deleted_rowset_gc_duration->TotalCount());
+  }
+
   // Helper functions that mutate rows in batches of keys:
   //   [0, rows_per_rowset)
   //   [rows_per_rowset, 2*rows_per_rowset)
@@ -675,7 +690,25 @@ TEST_F(TabletHistoryGcNoMaintMgrTest, 
TestUndoDeltaBlockGc) {
   ASSERT_EQ(1, 
tablet()->metrics()->undo_delta_block_gc_delete_duration->TotalCount());
 }
 
-TEST_F(TabletHistoryGcNoMaintMgrTest, TestGCDeletedRowsetsWithRedoFiles) {
+class TabletDeletedRowsetGcTest : public TabletHistoryGcNoMaintMgrTest,
+                                  public ::testing::WithParamInterface<bool> {
+public:
+  void SetSupportLiveRowCount() {
+    NO_FATALS(tablet()->metadata()->
+        set_supports_live_row_count_for_tests(/* supports_live_row_count */ 
GetParam()));
+  }
+
+  void SetUp() override {
+    TabletHistoryGcNoMaintMgrTest::SetUp();
+    NO_FATALS(SetSupportLiveRowCount());
+    FLAGS_enable_gc_deleted_rowsets_without_live_row_count = GetParam();
+  }
+};
+
+INSTANTIATE_TEST_SUITE_P(, TabletDeletedRowsetGcTest,
+                         ::testing::Values(true, false));
+
+TEST_P(TabletDeletedRowsetGcTest, TestGCDeletedRowsetsWithRedoFiles) {
   FLAGS_tablet_history_max_age_sec = 1000;
   NO_FATALS(InsertOriginalRows(kNumRowsets, rows_per_rowset_));
   ASSERT_EQ(kNumRowsets, tablet()->CountUndoDeltasForTests());
@@ -706,15 +739,23 @@ TEST_F(TabletHistoryGcNoMaintMgrTest, 
TestGCDeletedRowsetsWithRedoFiles) {
   ASSERT_EQ(0, tablet()->CountRedoDeltasForTests());
 }
 
-TEST_F(TabletHistoryGcNoMaintMgrTest, TestGCDeletedRowsetsWithDMS) {
+TEST_P(TabletDeletedRowsetGcTest, TestGCDeletedRowsetsWithDMS) {
   FLAGS_tablet_history_max_age_sec = 1000;
   NO_FATALS(InsertOriginalRows(kNumRowsets, rows_per_rowset_));
   NO_FATALS(DeleteOriginalRows(kNumRowsets, rows_per_rowset_, 
/*flush_dms*/false));
   
NO_FATALS(AddTimeToHybridClock(MonoDelta::FromSeconds(FLAGS_tablet_history_max_age_sec
 + 1)));
-  NO_FATALS(TryRunningDeletedRowsetGC());
+  if (GetParam()) {
+    NO_FATALS(TryRunningDeletedRowsetGC());
+  } else {
+    // We can't get the exact count of live row without live_row_count feature.
+    // If we disable the live_row_count, the imprecise counting will not 
trigger the GC.
+    // However, this is expected and will not cause any errors, so we can 
directly ignore this
+    // unit testcase at this time.
+    NO_FATALS(TryRunningDeletedRowsetGCWithoutEffect());
+  }
 }
 
-TEST_F(TabletHistoryGcNoMaintMgrTest, 
TestGCDeletedRowsetsAfterMinorCompaction) {
+TEST_P(TabletDeletedRowsetGcTest, TestGCDeletedRowsetsAfterMinorCompaction) {
   FLAGS_tablet_history_max_age_sec = 1000;
   NO_FATALS(InsertOriginalRows(kNumRowsets, rows_per_rowset_));
   // Flush twice as frequently when deleting so we end up with multiple delta
@@ -729,7 +770,7 @@ TEST_F(TabletHistoryGcNoMaintMgrTest, 
TestGCDeletedRowsetsAfterMinorCompaction)
   NO_FATALS(TryRunningDeletedRowsetGC());
 }
 
-TEST_F(TabletHistoryGcNoMaintMgrTest, 
TestGCDeletedRowsetsAfterMajorCompaction) {
+TEST_P(TabletDeletedRowsetGcTest, TestGCDeletedRowsetsAfterMajorCompaction) {
   FLAGS_tablet_history_max_age_sec = 1000;
   NO_FATALS(InsertOriginalRows(kNumRowsets, rows_per_rowset_));
   // Major delta compaction is a no-op if we only have deletes, so trick the

Reply via email to