This is an automated email from the ASF dual-hosted git repository. alexey pushed a commit to branch branch-1.14.x in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/branch-1.14.x by this push: new 220e051 KUDU-3268: fix race between MM scheduling and unregistration 220e051 is described below commit 220e05166fbb8f25fb72d28886116a60a1973828 Author: Andrew Wong <aw...@cloudera.com> AuthorDate: Thu Apr 8 14:39:54 2021 -0700 KUDU-3268: fix race between MM scheduling and unregistration Following commit 9e4664d the following race was possible (S: scheduler thread, T: another thread): S: finds best op is op1 T: unregisters op1 because the tablet is being shut down T: takes running_instances_lock_ - waits for running count to go to 0 - destructs op1 S: takes running_instances_lock_ - increments running count to 1 S: attempts to run op1 but segfaults This resulted in a crash like the following, when shutting down the tserver (though it seems possible this could manifest itself when deleting a TabletReplica): PC: @ 0x7ff97596de0d kudu::MaintenanceManager::LaunchOp() *** SIGSEGV (@0x30) received by PID 21067 (TID 0x7ff96343b700) from PID 48; stack trace: *** @ 0x7ff976846980 (unknown) at ??:0 @ 0x7ff97596de0d kudu::MaintenanceManager::LaunchOp() at ??:0 @ 0x7ff97596b538 _ZZN4kudu18MaintenanceManager18RunSchedulerThreadEvENKUlvE_clEv at ??:0 @ 0x7ff97596f124 _ZNSt17_Function_handlerIFvvEZN4kudu18MaintenanceManager18RunSchedulerThreadEvEUlvE_E9_M_invokeERKSt9_Any_data at ??:0 @ 0x7ff977e2bcf4 std::function<>::operator()() at ??:0 @ 0x7ff975a05e6e kudu::ThreadPool::DispatchThread() at ??:0 @ 0x7ff975a06757 _ZZN4kudu10ThreadPool12CreateThreadEvENKUlvE_clEv at ??:0 @ 0x7ff975a07e7b _ZNSt17_Function_handlerIFvvEZN4kudu10ThreadPool12CreateThreadEvEUlvE_E9_M_invokeERKSt9_Any_data at ??:0 @ 0x7ff977e2bcf4 std::function<>::operator()() at ??:0 @ 0x7ff9759f8913 kudu::Thread::SuperviseThread() at ??:0 @ 0x7ff97683b6db start_thread at ??:0 @ 0x7ff97388e71f clone at ??:0 This patch fixes this by checking whether the op has been cancelled, while holding running_instanes_lock_, before proceeding with the scheduling. I added a new test that failed consistently within 100 runs, that passed 5000 times locally. Change-Id: I29ad4d545de8166832b7c4148880a7e8719353bf Reviewed-on: http://gerrit.cloudera.org:8080/17291 Reviewed-by: Alexey Serbin <aser...@cloudera.com> Tested-by: Andrew Wong <aw...@cloudera.com> (cherry picked from commit 6aeb466c538d8cc566031c4a82200f18f088e18a) Reviewed-on: http://gerrit.cloudera.org:8080/17293 --- src/kudu/util/maintenance_manager-test.cc | 18 ++++++++++++++++-- src/kudu/util/maintenance_manager.cc | 9 +++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/kudu/util/maintenance_manager-test.cc b/src/kudu/util/maintenance_manager-test.cc index 99a8b39..4870e08 100644 --- a/src/kudu/util/maintenance_manager-test.cc +++ b/src/kudu/util/maintenance_manager-test.cc @@ -326,8 +326,8 @@ TEST_F(MaintenanceManagerTest, TestNewOpsDontGetScheduledDuringUnregister) { // Wait until two instances of the ops start running, since we have two MM threads. ASSERT_EVENTUALLY([&]() { - ASSERT_EQ(op1.RunningGauge()->value(), 2); - }); + ASSERT_EQ(op1.RunningGauge()->value(), 2); + }); // Trigger Unregister while they are running. This should wait for the currently- // running operations to complete, but no new operations should be scheduled. @@ -683,4 +683,18 @@ TEST_F(MaintenanceManagerTest, TestPriorityOpLaunch) { } } +// Regression test for KUDU-3268, where the unregistering and destruction of an +// op may race with the scheduling of that op, resulting in a segfault. +TEST_F(MaintenanceManagerTest, TestUnregisterWhileScheduling) { + TestMaintenanceOp op1("1", MaintenanceOp::HIGH_IO_USAGE); + op1.set_perf_improvement(10); + // Set a bunch of runs so we continually schedule the op. + op1.set_remaining_runs(1000000); + manager_->RegisterOp(&op1); + ASSERT_EVENTUALLY([&]() { + ASSERT_GE(op1.DurationHistogram()->TotalCount(), 1); + }); + op1.Unregister(); +} + } // namespace kudu diff --git a/src/kudu/util/maintenance_manager.cc b/src/kudu/util/maintenance_manager.cc index 64a361a..deeaf2f 100644 --- a/src/kudu/util/maintenance_manager.cc +++ b/src/kudu/util/maintenance_manager.cc @@ -348,7 +348,16 @@ void MaintenanceManager::RunSchedulerThread() { op_note = std::move(best_op_and_why.second); } if (op) { + // While 'running_instances_lock_' is held, check one more time for + // whether the op is cancelled. This ensures that we don't attempt to + // launch an op that has been destructed in UnregisterOp(). See + // KUDU-3268 for more details. std::lock_guard<Mutex> guard(running_instances_lock_); + if (op->cancelled()) { + VLOG_AND_TRACE_WITH_PREFIX("maintenance", 2) + << "picked maintenance operation that has been cancelled"; + continue; + } IncreaseOpCount(op); prev_iter_found_no_work = false; } else {