This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new fd6757a274 [fix](function) fix create tablet timeout (#21682)
fd6757a274 is described below
commit fd6757a27425ec14020251618cdfbb6645617beb
Author: yujun <[email protected]>
AuthorDate: Tue Jul 11 09:46:43 2023 +0800
[fix](function) fix create tablet timeout (#21682)
To get tablet's local size and remote size both need lock the tablet's meta.
Sometimes the meta lock was locked by other process for a long time (eg,
publish version).
It will cause function 'get root path info' hold the tablet manager's shard
lock for a long time too.
Then it will cause creating tablet could not get the shard lock and finally
timeout.
Let getting tablet's local size/size go out of the scope of tablet manager
shard lock.
---
be/src/http/action/pad_rowset_action.cpp | 3 ++
be/src/olap/compaction.cpp | 1 +
be/src/olap/schema_change.cpp | 5 +++
be/src/olap/tablet.cpp | 6 ++++
be/src/olap/tablet.h | 2 ++
be/src/olap/tablet_manager.cpp | 38 +++++++++-------------
be/src/olap/tablet_manager.h | 3 +-
be/src/olap/task/engine_clone_task.cpp | 2 ++
be/src/util/trace.h | 29 +++++++++++++++++
.../apache/doris/datasource/InternalCatalog.java | 2 +-
10 files changed, 67 insertions(+), 24 deletions(-)
diff --git a/be/src/http/action/pad_rowset_action.cpp
b/be/src/http/action/pad_rowset_action.cpp
index df2721f50b..1b16e3f98d 100644
--- a/be/src/http/action/pad_rowset_action.cpp
+++ b/be/src/http/action/pad_rowset_action.cpp
@@ -25,6 +25,8 @@
#include "olap/rowset/beta_rowset_writer.h"
#include "olap/rowset/rowset.h"
#include "olap/storage_engine.h"
+#include "util/time.h"
+#include "util/trace.h"
namespace doris {
@@ -95,6 +97,7 @@ Status PadRowsetAction::_pad_rowset(TabletSharedPtr tablet,
const Version& versi
std::vector<RowsetSharedPtr> to_delete;
{
std::unique_lock wlock(tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
tablet->modify_rowsets(to_add, to_delete);
tablet->save_meta();
}
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index b9e9754030..8fd87a0e5f 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -467,6 +467,7 @@ Status Compaction::modify_rowsets(const Merger::Statistics*
stats) {
{
std::lock_guard<std::mutex>
wrlock_(_tablet->get_rowset_update_lock());
std::lock_guard<std::shared_mutex>
wrlock(_tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
// Convert the delete bitmap of the input rowsets to output rowset
for
// incremental data.
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 2ceff6f7c6..dd81df84d3 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -32,6 +32,7 @@
#include "olap/wrapper_field.h"
#include "runtime/memory/mem_tracker.h"
#include "util/defer_op.h"
+#include "util/trace.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_reader.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
@@ -1846,6 +1847,7 @@ Status
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
std::lock_guard<std::mutex>
base_tablet_lock(base_tablet->get_push_lock());
std::lock_guard<std::mutex>
new_tablet_lock(new_tablet->get_push_lock());
std::lock_guard<std::shared_mutex>
base_tablet_wlock(base_tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
std::lock_guard<std::shared_mutex>
new_tablet_wlock(new_tablet->get_header_lock());
do {
@@ -2064,6 +2066,7 @@ Status
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
// step 3
std::lock_guard<std::mutex>
rwlock(new_tablet->get_rowset_update_lock());
std::lock_guard<std::shared_mutex>
new_wlock(new_tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
int64_t new_max_version = new_tablet->max_version().second;
rowsets.clear();
if (max_version < new_max_version) {
@@ -2094,6 +2097,7 @@ Status
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
} else {
// set state to ready
std::lock_guard<std::shared_mutex>
new_wlock(new_tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
res = new_tablet->set_tablet_state(TabletState::TABLET_RUNNING);
if (!res) {
break;
@@ -2171,6 +2175,7 @@ Status
SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangeParams
{
// save tablet meta here because rowset meta is not saved during
add rowset
std::lock_guard<std::shared_mutex>
new_wlock(sc_params.new_tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
sc_params.new_tablet->save_meta();
}
if (res) {
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index ce1b14f8a1..968b6ad9d4 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -68,6 +68,7 @@
namespace doris {
using namespace ErrorCode;
+using namespace std::chrono_literals;
using std::pair;
using std::nothrow;
@@ -75,6 +76,8 @@ using std::sort;
using std::string;
using std::vector;
+const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD = 10s;
+
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_bytes, MetricUnit::BYTES);
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_finish_count,
MetricUnit::OPERATIONS);
@@ -300,6 +303,7 @@ Status Tablet::revise_tablet_meta(const
std::vector<RowsetMetaSharedPtr>& rowset
Status Tablet::add_rowset(RowsetSharedPtr rowset) {
DCHECK(rowset != nullptr);
std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
// If the rowset already exist, just return directly. The rowset_id is an
unique-id,
// we can use it to check this situation.
if (_contains_rowset(rowset->rowset_id())) {
@@ -535,6 +539,7 @@ void Tablet::_delete_stale_rowset_by_version(const Version&
version) {
void Tablet::delete_expired_stale_rowset() {
int64_t now = UnixSeconds();
std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
// Compute the end time to delete rowsets, when a expired rowset
createtime less then this time, it will be deleted.
double expired_stale_sweep_endtime =
::difftime(now, config::tablet_rowset_stale_sweep_time_sec);
@@ -972,6 +977,7 @@ void
Tablet::_max_continuous_version_from_beginning_unlocked(Version* version, V
void Tablet::calculate_cumulative_point() {
std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
int64_t ret_cumulative_point;
_cumulative_compaction_policy->calculate_cumulative_point(
this, _tablet_meta->all_rs_metas(), _cumulative_point,
&ret_cumulative_point);
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index d34597323a..15e17223c3 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -62,6 +62,8 @@ using TabletSharedPtr = std::shared_ptr<Tablet>;
enum TabletStorageType { STORAGE_TYPE_LOCAL, STORAGE_TYPE_REMOTE,
STORAGE_TYPE_REMOTE_AND_LOCAL };
+extern const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD;
+
class Tablet : public BaseTablet {
public:
static TabletSharedPtr create_tablet_from_meta(TabletMetaSharedPtr
tablet_meta,
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index fa7c25e9c4..5f2bd1b5c0 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -473,6 +473,7 @@ Status TabletManager::_drop_tablet_unlocked(TTabletId
tablet_id, TReplicaId repl
if (!keep_files) {
// drop tablet will update tablet meta, should lock
std::lock_guard<std::shared_mutex>
wrlock(to_drop_tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
LOG(INFO) << "set tablet to shutdown state and remove it from memory. "
<< "tablet_id=" << tablet_id << ", tablet_path=" <<
to_drop_tablet->tablet_path();
// NOTE: has to update tablet here, but must not update tablet meta
directly.
@@ -583,18 +584,14 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId
tablet_id, TabletUid tablet_
return nullptr;
}
-std::vector<TabletSharedPtr> TabletManager::get_all_tablet() {
+std::vector<TabletSharedPtr>
TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) {
std::vector<TabletSharedPtr> res;
for (const auto& tablets_shard : _tablets_shards) {
std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& tablet_map : tablets_shard.tablet_map) {
- // these are tablets which is not deleted
- TabletSharedPtr tablet = tablet_map.second;
- if (!tablet->is_used()) {
- LOG(WARNING) << "tablet cannot be used. tablet=" <<
tablet->tablet_id();
- continue;
+ for (auto& [id, tablet] : tablets_shard.tablet_map) {
+ if (filter(tablet.get())) {
+ res.emplace_back(tablet);
}
- res.emplace_back(tablet);
}
}
return res;
@@ -1096,20 +1093,17 @@ void
TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_ma
size_t* tablet_count) {
DCHECK(tablet_count);
*tablet_count = 0;
- for (const auto& tablets_shard : _tablets_shards) {
- std::shared_lock rdlock(tablets_shard.lock);
- for (const auto& item : tablets_shard.tablet_map) {
- TabletSharedPtr tablet = item.second;
- ++(*tablet_count);
- auto iter = path_map->find(tablet->data_dir()->path());
- if (iter == path_map->end()) {
- continue;
- }
- if (iter->second.is_used) {
- iter->second.local_used_capacity +=
tablet->tablet_local_size();
- iter->second.remote_used_capacity +=
tablet->tablet_remote_size();
- }
- }
+ auto filter = [path_map, tablet_count](Tablet* t) -> bool {
+ ++(*tablet_count);
+ auto iter = path_map->find(t->data_dir()->path());
+ return iter != path_map->end() && iter->second.is_used;
+ };
+
+ auto tablets = get_all_tablet(filter);
+ for (const auto& tablet : tablets) {
+ auto& data_dir_info = (*path_map)[tablet->data_dir()->path()];
+ data_dir_info.local_used_capacity += tablet->tablet_local_size();
+ data_dir_info.remote_used_capacity += tablet->tablet_remote_size();
}
}
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 667013d268..378ff499ac 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -78,7 +78,8 @@ public:
TabletSharedPtr get_tablet(TTabletId tablet_id, TabletUid tablet_uid,
bool include_deleted = false, std::string* err
= nullptr);
- std::vector<TabletSharedPtr> get_all_tablet();
+ std::vector<TabletSharedPtr> get_all_tablet(std::function<bool(Tablet*)>&&
filter =
+ [](Tablet* t) { return
t->is_used(); });
uint64_t get_rowset_nums();
uint64_t get_segment_nums();
diff --git a/be/src/olap/task/engine_clone_task.cpp
b/be/src/olap/task/engine_clone_task.cpp
index 5c8c2f8070..9352c7a0ce 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -34,6 +34,7 @@
#include "runtime/thread_context.h"
#include "util/defer_op.h"
#include "util/thrift_rpc_helper.h"
+#include "util/trace.h"
using std::set;
using std::stringstream;
@@ -466,6 +467,7 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const
std::string& clone_d
std::lock_guard<std::mutex> push_lock(tablet->get_push_lock());
std::lock_guard<std::mutex> rwlock(tablet->get_rowset_update_lock());
std::lock_guard<std::shared_mutex> wrlock(tablet->get_header_lock());
+ SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
// check clone dir existed
if (!FileUtils::check_exist(clone_dir)) {
return Status::InternalError("clone dir not existed. clone_dir={}",
clone_dir);
diff --git a/be/src/util/trace.h b/be/src/util/trace.h
index 40c6ba265f..7fe0c9e0c2 100644
--- a/be/src/util/trace.h
+++ b/be/src/util/trace.h
@@ -18,18 +18,21 @@
#include <rapidjson/writer.h>
+#include <chrono>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
+#include "common/logging.h"
#include "gutil/macros.h"
#include "gutil/ref_counted.h"
#include "gutil/strings/stringpiece.h"
#include "gutil/strings/substitute.h"
#include "gutil/threading/thread_collision_warner.h"
#include "gutil/walltime.h"
+#include "util/scoped_cleanup.h"
#include "util/spinlock.h"
#include "util/trace_metrics.h"
@@ -112,6 +115,32 @@ class Trace;
} \
}()
+// If this scope times out, make a simple trace.
+// It will log the cost time only.
+// Timeout is chrono duration struct, eg: 5ms, 100 * 1s.
+#define SCOPED_SIMPLE_TRACE_IF_TIMEOUT(timeout) \
+ SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(timeout, LOG(WARNING))
+
+// If this scope times out, then put simple trace to the stream.
+// Timeout is chrono duration struct, eg: 5ms, 100 * 1s.
+// For example:
+//
+// std::string tag = "[foo]";
+// SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(5s, LOG(INFO) << tag);
+//
+#define SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(timeout, stream)
\
+ using namespace std::chrono_literals;
\
+ auto VARNAME_LINENUM(scoped_simple_trace) = doris::MonotonicMicros();
\
+ SCOPED_CLEANUP({
\
+ auto VARNAME_LINENUM(timeout_us) =
\
+
std::chrono::duration_cast<std::chrono::microseconds>(timeout).count(); \
+ auto VARNAME_LINENUM(cost_us) =
\
+ doris::MonotonicMicros() -
VARNAME_LINENUM(scoped_simple_trace); \
+ if (VARNAME_LINENUM(cost_us) >= VARNAME_LINENUM(timeout_us)) {
\
+ stream << "Simple trace cost(us): " << VARNAME_LINENUM(cost_us);
\
+ }
\
+ })
+
namespace doris {
struct TraceEntry;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 8a93277bf5..ca24af08eb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -1763,7 +1763,7 @@ public class InternalCatalog implements
CatalogIf<Database> {
}
if (!ok || !countDownLatch.getStatus().ok()) {
- errMsg = "Failed to create partition[" + partitionName + "].
Timeout.";
+ errMsg = "Failed to create partition[" + partitionName + "].
Timeout:" + (timeout / 1000) + " seconds.";
// clear tasks
AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]