github-actions[bot] commented on code in PR #61585:
URL: https://github.com/apache/doris/pull/61585#discussion_r3295963667
##########
be/src/cloud/cloud_internal_service.cpp:
##########
@@ -49,12 +50,70 @@ bvar::LatencyRecorder
g_file_cache_get_by_peer_read_cache_file_latency(
"file_cache_get_by_peer_read_cache_file_latency");
bvar::LatencyRecorder
g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency(
"cloud_internal_service_get_file_cache_meta_by_tablet_id_latency");
+bvar::Adder<int64_t> g_cloud_sync_tablet_meta_requests_total(
+ "cloud_sync_tablet_meta_requests_total");
+bvar::Adder<int64_t>
g_cloud_sync_tablet_meta_synced_total("cloud_sync_tablet_meta_synced_total");
+bvar::Adder<int64_t>
g_cloud_sync_tablet_meta_skipped_total("cloud_sync_tablet_meta_skipped_total");
+bvar::Adder<int64_t>
g_cloud_sync_tablet_meta_failed_total("cloud_sync_tablet_meta_failed_total");
CloudInternalServiceImpl::CloudInternalServiceImpl(CloudStorageEngine& engine,
ExecEnv* exec_env)
: PInternalService(exec_env), _engine(engine) {}
CloudInternalServiceImpl::~CloudInternalServiceImpl() = default;
+void
CloudInternalServiceImpl::sync_tablet_meta(google::protobuf::RpcController*
controller,
+ const PSyncTabletMetaRequest*
request,
+ PSyncTabletMetaResponse*
response,
+ google::protobuf::Closure*
done) {
+ auto start_time = std::chrono::steady_clock::now();
+ bool ret = _light_work_pool.try_offer([this, request, response, done,
start_time]() {
+ brpc::ClosureGuard closure_guard(done);
+ LOG(INFO) << "begin to sync tablet meta, request=" <<
request->ShortDebugString();
+ int64_t synced = 0;
+ int64_t skipped = 0;
+ int64_t failed = 0;
+ g_cloud_sync_tablet_meta_requests_total << 1;
+ for (const auto tablet_id : request->tablet_ids()) {
+ auto tablet = _engine.tablet_mgr().get_tablet_if_cached(tablet_id);
+ if (!tablet) {
+ ++skipped;
+ continue;
+ }
+ auto st = tablet->sync_meta();
Review Comment:
This new RPC path can call `CloudTablet::sync_meta()` from the light work
pool concurrently with the scheduled `CloudTabletMgr::sync_tablets()` path and
with read/compaction paths that read tablet metadata under `_meta_lock`.
However `CloudTablet::sync_meta()` currently fetches remote meta and then
mutates `_tablet_meta` and `tablet_schema` fields without taking
`_sync_meta_lock` or `_meta_lock` (for example `set_ttl_seconds`, compaction
policy fields, and `set_disable_auto_compaction`). That makes the proactive
notification race with readers that assume `_meta_lock` protects tablet
metadata, and it also violates the documented cloud lock order `_sync_meta_lock
-> _meta_lock`. Please serialize `sync_meta()` consistently with the other
cloud sync paths, e.g. take `_sync_meta_lock` for the sync operation and
`_meta_lock` in write mode before applying the refreshed fields.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]