github-actions[bot] commented on code in PR #61882:
URL: https://github.com/apache/doris/pull/61882#discussion_r3013127250
##########
cloud/src/snapshot/snapshot_manager.cpp:
##########
@@ -262,4 +264,88 @@ int
SnapshotManager::compact_snapshot_chains(InstanceChainCompactor* compactor)
return -1;
}
+static std::pair<MetaServiceCode, std::string> get_instance(Transaction* txn,
+ std::string_view
instance_id,
+ InstanceInfoPB*
instance_info) {
+ InstanceKeyInfo instance_key_info {instance_id};
+ std::string key = instance_key(instance_key_info);
+ std::string val;
+ TxnErrorCode err = txn->get(key, &val);
+ if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ return {MetaServiceCode::INVALID_ARGUMENT,
+ fmt::format("instance not found, instance_id={}",
instance_id)};
+ } else {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={},
err={}", instance_id,
+ err)};
+ }
+ }
+
+ if (!instance_info->ParseFromString(val)) {
+ return {MetaServiceCode::INVALID_ARGUMENT, "failed to parse instance
info"};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+std::pair<MetaServiceCode, std::string> SnapshotManager::get_all_snapshots(
+ Transaction* txn, std::string_view instance_id, std::string_view
required_snapshot_id,
+ std::vector<std::pair<SnapshotPB, Versionstamp>>* snapshots) {
+ Versionstamp required_snapshot_versionstamp;
+ if (!required_snapshot_id.empty()) {
+ if (!parse_snapshot_versionstamp(required_snapshot_id,
&required_snapshot_versionstamp)) {
+ return {MetaServiceCode::INVALID_ARGUMENT, "invalid snapshot_id
format"};
+ }
+ }
+
+ InstanceInfoPB instance_info;
+ auto [code, error_msg] = get_instance(txn, instance_id, &instance_info);
+ if (code != MetaServiceCode::OK) {
+ return {code, error_msg};
+ }
+ std::string_view current_instance_id = instance_id;
+ if (instance_info.has_original_instance_id() &&
!instance_info.original_instance_id().empty()) {
+ // the earliest instance_id for rollback
+ current_instance_id = instance_info.original_instance_id();
+ }
+
+ do {
+ MetaReader meta_reader(current_instance_id);
+ if (required_snapshot_id.empty()) {
+ TxnErrorCode err = meta_reader.get_snapshots(txn, snapshots);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err), "failed to get
snapshots"};
+ }
+ } else {
+ SnapshotPB snapshot_pb;
+ TxnErrorCode err =
+ meta_reader.get_snapshot(txn,
required_snapshot_versionstamp, &snapshot_pb);
+ if (err == TxnErrorCode::TXN_OK) {
+ snapshots->emplace_back(snapshot_pb,
required_snapshot_versionstamp);
+ return {MetaServiceCode::OK, ""};
+ } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ return {cast_as<ErrCategory::READ>(err), "failed to get
snapshot"};
+ }
+ }
+ if (current_instance_id == instance_id) {
+ break;
Review Comment:
**Bug (High): Use-after-free on error path.** `current_instance_id` is a
`std::string_view`. After line 309/346, it may point into `instance_info`'s
internal protobuf string. When `get_instance(txn, current_instance_id,
&instance_info)` is called at line 405 (diff), `ParseFromString` inside
`get_instance` destroys the old protobuf data. After `get_instance` returns,
`current_instance_id` is dangling.
The error path at lines 412-414 (diff) then reads `current_instance_id` via
`fmt::format` — this is a **use-after-free**.
The `get_instance` call itself happens to be safe because `InstanceKeyInfo`
copies the `string_view` into a `std::string` before `ParseFromString` runs.
But any subsequent use of `current_instance_id` before line 418 reassigns it is
UB.
**Fix:** Change `current_instance_id` from `std::string_view` to
`std::string`:
```cpp
std::string current_instance_id {instance_id};
```
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2149,178 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
}
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction*
txn,
+ const
std::string& instance_id,
+
InstanceInfoPB* instance) {
+ std::string instance_val;
+ TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
+ if (!instance->ParseFromString(instance_val)) {
+ return {MetaServiceCode::PROTOBUF_PARSE_ERR,
+ fmt::format("failed to parse instance proto, instance_id={}",
instance_id)};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+// Traverses the predecessor chain of the given instance, populating
`predecessors` with the
+// instance info of each predecessor.
+//
+// The order of instances in `predecessors` is from closest predecessor to
furthest predecessor (i.e. reverse
+// chronological order).
+static std::pair<MetaServiceCode, std::string> get_predecessor_instances(
+ Transaction* txn, const InstanceInfoPB& instance,
+ std::vector<InstanceInfoPB>* predecessors) {
+ std::string instance_id = instance.original_instance_id();
Review Comment:
**Misleading comment:** The traversal starts from `original_instance_id`
(the oldest/furthest predecessor) and walks forward via `successor_instance_id`
links. The actual order in `predecessors` is chronological: **furthest
predecessor to closest**, not "closest to furthest" as stated.
Suggested fix:
```
// The order of instances in `predecessors` is from furthest predecessor to
closest
// predecessor (i.e. chronological order).
```
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2149,178 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
}
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction*
txn,
+ const
std::string& instance_id,
+
InstanceInfoPB* instance) {
+ std::string instance_val;
+ TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+ if (err != TxnErrorCode::TXN_OK) {
+ return {cast_as<ErrCategory::READ>(err),
+ fmt::format("failed to get instance, instance_id={}, err={}",
instance_id, err)};
+ }
+ if (!instance->ParseFromString(instance_val)) {
+ return {MetaServiceCode::PROTOBUF_PARSE_ERR,
+ fmt::format("failed to parse instance proto, instance_id={}",
instance_id)};
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+// Traverses the predecessor chain of the given instance, populating
`predecessors` with the
+// instance info of each predecessor.
+//
+// The order of instances in `predecessors` is from closest predecessor to
furthest predecessor (i.e. reverse
+// chronological order).
+static std::pair<MetaServiceCode, std::string> get_predecessor_instances(
+ Transaction* txn, const InstanceInfoPB& instance,
+ std::vector<InstanceInfoPB>* predecessors) {
+ std::string instance_id = instance.original_instance_id();
+ while (!instance_id.empty() && instance_id != instance.instance_id()) {
+ InstanceInfoPB current_instance;
+ auto [code, msg] = get_instance_info(txn, instance_id,
¤t_instance);
+ if (code != MetaServiceCode::OK) {
+ return {code, std::move(msg)};
+ }
+ instance_id = current_instance.successor_instance_id();
+ predecessors->emplace_back(std::move(current_instance));
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> ensure_all_snapshot_recycled(
+ Transaction* txn, const std::string& instance_id,
+
std::optional<std::reference_wrapper<std::unordered_set<std::string_view>>>
+ exclude_snapshot_set) {
+ MetaReader meta_reader(instance_id);
+ std::vector<std::pair<SnapshotPB, Versionstamp>> snapshots;
+ TxnErrorCode err = meta_reader.get_snapshots(txn, &snapshots);
+ if (err != TxnErrorCode::TXN_OK) {
+ std::string msg = fmt::format("failed to get snapshots, err={}", err);
+ LOG(WARNING) << msg << " instance_id=" << instance_id;
+ return {cast_as<ErrCategory::READ>(err), std::move(msg)};
+ }
+ for (auto& [snapshot, versionstamp] : snapshots) {
+ std::string snapshot_id =
SnapshotManager::serialize_snapshot_id(versionstamp);
+ if (exclude_snapshot_set.has_value() &&
+ exclude_snapshot_set.value().get().count(snapshot_id) > 0) {
+ continue;
+ }
+ if (snapshot.status() != SnapshotStatus::SNAPSHOT_RECYCLED) {
+ std::string msg = fmt::format(
+ "failed to drop instance, instance has snapshots,
snapshot_id={}", snapshot_id);
+ LOG(WARNING) << msg << " instance_id=" << instance_id
+ << " snapshot=" << proto_to_json(snapshot);
+ return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+ }
+ }
+ return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> drop_single_instance(const
std::string& instance_id,
+
Transaction* txn,
+
InstanceInfoPB* instance) {
+ auto [code, msg] = ensure_all_snapshot_recycled(txn, instance_id,
std::nullopt);
+ if (code != MetaServiceCode::OK) {
+ return {code, std::move(msg)};
+ }
+
+ instance->set_status(InstanceInfoPB::DELETED);
+
instance->set_mtime(duration_cast<seconds>(system_clock::now().time_since_epoch()).count());
+
+ std::string serialized = instance->SerializeAsString();
+ if (serialized.empty()) {
+ std::string msg = "failed to serialize";
+ LOG(ERROR) << msg;
+ return {MetaServiceCode::PROTOBUF_SERIALIZE_ERR, std::move(msg)};
+ }
+ LOG(INFO) << "drop instance_id=" << instance_id << " json=" <<
proto_to_json(*instance);
+
+ if (instance->has_source_instance_id() &&
instance->has_source_snapshot_id() &&
+ !instance->source_instance_id().empty() &&
!instance->source_snapshot_id().empty()) {
+ Versionstamp snapshot_versionstamp;
+ if
(!SnapshotManager::parse_snapshot_versionstamp(instance->source_snapshot_id(),
+
&snapshot_versionstamp)) {
+ std::string msg = "failed to parse snapshot_id to versionstamp,
snapshot_id=" +
+ instance->source_snapshot_id();
+ LOG(WARNING) << msg;
+ return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+ }
+ std::string snapshot_ref_key = versioned::snapshot_reference_key(
+ {instance->source_instance_id(), snapshot_versionstamp,
instance_id});
+ txn->remove(snapshot_ref_key);
+ }
+ return {MetaServiceCode::OK, std::move(serialized)};
+}
+
+// Handles the instance-chain drop: all instances from the original up to and
including the tail
+// are marked DELETED atomically within the same transaction.
+static std::pair<MetaServiceCode, std::string> drop_instance_chain(
+ const std::string& tail_instance_id, Transaction* txn, InstanceInfoPB*
tail_instance) {
+ std::vector<InstanceInfoPB> predecessors;
+ if (auto [code, err] = get_predecessor_instances(txn, *tail_instance,
&predecessors);
+ code != MetaServiceCode::OK) {
+ LOG(WARNING) << "drop instance chain: " << err;
+ return {code, std::move(err)};
+ }
+
+ std::unordered_set<std::string_view> exclude_snapshot_set;
+ if (auto [code, err] = ensure_all_snapshot_recycled(txn, tail_instance_id,
std::nullopt);
+ code != MetaServiceCode::OK) {
+ return {code, std::move(err)};
+ }
+ exclude_snapshot_set.insert(tail_instance->source_snapshot_id());
+
+ auto remove_snapshot_reference_key = [&](const InstanceInfoPB& instance) {
+ Versionstamp vs;
+ if
(SnapshotManager::parse_snapshot_versionstamp(instance.source_snapshot_id(),
&vs)) {
+ txn->remove(versioned::snapshot_reference_key(
+ {tail_instance->source_instance_id(), vs,
tail_instance_id}));
+ }
+ };
+ remove_snapshot_reference_key(*tail_instance);
+ for (auto& instance : predecessors) {
Review Comment:
**Bug (Critical):** The `remove_snapshot_reference_key` lambda hardcodes
`tail_instance->source_instance_id()` and `tail_instance_id` for all instances,
but the snapshot reference key should use each predecessor's own
`source_instance_id()` and `instance_id()`.
In a chain `A → B → C (tail)`, when removing B's snapshot reference key:
- Current code produces key `{C.source_instance_id, vs, C.instance_id}` =
`{B, vs, C}` — **wrong**
- Correct key should be `{B.source_instance_id, vs, B.instance_id}` = `{A,
vs, B}`
This means predecessor reference keys are not actually cleaned up (wrong
keys are removed as no-ops), leaving orphan references.
Suggested fix:
```cpp
auto remove_snapshot_reference_key = [&](const InstanceInfoPB& instance) {
Versionstamp vs;
if
(SnapshotManager::parse_snapshot_versionstamp(instance.source_snapshot_id(),
&vs)) {
txn->remove(versioned::snapshot_reference_key(
{instance.source_instance_id(), vs,
instance.instance_id()}));
}
};
```
##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2149,178 @@ std::pair<MetaServiceCode, std::string>
handle_snapshot_intervals(const std::str
return std::make_pair(MetaServiceCode::OK, "");
Review Comment:
**Unused include:** `<ranges>` is included but not used anywhere in the diff
or surrounding code. Please remove.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]