github-actions[bot] commented on code in PR #61882:
URL: https://github.com/apache/doris/pull/61882#discussion_r3013245179


##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string> 
handle_snapshot_intervals(const std::str
     return std::make_pair(MetaServiceCode::OK, "");
 }
 
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction* 
txn,
+                                                                 const 
std::string& instance_id,
+                                                                 
InstanceInfoPB* instance) {
+    std::string instance_val;
+    TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+    if (err != TxnErrorCode::TXN_OK) {
+        return {cast_as<ErrCategory::READ>(err),
+                fmt::format("failed to get instance, instance_id={}, err={}", 
instance_id, err)};
+    }
+    if (!instance->ParseFromString(instance_val)) {
+        return {MetaServiceCode::PROTOBUF_PARSE_ERR,
+                fmt::format("failed to parse instance proto, instance_id={}", 
instance_id)};
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+// Traverses the predecessor chain of the given instance, populating 
`predecessors` with the
+// instance info of each predecessor.
+static std::pair<MetaServiceCode, std::string> get_predecessor_instances(
+        Transaction* txn, const InstanceInfoPB& instance,
+        std::vector<InstanceInfoPB>* predecessors) {
+    std::string instance_id = instance.original_instance_id();
+    while (!instance_id.empty() && instance_id != instance.instance_id()) {
+        InstanceInfoPB current_instance;
+        auto [code, msg] = get_instance_info(txn, instance_id, 
&current_instance);
+        if (code != MetaServiceCode::OK) {
+            return {code, std::move(msg)};
+        }
+        instance_id = current_instance.successor_instance_id();
+        predecessors->emplace_back(std::move(current_instance));
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> ensure_all_snapshot_recycled(
+        Transaction* txn, const std::string& instance_id,
+        
std::optional<std::reference_wrapper<std::unordered_set<std::string_view>>>
+                exclude_snapshot_set) {
+    MetaReader meta_reader(instance_id);
+    std::vector<std::pair<SnapshotPB, Versionstamp>> snapshots;
+    TxnErrorCode err = meta_reader.get_snapshots(txn, &snapshots);
+    if (err != TxnErrorCode::TXN_OK) {
+        std::string msg = fmt::format("failed to get snapshots, err={}", err);
+        LOG(WARNING) << msg << " instance_id=" << instance_id;
+        return {cast_as<ErrCategory::READ>(err), std::move(msg)};
+    }
+    for (auto& [snapshot, versionstamp] : snapshots) {
+        std::string snapshot_id = 
SnapshotManager::serialize_snapshot_id(versionstamp);
+        if (exclude_snapshot_set.has_value() &&
+            exclude_snapshot_set.value().get().count(snapshot_id) > 0) {
+            continue;
+        }
+        if (snapshot.status() != SnapshotStatus::SNAPSHOT_RECYCLED) {
+            std::string msg = fmt::format(
+                    "failed to drop instance, instance has snapshots, 
snapshot_id={}", snapshot_id);
+            LOG(WARNING) << msg << " instance_id=" << instance_id
+                         << " snapshot=" << proto_to_json(snapshot);
+            return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+        }
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> drop_single_instance(const 
std::string& instance_id,
+                                                                    
Transaction* txn,
+                                                                    
InstanceInfoPB* instance) {
+    auto [code, msg] = ensure_all_snapshot_recycled(txn, instance_id, 
std::nullopt);
+    if (code != MetaServiceCode::OK) {
+        return {code, std::move(msg)};
+    }
+
+    instance->set_status(InstanceInfoPB::DELETED);
+    
instance->set_mtime(duration_cast<seconds>(system_clock::now().time_since_epoch()).count());
+
+    std::string serialized = instance->SerializeAsString();
+    if (serialized.empty()) {
+        std::string msg = "failed to serialize";
+        LOG(ERROR) << msg;
+        return {MetaServiceCode::PROTOBUF_SERIALIZE_ERR, std::move(msg)};
+    }
+    LOG(INFO) << "drop instance_id=" << instance_id << " json=" << 
proto_to_json(*instance);
+
+    if (instance->has_source_instance_id() && 
instance->has_source_snapshot_id() &&
+        !instance->source_instance_id().empty() && 
!instance->source_snapshot_id().empty()) {
+        Versionstamp snapshot_versionstamp;
+        if 
(!SnapshotManager::parse_snapshot_versionstamp(instance->source_snapshot_id(),
+                                                          
&snapshot_versionstamp)) {
+            std::string msg = "failed to parse snapshot_id to versionstamp, 
snapshot_id=" +
+                              instance->source_snapshot_id();
+            LOG(WARNING) << msg;
+            return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+        }
+        std::string snapshot_ref_key = versioned::snapshot_reference_key(
+                {instance->source_instance_id(), snapshot_versionstamp, 
instance_id});
+        txn->remove(snapshot_ref_key);
+    }
+    return {MetaServiceCode::OK, std::move(serialized)};
+}
+
+// Handles the instance-chain drop: all instances from the original up to and 
including the tail
+// are marked DELETED atomically within the same transaction.
+static std::pair<MetaServiceCode, std::string> drop_instance_chain(
+        const std::string& tail_instance_id, Transaction* txn, InstanceInfoPB* 
tail_instance) {
+    std::vector<InstanceInfoPB> predecessors;
+    if (auto [code, err] = get_predecessor_instances(txn, *tail_instance, 
&predecessors);
+        code != MetaServiceCode::OK) {
+        LOG(WARNING) << "drop instance chain: " << err;
+        return {code, std::move(err)};
+    }
+
+    std::unordered_set<std::string_view> exclude_snapshot_set;
+    if (auto [code, err] = ensure_all_snapshot_recycled(txn, tail_instance_id, 
std::nullopt);
+        code != MetaServiceCode::OK) {
+        return {code, std::move(err)};
+    }
+    exclude_snapshot_set.insert(tail_instance->source_snapshot_id());

Review Comment:
   **[Low - Fragility] `std::unordered_set<std::string_view>` is 
lifetime-fragile.** The `string_view`s in `exclude_snapshot_set` point into the 
protobuf `source_snapshot_id()` fields of `tail_instance` and `predecessors` 
vector elements. This is currently safe because `set_status()` and 
`set_mtime()` (lines 152-153, 165-166) do not invalidate unrelated string 
fields. However, this is a fragile pattern — any future modification that 
clears or modifies `source_snapshot_id` on these objects before the set is 
consumed would cause use-after-free.
   
   Consider using `std::unordered_set<std::string>` instead for robustness, or 
add a comment documenting why the `string_view` approach is safe here.



##########
cloud/src/snapshot/snapshot_manager.cpp:
##########
@@ -262,4 +264,88 @@ int 
SnapshotManager::compact_snapshot_chains(InstanceChainCompactor* compactor)
     return -1;
 }
 
+static std::pair<MetaServiceCode, std::string> get_instance(Transaction* txn,
+                                                            std::string_view 
instance_id,
+                                                            InstanceInfoPB* 
instance_info) {
+    InstanceKeyInfo instance_key_info {instance_id};
+    std::string key = instance_key(instance_key_info);
+    std::string val;
+    TxnErrorCode err = txn->get(key, &val);
+    if (err != TxnErrorCode::TXN_OK) {
+        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+            return {MetaServiceCode::INVALID_ARGUMENT,
+                    fmt::format("instance not found, instance_id={}", 
instance_id)};
+        } else {
+            return {cast_as<ErrCategory::READ>(err),
+                    fmt::format("failed to get instance, instance_id={}, 
err={}", instance_id,
+                                err)};

Review Comment:
   **[Medium] Code duplication.** `get_instance()` (defined here as a 
file-local static function) and `get_instance_info()` in 
`meta_service_resource.cpp` serve the same purpose but with slightly different 
error handling (this one distinguishes `TXN_KEY_NOT_FOUND`, the other does 
not). Consider extracting a shared utility to avoid the duplication and ensure 
consistent error handling.



##########
cloud/src/snapshot/snapshot_manager.cpp:
##########
@@ -262,4 +264,88 @@ int 
SnapshotManager::compact_snapshot_chains(InstanceChainCompactor* compactor)
     return -1;
 }
 
+static std::pair<MetaServiceCode, std::string> get_instance(Transaction* txn,
+                                                            std::string_view 
instance_id,
+                                                            InstanceInfoPB* 
instance_info) {
+    InstanceKeyInfo instance_key_info {instance_id};
+    std::string key = instance_key(instance_key_info);
+    std::string val;
+    TxnErrorCode err = txn->get(key, &val);
+    if (err != TxnErrorCode::TXN_OK) {
+        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+            return {MetaServiceCode::INVALID_ARGUMENT,
+                    fmt::format("instance not found, instance_id={}", 
instance_id)};
+        } else {
+            return {cast_as<ErrCategory::READ>(err),
+                    fmt::format("failed to get instance, instance_id={}, 
err={}", instance_id,
+                                err)};
+        }
+    }
+
+    if (!instance_info->ParseFromString(val)) {
+        return {MetaServiceCode::INVALID_ARGUMENT, "failed to parse instance 
info"};
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+std::pair<MetaServiceCode, std::string> SnapshotManager::get_all_snapshots(
+        Transaction* txn, std::string_view instance_id, std::string_view 
required_snapshot_id,
+        std::vector<std::pair<SnapshotPB, Versionstamp>>* snapshots) {
+    Versionstamp required_snapshot_versionstamp;
+    if (!required_snapshot_id.empty()) {
+        if (!parse_snapshot_versionstamp(required_snapshot_id, 
&required_snapshot_versionstamp)) {
+            return {MetaServiceCode::INVALID_ARGUMENT, "invalid snapshot_id 
format"};
+        }
+    }
+
+    InstanceInfoPB instance_info;
+    auto [code, error_msg] = get_instance(txn, instance_id, &instance_info);
+    if (code != MetaServiceCode::OK) {
+        return {code, error_msg};
+    }
+    std::string current_instance_id = instance_id;
+    if (instance_info.has_original_instance_id() && 
!instance_info.original_instance_id().empty()) {
+        // the earliest instance_id for rollback
+        current_instance_id = instance_info.original_instance_id();
+    }
+
+    do {
+        MetaReader meta_reader(current_instance_id);
+        if (required_snapshot_id.empty()) {
+            TxnErrorCode err = meta_reader.get_snapshots(txn, snapshots);
+            if (err != TxnErrorCode::TXN_OK) {
+                return {cast_as<ErrCategory::READ>(err), "failed to get 
snapshots"};
+            }
+        } else {
+            SnapshotPB snapshot_pb;
+            TxnErrorCode err =
+                    meta_reader.get_snapshot(txn, 
required_snapshot_versionstamp, &snapshot_pb);
+            if (err == TxnErrorCode::TXN_OK) {
+                snapshots->emplace_back(snapshot_pb, 
required_snapshot_versionstamp);
+                return {MetaServiceCode::OK, ""};
+            } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
+                return {cast_as<ErrCategory::READ>(err), "failed to get 
snapshot"};
+            }
+        }
+        if (current_instance_id == instance_id) {
+            break;
+        }
+        auto [code, error_msg] = get_instance(txn, current_instance_id, 
&instance_info);

Review Comment:
   **[Medium] Missing cycle protection in chain traversal.** The 
`do-while(true)` loop at line 312 follows `successor_instance_id` links without 
any cycle detection or iteration bound. If the instance chain metadata is 
corrupted and forms a cycle that doesn't include `instance_id`, this loop will 
run indefinitely.
   
   Per the project's coding standards (AGENTS.md: "upon discovering errors or 
unexpected situations, report errors or crash—never allow the process to 
continue"), consider adding a `DORIS_CHECK` with a visited-set:
   
   ```cpp
   std::unordered_set<std::string> visited;
   // inside loop, after updating current_instance_id:
   DORIS_CHECK(!visited.count(current_instance_id))
           << "cycle detected in instance chain at " << current_instance_id;
   visited.insert(current_instance_id);
   ```
   
   The same concern applies to `get_predecessor_instances` in 
`meta_service_resource.cpp`.



##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string> 
handle_snapshot_intervals(const std::str
     return std::make_pair(MetaServiceCode::OK, "");
 }
 
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction* 
txn,
+                                                                 const 
std::string& instance_id,
+                                                                 
InstanceInfoPB* instance) {
+    std::string instance_val;
+    TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+    if (err != TxnErrorCode::TXN_OK) {
+        return {cast_as<ErrCategory::READ>(err),
+                fmt::format("failed to get instance, instance_id={}, err={}", 
instance_id, err)};
+    }
+    if (!instance->ParseFromString(instance_val)) {
+        return {MetaServiceCode::PROTOBUF_PARSE_ERR,
+                fmt::format("failed to parse instance proto, instance_id={}", 
instance_id)};
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+// Traverses the predecessor chain of the given instance, populating 
`predecessors` with the
+// instance info of each predecessor.
+static std::pair<MetaServiceCode, std::string> get_predecessor_instances(
+        Transaction* txn, const InstanceInfoPB& instance,
+        std::vector<InstanceInfoPB>* predecessors) {
+    std::string instance_id = instance.original_instance_id();
+    while (!instance_id.empty() && instance_id != instance.instance_id()) {
+        InstanceInfoPB current_instance;
+        auto [code, msg] = get_instance_info(txn, instance_id, 
&current_instance);
+        if (code != MetaServiceCode::OK) {
+            return {code, std::move(msg)};
+        }
+        instance_id = current_instance.successor_instance_id();
+        predecessors->emplace_back(std::move(current_instance));
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> ensure_all_snapshot_recycled(
+        Transaction* txn, const std::string& instance_id,
+        
std::optional<std::reference_wrapper<std::unordered_set<std::string_view>>>
+                exclude_snapshot_set) {
+    MetaReader meta_reader(instance_id);
+    std::vector<std::pair<SnapshotPB, Versionstamp>> snapshots;
+    TxnErrorCode err = meta_reader.get_snapshots(txn, &snapshots);
+    if (err != TxnErrorCode::TXN_OK) {
+        std::string msg = fmt::format("failed to get snapshots, err={}", err);
+        LOG(WARNING) << msg << " instance_id=" << instance_id;
+        return {cast_as<ErrCategory::READ>(err), std::move(msg)};
+    }
+    for (auto& [snapshot, versionstamp] : snapshots) {
+        std::string snapshot_id = 
SnapshotManager::serialize_snapshot_id(versionstamp);
+        if (exclude_snapshot_set.has_value() &&
+            exclude_snapshot_set.value().get().count(snapshot_id) > 0) {
+            continue;
+        }
+        if (snapshot.status() != SnapshotStatus::SNAPSHOT_RECYCLED) {
+            std::string msg = fmt::format(
+                    "failed to drop instance, instance has snapshots, 
snapshot_id={}", snapshot_id);
+            LOG(WARNING) << msg << " instance_id=" << instance_id
+                         << " snapshot=" << proto_to_json(snapshot);
+            return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+        }
+    }
+    return {MetaServiceCode::OK, ""};
+}
+
+static std::pair<MetaServiceCode, std::string> drop_single_instance(const 
std::string& instance_id,
+                                                                    
Transaction* txn,
+                                                                    
InstanceInfoPB* instance) {
+    auto [code, msg] = ensure_all_snapshot_recycled(txn, instance_id, 
std::nullopt);
+    if (code != MetaServiceCode::OK) {
+        return {code, std::move(msg)};
+    }
+
+    instance->set_status(InstanceInfoPB::DELETED);
+    
instance->set_mtime(duration_cast<seconds>(system_clock::now().time_since_epoch()).count());
+
+    std::string serialized = instance->SerializeAsString();
+    if (serialized.empty()) {
+        std::string msg = "failed to serialize";
+        LOG(ERROR) << msg;
+        return {MetaServiceCode::PROTOBUF_SERIALIZE_ERR, std::move(msg)};
+    }
+    LOG(INFO) << "drop instance_id=" << instance_id << " json=" << 
proto_to_json(*instance);
+
+    if (instance->has_source_instance_id() && 
instance->has_source_snapshot_id() &&
+        !instance->source_instance_id().empty() && 
!instance->source_snapshot_id().empty()) {
+        Versionstamp snapshot_versionstamp;
+        if 
(!SnapshotManager::parse_snapshot_versionstamp(instance->source_snapshot_id(),
+                                                          
&snapshot_versionstamp)) {
+            std::string msg = "failed to parse snapshot_id to versionstamp, 
snapshot_id=" +
+                              instance->source_snapshot_id();
+            LOG(WARNING) << msg;
+            return {MetaServiceCode::INVALID_ARGUMENT, std::move(msg)};
+        }
+        std::string snapshot_ref_key = versioned::snapshot_reference_key(
+                {instance->source_instance_id(), snapshot_versionstamp, 
instance_id});
+        txn->remove(snapshot_ref_key);
+    }
+    return {MetaServiceCode::OK, std::move(serialized)};
+}
+
+// Handles the instance-chain drop: all instances from the original up to and 
including the tail
+// are marked DELETED atomically within the same transaction.
+static std::pair<MetaServiceCode, std::string> drop_instance_chain(
+        const std::string& tail_instance_id, Transaction* txn, InstanceInfoPB* 
tail_instance) {
+    std::vector<InstanceInfoPB> predecessors;
+    if (auto [code, err] = get_predecessor_instances(txn, *tail_instance, 
&predecessors);
+        code != MetaServiceCode::OK) {
+        LOG(WARNING) << "drop instance chain: " << err;
+        return {code, std::move(err)};
+    }
+
+    std::unordered_set<std::string_view> exclude_snapshot_set;
+    if (auto [code, err] = ensure_all_snapshot_recycled(txn, tail_instance_id, 
std::nullopt);
+        code != MetaServiceCode::OK) {
+        return {code, std::move(err)};
+    }
+    exclude_snapshot_set.insert(tail_instance->source_snapshot_id());
+
+    auto remove_snapshot_reference_key = [&](const InstanceInfoPB& instance) {
+        Versionstamp vs;
+        if 
(SnapshotManager::parse_snapshot_versionstamp(instance.source_snapshot_id(), 
&vs)) {
+            txn->remove(versioned::snapshot_reference_key(

Review Comment:
   **[Medium] Missing validation before insert.** 
`tail_instance->source_snapshot_id()` is inserted into `exclude_snapshot_set` 
without checking if the field is set and non-empty. Although in practice a 
chain-tail instance should always have this set, per project coding standards, 
this assumption should be enforced with a `DORIS_CHECK`:
   
   ```cpp
   DORIS_CHECK(tail_instance->has_source_snapshot_id() && 
!tail_instance->source_snapshot_id().empty())
           << "chain tail instance must have source_snapshot_id, instance_id=" 
<< tail_instance_id;
   exclude_snapshot_set.insert(tail_instance->source_snapshot_id());
   ```
   
   If this assumption doesn't hold, the empty string would be inserted 
silently, which is benign but sloppy.



##########
cloud/src/meta-service/meta_service_resource.cpp:
##########
@@ -2148,6 +2148,175 @@ std::pair<MetaServiceCode, std::string> 
handle_snapshot_intervals(const std::str
     return std::make_pair(MetaServiceCode::OK, "");
 }
 
+static std::pair<MetaServiceCode, std::string> get_instance_info(Transaction* 
txn,
+                                                                 const 
std::string& instance_id,
+                                                                 
InstanceInfoPB* instance) {
+    std::string instance_val;
+    TxnErrorCode err = txn->get(instance_key({instance_id}), &instance_val);
+    if (err != TxnErrorCode::TXN_OK) {
+        return {cast_as<ErrCategory::READ>(err),
+                fmt::format("failed to get instance, instance_id={}, err={}", 
instance_id, err)};
+    }
+    if (!instance->ParseFromString(instance_val)) {
+        return {MetaServiceCode::PROTOBUF_PARSE_ERR,

Review Comment:
   **[Low] `get_instance_info` does not distinguish `TXN_KEY_NOT_FOUND`.** 
Unlike the `get_instance` function in `snapshot_manager.cpp`, this function 
uses `cast_as<ErrCategory::READ>(err)` for all non-OK errors. If a predecessor 
instance key is missing (e.g., due to data corruption), the error message will 
include the error code but the `MetaServiceCode` won't distinguish 'not found' 
from other read errors. Consider handling `TXN_KEY_NOT_FOUND` explicitly for 
better diagnostics.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to