BiteTheDDDDt commented on code in PR #64165:
URL: https://github.com/apache/doris/pull/64165#discussion_r3402402474


##########
be/src/runtime/scan_filter_profile.cpp:
##########
@@ -0,0 +1,499 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/scan_filter_profile.h"
+
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <unordered_set>
+
+#include "runtime/runtime_profile.h"
+
+namespace doris {
+
+namespace {
+
+constexpr const char* SCAN_FILTER_INFO = "ScanFilterInfo";
+constexpr const char* KEY_RANGE_INFO = "KeyRangeInfo";
+constexpr const char* RUNTIME_FILTER_PARTITION_PRUNING = 
"RuntimeFilterPartitionPruning";
+constexpr int NOT_APPLIED_PROFILE_ORDER = 
static_cast<int>(ScanFilterStage::NUM_STAGES);
+
+bool is_index_stage(ScanFilterStage stage) {
+    return stage == ScanFilterStage::INDEX_INVERTED || stage == 
ScanFilterStage::INDEX_ANN ||
+           stage == ScanFilterStage::INDEX_DICT || stage == 
ScanFilterStage::INDEX_BLOOM_FILTER ||
+           stage == ScanFilterStage::INDEX_ZONE_MAP;
+}
+
+bool is_exec_stage(ScanFilterStage stage) {
+    return stage == ScanFilterStage::EXEC_VECTOR || stage == 
ScanFilterStage::EXEC_SHORT_CIRCUIT ||
+           stage == ScanFilterStage::EXEC_COMMON_EXPR || stage == 
ScanFilterStage::EXEC_RESIDUAL;
+}
+
+void set_counter(RuntimeProfile* profile, const std::string& name, TUnit::type 
type,
+                 const std::string& parent, int64_t level, int64_t value) {
+    auto* counter = profile->add_counter(name, type, parent, level);
+    counter->set(value);
+}
+
+void set_root_counter(RuntimeProfile* profile, const std::string& name, 
TUnit::type type,
+                      int64_t level, int64_t value) {
+    set_counter(profile, name, type, RuntimeProfile::ROOT_COUNTER, level, 
value);
+}
+
+RuntimeProfile* get_or_create_child(RuntimeProfile* profile, const 
std::string& name) {
+    auto* child = profile->get_child(name);
+    if (child != nullptr) {
+        return child;
+    }
+    return profile->create_child(name, true, false);
+}
+
+void add_info_string_if_not_empty(RuntimeProfile* profile, const std::string& 
key,
+                                  const std::string& value) {
+    if (!value.empty()) {
+        profile->add_info_string(key, value);
+    }
+}
+
+const char* scan_filter_source_name(ScanFilterKind kind) {
+    switch (kind) {
+    case ScanFilterKind::NORMAL:
+        return "Conjunct";
+    case ScanFilterKind::RUNTIME_FILTER:
+        return "RuntimeFilter";
+    case ScanFilterKind::TOPN_FILTER:
+        return "TopNFilter";
+    case ScanFilterKind::UNKNOWN:
+        return "Unknown";
+    }
+    return "Unknown";
+}
+
+struct SummaryStats {
+    bool participated = false;
+    bool has_filtering_stage = false;
+    bool has_time = false;
+    int first_stage = NOT_APPLIED_PROFILE_ORDER;
+    int last_stage = -1;
+    int64_t input_rows = 0;
+    int64_t output_rows = 0;
+    int64_t filtered_rows = 0;
+    int64_t time_ns = 0;
+};
+
+void update_summary(SummaryStats* summary, ScanFilterStage stage,
+                    const ScanFilterStageStatsSnapshot& stats) {
+    if (!stats.participated()) {
+        return;
+    }
+    const auto order = static_cast<int>(stage);
+    if (stats.filtered_rows > 0) {
+        if (!summary->has_filtering_stage || order < summary->first_stage) {
+            summary->first_stage = order;
+            summary->input_rows = stats.input_rows;
+        }
+        if (!summary->has_filtering_stage || order > summary->last_stage) {
+            summary->last_stage = order;
+            summary->output_rows = stats.output_rows;
+        }
+        summary->has_filtering_stage = true;
+    } else if (!summary->has_filtering_stage &&
+               (!summary->participated || order > summary->last_stage)) {
+        summary->first_stage = order;
+        summary->last_stage = order;
+        summary->input_rows = stats.input_rows;
+        summary->output_rows = stats.output_rows;
+    }
+    summary->participated = true;
+    summary->filtered_rows += stats.filtered_rows;
+    if (stats.has_time) {
+        summary->has_time = true;
+        summary->time_ns += stats.time_ns;
+    }
+}
+
+struct MaterializedFilterSnapshot {
+    ScanFilterDesc desc;
+    std::optional<ScanRuntimeFilterProfileStats> runtime_filter_stats;
+    std::array<ScanFilterStageStatsSnapshot, 
static_cast<size_t>(ScanFilterStage::NUM_STAGES)>
+            stage_snapshots;
+    SummaryStats total;
+    SummaryStats index;
+    SummaryStats exec;
+};
+
+void materialize_filter_stage(RuntimeProfile* filter_profile, ScanFilterStage 
stage,
+                              const ScanFilterStageStatsSnapshot& stats) {
+    auto* stage_profile = get_or_create_child(filter_profile, 
scan_filter_stage_name(stage));
+    set_root_counter(stage_profile, "InputRows", TUnit::UNIT, 2, 
stats.input_rows);
+    set_root_counter(stage_profile, "FilteredRows", TUnit::UNIT, 2, 
stats.filtered_rows);
+    if (stats.has_time) {
+        set_root_counter(stage_profile, "Time", TUnit::TIME_NS, 2, 
stats.time_ns);
+    }
+}
+
+std::string scan_filter_stages_string(const MaterializedFilterSnapshot& 
snapshot,
+                                      bool is_key_range_source) {
+    std::vector<std::string> stages;
+    for (int i = 0; i < static_cast<int>(ScanFilterStage::NUM_STAGES); ++i) {
+        const auto stage = static_cast<ScanFilterStage>(i);
+        if 
(snapshot.stage_snapshots[static_cast<size_t>(stage)].participated()) {
+            stages.emplace_back(scan_filter_stage_name(stage));
+        }
+    }
+    if (stages.empty()) {
+        return is_key_range_source ? "KeyRangeInfo" : "NotApplied";
+    }
+    return fmt::format("{}", fmt::join(stages, " -> "));
+}
+
+std::string target_string(const ScanFilterDesc& desc) {
+    std::vector<std::string> parts;
+    if (desc.column_id >= 0) {
+        parts.emplace_back(fmt::format("column_id={}", desc.column_id));
+    }
+    if (!desc.column_name.empty()) {
+        parts.emplace_back(fmt::format("column={}", desc.column_name));
+    }
+    return fmt::format("{}", fmt::join(parts, ", "));
+}
+
+std::string source_string(const ScanFilterDesc& desc) {
+    if (desc.kind == ScanFilterKind::RUNTIME_FILTER) {
+        return fmt::format("{} rf_id={}", scan_filter_source_name(desc.kind),
+                           desc.runtime_filter_id);
+    }
+    if (desc.kind == ScanFilterKind::TOPN_FILTER) {
+        return fmt::format("{} source_node_id={}", 
scan_filter_source_name(desc.kind),
+                           desc.topn_filter_source_node_id);
+    }
+    return scan_filter_source_name(desc.kind);
+}
+
+void materialize_filter_counters(RuntimeProfile* filter_profile,
+                                 const MaterializedFilterSnapshot& snapshot, 
int profile_level,
+                                 bool is_key_range_source) {
+    const auto* runtime_filter_stats =
+            snapshot.runtime_filter_stats.has_value() ? 
&*snapshot.runtime_filter_stats : nullptr;
+    filter_profile->add_info_string("Source", source_string(snapshot.desc));
+    add_info_string_if_not_empty(filter_profile, "Target", 
target_string(snapshot.desc));
+    filter_profile->add_info_string("Stages",
+                                    scan_filter_stages_string(snapshot, 
is_key_range_source));
+    add_info_string_if_not_empty(filter_profile, "Expr", 
snapshot.desc.expr_debug_string);
+    if (profile_level >= 2 && runtime_filter_stats != nullptr &&

Review Comment:
   legacy RF debug description 是 level 2 counter description,level 1 本来也会被 
prune;当前新路径的 gate 保持了同样可见性。



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to