This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch branch-4.4.0
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 045bdb0c8aec5a4a77d83e099268c4200d6ccee4
Author: Michael Smith <michael.sm...@cloudera.com>
AuthorDate: Thu May 2 15:55:34 2024 -0700

    IMPALA-13054: Avoid revisiting children in QueryStateExpanded
    
    Avoids revisiting children in QueryStateExpanded due to duplicate
    recursion. Since process_exec_profile visits children recursively, we
    only want immediate children from GetChildren, not all children from
    GetAllChildren.
    
    Adjusts HDFS_SCAN_NODE test to look back at top of stack, rather than
    depend on a specific depth that was caused by calling GetAllChildren
    from the root.
    
    This was identified by running the Impala E2E test suite with workload
    management enabled, where
    test_nested_types.py::TestMaxNestingDepth::test_max_nesting_depth took
    multiple hours to run and blocked the FinishUnregisterQuery for other
    tests.
    
    Testing:
    - Manual test with enable_workload_mgmt=true running
      test_max_nesting_depth.
    
    Change-Id: Ia36d53e52095d487ffb3a162fc723f1e815bec11
    Reviewed-on: http://gerrit.cloudera.org:8080/21392
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    (cherry picked from commit 086cf6b921f99359b9d490555163c51285499fa2)
---
 be/src/service/query-state-record.cc | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/be/src/service/query-state-record.cc 
b/be/src/service/query-state-record.cc
index 8c7458a03..d054cf644 100644
--- a/be/src/service/query-state-record.cc
+++ b/be/src/service/query-state-record.cc
@@ -184,6 +184,14 @@ int64_t EstimateSize(const QueryStateRecord* record) {
   return size;
 }
 
+static bool find_instance(RuntimeProfileBase* prof) {
+  return boost::algorithm::istarts_with(prof->name(), "Instance");
+}
+
+static bool find_averaged(RuntimeProfileBase* prof) {
+  return boost::algorithm::istarts_with(prof->name(), "Averaged Fragment");
+}
+
 QueryStateExpanded::QueryStateExpanded(const ClientRequestState& exec_state,
     const std::shared_ptr<QueryStateRecord> base) :
     base_state(base ? move(base) : make_shared<QueryStateRecord>(exec_state)) {
@@ -253,9 +261,7 @@ QueryStateExpanded::QueryStateExpanded(const 
ClientRequestState& exec_state,
     map<string, int64_t> bytes_read_cache;
     vector<RuntimeProfileBase*> prof_stack;
 
-    using boost::algorithm::iequals;
-
-   // Lambda function to recursively walk through a profile.
+    // Lambda function to recursively walk through a profile.
     std::function<void(RuntimeProfileBase*)> process_exec_profile = [
         &process_exec_profile, &host_scratch_bytes, &scanner_io_wait, 
&prof_stack,
         &bytes_read_cache, this](RuntimeProfileBase* profile) {
@@ -266,20 +272,26 @@ QueryStateExpanded::QueryStateExpanded(const 
ClientRequestState& exec_state,
       }
 
       // Metrics from HDFS_SCAN_NODE entries.
-      if (prof_stack.size() >= 3) {
-        if (iequals(prof_stack[1]->name().substr(0, 8), "instance") &&
-            iequals(prof_stack[2]->name().substr(0, 14), "hdfs_scan_node")) {
+      if (const string& scan = prof_stack.back()->name();
+          boost::algorithm::istarts_with(scan, "HDFS_SCAN_NODE")) {
+        // Find a parent instance. If none found, assume in Averaged Fragment.
+        if (auto it = find_if(prof_stack.begin()+1, prof_stack.end()-1, 
find_instance);
+            it != prof_stack.end()-1) {
+          DCHECK(find_if(prof_stack.begin(), prof_stack.end(), find_averaged)
+              == prof_stack.end());
+          const string& inst = (*it)->name();
           if (const auto& cntr = profile->GetCounter("ScannerIoWaitTime");
               cntr != nullptr) {
-            scanner_io_wait.emplace(StrCat(
-                prof_stack[1]->name(), "::", prof_stack[2]->name()), 
cntr->value());
+            scanner_io_wait.emplace(StrCat(inst, "::", scan), cntr->value());
           }
 
           if (const auto& cntr = profile->GetCounter("DataCacheHitBytes");
               cntr != nullptr) {
-            bytes_read_cache.emplace(StrCat(
-                prof_stack[1]->name(), "::", prof_stack[2]->name()), 
cntr->value());
+            bytes_read_cache.emplace(StrCat(inst, "::", scan), cntr->value());
           }
+        } else {
+          DCHECK(find_if(prof_stack.begin(), prof_stack.end(), find_averaged)
+              != prof_stack.end());
         }
       }
 
@@ -290,7 +302,7 @@ QueryStateExpanded::QueryStateExpanded(const 
ClientRequestState& exec_state,
 
       // Recursively walk down through all child nodes.
       vector<RuntimeProfileBase*> children;
-      profile->GetAllChildren(&children);
+      profile->GetChildren(&children);
       for (const auto& child : children) {
         process_exec_profile(child);
       }

Reply via email to