This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new c04de99  IMPALA-10343: increase control_service_queue_mem_limit
c04de99 is described below

commit c04de9933d174ebf7fc3779faa1fa54de953be93
Author: Tim Armstrong <tarmstr...@cloudera.com>
AuthorDate: Wed Dec 9 14:08:33 2020 -0800

    IMPALA-10343: increase control_service_queue_mem_limit
    
    --control_service_queue_mem_limit is set to 1% by default
    to increase the maximum size of the queue in typical
    production deployments.  E.g. an Impala daemon with
    a 50GB memory limit will have a limit of 512MB on
    the control service queue.
    
    Add --control_service_queue_mem_limit_floor_bytes so
    that this does not have the unintended effect of
    reducing the memory given to the control service queue.
    I.e. the default behaviour does not change for
    impala daemons with a daemon mem limit of <= 5000MB,
    but does increase the control service queue memory
    limit for impala daemons with mem limits > 5000MB.
    
    The default process memory limit in the mocked backend
    test ExecEnv is changed to be 8GB. Previously it
    was unlimited, so we couldn't calculate 1% of it.
    It cannot be unlimited in an actual impalad since
    IMPALA-5653 was fixed.
    
    Testing:
    This had been previously problematic on a 64 node TPC-DS
    workload with mt_dop=12 where impalads had ~100GB of memory.
    Status report RPCs would fail and have to be retried.
    We tested this new value on the same workload and the retries
    were avoided.
    
    Change-Id: Ic7fe93b5ce7eb6b63e48293ac287d98cc1d9e3fa
    Reviewed-on: http://gerrit.cloudera.org:8080/16848
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Reviewed-by: Thomas Tauber-Marshall <tmarsh...@cloudera.com>
---
 be/src/runtime/test-env.h                | 4 +++-
 be/src/service/control-service.cc        | 7 ++++++-
 tests/custom_cluster/test_rpc_timeout.py | 4 +++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/be/src/runtime/test-env.h b/be/src/runtime/test-env.h
index 108b62f..6e50fd0 100644
--- a/be/src/runtime/test-env.h
+++ b/be/src/runtime/test-env.h
@@ -96,7 +96,9 @@ class TestEnv {
   int64_t buffer_pool_capacity_;
 
   /// Arguments for process memory tracker, used in Init().
-  int64_t process_mem_limit_ = -1;
+  /// Default to 8GB, which should be enough for any tests that are not 
deliberately
+  /// allocating large amounts of memory.
+  int64_t process_mem_limit_ = 8L * 1024L * 1024L * 1024L;
   bool process_mem_tracker_use_metrics_ = false;
 
   /// Global state for test environment.
diff --git a/be/src/service/control-service.cc 
b/be/src/service/control-service.cc
index 3db1055..8c6cea0 100644
--- a/be/src/service/control-service.cc
+++ b/be/src/service/control-service.cc
@@ -49,7 +49,11 @@ using kudu::rpc::RpcContext;
 
 static const string QUEUE_LIMIT_MSG = "(Advanced) Limit on RPC payloads 
consumption for "
     "ControlService. " + Substitute(MEM_UNITS_HELP_MSG, "the process memory 
limit");
-DEFINE_string(control_service_queue_mem_limit, "50MB", 
QUEUE_LIMIT_MSG.c_str());
+DEFINE_string(control_service_queue_mem_limit, "1%", QUEUE_LIMIT_MSG.c_str());
+DEFINE_int64(control_service_queue_mem_limit_floor_bytes, 50L * 1024L * 1024L,
+    "Lower bound on --control_service_queue_mem_limit in bytes. If "
+    "--control_service_queue_mem_limit works out to be less than this amount, "
+    "this value is used instead");
 DEFINE_int32(control_service_num_svc_threads, 0, "Number of threads for 
processing "
     "control service's RPCs. if left at default value 0, it will be set to 
number of "
     "CPU cores. Set it to a positive value to change from the default.");
@@ -68,6 +72,7 @@ ControlService::ControlService(MetricGroup* metric_group)
     CLEAN_EXIT_WITH_ERROR(Substitute("Invalid mem limit for control service 
queue: "
         "'$0'.", FLAGS_control_service_queue_mem_limit));
   }
+  bytes_limit = max(bytes_limit, 
FLAGS_control_service_queue_mem_limit_floor_bytes);
   mem_tracker_.reset(new MemTracker(
       bytes_limit, "Control Service Queue", process_mem_tracker));
   MemTrackerMetric::CreateMetrics(metric_group, mem_tracker_.get(), 
"ControlService");
diff --git a/tests/custom_cluster/test_rpc_timeout.py 
b/tests/custom_cluster/test_rpc_timeout.py
index 59a809a..797efb2 100644
--- a/tests/custom_cluster/test_rpc_timeout.py
+++ b/tests/custom_cluster/test_rpc_timeout.py
@@ -154,7 +154,9 @@ class TestRPCTimeout(CustomClusterTestSuite):
   # the retry paths in the ReportExecStatus() RPC
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args("--status_report_interval_ms=100"
-      " --control_service_queue_mem_limit=1 
--control_service_num_svc_threads=1")
+      " --control_service_queue_mem_limit=1"
+      " --control_service_queue_mem_limit_floor_bytes=1"
+      " --control_service_num_svc_threads=1")
   def test_reportexecstatus_retry(self, vector):
     self.execute_query_verify_metrics(self.TEST_QUERY, None, 10)
 

Reply via email to