DRILL-5741: Automatically manage memory allocations during startup

**Note:** This commit is rebased on (hence, requires) DRILL-6068 commit

This commit provides a way for distributions and users to automatically define 
the Drillbit JVM's Heap, Direct and CodeCache allocations. To do this, it 
leverages the DRILL-6068 commit, which provides for configuration setup.

The only new variable introduced is an environment variable - 
`DRILLBIT_MAX_PROC_MEM` that defines a Drillbit's max memory parameters. The 
logic defined by `auto-setup.sh` will check that if any (or all) of the JVM 
memory parameters have been declared; they, cumulatively, don't exceed the 
value specified by `DRILLBIT_MAX_PROC_MEM`.

NOTE: To disable this, simply unset (or don't define) the environment variable 

The variable can be defined in KB, MB, or  GB; similar in syntax to how the JVM 
MaxHeap is specified. For e.g.
In addition, you can specify it as a percent of the total system memory prior 
to the Drillbit starting up:

For a system with with 48GB free memory, when set to (say) 25% (with settings 
defined in drill-env.sh), and heap (8GB) and direct (10GB) are defined; the 
Drillbit fails startup with the following message:
2018-01-03 14:27:57  [WARN] 25% of System Memory (47 GB) translates to 12 GB
2018-01-03 14:27:57  [ERROR]    Unable to start Drillbit due to memory 
constraint violations
  Total Memory Requested : 19 GB
  Check the following settings to possibly modify (or increase the Max Memory 
        *NOTE: It is recommended not to specify DRILLBIT_CODE_CACHE_SIZE as 
this will be auto-computed based on the HeapSize and would not exceed 1GB

For all other combinations, the undefined parameters are adjusted to ensure 
that the total memory allocated is within the value specified by 

For a system with with 48GB free memory, when set to (say) 50% (with settings 
defined in drill-env.sh), and heap (8GB) and direct (10GB) are defined; the 
Drillbit startup with the following warning:
2018-01-03 14:31:06  [WARN] 50% of System Memory (47 GB) translates to 24 GB
2018-01-03 14:31:06  [WARN] You have an allocation of 4 GB that is currently 
unused from a total of 24 GB. You can increase your existing memory 
configuration to use this extra memory
        *NOTE: It is recommended not to specify DRILLBIT_CODE_CACHE_SIZE as 
this will be auto-computed based on the HeapSize and would not exceed 1GB

In addition, if the available free memory is less than the allocation, an 
additional warning is provided under the assumption that the OS will reclaim 
more free memory when required:
2018-01-03 14:31:06  [WARN] Total Memory Allocation for Drillbit (19GB) exceeds 
available free memory (11GB)
2018-01-03 14:31:06  [WARN] Drillbit will start up, but can potentially crash 
due to oversubscribing of system memory.

For more details, refer the attachments in 

Changes to auto configure messaging

Publishing final values prior to startup

Minor update for printing to console's err stream

close apache/drill#1082

Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/50efb806
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/50efb806
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/50efb806

Branch: refs/heads/master
Commit: 50efb806bb03494e1da4d6b48f90fbf58d699c18
Parents: 266250c
Author: Kunal Khatua <kkha...@maprtech.com>
Authored: Thu Jan 11 17:35:13 2018 -0800
Committer: Aman Sinha <asi...@maprtech.com>
Committed: Fri Feb 23 17:55:36 2018 -0800

 distribution/src/resources/auto-setup.sh    | 202 ++++++++++++++++++++++-
 distribution/src/resources/distrib-setup.sh |   2 +-
 distribution/src/resources/drill-env.sh     |  10 ++
 3 files changed, 212 insertions(+), 2 deletions(-)

diff --git a/distribution/src/resources/auto-setup.sh 
index 75bdda0..141648c 100644
--- a/distribution/src/resources/auto-setup.sh
+++ b/distribution/src/resources/auto-setup.sh
@@ -25,5 +25,205 @@
 # if [ $status == "FAILED" ]; return 1; fi
-# FEATURES (Added here to describe supported additions)
+# 1. Provides checks and auto-configuration for memory settings
+# Convert Java memory value to MB
+function valueInMB() {
+  if [ -z "$1" ]; then echo ""; return; fi
+  local inputTxt=`echo $1| tr '[A-Z]' '[a-z]'`
+  local inputValue=`echo ${inputTxt:0:${#inputTxt}-1}`;
+  # Extracting Numeric Value
+  if [[ "$inputTxt" == *g ]]; then
+    let valueInMB=$inputValue*1024
+  elif [[ "$DbitMaxProcMem" == *k ]]; then
+    let valueInMB=$inputValue/1024
+  elif [[ "$inputTxt" == *m ]]; then
+    let valueInMB=$inputValue
+  elif [[ "$inputTxt" == *% ]]; then
+    #TotalRAM_inMB*percentage [Works on Linux]
+    let valueInMB=$inputValue*$totalRAM_inMB/100;
+  else
+    echo error;
+    return 1;
+  fi
+  echo "$valueInMB"
+  return
+# Convert Java memory value to GB
+function valueInGB() {
+  if [ -z "$1" ]; then echo ""; return; fi
+  local inputTxt=`echo $1| tr '[A-Z]' '[a-z]'`
+  local inputValue=`echo ${inputTxt:0:${#inputTxt}-1}`;
+  # Extracting Numeric Value
+  if [[ "$inputTxt" == *g ]]; then
+    let valueInGB=$inputValue
+  elif [[ "$DbitMaxProcMem" == *k ]]; then
+    let valueInGB=$inputValue/1024/1024
+  elif [[ "$inputTxt" == *m ]]; then
+    let valueInGB=$inputValue/1024
+  elif [[ "$inputTxt" == *% ]]; then
+    #TotalRAM_inMB*percentage [Works on Linux]
+    let valueInGB=$inputValue*`cat /proc/meminfo | grep MemTotal | tr ' ' 
'\n'| grep '[0-9]'`/1024/1024/100;
+  else
+    echo error;
+    return 1;
+  fi
+  echo "$valueInGB"
+  return
+# Estimates code cache based on total heap and direct
+function estCodeCacheInMB() {
+  local totalHeapAndDirect=$1
+  if [ $totalHeapAndDirect -le 4096 ]; then echo 512;
+  elif [ $totalHeapAndDirect -le 10240 ]; then echo 768;
+  else echo 1024;
+  fi
+#Print Current Allocation
+function printCurrAllocation()
+  if [ -n "$DRILLBIT_MAX_PROC_MEM" ]; then echo -e "    
+  if [ -n "$DRILL_HEAP" ]; then echo -e "    DRILL_HEAP=$DRILL_HEAP" 1>&2; fi
+  if [ -n "$DRILL_MAX_DIRECT_MEMORY" ]; then echo -e "    
+  if [ -n "$DRILLBIT_CODE_CACHE_SIZE" ]; then
+    echo -e "    *NOTE: It is recommended not to specify 
DRILLBIT_CODE_CACHE_SIZE as this will be auto-computed based on the HeapSize 
and would not exceed 1GB" 1>&2
+  fi
+# Check and auto-configuration for memory settings
+#Default (Track status of this check: "" => Continue checking ; "PASSED" => no 
more check required)
+#Computing existing system information
+# Tested on Linux (CentOS/RHEL/Ubuntu); Cygwin (Win10Pro-64bit)
+if [[ "$OSTYPE" == *linux* ]] || [[ "$OSTYPE" == cygwin* ]]; then
+  let totalRAM_inMB=`cat /proc/meminfo | grep MemTotal | tr ' ' '\n'| grep 
+  let freeRAM_inMB=`cat /proc/meminfo | grep MemFree | tr ' ' '\n'| grep 
+elif [[ "$OSTYPE" == darwin* ]]; then
+  # Mac OSX
+  #Refer for math: https://apple.stackexchange.com/a/196925
+  #Page Size
+  let macOSPageSize=`vm_stat | grep 'page size' | grep -o -E '[0-9]+'`
+  #MemoryUsage on MacOS
+  let freePg=`vm_stat | grep free | awk '{ print $NF }' | sed 's/\.//'`
+  let activePg=`vm_stat | grep -w 'active:' | awk '{ print $NF }' | sed 
+  let speculativePg=`vm_stat | grep speculative | awk '{ print $NF }' | sed 
+  let fileCachePg=`vm_stat | grep File-backed | awk '{ print $NF }' | sed 
+  let wiredMemPg=`vm_stat | grep 'wired down' | awk '{ print $NF }' | sed 
+  let compressedPg=`vm_stat | grep 'occupied by compressor' | awk '{ print $NF 
}' | sed 's/\.//'`
+  #Total
+  let 
+  let totalRAM_inMB=$totalRAM_inPages*$macOSPageSize/1048576
+  let freeRAM_inMB=$freePg*$macOSPageSize/1048576
+elif [[ "$OSTYPE" == "msys" ]]; then
+  # Msys env on MinGW (TODO: Pending verification)
+  let totalRAM_inMB=`cat /proc/meminfo | grep MemTotal | tr ' ' '\n'| grep 
+  let freeRAM_inMB=`cat /proc/meminfo | grep MemFree | tr ' ' '\n'| grep 
+  # Unknown OS
+  echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] Unknown OS ("$OSTYPE"). Will not 
attempt to auto-configure memory" 1>&2
+  AutoMemConfigStatus="PASSED"
+#Read current values
+DbitMaxProcMem=$(valueInMB $DRILLBIT_MAX_PROC_MEM)
+DbitMaxDirectMem=$(valueInMB $DRILL_MAX_DIRECT_MEMORY)
+DbitMaxHeapMem=$(valueInMB $DRILL_HEAP)
+DbitMaxCodeCacheMem=$(valueInMB $DRILLBIT_CODE_CACHE_SIZE)
+# Alert for %age usage
+if [[ "$DRILLBIT_MAX_PROC_MEM" == *% ]] && [ -z "$AutoMemConfigStatus" ]; then
+  echo `date +%Y-%m-%d" "%H:%M:%S`"  [WARN] "$DRILLBIT_MAX_PROC_MEM" of System 
Memory ("$(valueInGB $totalRAM_inMB'm')" GB) translates to "$(valueInGB 
$DbitMaxProcMem'm')" GB" 1>&2
+### Performing Auto-Configuration
+if [ -z "$DbitMaxProcMem" ] && [ -z "$AutoMemConfigStatus" ]; then
+  if [ -n "$DbitMaxDirectMem" ] && [ -n "$DbitMaxHeapMem" ]; then
+    ## [SCENARIO 1]: TotalCap is NOT Defined, but Heap&Direct ARE Defined 
(i.e. no limit)
+    let currTotal=$DbitMaxDirectMem+$DbitMaxHeapMem
+    #Estimating CodeCache size of current total
+    if [ -z "$DbitMaxCodeCacheMem" ]; then export 
DRILLBIT_CODE_CACHE_SIZE=$(estCodeCacheInMB $currTotal)'m'; fi
+  fi
+  # Default values will be loaded for unspecified memory parameters
+  AutoMemConfigStatus="PASSED"
+elif [ -z "$AutoMemConfigStatus" ]; then
+  ## Scenario: Total IS Defined
+  if [ -z "$DbitMaxCodeCacheMem" ]; then
+    let DbitMaxCodeCacheMem=$(estCodeCacheInMB $DbitMaxProcMem)
+    export DRILLBIT_CODE_CACHE_SIZE=$DbitMaxCodeCacheMem'm'
+  fi
+  if [ -n "$DbitMaxHeapMem" ] && [ -n "$DbitMaxDirectMem" ]; then
+    ## [SCENARIO 2]: Heap & Direct ARE Defined
+    let calcTotalInMB=$DbitMaxDirectMem+$DbitMaxHeapMem+$DbitMaxCodeCacheMem
+    # Fail if exceeding process limit
+    if [ $calcTotalInMB -gt $DbitMaxProcMem ]; then
+      echo "[ERROR]    Unable to start Drillbit due to memory constraint 
violations" 1>&2
+      echo "  Total Memory Requested : "$(valueInGB $calcTotalInMB'm')" GB" 
+      echo "  Check the following settings to possibly modify (or increase the 
Max Memory Permitted):" 1>&2
+      printCurrAllocation
+      exit 127
+    else
+      #All numbers align
+      let deltaInGB=($DbitMaxProcMem-$calcTotalInMB)/1024
+      if [ $deltaInGB -gt 1 ]; then
+        echo "[WARN] You have an allocation of "$deltaInGB" GB that is 
currently unused from a total of "$(valueInGB $DbitMaxProcMem'm')" GB. You can 
increase your existing memory configuration to use this extra memory" 1>&2
+        printCurrAllocation
+      fi
+    fi
+  elif [ -n "$DbitMaxHeapMem" ] && [ -z "$DbitMaxDirectMem" ]; then
+    ## [SCENARIO 3]: Total and only Heap is defined
+    echo "[WARN] Only DRILL_HEAP is defined. Auto-configuring for Direct 
memory" 1>&2
+    let DbitMaxDirectMem=$DbitMaxProcMem-$DbitMaxHeapMem-$DbitMaxCodeCacheMem
+  elif [ -z "$DbitMaxHeapMem" ] && [ -n "$DbitMaxDirectMem" ]; then
+    ## [SCENARIO 4]: Total and only Direct is defined
+    echo "[WARN] Only DRILL_MAX_DIRECT_MEMORY is defined. Auto-configuring for 
Heap" 1>&2
+    let DbitMaxHeapMem=$DbitMaxProcMem-$DbitMaxDirectMem-$DbitMaxCodeCacheMem
+  elif [ -z "$DbitMaxDirectMem" ] && [ -z "$DbitMaxHeapMem" ]; then
+    ## [SCENARIO 5]: Only Total is defined
+    echo "[WARN] Only DRILLBIT_MAX_PROC_MEM is defined. Auto-configuring for 
Heap & Direct memory" 1>&2
+    ## Compute Direct & Heap
+    let DbitMaxProcMemInGB=$(valueInGB $DbitMaxProcMem'm')
+    let DbitMaxHeapMemInGB=`echo $DbitMaxProcMemInGB | awk 
'{heap=-13.2+6.12*log($1); if (heap<1) {heap=1}; printf "%0.0f\n", heap }'`
+    let DbitMaxHeapMem=$(valueInMB $DbitMaxHeapMemInGB'g')
+    let DbitMaxDirectMem=$DbitMaxProcMem-$DbitMaxHeapMem-$DbitMaxCodeCacheMem
+  fi
+  ## Export computed values
+  export DRILL_HEAP=$(valueInGB $DbitMaxHeapMem'm')"G"
+  export DRILL_MAX_DIRECT_MEMORY=$(valueInGB $DbitMaxDirectMem'm')"G"
+  export DRILLBIT_CODE_CACHE_SIZE=$DbitMaxCodeCacheMem'm'
+### Broad check for System Level capacity
+if [ -z "$AutoMemConfigStatus" ]; then
+  # Rereading for recently exported env var
+  DbitMaxDirectMem=$(valueInMB $DRILL_MAX_DIRECT_MEMORY)
+  DbitMaxHeapMem=$(valueInMB $DRILL_HEAP)
+  DbitMaxCodeCacheMem=$(valueInMB $DRILLBIT_CODE_CACHE_SIZE)
+  echo "[INFO] Attempting to start up Drill with the following settings" 1>&2
+  echo "  DRILL_HEAP="$DRILL_HEAP 1>&2
+  let totalDBitMem_inMB=$DbitMaxDirectMem+$DbitMaxHeapMem+$DbitMaxCodeCacheMem 
+  if [ $totalDBitMem_inMB -gt $totalRAM_inMB ]; then
+    echo "[ERROR] Total Memory Allocation for Drillbit ("$(valueInGB 
$totalDBitMem_inMB'm')"GB) exceeds total system memory ("$(valueInGB 
$totalRAM_inMB'm')"GB)" 1>&2
+    echo "[ERROR] Drillbit not will start up. Please check your allocations" 
+    exit 127
+  elif [ $totalDBitMem_inMB -gt $freeRAM_inMB ]; then
+    echo "[WARN] Total Memory Allocation for Drillbit ("$(valueInGB 
$totalDBitMem_inMB'm')"GB) exceeds available free memory ("$(valueInGB 
$freeRAM_inMB'm')"GB)" 1>&2
+    echo "[WARN] Drillbit will start up, but can potentially crash due to 
oversubscribing of system memory." 1>&2
+  fi
+#Implicit that checks have passed
+# AT THIS POINT: Check and auto-configuration for memory settings [PASSED]

diff --git a/distribution/src/resources/distrib-setup.sh 
index dbd2b90..e641b76 100644
--- a/distribution/src/resources/distrib-setup.sh
+++ b/distribution/src/resources/distrib-setup.sh
@@ -25,5 +25,5 @@
 # if [ $status == "FAILED" ]; return 1; fi
-# FEATURES (Added here to describe supported additions)

diff --git a/distribution/src/resources/drill-env.sh 
index 8211c3c..02f1b2d 100644
--- a/distribution/src/resources/drill-env.sh
+++ b/distribution/src/resources/drill-env.sh
@@ -37,6 +37,16 @@
 # or a more specialized form.
+# Amount of total memory for the Drillbit process. This value is defined as 
the limit
+# that the startup script will try to enforce on the Drill JVM. The values can 
+# defined in terms of percentage of the available system memory, or in terms 
of actual
+# values, similar to how we define the actual JVM memory parameters like Heap 
+# There is no default and depends on how much can be allotted on a machine.
+# This enables Drill's memory auto-configuration logic to kick in, and should 
be unset
+# if the intent is to not use the auto-configuration.
 # Amount of heap memory for the Drillbit process. Values are those supported by
 # the Java -Xms option. The default is 4G.

Reply via email to