LGTM, but see the further commit comment.

On 10/19/2015 03:58 PM, 'Klaus Aehlig' via ganeti-devel wrote:
...if the memory over commitment is in place. We currently use
constant weight for the memory utilization. This will likely be
changed to something more aggressive, depending on our experience
with memory-based balancing.

Signed-off-by: Klaus Aehlig <[email protected]>
---
  src/Ganeti/MaintD/Balance.hs | 41 ++++++++++++++++++++++++++++++++++++++---
  1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/Ganeti/MaintD/Balance.hs b/src/Ganeti/MaintD/Balance.hs
index e7df512..de970a6 100644
--- a/src/Ganeti/MaintD/Balance.hs
+++ b/src/Ganeti/MaintD/Balance.hs
@@ -39,10 +39,12 @@ module Ganeti.MaintD.Balance
    ( balanceTask
    ) where
+import Control.Arrow ((***), (&&&))
  import Control.Exception.Lifted (bracket)
  import Control.Monad (liftM, unless, when)
  import Control.Monad.IO.Class (liftIO)
  import Data.IORef (IORef)
+import qualified Data.IntMap as IntMap
  import qualified Data.Set as Set
  import qualified Data.Map as Map
  import Data.Maybe (mapMaybe, isJust)
@@ -65,7 +67,7 @@ import Ganeti.JQueue (currentTimestamp)
  import Ganeti.JQueue.Objects (Timestamp)
  import Ganeti.Jobs (submitJobs)
  import Ganeti.HTools.Types ( zeroUtil, DynUtil(cpuWeight), addUtil, subUtil
-                           , MoveJob)
+                           , MoveJob, iPolicyMemoryRatio)
  import Ganeti.Logging.Lifted (logDebug)
  import Ganeti.MaintD.MemoryState ( MemoryState, getEvacuated
                                   , addEvacuated, rmEvacuated)
@@ -81,6 +83,7 @@ import Ganeti.Utils (logAndBad)
data AllReports = AllReports { rTotal :: MonD.Report
                               , rIndividual :: MonD.Report
+                             , rMem :: MonD.Report
                               }
-- | Empty report. It describes an idle node and can be used as
@@ -88,6 +91,7 @@ data AllReports = AllReports { rTotal :: MonD.Report
  emptyReports :: AllReports
  emptyReports = AllReports (MonD.CPUavgloadReport emptyCPUavgload)
                            (MonD.InstanceCpuReport Map.empty)
+                          (MonD.InstanceRSSReport Map.empty)
-- | Query a node unless it is offline and return all
  -- CPU reports. For offline nodes return the empty report.
@@ -104,7 +108,8 @@ queryNode node = do
      else do
        total <- getReport MonD.totalCPUCollector
        xeninstances <- getReport MonD.xenCPUCollector
-      return $ AllReports total xeninstances
+      rssinstances <- getReport MonD.kvmRSSCollector
+      return $ AllReports total xeninstances rssinstances
-- | Get a map with the CPU live data for all nodes; for offline nodes
  -- the empty report is guessed.
@@ -286,6 +291,27 @@ balanceGroup memstate xens client allowedNodes threshold 
(gidx, (nl, il)) = do
                     $ "Failure submitting balancing jobs: " ++ e
          Ok jids' -> return jids'
+-- * Memory balancing
+
+-- | Decide the weight that dynamic memory utilization should have
+-- based on the memory-over-commitment ratio. This function is likely
+-- to change once more experience with memory over-commited clusters
+-- is gained.
+weightFromMemRatio :: Double -> Double
+weightFromMemRatio f = if f > 1.0 then 1.0 else 0.0
+
+-- | Apply the memory data to the cluster data.
+useMemData :: Double
+           -> Container.Container AllReports
+           -> (Node.List, Instance.List)
+           -> ResultT String IO (Node.List, Instance.List)
+useMemData ratio allreports (nl, il) = do
+  logDebug "Taking dynamic memory data into account"
+  let memoryReports =
+        map (flip Container.find nl *** rMem) $ IntMap.toList allreports
+  mkResultT . return . liftM (MonD.scaleMemoryWeight (weightFromMemRatio 
ratio))
+    $ MonD.useInstanceRSSData memoryReports (nl, il)
+
  -- * Interface function
-- | Carry out all the needed balancing, based on live CPU data, only touching
@@ -305,7 +331,16 @@ balanceTask memstate (nl, il) okNodes threshold = do
    (nl', il') <- mkResultT . return
                    $ updateCPULoad (nl, il) reports xenInstances evacuated
    liftIO $ mapM_ (cleanUpEvacuation memstate il reports) evacuated
-  let ngroups = ClusterUtils.splitCluster nl' il'
+  let memoryOvercommitment =
+        maximum . (0.0:) . map (iPolicyMemoryRatio .Node.iPolicy)
+        $ IntMap.elems nl
+  logDebug $ "Memory over-commitment ratio is " ++ show memoryOvercommitment
+  (nl'', il'') <- if memoryOvercommitment > 1.0
+                    then useMemData memoryOvercommitment reports (nl', il')
+                    else return (nl', il')
+  logDebug . (++) "Dynamic node load: " . show
+    . map (Node.name &&& Node.utilLoad) $ Container.elems nl''
+  let ngroups = ClusterUtils.splitCluster nl'' il''
    luxiSocket <- liftIO Path.defaultQuerySocket
    bracket (liftIO $ L.getLuxiClient luxiSocket) (liftIO . L.closeClient) $ \c 
->
      liftM concat $ mapM (balanceGroup memstate xenInstances c okNodes 
threshold)

Reply via email to