This commit adds the enabled_predictive_queue cluster parameter that
allows the cluster to specify if the predictive scheduler should be used
or not when ordering ganeti jobs in the queue. It also adds the correct
command line flags for the cluster init and cluster modify ganeti jobs.

As a default, if no option is specified upon cluster creation, the
predictive scheduler is automatically enabled.

Signed-off-by: Federico Morg Pareschi <[email protected]>
---
 lib/bootstrap.py               |  3 ++-
 lib/cli_opts.py                |  8 ++++++++
 lib/client/gnt_cluster.py      | 17 ++++++++++++++---
 lib/cmdlib/cluster/__init__.py |  3 +++
 lib/objects.py                 |  1 +
 man/gnt-cluster.rst            | 10 ++++++++++
 src/Ganeti/JQScheduler.hs      | 20 ++++++++++++++++----
 src/Ganeti/Objects.hs          |  1 +
 src/Ganeti/OpCodes.hs          |  1 +
 src/Ganeti/OpParams.hs         |  7 +++++++
 src/Ganeti/Query/Server.hs     |  2 ++
 test/hs/Test/Ganeti/OpCodes.hs |  1 +
 12 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/lib/bootstrap.py b/lib/bootstrap.py
index 2f98fdd66..c4d987151 100644
--- a/lib/bootstrap.py
+++ b/lib/bootstrap.py
@@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: 
disable=R0913, R0914
                 use_external_mip_script=False, hv_state=None, disk_state=None,
                 enabled_disk_templates=None, install_image=None,
                 zeroing_image=None, compression_tools=None,
-                enabled_user_shutdown=False):
+                enabled_user_shutdown=False, enabled_predictive_queue=True):
   """Initialise the cluster.
 
   @type candidate_pool_size: int
@@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: 
disable=R0913, R0914
     enabled_user_shutdown=enabled_user_shutdown,
     ssh_key_type=ssh_key_type,
     ssh_key_bits=ssh_key_bits,
+    enabled_predictive_queue=enabled_predictive_queue,
     )
   master_node_config = objects.Node(name=hostname.name,
                                     primary_ip=hostname.ip,
diff --git a/lib/cli_opts.py b/lib/cli_opts.py
index f90594a4d..984536ad2 100644
--- a/lib/cli_opts.py
+++ b/lib/cli_opts.py
@@ -88,6 +88,7 @@ __all__ = [
   "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT",
   "ENABLED_DISK_TEMPLATES_OPT",
   "ENABLED_HV_OPT",
+  "ENABLED_PREDICTIVE_QUEUE_OPT",
   "ENABLED_USER_SHUTDOWN_OPT",
   "ERROR_CODES_OPT",
   "EXT_PARAMS_OPT",
@@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT = 
cli_option("--enabled-disk-templates",
                                              "disk templates",
                                         type="string", default=None)
 
+ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue",
+                                          default=None,
+                                          dest="enabled_predictive_queue",
+                                          help="Whether the predictive queue 
is"
+                                               "enabled",
+                                          type="bool")
+
 ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown",
                                        default=None,
                                        dest="enabled_user_shutdown",
diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py
index cbb3af14f..15208cad7 100644
--- a/lib/client/gnt_cluster.py
+++ b/lib/client/gnt_cluster.py
@@ -309,6 +309,11 @@ def InitCluster(opts, args):
 
   enabled_user_shutdown = bool(opts.enabled_user_shutdown)
 
+  if opts.enabled_predictive_queue  is not None:
+    enabled_predictive_queue = bool(opts.enabled_predictive_queue)
+  else:
+    enabled_predictive_queue = True # Predictive queue is enabled by default.
+
   if opts.ssh_key_type:
     ssh_key_type = opts.ssh_key_type
   else:
@@ -353,6 +358,7 @@ def InitCluster(opts, args):
                         enabled_user_shutdown=enabled_user_shutdown,
                         ssh_key_type=ssh_key_type,
                         ssh_key_bits=ssh_key_bits,
+                        enabled_predictive_queue=enabled_predictive_queue,
                         )
   op = opcodes.OpClusterPostInit()
   SubmitOpCode(op, opts=opts)
@@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args):
       ("modify ssh setup", result["modify_ssh_setup"]),
       ("ssh_key_type", result["ssh_key_type"]),
       ("ssh_key_bits", result["ssh_key_bits"]),
+      ("enabled predictive queue", result["enabled_predictive_queue"])
       ]),
 
     ("Default node parameters",
@@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args):
           opts.maint_balance_threshold is not None or
           opts.data_collector_interval or
           opts.diagnose_data_collector_filename is not None or
-          opts.enabled_data_collectors):
+          opts.enabled_data_collectors or
+          opts.enabled_predictive_queue is not None):
     ToStderr("Please give at least one of the parameters.")
     return 1
 
@@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args):
     maint_balance_threshold=opts.maint_balance_threshold,
     enabled_data_collectors=enabled_data_collectors,
     data_collector_interval=data_collector_interval,
-    diagnose_data_collector_filename=opts.diagnose_data_collector_filename
+    diagnose_data_collector_filename=opts.diagnose_data_collector_filename,
+    enabled_predictive_queue=opts.enabled_predictive_queue
     )
   return base.GetResult(None, opts, SubmitOrSend(op, opts))
 
@@ -2506,6 +2515,7 @@ commands = {
      IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT,
      ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
      ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT,
+     ENABLED_PREDICTIVE_QUEUE_OPT,
      ]
      + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
     "[opts...] <cluster_name>", "Initialises a new cluster configuration"),
@@ -2591,7 +2601,8 @@ commands = {
      PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
      DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
      [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT,
-      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] +
+      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT,
+      ENABLED_PREDICTIVE_QUEUE_OPT] +
      INSTANCE_POLICY_OPTS +
      [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
       COMPRESSION_TOOLS_OPT] +
diff --git a/lib/cmdlib/cluster/__init__.py b/lib/cmdlib/cluster/__init__.py
index 28370d90a..455c1de15 100644
--- a/lib/cmdlib/cluster/__init__.py
+++ b/lib/cmdlib/cluster/__init__.py
@@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit):
       self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown
       ensure_kvmd = True
 
+    if self.op.enabled_predictive_queue is not None:
+      self.cluster.enabled_predictive_queue = self.op.enabled_predictive_queue
+
     def helper_os(aname, mods, desc):
       desc += " OS list"
       lst = getattr(self.cluster, aname)
diff --git a/lib/objects.py b/lib/objects.py
index 7e20fc2cb..df0494c27 100644
--- a/lib/objects.py
+++ b/lib/objects.py
@@ -1710,6 +1710,7 @@ class Cluster(TaggableObject):
     "diagnose_data_collector_filename",
     "ssh_key_type",
     "ssh_key_bits",
+    "enabled_predictive_queue",
     ] + _TIMESTAMPS + _UUID
 
   def UpgradeConfig(self):
diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst
index 0469f7598..261019db8 100644
--- a/man/gnt-cluster.rst
+++ b/man/gnt-cluster.rst
@@ -209,6 +209,7 @@ INIT
 | [\--user-shutdown {yes \| no}]
 | [\--ssh-key-type *type*]
 | [\--ssh-key-bits *bits*]
+| [\--predictive-queue {yes \| no}]
 | {*clustername*}
 
 This commands is only run once initially on the first node of the
@@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are currently:
 
 Ganeti defaults to using 2048-bit RSA keys.
 
+The ``--predictive-queue`` option enables or disables the predictive
+queue algorithm for the job scheduler. If this option is not specified,
+Ganeti defaults to enabling the predictive scheduler.
+
 MASTER-FAILOVER
 ~~~~~~~~~~~~~~~
 
@@ -751,6 +756,8 @@ MODIFY
 | [\--auto-balance-cluster {yes \| no }]
 | [\--auto-balance-threshold *score* ]
 | [\--diagnose-data-collector-filename *filename*]
+| [\--predictive-queue {yes \| no}]
+
 
 
 Modify the options for the cluster.
@@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at least 10 
times that
 value, in which case all beneficial steps will be done if auto-balancing
 is enabled.
 
+The ``--predictive-queue`` option enables or disables the predictive
+queue algorithm for the job scheduler.
+
 See **gnt-cluster init** for a description of ``--install-image`` and
 ``--zeroing-image``.
 
diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs
index 5c79843a8..bfa78745f 100644
--- a/src/Ganeti/JQScheduler.hs
+++ b/src/Ganeti/JQScheduler.hs
@@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue clusterMaxRunningJobs 1
 getMaxTrackedJobs :: JQStatus -> IO Int
 getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1
 
+-- | Get the boolean that specifies whether or not the predictive queue
+-- scheduler is enabled in the cluster. If the configuration is not available,
+-- the predictive queue is enabled by default.
+getEnabledPredictiveQueue :: JQStatus -> IO Bool
+getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue True
+
 -- | Get the number of jobs currently running.
 getRQL :: JQStatus -> IO Int
 getRQL = liftM (length . qRunning) . readIORef . jqJobs
@@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy (compare 
`on` opWeight)
 -- pure function doing the scheduling.
 selectJobsToRun :: ConfigData
                 -> Int -- How many jobs are allowed to run at the same time.
+                -> Bool -- If the predictive scheduler is enabled
                 -> Timestamp -- Current time
                 -> Set FilterRule -- Filter rules to respect for scheduling
                 -> Queue
                 -> (Queue, [JobWithStat])
-selectJobsToRun cfg count currTime filters queue =
+selectJobsToRun cfg count isPredictive currTime filters queue =
   let n = count - length (qRunning queue) - length (qManipulated queue)
+      pickScheduler = if isPredictive
+                         then sortByStaticLocks cfg queue currTime
+                         else id
       chosen = take n
                . jobFiltering queue filters
                . reasonRateLimit queue
                . sortBy (comparing (calcJobPriority . jJob))
                . filter (jobEligible queue)
-               . sortByStaticLocks cfg queue currTime
+               . pickScheduler
                $ qEnqueued queue
       remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue) 
chosen
   in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen}, chosen)
@@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do
 
       -- Select the jobs to run.
       count <- getMaxRunningJobs qstate
-      chosen <- atomicModifyIORef (jqJobs qstate)
-                                  (selectJobsToRun cfg count ts filters)
+      isPredictive <- getEnabledPredictiveQueue qstate
+      let jobsToRun = selectJobsToRun cfg count isPredictive ts filters
+      chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun
+
       let jobs = map jJob chosen
       unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin
         $ map (show . fromJobId . qjId) jobs
diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs
index 5be8adfe5..572dc662f 100644
--- a/src/Ganeti/Objects.hs
+++ b/src/Ganeti/Objects.hs
@@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $
       "diagnose_data_collector_filename"         [t| String                  |]
   , simpleField "ssh_key_type"                   [t| SshKeyType              |]
   , simpleField "ssh_key_bits"                   [t| Int                     |]
+  , simpleField "enabled_predictive_queue"       [t| Bool                    |]
  ]
  ++ timeStampFields
  ++ uuidFields
diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs
index c4da480a8..811d59d66 100644
--- a/src/Ganeti/OpCodes.hs
+++ b/src/Ganeti/OpCodes.hs
@@ -271,6 +271,7 @@ $(genOpCode "OpCode"
      , pMaintdRoundDelay
      , pMaintdEnableBalancing
      , pMaintdBalancingThreshold
+     , pEnabledPredictiveQueue
      ],
      [])
   , ("OpClusterRedistConf",
diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs
index b5a561953..f35ae4513 100644
--- a/src/Ganeti/OpParams.hs
+++ b/src/Ganeti/OpParams.hs
@@ -321,6 +321,7 @@ module Ganeti.OpParams
   , pVerifyClutter
   , pLongSleep
   , pIsStrict
+  , pEnabledPredictiveQueue
   ) where
 
 import Control.Monad (liftM, mplus)
@@ -2030,3 +2031,9 @@ pIsStrict =
   withDoc "Whether the operation is in strict mode or not." .
   defaultField [| True |] $
   simpleField "is_strict" [t| Bool |]
+
+pEnabledPredictiveQueue :: Field
+pEnabledPredictiveQueue =
+  withDoc "Whether the predictive queue is enabled in the cluster." .
+  optionalField $
+  simpleField "enabled_predictive_queue" [t| Bool |]
diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs
index aefe129c5..8cef6cc2b 100644
--- a/src/Ganeti/Query/Server.hs
+++ b/src/Ganeti/Query/Server.hs
@@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo =
                showJSON $ clusterModifySshSetup cluster)
             , ("ssh_key_type", showJSON $ clusterSshKeyType cluster)
             , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster)
+            , ("enabled_predictive_queue",
+               showJSON $ clusterEnabledPredictiveQueue cluster)
             ]
 
   in case master of
diff --git a/test/hs/Test/Ganeti/OpCodes.hs b/test/hs/Test/Ganeti/OpCodes.hs
index 48a468345..7d39b6619 100644
--- a/test/hs/Test/Ganeti/OpCodes.hs
+++ b/test/hs/Test/Ganeti/OpCodes.hs
@@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg =
         <*> genMaybe (fromPositive <$> arbitrary) -- maintd round interval
         <*> genMaybe arbitrary           -- enable maintd balancing
         <*> genMaybe arbitrary           -- maintd balancing threshold
+        <*> arbitrary                    -- enabled_predictive_queue
     "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf
     "OP_CLUSTER_ACTIVATE_MASTER_IP" ->
       pure OpCodes.OpClusterActivateMasterIp
-- 
2.11.0.390.gc69c2f50cf-goog

Reply via email to