This commit adds the enabled_predictive_queue cluster parameter that allows the cluster to specify if the predictive scheduler should be used or not when ordering ganeti jobs in the queue. It also adds the correct command line flags for the cluster init and cluster modify ganeti jobs.
As a default, if no option is specified upon cluster creation, the predictive scheduler is automatically enabled. Signed-off-by: Federico Morg Pareschi <[email protected]> --- lib/bootstrap.py | 3 ++- lib/cli_opts.py | 8 ++++++++ lib/client/gnt_cluster.py | 17 ++++++++++++++--- lib/cmdlib/cluster/__init__.py | 3 +++ lib/objects.py | 1 + man/gnt-cluster.rst | 10 ++++++++++ src/Ganeti/JQScheduler.hs | 20 ++++++++++++++++---- src/Ganeti/Objects.hs | 1 + src/Ganeti/OpCodes.hs | 1 + src/Ganeti/OpParams.hs | 7 +++++++ src/Ganeti/Query/Server.hs | 2 ++ test/hs/Test/Ganeti/OpCodes.hs | 1 + 12 files changed, 66 insertions(+), 8 deletions(-) diff --git a/lib/bootstrap.py b/lib/bootstrap.py index 2f98fdd66..c4d987151 100644 --- a/lib/bootstrap.py +++ b/lib/bootstrap.py @@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914 use_external_mip_script=False, hv_state=None, disk_state=None, enabled_disk_templates=None, install_image=None, zeroing_image=None, compression_tools=None, - enabled_user_shutdown=False): + enabled_user_shutdown=False, enabled_predictive_queue=True): """Initialise the cluster. @type candidate_pool_size: int @@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: disable=R0913, R0914 enabled_user_shutdown=enabled_user_shutdown, ssh_key_type=ssh_key_type, ssh_key_bits=ssh_key_bits, + enabled_predictive_queue=enabled_predictive_queue, ) master_node_config = objects.Node(name=hostname.name, primary_ip=hostname.ip, diff --git a/lib/cli_opts.py b/lib/cli_opts.py index f90594a4d..984536ad2 100644 --- a/lib/cli_opts.py +++ b/lib/cli_opts.py @@ -88,6 +88,7 @@ __all__ = [ "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT", "ENABLED_DISK_TEMPLATES_OPT", "ENABLED_HV_OPT", + "ENABLED_PREDICTIVE_QUEUE_OPT", "ENABLED_USER_SHUTDOWN_OPT", "ERROR_CODES_OPT", "EXT_PARAMS_OPT", @@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT = cli_option("--enabled-disk-templates", "disk templates", type="string", default=None) +ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue", + default=None, + dest="enabled_predictive_queue", + help="Whether the predictive queue is" + "enabled", + type="bool") + ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown", default=None, dest="enabled_user_shutdown", diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py index cbb3af14f..15208cad7 100644 --- a/lib/client/gnt_cluster.py +++ b/lib/client/gnt_cluster.py @@ -309,6 +309,11 @@ def InitCluster(opts, args): enabled_user_shutdown = bool(opts.enabled_user_shutdown) + if opts.enabled_predictive_queue is not None: + enabled_predictive_queue = bool(opts.enabled_predictive_queue) + else: + enabled_predictive_queue = True # Predictive queue is enabled by default. + if opts.ssh_key_type: ssh_key_type = opts.ssh_key_type else: @@ -353,6 +358,7 @@ def InitCluster(opts, args): enabled_user_shutdown=enabled_user_shutdown, ssh_key_type=ssh_key_type, ssh_key_bits=ssh_key_bits, + enabled_predictive_queue=enabled_predictive_queue, ) op = opcodes.OpClusterPostInit() SubmitOpCode(op, opts=opts) @@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args): ("modify ssh setup", result["modify_ssh_setup"]), ("ssh_key_type", result["ssh_key_type"]), ("ssh_key_bits", result["ssh_key_bits"]), + ("enabled predictive queue", result["enabled_predictive_queue"]) ]), ("Default node parameters", @@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args): opts.maint_balance_threshold is not None or opts.data_collector_interval or opts.diagnose_data_collector_filename is not None or - opts.enabled_data_collectors): + opts.enabled_data_collectors or + opts.enabled_predictive_queue is not None): ToStderr("Please give at least one of the parameters.") return 1 @@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args): maint_balance_threshold=opts.maint_balance_threshold, enabled_data_collectors=enabled_data_collectors, data_collector_interval=data_collector_interval, - diagnose_data_collector_filename=opts.diagnose_data_collector_filename + diagnose_data_collector_filename=opts.diagnose_data_collector_filename, + enabled_predictive_queue=opts.enabled_predictive_queue ) return base.GetResult(None, opts, SubmitOrSend(op, opts)) @@ -2506,6 +2515,7 @@ commands = { IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, INSTALL_IMAGE_OPT, ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT, ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT, + ENABLED_PREDICTIVE_QUEUE_OPT, ] + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, "[opts...] <cluster_name>", "Initialises a new cluster configuration"), @@ -2591,7 +2601,8 @@ commands = { PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS + [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, MODIFY_ETCHOSTS_OPT, - MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] + + MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT, + ENABLED_PREDICTIVE_QUEUE_OPT] + INSTANCE_POLICY_OPTS + [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT] + diff --git a/lib/cmdlib/cluster/__init__.py b/lib/cmdlib/cluster/__init__.py index 28370d90a..455c1de15 100644 --- a/lib/cmdlib/cluster/__init__.py +++ b/lib/cmdlib/cluster/__init__.py @@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit): self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown ensure_kvmd = True + if self.op.enabled_predictive_queue is not None: + self.cluster.enabled_predictive_queue = self.op.enabled_predictive_queue + def helper_os(aname, mods, desc): desc += " OS list" lst = getattr(self.cluster, aname) diff --git a/lib/objects.py b/lib/objects.py index 7e20fc2cb..df0494c27 100644 --- a/lib/objects.py +++ b/lib/objects.py @@ -1710,6 +1710,7 @@ class Cluster(TaggableObject): "diagnose_data_collector_filename", "ssh_key_type", "ssh_key_bits", + "enabled_predictive_queue", ] + _TIMESTAMPS + _UUID def UpgradeConfig(self): diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst index 0469f7598..261019db8 100644 --- a/man/gnt-cluster.rst +++ b/man/gnt-cluster.rst @@ -209,6 +209,7 @@ INIT | [\--user-shutdown {yes \| no}] | [\--ssh-key-type *type*] | [\--ssh-key-bits *bits*] +| [\--predictive-queue {yes \| no}] | {*clustername*} This commands is only run once initially on the first node of the @@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are currently: Ganeti defaults to using 2048-bit RSA keys. +The ``--predictive-queue`` option enables or disables the predictive +queue algorithm for the job scheduler. If this option is not specified, +Ganeti defaults to enabling the predictive scheduler. + MASTER-FAILOVER ~~~~~~~~~~~~~~~ @@ -751,6 +756,8 @@ MODIFY | [\--auto-balance-cluster {yes \| no }] | [\--auto-balance-threshold *score* ] | [\--diagnose-data-collector-filename *filename*] +| [\--predictive-queue {yes \| no}] + Modify the options for the cluster. @@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at least 10 times that value, in which case all beneficial steps will be done if auto-balancing is enabled. +The ``--predictive-queue`` option enables or disables the predictive +queue algorithm for the job scheduler. + See **gnt-cluster init** for a description of ``--install-image`` and ``--zeroing-image``. diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs index 5c79843a8..bfa78745f 100644 --- a/src/Ganeti/JQScheduler.hs +++ b/src/Ganeti/JQScheduler.hs @@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue clusterMaxRunningJobs 1 getMaxTrackedJobs :: JQStatus -> IO Int getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1 +-- | Get the boolean that specifies whether or not the predictive queue +-- scheduler is enabled in the cluster. If the configuration is not available, +-- the predictive queue is enabled by default. +getEnabledPredictiveQueue :: JQStatus -> IO Bool +getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue True + -- | Get the number of jobs currently running. getRQL :: JQStatus -> IO Int getRQL = liftM (length . qRunning) . readIORef . jqJobs @@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy (compare `on` opWeight) -- pure function doing the scheduling. selectJobsToRun :: ConfigData -> Int -- How many jobs are allowed to run at the same time. + -> Bool -- If the predictive scheduler is enabled -> Timestamp -- Current time -> Set FilterRule -- Filter rules to respect for scheduling -> Queue -> (Queue, [JobWithStat]) -selectJobsToRun cfg count currTime filters queue = +selectJobsToRun cfg count isPredictive currTime filters queue = let n = count - length (qRunning queue) - length (qManipulated queue) + pickScheduler = if isPredictive + then sortByStaticLocks cfg queue currTime + else id chosen = take n . jobFiltering queue filters . reasonRateLimit queue . sortBy (comparing (calcJobPriority . jJob)) . filter (jobEligible queue) - . sortByStaticLocks cfg queue currTime + . pickScheduler $ qEnqueued queue remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue) chosen in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen}, chosen) @@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do -- Select the jobs to run. count <- getMaxRunningJobs qstate - chosen <- atomicModifyIORef (jqJobs qstate) - (selectJobsToRun cfg count ts filters) + isPredictive <- getEnabledPredictiveQueue qstate + let jobsToRun = selectJobsToRun cfg count isPredictive ts filters + chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun + let jobs = map jJob chosen unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin $ map (show . fromJobId . qjId) jobs diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs index 5be8adfe5..572dc662f 100644 --- a/src/Ganeti/Objects.hs +++ b/src/Ganeti/Objects.hs @@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $ "diagnose_data_collector_filename" [t| String |] , simpleField "ssh_key_type" [t| SshKeyType |] , simpleField "ssh_key_bits" [t| Int |] + , simpleField "enabled_predictive_queue" [t| Bool |] ] ++ timeStampFields ++ uuidFields diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs index c4da480a8..811d59d66 100644 --- a/src/Ganeti/OpCodes.hs +++ b/src/Ganeti/OpCodes.hs @@ -271,6 +271,7 @@ $(genOpCode "OpCode" , pMaintdRoundDelay , pMaintdEnableBalancing , pMaintdBalancingThreshold + , pEnabledPredictiveQueue ], []) , ("OpClusterRedistConf", diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs index b5a561953..f35ae4513 100644 --- a/src/Ganeti/OpParams.hs +++ b/src/Ganeti/OpParams.hs @@ -321,6 +321,7 @@ module Ganeti.OpParams , pVerifyClutter , pLongSleep , pIsStrict + , pEnabledPredictiveQueue ) where import Control.Monad (liftM, mplus) @@ -2030,3 +2031,9 @@ pIsStrict = withDoc "Whether the operation is in strict mode or not." . defaultField [| True |] $ simpleField "is_strict" [t| Bool |] + +pEnabledPredictiveQueue :: Field +pEnabledPredictiveQueue = + withDoc "Whether the predictive queue is enabled in the cluster." . + optionalField $ + simpleField "enabled_predictive_queue" [t| Bool |] diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs index aefe129c5..8cef6cc2b 100644 --- a/src/Ganeti/Query/Server.hs +++ b/src/Ganeti/Query/Server.hs @@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo = showJSON $ clusterModifySshSetup cluster) , ("ssh_key_type", showJSON $ clusterSshKeyType cluster) , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster) + , ("enabled_predictive_queue", + showJSON $ clusterEnabledPredictiveQueue cluster) ] in case master of diff --git a/test/hs/Test/Ganeti/OpCodes.hs b/test/hs/Test/Ganeti/OpCodes.hs index 48a468345..7d39b6619 100644 --- a/test/hs/Test/Ganeti/OpCodes.hs +++ b/test/hs/Test/Ganeti/OpCodes.hs @@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg = <*> genMaybe (fromPositive <$> arbitrary) -- maintd round interval <*> genMaybe arbitrary -- enable maintd balancing <*> genMaybe arbitrary -- maintd balancing threshold + <*> arbitrary -- enabled_predictive_queue "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf "OP_CLUSTER_ACTIVATE_MASTER_IP" -> pure OpCodes.OpClusterActivateMasterIp -- 2.11.0.390.gc69c2f50cf-goog
