I agree completely, and the --user-shutdown flag is the reason why I kept it consistent with it. I'll be pushing this onto master then, thanks for the review.
On 9 January 2017 at 11:40, 'Viktor Bachraty' via ganeti-devel <[email protected]> wrote: > Thanks for the patch! LGTM, just one minor comment. Personally I'd prefer > '--enable-predictive-queue' rather than '--predictive-queue' as it makes it > clear that it's a boolean flag (and it's consistent with the variable name). > However probably it's better to keep the convention of existing flags (e.g. > --user-shutdown). > > On Monday, January 9, 2017 at 11:18:01 AM UTC, Federico Pareschi wrote: >> >> This commit adds the enabled_predictive_queue cluster parameter that >> allows the cluster to specify if the predictive scheduler should be used >> or not when ordering ganeti jobs in the queue. It also adds the correct >> command line flags for the cluster init and cluster modify ganeti jobs. >> >> As a default, if no option is specified upon cluster creation, the >> predictive scheduler is automatically enabled. >> >> Signed-off-by: Federico Morg Pareschi <[email protected]> >> --- >> lib/bootstrap.py | 3 ++- >> lib/cli_opts.py | 8 ++++++++ >> lib/client/gnt_cluster.py | 17 ++++++++++++++--- >> lib/cmdlib/cluster/__init__.py | 3 +++ >> lib/objects.py | 1 + >> man/gnt-cluster.rst | 10 ++++++++++ >> src/Ganeti/JQScheduler.hs | 20 ++++++++++++++++---- >> src/Ganeti/Objects.hs | 1 + >> src/Ganeti/OpCodes.hs | 1 + >> src/Ganeti/OpParams.hs | 7 +++++++ >> src/Ganeti/Query/Server.hs | 2 ++ >> test/hs/Test/Ganeti/OpCodes.hs | 1 + >> 12 files changed, 66 insertions(+), 8 deletions(-) >> >> diff --git a/lib/bootstrap.py b/lib/bootstrap.py >> index 2f98fdd66..c4d987151 100644 >> --- a/lib/bootstrap.py >> +++ b/lib/bootstrap.py >> @@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: >> disable=R0913, R0914 >> use_external_mip_script=False, hv_state=None, >> disk_state=None, >> enabled_disk_templates=None, install_image=None, >> zeroing_image=None, compression_tools=None, >> - enabled_user_shutdown=False): >> + enabled_user_shutdown=False, >> enabled_predictive_queue=True): >> """Initialise the cluster. >> >> @type candidate_pool_size: int >> @@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: >> disable=R0913, R0914 >> enabled_user_shutdown=enabled_user_shutdown, >> ssh_key_type=ssh_key_type, >> ssh_key_bits=ssh_key_bits, >> + enabled_predictive_queue=enabled_predictive_queue, >> ) >> master_node_config = objects.Node(name=hostname.name, >> primary_ip=hostname.ip, >> diff --git a/lib/cli_opts.py b/lib/cli_opts.py >> index f90594a4d..984536ad2 100644 >> --- a/lib/cli_opts.py >> +++ b/lib/cli_opts.py >> @@ -88,6 +88,7 @@ __all__ = [ >> "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT", >> "ENABLED_DISK_TEMPLATES_OPT", >> "ENABLED_HV_OPT", >> + "ENABLED_PREDICTIVE_QUEUE_OPT", >> "ENABLED_USER_SHUTDOWN_OPT", >> "ERROR_CODES_OPT", >> "EXT_PARAMS_OPT", >> @@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT = >> cli_option("--enabled-disk-templates", >> "disk templates", >> type="string", default=None) >> >> +ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue", >> + default=None, >> + >> dest="enabled_predictive_queue", >> + help="Whether the predictive >> queue is" >> + "enabled", >> + type="bool") >> + >> ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown", >> default=None, >> dest="enabled_user_shutdown", >> diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py >> index cbb3af14f..15208cad7 100644 >> --- a/lib/client/gnt_cluster.py >> +++ b/lib/client/gnt_cluster.py >> @@ -309,6 +309,11 @@ def InitCluster(opts, args): >> >> enabled_user_shutdown = bool(opts.enabled_user_shutdown) >> >> + if opts.enabled_predictive_queue is not None: >> + enabled_predictive_queue = bool(opts.enabled_predictive_queue) >> + else: >> + enabled_predictive_queue = True # Predictive queue is enabled by >> default. >> + >> if opts.ssh_key_type: >> ssh_key_type = opts.ssh_key_type >> else: >> @@ -353,6 +358,7 @@ def InitCluster(opts, args): >> enabled_user_shutdown=enabled_user_shutdown, >> ssh_key_type=ssh_key_type, >> ssh_key_bits=ssh_key_bits, >> + >> enabled_predictive_queue=enabled_predictive_queue, >> ) >> op = opcodes.OpClusterPostInit() >> SubmitOpCode(op, opts=opts) >> @@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args): >> ("modify ssh setup", result["modify_ssh_setup"]), >> ("ssh_key_type", result["ssh_key_type"]), >> ("ssh_key_bits", result["ssh_key_bits"]), >> + ("enabled predictive queue", result["enabled_predictive_queue"]) >> ]), >> >> ("Default node parameters", >> @@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args): >> opts.maint_balance_threshold is not None or >> opts.data_collector_interval or >> opts.diagnose_data_collector_filename is not None or >> - opts.enabled_data_collectors): >> + opts.enabled_data_collectors or >> + opts.enabled_predictive_queue is not None): >> ToStderr("Please give at least one of the parameters.") >> return 1 >> >> @@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args): >> maint_balance_threshold=opts.maint_balance_threshold, >> enabled_data_collectors=enabled_data_collectors, >> data_collector_interval=data_collector_interval, >> - >> diagnose_data_collector_filename=opts.diagnose_data_collector_filename >> + >> diagnose_data_collector_filename=opts.diagnose_data_collector_filename, >> + enabled_predictive_queue=opts.enabled_predictive_queue >> ) >> return base.GetResult(None, opts, SubmitOrSend(op, opts)) >> >> @@ -2506,6 +2515,7 @@ commands = { >> IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, >> INSTALL_IMAGE_OPT, >> ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT, >> ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT, >> + ENABLED_PREDICTIVE_QUEUE_OPT, >> ] >> + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, >> "[opts...] <cluster_name>", "Initialises a new cluster >> configuration"), >> @@ -2591,7 +2601,8 @@ commands = { >> PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, >> DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS + >> [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, >> MODIFY_ETCHOSTS_OPT, >> - MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] + >> + MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT, >> + ENABLED_PREDICTIVE_QUEUE_OPT] + >> INSTANCE_POLICY_OPTS + >> [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT, >> COMPRESSION_TOOLS_OPT] + >> diff --git a/lib/cmdlib/cluster/__init__.py >> b/lib/cmdlib/cluster/__init__.py >> index 28370d90a..455c1de15 100644 >> --- a/lib/cmdlib/cluster/__init__.py >> +++ b/lib/cmdlib/cluster/__init__.py >> @@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit): >> self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown >> ensure_kvmd = True >> >> + if self.op.enabled_predictive_queue is not None: >> + self.cluster.enabled_predictive_queue = >> self.op.enabled_predictive_queue >> + >> def helper_os(aname, mods, desc): >> desc += " OS list" >> lst = getattr(self.cluster, aname) >> diff --git a/lib/objects.py b/lib/objects.py >> index 7e20fc2cb..df0494c27 100644 >> --- a/lib/objects.py >> +++ b/lib/objects.py >> @@ -1710,6 +1710,7 @@ class Cluster(TaggableObject): >> "diagnose_data_collector_filename", >> "ssh_key_type", >> "ssh_key_bits", >> + "enabled_predictive_queue", >> ] + _TIMESTAMPS + _UUID >> >> def UpgradeConfig(self): >> diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst >> index 0469f7598..261019db8 100644 >> --- a/man/gnt-cluster.rst >> +++ b/man/gnt-cluster.rst >> @@ -209,6 +209,7 @@ INIT >> | [\--user-shutdown {yes \| no}] >> | [\--ssh-key-type *type*] >> | [\--ssh-key-bits *bits*] >> +| [\--predictive-queue {yes \| no}] >> | {*clustername*} >> >> This commands is only run once initially on the first node of the >> @@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are >> currently: >> >> Ganeti defaults to using 2048-bit RSA keys. >> >> +The ``--predictive-queue`` option enables or disables the predictive >> +queue algorithm for the job scheduler. If this option is not specified, >> +Ganeti defaults to enabling the predictive scheduler. >> + >> MASTER-FAILOVER >> ~~~~~~~~~~~~~~~ >> >> @@ -751,6 +756,8 @@ MODIFY >> | [\--auto-balance-cluster {yes \| no }] >> | [\--auto-balance-threshold *score* ] >> | [\--diagnose-data-collector-filename *filename*] >> +| [\--predictive-queue {yes \| no}] >> + >> >> >> Modify the options for the cluster. >> @@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at >> least 10 times that >> value, in which case all beneficial steps will be done if auto-balancing >> is enabled. >> >> +The ``--predictive-queue`` option enables or disables the predictive >> +queue algorithm for the job scheduler. >> + >> See **gnt-cluster init** for a description of ``--install-image`` and >> ``--zeroing-image``. >> >> diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs >> index 5c79843a8..bfa78745f 100644 >> --- a/src/Ganeti/JQScheduler.hs >> +++ b/src/Ganeti/JQScheduler.hs >> @@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue >> clusterMaxRunningJobs 1 >> getMaxTrackedJobs :: JQStatus -> IO Int >> getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1 >> >> +-- | Get the boolean that specifies whether or not the predictive queue >> +-- scheduler is enabled in the cluster. If the configuration is not >> available, >> +-- the predictive queue is enabled by default. >> +getEnabledPredictiveQueue :: JQStatus -> IO Bool >> +getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue >> True >> + >> -- | Get the number of jobs currently running. >> getRQL :: JQStatus -> IO Int >> getRQL = liftM (length . qRunning) . readIORef . jqJobs >> @@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy >> (compare `on` opWeight) >> -- pure function doing the scheduling. >> selectJobsToRun :: ConfigData >> -> Int -- How many jobs are allowed to run at the same >> time. >> + -> Bool -- If the predictive scheduler is enabled >> -> Timestamp -- Current time >> -> Set FilterRule -- Filter rules to respect for >> scheduling >> -> Queue >> -> (Queue, [JobWithStat]) >> -selectJobsToRun cfg count currTime filters queue = >> +selectJobsToRun cfg count isPredictive currTime filters queue = >> let n = count - length (qRunning queue) - length (qManipulated queue) >> + pickScheduler = if isPredictive >> + then sortByStaticLocks cfg queue currTime >> + else id >> chosen = take n >> . jobFiltering queue filters >> . reasonRateLimit queue >> . sortBy (comparing (calcJobPriority . jJob)) >> . filter (jobEligible queue) >> - . sortByStaticLocks cfg queue currTime >> + . pickScheduler >> $ qEnqueued queue >> remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue) >> chosen >> in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen}, >> chosen) >> @@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do >> >> -- Select the jobs to run. >> count <- getMaxRunningJobs qstate >> - chosen <- atomicModifyIORef (jqJobs qstate) >> - (selectJobsToRun cfg count ts filters) >> + isPredictive <- getEnabledPredictiveQueue qstate >> + let jobsToRun = selectJobsToRun cfg count isPredictive ts filters >> + chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun >> + >> let jobs = map jJob chosen >> unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin >> $ map (show . fromJobId . qjId) jobs >> diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs >> index 5be8adfe5..572dc662f 100644 >> --- a/src/Ganeti/Objects.hs >> +++ b/src/Ganeti/Objects.hs >> @@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $ >> "diagnose_data_collector_filename" [t| String >> |] >> , simpleField "ssh_key_type" [t| SshKeyType >> |] >> , simpleField "ssh_key_bits" [t| Int >> |] >> + , simpleField "enabled_predictive_queue" [t| Bool >> |] >> ] >> ++ timeStampFields >> ++ uuidFields >> diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs >> index c4da480a8..811d59d66 100644 >> --- a/src/Ganeti/OpCodes.hs >> +++ b/src/Ganeti/OpCodes.hs >> @@ -271,6 +271,7 @@ $(genOpCode "OpCode" >> , pMaintdRoundDelay >> , pMaintdEnableBalancing >> , pMaintdBalancingThreshold >> + , pEnabledPredictiveQueue >> ], >> []) >> , ("OpClusterRedistConf", >> diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs >> index b5a561953..f35ae4513 100644 >> --- a/src/Ganeti/OpParams.hs >> +++ b/src/Ganeti/OpParams.hs >> @@ -321,6 +321,7 @@ module Ganeti.OpParams >> , pVerifyClutter >> , pLongSleep >> , pIsStrict >> + , pEnabledPredictiveQueue >> ) where >> >> import Control.Monad (liftM, mplus) >> @@ -2030,3 +2031,9 @@ pIsStrict = >> withDoc "Whether the operation is in strict mode or not." . >> defaultField [| True |] $ >> simpleField "is_strict" [t| Bool |] >> + >> +pEnabledPredictiveQueue :: Field >> +pEnabledPredictiveQueue = >> + withDoc "Whether the predictive queue is enabled in the cluster." . >> + optionalField $ >> + simpleField "enabled_predictive_queue" [t| Bool |] >> diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs >> index aefe129c5..8cef6cc2b 100644 >> --- a/src/Ganeti/Query/Server.hs >> +++ b/src/Ganeti/Query/Server.hs >> @@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo = >> showJSON $ clusterModifySshSetup cluster) >> , ("ssh_key_type", showJSON $ clusterSshKeyType cluster) >> , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster) >> + , ("enabled_predictive_queue", >> + showJSON $ clusterEnabledPredictiveQueue cluster) >> ] >> >> in case master of >> diff --git a/test/hs/Test/Ganeti/OpCodes.hs >> b/test/hs/Test/Ganeti/OpCodes.hs >> index 48a468345..7d39b6619 100644 >> --- a/test/hs/Test/Ganeti/OpCodes.hs >> +++ b/test/hs/Test/Ganeti/OpCodes.hs >> @@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg = >> <*> genMaybe (fromPositive <$> arbitrary) -- maintd round >> interval >> <*> genMaybe arbitrary -- enable maintd balancing >> <*> genMaybe arbitrary -- maintd balancing threshold >> + <*> arbitrary -- enabled_predictive_queue >> "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf >> "OP_CLUSTER_ACTIVATE_MASTER_IP" -> >> pure OpCodes.OpClusterActivateMasterIp >> -- >> 2.11.0.390.gc69c2f50cf-goog >> >
