Thanks for the patch! LGTM, just one minor comment. Personally I'd prefer '--enable-predictive-queue' rather than '--predictive-queue' as it makes it clear that it's a boolean flag (and it's consistent with the variable name). However probably it's better to keep the convention of existing flags (e.g. --user-shutdown).
On Monday, January 9, 2017 at 11:18:01 AM UTC, Federico Pareschi wrote: > > This commit adds the enabled_predictive_queue cluster parameter that > allows the cluster to specify if the predictive scheduler should be used > or not when ordering ganeti jobs in the queue. It also adds the correct > command line flags for the cluster init and cluster modify ganeti jobs. > > As a default, if no option is specified upon cluster creation, the > predictive scheduler is automatically enabled. > > Signed-off-by: Federico Morg Pareschi <[email protected]> > --- > lib/bootstrap.py | 3 ++- > lib/cli_opts.py | 8 ++++++++ > lib/client/gnt_cluster.py | 17 ++++++++++++++--- > lib/cmdlib/cluster/__init__.py | 3 +++ > lib/objects.py | 1 + > man/gnt-cluster.rst | 10 ++++++++++ > src/Ganeti/JQScheduler.hs | 20 ++++++++++++++++---- > src/Ganeti/Objects.hs | 1 + > src/Ganeti/OpCodes.hs | 1 + > src/Ganeti/OpParams.hs | 7 +++++++ > src/Ganeti/Query/Server.hs | 2 ++ > test/hs/Test/Ganeti/OpCodes.hs | 1 + > 12 files changed, 66 insertions(+), 8 deletions(-) > > diff --git a/lib/bootstrap.py b/lib/bootstrap.py > index 2f98fdd66..c4d987151 100644 > --- a/lib/bootstrap.py > +++ b/lib/bootstrap.py > @@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: > disable=R0913, R0914 > use_external_mip_script=False, hv_state=None, > disk_state=None, > enabled_disk_templates=None, install_image=None, > zeroing_image=None, compression_tools=None, > - enabled_user_shutdown=False): > + enabled_user_shutdown=False, > enabled_predictive_queue=True): > """Initialise the cluster. > > @type candidate_pool_size: int > @@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint: > disable=R0913, R0914 > enabled_user_shutdown=enabled_user_shutdown, > ssh_key_type=ssh_key_type, > ssh_key_bits=ssh_key_bits, > + enabled_predictive_queue=enabled_predictive_queue, > ) > master_node_config = objects.Node(name=hostname.name, > primary_ip=hostname.ip, > diff --git a/lib/cli_opts.py b/lib/cli_opts.py > index f90594a4d..984536ad2 100644 > --- a/lib/cli_opts.py > +++ b/lib/cli_opts.py > @@ -88,6 +88,7 @@ __all__ = [ > "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT", > "ENABLED_DISK_TEMPLATES_OPT", > "ENABLED_HV_OPT", > + "ENABLED_PREDICTIVE_QUEUE_OPT", > "ENABLED_USER_SHUTDOWN_OPT", > "ERROR_CODES_OPT", > "EXT_PARAMS_OPT", > @@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT = > cli_option("--enabled-disk-templates", > "disk templates", > type="string", default=None) > > +ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue", > + default=None, > + > dest="enabled_predictive_queue", > + help="Whether the predictive > queue is" > + "enabled", > + type="bool") > + > ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown", > default=None, > dest="enabled_user_shutdown", > diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py > index cbb3af14f..15208cad7 100644 > --- a/lib/client/gnt_cluster.py > +++ b/lib/client/gnt_cluster.py > @@ -309,6 +309,11 @@ def InitCluster(opts, args): > > enabled_user_shutdown = bool(opts.enabled_user_shutdown) > > + if opts.enabled_predictive_queue is not None: > + enabled_predictive_queue = bool(opts.enabled_predictive_queue) > + else: > + enabled_predictive_queue = True # Predictive queue is enabled by > default. > + > if opts.ssh_key_type: > ssh_key_type = opts.ssh_key_type > else: > @@ -353,6 +358,7 @@ def InitCluster(opts, args): > enabled_user_shutdown=enabled_user_shutdown, > ssh_key_type=ssh_key_type, > ssh_key_bits=ssh_key_bits, > + > enabled_predictive_queue=enabled_predictive_queue, > ) > op = opcodes.OpClusterPostInit() > SubmitOpCode(op, opts=opts) > @@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args): > ("modify ssh setup", result["modify_ssh_setup"]), > ("ssh_key_type", result["ssh_key_type"]), > ("ssh_key_bits", result["ssh_key_bits"]), > + ("enabled predictive queue", result["enabled_predictive_queue"]) > ]), > > ("Default node parameters", > @@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args): > opts.maint_balance_threshold is not None or > opts.data_collector_interval or > opts.diagnose_data_collector_filename is not None or > - opts.enabled_data_collectors): > + opts.enabled_data_collectors or > + opts.enabled_predictive_queue is not None): > ToStderr("Please give at least one of the parameters.") > return 1 > > @@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args): > maint_balance_threshold=opts.maint_balance_threshold, > enabled_data_collectors=enabled_data_collectors, > data_collector_interval=data_collector_interval, > - > diagnose_data_collector_filename=opts.diagnose_data_collector_filename > + > diagnose_data_collector_filename=opts.diagnose_data_collector_filename, > + enabled_predictive_queue=opts.enabled_predictive_queue > ) > return base.GetResult(None, opts, SubmitOrSend(op, opts)) > > @@ -2506,6 +2515,7 @@ commands = { > IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT, > INSTALL_IMAGE_OPT, > ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT, > ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT, > + ENABLED_PREDICTIVE_QUEUE_OPT, > ] > + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS, > "[opts...] <cluster_name>", "Initialises a new cluster > configuration"), > @@ -2591,7 +2601,8 @@ commands = { > PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT, > DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS + > [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT, > MODIFY_ETCHOSTS_OPT, > - MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] + > + MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT, > + ENABLED_PREDICTIVE_QUEUE_OPT] + > INSTANCE_POLICY_OPTS + > [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT, > COMPRESSION_TOOLS_OPT] + > diff --git a/lib/cmdlib/cluster/__init__.py > b/lib/cmdlib/cluster/__init__.py > index 28370d90a..455c1de15 100644 > --- a/lib/cmdlib/cluster/__init__.py > +++ b/lib/cmdlib/cluster/__init__.py > @@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit): > self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown > ensure_kvmd = True > > + if self.op.enabled_predictive_queue is not None: > + self.cluster.enabled_predictive_queue = > self.op.enabled_predictive_queue > + > def helper_os(aname, mods, desc): > desc += " OS list" > lst = getattr(self.cluster, aname) > diff --git a/lib/objects.py b/lib/objects.py > index 7e20fc2cb..df0494c27 100644 > --- a/lib/objects.py > +++ b/lib/objects.py > @@ -1710,6 +1710,7 @@ class Cluster(TaggableObject): > "diagnose_data_collector_filename", > "ssh_key_type", > "ssh_key_bits", > + "enabled_predictive_queue", > ] + _TIMESTAMPS + _UUID > > def UpgradeConfig(self): > diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst > index 0469f7598..261019db8 100644 > --- a/man/gnt-cluster.rst > +++ b/man/gnt-cluster.rst > @@ -209,6 +209,7 @@ INIT > | [\--user-shutdown {yes \| no}] > | [\--ssh-key-type *type*] > | [\--ssh-key-bits *bits*] > +| [\--predictive-queue {yes \| no}] > | {*clustername*} > > This commands is only run once initially on the first node of the > @@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are > currently: > > Ganeti defaults to using 2048-bit RSA keys. > > +The ``--predictive-queue`` option enables or disables the predictive > +queue algorithm for the job scheduler. If this option is not specified, > +Ganeti defaults to enabling the predictive scheduler. > + > MASTER-FAILOVER > ~~~~~~~~~~~~~~~ > > @@ -751,6 +756,8 @@ MODIFY > | [\--auto-balance-cluster {yes \| no }] > | [\--auto-balance-threshold *score* ] > | [\--diagnose-data-collector-filename *filename*] > +| [\--predictive-queue {yes \| no}] > + > > > Modify the options for the cluster. > @@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at > least 10 times that > value, in which case all beneficial steps will be done if auto-balancing > is enabled. > > +The ``--predictive-queue`` option enables or disables the predictive > +queue algorithm for the job scheduler. > + > See **gnt-cluster init** for a description of ``--install-image`` and > ``--zeroing-image``. > > diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs > index 5c79843a8..bfa78745f 100644 > --- a/src/Ganeti/JQScheduler.hs > +++ b/src/Ganeti/JQScheduler.hs > @@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue > clusterMaxRunningJobs 1 > getMaxTrackedJobs :: JQStatus -> IO Int > getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1 > > +-- | Get the boolean that specifies whether or not the predictive queue > +-- scheduler is enabled in the cluster. If the configuration is not > available, > +-- the predictive queue is enabled by default. > +getEnabledPredictiveQueue :: JQStatus -> IO Bool > +getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue > True > + > -- | Get the number of jobs currently running. > getRQL :: JQStatus -> IO Int > getRQL = liftM (length . qRunning) . readIORef . jqJobs > @@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy > (compare `on` opWeight) > -- pure function doing the scheduling. > selectJobsToRun :: ConfigData > -> Int -- How many jobs are allowed to run at the same > time. > + -> Bool -- If the predictive scheduler is enabled > -> Timestamp -- Current time > -> Set FilterRule -- Filter rules to respect for > scheduling > -> Queue > -> (Queue, [JobWithStat]) > -selectJobsToRun cfg count currTime filters queue = > +selectJobsToRun cfg count isPredictive currTime filters queue = > let n = count - length (qRunning queue) - length (qManipulated queue) > + pickScheduler = if isPredictive > + then sortByStaticLocks cfg queue currTime > + else id > chosen = take n > . jobFiltering queue filters > . reasonRateLimit queue > . sortBy (comparing (calcJobPriority . jJob)) > . filter (jobEligible queue) > - . sortByStaticLocks cfg queue currTime > + . pickScheduler > $ qEnqueued queue > remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue) > chosen > in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen}, > chosen) > @@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do > > -- Select the jobs to run. > count <- getMaxRunningJobs qstate > - chosen <- atomicModifyIORef (jqJobs qstate) > - (selectJobsToRun cfg count ts filters) > + isPredictive <- getEnabledPredictiveQueue qstate > + let jobsToRun = selectJobsToRun cfg count isPredictive ts filters > + chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun > + > let jobs = map jJob chosen > unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin > $ map (show . fromJobId . qjId) jobs > diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs > index 5be8adfe5..572dc662f 100644 > --- a/src/Ganeti/Objects.hs > +++ b/src/Ganeti/Objects.hs > @@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $ > "diagnose_data_collector_filename" [t| String > |] > , simpleField "ssh_key_type" [t| SshKeyType > |] > , simpleField "ssh_key_bits" [t| Int > |] > + , simpleField "enabled_predictive_queue" [t| Bool > |] > ] > ++ timeStampFields > ++ uuidFields > diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs > index c4da480a8..811d59d66 100644 > --- a/src/Ganeti/OpCodes.hs > +++ b/src/Ganeti/OpCodes.hs > @@ -271,6 +271,7 @@ $(genOpCode "OpCode" > , pMaintdRoundDelay > , pMaintdEnableBalancing > , pMaintdBalancingThreshold > + , pEnabledPredictiveQueue > ], > []) > , ("OpClusterRedistConf", > diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs > index b5a561953..f35ae4513 100644 > --- a/src/Ganeti/OpParams.hs > +++ b/src/Ganeti/OpParams.hs > @@ -321,6 +321,7 @@ module Ganeti.OpParams > , pVerifyClutter > , pLongSleep > , pIsStrict > + , pEnabledPredictiveQueue > ) where > > import Control.Monad (liftM, mplus) > @@ -2030,3 +2031,9 @@ pIsStrict = > withDoc "Whether the operation is in strict mode or not." . > defaultField [| True |] $ > simpleField "is_strict" [t| Bool |] > + > +pEnabledPredictiveQueue :: Field > +pEnabledPredictiveQueue = > + withDoc "Whether the predictive queue is enabled in the cluster." . > + optionalField $ > + simpleField "enabled_predictive_queue" [t| Bool |] > diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs > index aefe129c5..8cef6cc2b 100644 > --- a/src/Ganeti/Query/Server.hs > +++ b/src/Ganeti/Query/Server.hs > @@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo = > showJSON $ clusterModifySshSetup cluster) > , ("ssh_key_type", showJSON $ clusterSshKeyType cluster) > , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster) > + , ("enabled_predictive_queue", > + showJSON $ clusterEnabledPredictiveQueue cluster) > ] > > in case master of > diff --git a/test/hs/Test/Ganeti/OpCodes.hs > b/test/hs/Test/Ganeti/OpCodes.hs > index 48a468345..7d39b6619 100644 > --- a/test/hs/Test/Ganeti/OpCodes.hs > +++ b/test/hs/Test/Ganeti/OpCodes.hs > @@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg = > <*> genMaybe (fromPositive <$> arbitrary) -- maintd round > interval > <*> genMaybe arbitrary -- enable maintd balancing > <*> genMaybe arbitrary -- maintd balancing threshold > + <*> arbitrary -- enabled_predictive_queue > "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf > "OP_CLUSTER_ACTIVATE_MASTER_IP" -> > pure OpCodes.OpClusterActivateMasterIp > -- > 2.11.0.390.gc69c2f50cf-goog > >
