This commit was successfully pushed into master @
2cb216028a063024a3ff4246ec417b23ab9507a1

On 9 January 2017 at 11:43, Federico Pareschi <[email protected]> wrote:
> I agree completely, and the --user-shutdown flag is the reason why I
> kept it consistent with it. I'll be pushing this onto master then,
> thanks for the review.
>
> On 9 January 2017 at 11:40, 'Viktor Bachraty' via ganeti-devel
> <[email protected]> wrote:
>> Thanks for the patch! LGTM, just one minor comment. Personally I'd prefer
>> '--enable-predictive-queue' rather than '--predictive-queue' as it makes it
>> clear that it's a boolean flag (and it's consistent with the variable name).
>> However probably it's better to keep the convention of existing flags (e.g.
>> --user-shutdown).
>>
>> On Monday, January 9, 2017 at 11:18:01 AM UTC, Federico Pareschi wrote:
>>>
>>> This commit adds the enabled_predictive_queue cluster parameter that
>>> allows the cluster to specify if the predictive scheduler should be used
>>> or not when ordering ganeti jobs in the queue. It also adds the correct
>>> command line flags for the cluster init and cluster modify ganeti jobs.
>>>
>>> As a default, if no option is specified upon cluster creation, the
>>> predictive scheduler is automatically enabled.
>>>
>>> Signed-off-by: Federico Morg Pareschi <[email protected]>
>>> ---
>>>  lib/bootstrap.py               |  3 ++-
>>>  lib/cli_opts.py                |  8 ++++++++
>>>  lib/client/gnt_cluster.py      | 17 ++++++++++++++---
>>>  lib/cmdlib/cluster/__init__.py |  3 +++
>>>  lib/objects.py                 |  1 +
>>>  man/gnt-cluster.rst            | 10 ++++++++++
>>>  src/Ganeti/JQScheduler.hs      | 20 ++++++++++++++++----
>>>  src/Ganeti/Objects.hs          |  1 +
>>>  src/Ganeti/OpCodes.hs          |  1 +
>>>  src/Ganeti/OpParams.hs         |  7 +++++++
>>>  src/Ganeti/Query/Server.hs     |  2 ++
>>>  test/hs/Test/Ganeti/OpCodes.hs |  1 +
>>>  12 files changed, 66 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/lib/bootstrap.py b/lib/bootstrap.py
>>> index 2f98fdd66..c4d987151 100644
>>> --- a/lib/bootstrap.py
>>> +++ b/lib/bootstrap.py
>>> @@ -501,7 +501,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint:
>>> disable=R0913, R0914
>>>                  use_external_mip_script=False, hv_state=None,
>>> disk_state=None,
>>>                  enabled_disk_templates=None, install_image=None,
>>>                  zeroing_image=None, compression_tools=None,
>>> -                enabled_user_shutdown=False):
>>> +                enabled_user_shutdown=False,
>>> enabled_predictive_queue=True):
>>>    """Initialise the cluster.
>>>
>>>    @type candidate_pool_size: int
>>> @@ -805,6 +805,7 @@ def InitCluster(cluster_name, mac_prefix, # pylint:
>>> disable=R0913, R0914
>>>      enabled_user_shutdown=enabled_user_shutdown,
>>>      ssh_key_type=ssh_key_type,
>>>      ssh_key_bits=ssh_key_bits,
>>> +    enabled_predictive_queue=enabled_predictive_queue,
>>>      )
>>>    master_node_config = objects.Node(name=hostname.name,
>>>                                      primary_ip=hostname.ip,
>>> diff --git a/lib/cli_opts.py b/lib/cli_opts.py
>>> index f90594a4d..984536ad2 100644
>>> --- a/lib/cli_opts.py
>>> +++ b/lib/cli_opts.py
>>> @@ -88,6 +88,7 @@ __all__ = [
>>>    "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT",
>>>    "ENABLED_DISK_TEMPLATES_OPT",
>>>    "ENABLED_HV_OPT",
>>> +  "ENABLED_PREDICTIVE_QUEUE_OPT",
>>>    "ENABLED_USER_SHUTDOWN_OPT",
>>>    "ERROR_CODES_OPT",
>>>    "EXT_PARAMS_OPT",
>>> @@ -1118,6 +1119,13 @@ ENABLED_DISK_TEMPLATES_OPT =
>>> cli_option("--enabled-disk-templates",
>>>                                               "disk templates",
>>>                                          type="string", default=None)
>>>
>>> +ENABLED_PREDICTIVE_QUEUE_OPT = cli_option("--predictive-queue",
>>> +                                          default=None,
>>> +
>>> dest="enabled_predictive_queue",
>>> +                                          help="Whether the predictive
>>> queue is"
>>> +                                               "enabled",
>>> +                                          type="bool")
>>> +
>>>  ENABLED_USER_SHUTDOWN_OPT = cli_option("--user-shutdown",
>>>                                         default=None,
>>>                                         dest="enabled_user_shutdown",
>>> diff --git a/lib/client/gnt_cluster.py b/lib/client/gnt_cluster.py
>>> index cbb3af14f..15208cad7 100644
>>> --- a/lib/client/gnt_cluster.py
>>> +++ b/lib/client/gnt_cluster.py
>>> @@ -309,6 +309,11 @@ def InitCluster(opts, args):
>>>
>>>    enabled_user_shutdown = bool(opts.enabled_user_shutdown)
>>>
>>> +  if opts.enabled_predictive_queue  is not None:
>>> +    enabled_predictive_queue = bool(opts.enabled_predictive_queue)
>>> +  else:
>>> +    enabled_predictive_queue = True # Predictive queue is enabled by
>>> default.
>>> +
>>>    if opts.ssh_key_type:
>>>      ssh_key_type = opts.ssh_key_type
>>>    else:
>>> @@ -353,6 +358,7 @@ def InitCluster(opts, args):
>>>                          enabled_user_shutdown=enabled_user_shutdown,
>>>                          ssh_key_type=ssh_key_type,
>>>                          ssh_key_bits=ssh_key_bits,
>>> +
>>> enabled_predictive_queue=enabled_predictive_queue,
>>>                          )
>>>    op = opcodes.OpClusterPostInit()
>>>    SubmitOpCode(op, opts=opts)
>>> @@ -635,6 +641,7 @@ def ShowClusterConfig(opts, args):
>>>        ("modify ssh setup", result["modify_ssh_setup"]),
>>>        ("ssh_key_type", result["ssh_key_type"]),
>>>        ("ssh_key_bits", result["ssh_key_bits"]),
>>> +      ("enabled predictive queue", result["enabled_predictive_queue"])
>>>        ]),
>>>
>>>      ("Default node parameters",
>>> @@ -1416,7 +1423,8 @@ def SetClusterParams(opts, args):
>>>            opts.maint_balance_threshold is not None or
>>>            opts.data_collector_interval or
>>>            opts.diagnose_data_collector_filename is not None or
>>> -          opts.enabled_data_collectors):
>>> +          opts.enabled_data_collectors or
>>> +          opts.enabled_predictive_queue is not None):
>>>      ToStderr("Please give at least one of the parameters.")
>>>      return 1
>>>
>>> @@ -1567,7 +1575,8 @@ def SetClusterParams(opts, args):
>>>      maint_balance_threshold=opts.maint_balance_threshold,
>>>      enabled_data_collectors=enabled_data_collectors,
>>>      data_collector_interval=data_collector_interval,
>>> -
>>> diagnose_data_collector_filename=opts.diagnose_data_collector_filename
>>> +
>>> diagnose_data_collector_filename=opts.diagnose_data_collector_filename,
>>> +    enabled_predictive_queue=opts.enabled_predictive_queue
>>>      )
>>>    return base.GetResult(None, opts, SubmitOrSend(op, opts))
>>>
>>> @@ -2506,6 +2515,7 @@ commands = {
>>>       IPOLICY_STD_SPECS_OPT, GLOBAL_GLUSTER_FILEDIR_OPT,
>>> INSTALL_IMAGE_OPT,
>>>       ZEROING_IMAGE_OPT, COMPRESSION_TOOLS_OPT,
>>>       ENABLED_USER_SHUTDOWN_OPT, SSH_KEY_BITS_OPT, SSH_KEY_TYPE_OPT,
>>> +     ENABLED_PREDICTIVE_QUEUE_OPT,
>>>       ]
>>>       + INSTANCE_POLICY_OPTS + SPLIT_ISPECS_OPTS,
>>>      "[opts...] <cluster_name>", "Initialises a new cluster
>>> configuration"),
>>> @@ -2591,7 +2601,8 @@ commands = {
>>>       PREALLOC_WIPE_DISKS_OPT, NODE_PARAMS_OPT, USE_EXTERNAL_MIP_SCRIPT,
>>>       DISK_PARAMS_OPT, HV_STATE_OPT, DISK_STATE_OPT] + SUBMIT_OPTS +
>>>       [ENABLED_DISK_TEMPLATES_OPT, IPOLICY_STD_SPECS_OPT,
>>> MODIFY_ETCHOSTS_OPT,
>>> -      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT] +
>>> +      MODIFY_SSH_SETUP_OPT, ENABLED_USER_SHUTDOWN_OPT,
>>> +      ENABLED_PREDICTIVE_QUEUE_OPT] +
>>>       INSTANCE_POLICY_OPTS +
>>>       [GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
>>>        COMPRESSION_TOOLS_OPT] +
>>> diff --git a/lib/cmdlib/cluster/__init__.py
>>> b/lib/cmdlib/cluster/__init__.py
>>> index 28370d90a..455c1de15 100644
>>> --- a/lib/cmdlib/cluster/__init__.py
>>> +++ b/lib/cmdlib/cluster/__init__.py
>>> @@ -1770,6 +1770,9 @@ class LUClusterSetParams(LogicalUnit):
>>>        self.cluster.enabled_user_shutdown = self.op.enabled_user_shutdown
>>>        ensure_kvmd = True
>>>
>>> +    if self.op.enabled_predictive_queue is not None:
>>> +      self.cluster.enabled_predictive_queue =
>>> self.op.enabled_predictive_queue
>>> +
>>>      def helper_os(aname, mods, desc):
>>>        desc += " OS list"
>>>        lst = getattr(self.cluster, aname)
>>> diff --git a/lib/objects.py b/lib/objects.py
>>> index 7e20fc2cb..df0494c27 100644
>>> --- a/lib/objects.py
>>> +++ b/lib/objects.py
>>> @@ -1710,6 +1710,7 @@ class Cluster(TaggableObject):
>>>      "diagnose_data_collector_filename",
>>>      "ssh_key_type",
>>>      "ssh_key_bits",
>>> +    "enabled_predictive_queue",
>>>      ] + _TIMESTAMPS + _UUID
>>>
>>>    def UpgradeConfig(self):
>>> diff --git a/man/gnt-cluster.rst b/man/gnt-cluster.rst
>>> index 0469f7598..261019db8 100644
>>> --- a/man/gnt-cluster.rst
>>> +++ b/man/gnt-cluster.rst
>>> @@ -209,6 +209,7 @@ INIT
>>>  | [\--user-shutdown {yes \| no}]
>>>  | [\--ssh-key-type *type*]
>>>  | [\--ssh-key-bits *bits*]
>>> +| [\--predictive-queue {yes \| no}]
>>>  | {*clustername*}
>>>
>>>  This commands is only run once initially on the first node of the
>>> @@ -651,6 +652,10 @@ options **ssh-keygen**\(1) exposes. These are
>>> currently:
>>>
>>>  Ganeti defaults to using 2048-bit RSA keys.
>>>
>>> +The ``--predictive-queue`` option enables or disables the predictive
>>> +queue algorithm for the job scheduler. If this option is not specified,
>>> +Ganeti defaults to enabling the predictive scheduler.
>>> +
>>>  MASTER-FAILOVER
>>>  ~~~~~~~~~~~~~~~
>>>
>>> @@ -751,6 +756,8 @@ MODIFY
>>>  | [\--auto-balance-cluster {yes \| no }]
>>>  | [\--auto-balance-threshold *score* ]
>>>  | [\--diagnose-data-collector-filename *filename*]
>>> +| [\--predictive-queue {yes \| no}]
>>> +
>>>
>>>
>>>  Modify the options for the cluster.
>>> @@ -840,6 +847,9 @@ in absolute terms, unless the cluster score it at
>>> least 10 times that
>>>  value, in which case all beneficial steps will be done if auto-balancing
>>>  is enabled.
>>>
>>> +The ``--predictive-queue`` option enables or disables the predictive
>>> +queue algorithm for the job scheduler.
>>> +
>>>  See **gnt-cluster init** for a description of ``--install-image`` and
>>>  ``--zeroing-image``.
>>>
>>> diff --git a/src/Ganeti/JQScheduler.hs b/src/Ganeti/JQScheduler.hs
>>> index 5c79843a8..bfa78745f 100644
>>> --- a/src/Ganeti/JQScheduler.hs
>>> +++ b/src/Ganeti/JQScheduler.hs
>>> @@ -167,6 +167,12 @@ getMaxRunningJobs = getConfigValue
>>> clusterMaxRunningJobs 1
>>>  getMaxTrackedJobs :: JQStatus -> IO Int
>>>  getMaxTrackedJobs = getConfigValue clusterMaxTrackedJobs 1
>>>
>>> +-- | Get the boolean that specifies whether or not the predictive queue
>>> +-- scheduler is enabled in the cluster. If the configuration is not
>>> available,
>>> +-- the predictive queue is enabled by default.
>>> +getEnabledPredictiveQueue :: JQStatus -> IO Bool
>>> +getEnabledPredictiveQueue = getConfigValue clusterEnabledPredictiveQueue
>>> True
>>> +
>>>  -- | Get the number of jobs currently running.
>>>  getRQL :: JQStatus -> IO Int
>>>  getRQL = liftM (length . qRunning) . readIORef . jqJobs
>>> @@ -348,18 +354,22 @@ sortByStaticLocks cfg queue currTime = sortBy
>>> (compare `on` opWeight)
>>>  -- pure function doing the scheduling.
>>>  selectJobsToRun :: ConfigData
>>>                  -> Int -- How many jobs are allowed to run at the same
>>> time.
>>> +                -> Bool -- If the predictive scheduler is enabled
>>>                  -> Timestamp -- Current time
>>>                  -> Set FilterRule -- Filter rules to respect for
>>> scheduling
>>>                  -> Queue
>>>                  -> (Queue, [JobWithStat])
>>> -selectJobsToRun cfg count currTime filters queue =
>>> +selectJobsToRun cfg count isPredictive currTime filters queue =
>>>    let n = count - length (qRunning queue) - length (qManipulated queue)
>>> +      pickScheduler = if isPredictive
>>> +                         then sortByStaticLocks cfg queue currTime
>>> +                         else id
>>>        chosen = take n
>>>                 . jobFiltering queue filters
>>>                 . reasonRateLimit queue
>>>                 . sortBy (comparing (calcJobPriority . jJob))
>>>                 . filter (jobEligible queue)
>>> -               . sortByStaticLocks cfg queue currTime
>>> +               . pickScheduler
>>>                 $ qEnqueued queue
>>>        remain = deleteFirstsBy ((==) `on` (qjId . jJob)) (qEnqueued queue)
>>> chosen
>>>    in (queue {qEnqueued=remain, qRunning=qRunning queue ++ chosen},
>>> chosen)
>>> @@ -456,8 +466,10 @@ scheduleSomeJobs qstate = do
>>>
>>>        -- Select the jobs to run.
>>>        count <- getMaxRunningJobs qstate
>>> -      chosen <- atomicModifyIORef (jqJobs qstate)
>>> -                                  (selectJobsToRun cfg count ts filters)
>>> +      isPredictive <- getEnabledPredictiveQueue qstate
>>> +      let jobsToRun = selectJobsToRun cfg count isPredictive ts filters
>>> +      chosen <- atomicModifyIORef (jqJobs qstate) jobsToRun
>>> +
>>>        let jobs = map jJob chosen
>>>        unless (null chosen) . logInfo . (++) "Starting jobs: " . commaJoin
>>>          $ map (show . fromJobId . qjId) jobs
>>> diff --git a/src/Ganeti/Objects.hs b/src/Ganeti/Objects.hs
>>> index 5be8adfe5..572dc662f 100644
>>> --- a/src/Ganeti/Objects.hs
>>> +++ b/src/Ganeti/Objects.hs
>>> @@ -698,6 +698,7 @@ $(buildObject "Cluster" "cluster" $
>>>        "diagnose_data_collector_filename"         [t| String
>>> |]
>>>    , simpleField "ssh_key_type"                   [t| SshKeyType
>>> |]
>>>    , simpleField "ssh_key_bits"                   [t| Int
>>> |]
>>> +  , simpleField "enabled_predictive_queue"       [t| Bool
>>> |]
>>>   ]
>>>   ++ timeStampFields
>>>   ++ uuidFields
>>> diff --git a/src/Ganeti/OpCodes.hs b/src/Ganeti/OpCodes.hs
>>> index c4da480a8..811d59d66 100644
>>> --- a/src/Ganeti/OpCodes.hs
>>> +++ b/src/Ganeti/OpCodes.hs
>>> @@ -271,6 +271,7 @@ $(genOpCode "OpCode"
>>>       , pMaintdRoundDelay
>>>       , pMaintdEnableBalancing
>>>       , pMaintdBalancingThreshold
>>> +     , pEnabledPredictiveQueue
>>>       ],
>>>       [])
>>>    , ("OpClusterRedistConf",
>>> diff --git a/src/Ganeti/OpParams.hs b/src/Ganeti/OpParams.hs
>>> index b5a561953..f35ae4513 100644
>>> --- a/src/Ganeti/OpParams.hs
>>> +++ b/src/Ganeti/OpParams.hs
>>> @@ -321,6 +321,7 @@ module Ganeti.OpParams
>>>    , pVerifyClutter
>>>    , pLongSleep
>>>    , pIsStrict
>>> +  , pEnabledPredictiveQueue
>>>    ) where
>>>
>>>  import Control.Monad (liftM, mplus)
>>> @@ -2030,3 +2031,9 @@ pIsStrict =
>>>    withDoc "Whether the operation is in strict mode or not." .
>>>    defaultField [| True |] $
>>>    simpleField "is_strict" [t| Bool |]
>>> +
>>> +pEnabledPredictiveQueue :: Field
>>> +pEnabledPredictiveQueue =
>>> +  withDoc "Whether the predictive queue is enabled in the cluster." .
>>> +  optionalField $
>>> +  simpleField "enabled_predictive_queue" [t| Bool |]
>>> diff --git a/src/Ganeti/Query/Server.hs b/src/Ganeti/Query/Server.hs
>>> index aefe129c5..8cef6cc2b 100644
>>> --- a/src/Ganeti/Query/Server.hs
>>> +++ b/src/Ganeti/Query/Server.hs
>>> @@ -289,6 +289,8 @@ handleCall _ _ cdata QueryClusterInfo =
>>>                 showJSON $ clusterModifySshSetup cluster)
>>>              , ("ssh_key_type", showJSON $ clusterSshKeyType cluster)
>>>              , ("ssh_key_bits", showJSON $ clusterSshKeyBits cluster)
>>> +            , ("enabled_predictive_queue",
>>> +               showJSON $ clusterEnabledPredictiveQueue cluster)
>>>              ]
>>>
>>>    in case master of
>>> diff --git a/test/hs/Test/Ganeti/OpCodes.hs
>>> b/test/hs/Test/Ganeti/OpCodes.hs
>>> index 48a468345..7d39b6619 100644
>>> --- a/test/hs/Test/Ganeti/OpCodes.hs
>>> +++ b/test/hs/Test/Ganeti/OpCodes.hs
>>> @@ -265,6 +265,7 @@ genOpCodeFromId op_id cfg =
>>>          <*> genMaybe (fromPositive <$> arbitrary) -- maintd round
>>> interval
>>>          <*> genMaybe arbitrary           -- enable maintd balancing
>>>          <*> genMaybe arbitrary           -- maintd balancing threshold
>>> +        <*> arbitrary                    -- enabled_predictive_queue
>>>      "OP_CLUSTER_REDIST_CONF" -> pure OpCodes.OpClusterRedistConf
>>>      "OP_CLUSTER_ACTIVATE_MASTER_IP" ->
>>>        pure OpCodes.OpClusterActivateMasterIp
>>> --
>>> 2.11.0.390.gc69c2f50cf-goog
>>>
>>

Reply via email to