Re: [PATCH htools master 3/6] Rework the types used during data loading

Balazs Lecz Wed, 01 Dec 2010 08:01:35 -0800

On Wed, Dec 1, 2010 at 14:47, Iustin Pop <[email protected]> wrote:


> This improves on the previous change. Currently, the node and instance
> lists shipped around during data loading are (again) association lists.
> For instances it's not a big issue, but the node list is rewritten
> continuously while we assign instances to nodes, and that is very slow.
> The code was originally written for small (10-20 node) clusters, and
> today with multinodes… :)
>
> Rewriting to use Node.List/Instance.List makes a bit of a messy patch,
> but it allows to remove some custom functions for assoc. list
> processing, and also some custom unittests.
>
> At the end, the load time is almost halved, and we spend time now just
> in the string parsing code (which is, as we know, slow…).
> ---
>  Ganeti/HTools/Loader.hs |   37 ++++++++++++++-----------------------
>  Ganeti/HTools/Luxi.hs   |    4 ++--
>  Ganeti/HTools/QC.hs     |   45
> +++++++++++++++++++--------------------------
>  Ganeti/HTools/Rapi.hs   |    4 ++--
>  Ganeti/HTools/Simu.hs   |    9 +++++----
>  Ganeti/HTools/Text.hs   |    6 +++---
>  hscan.hs                |    2 +-
>  7 files changed, 46 insertions(+), 61 deletions(-)
>
> diff --git a/Ganeti/HTools/Loader.hs b/Ganeti/HTools/Loader.hs
> index e1f35a0..dcaef80 100644
> --- a/Ganeti/HTools/Loader.hs
> +++ b/Ganeti/HTools/Loader.hs
> @@ -37,9 +37,7 @@ module Ganeti.HTools.Loader
>     , Request(..)
>     ) where
>
> -import Data.Function (on)
>  import Data.List
> -import Data.Maybe (fromJust)
>  import qualified Data.Map as M
>  import Text.Printf (printf)
>
> @@ -93,35 +91,29 @@ lookupInstance kti inst =
>  -- | Given a list of elements (and their names), assign indices to them.
>  assignIndices :: (Element a) =>
>                  [(String, a)]
> -              -> (NameAssoc, [(Int, a)])
> +              -> (NameAssoc, Container.Container a)
>  assignIndices nodes =
>   let (na, idx_node) =
>           unzip . map (\ (idx, (k, v)) -> ((k, idx), (idx, setIdx v idx)))
>           . zip [0..] $ nodes
> -  in (M.fromList na, idx_node)
> -
> --- | Assoc element comparator
> -assocEqual :: (Eq a) => (a, b) -> (a, b) -> Bool
> -assocEqual = (==) `on` fst
> +  in (M.fromList na, Container.fromAssocList idx_node)
>
>  -- | For each instance, add its index to its primary and secondary nodes.
> -fixNodes :: [(Ndx, Node.Node)]
> +fixNodes :: Node.List
>          -> Instance.Instance
> -         -> [(Ndx, Node.Node)]
> +         -> Node.List
>  fixNodes accu inst =
>     let
>         pdx = Instance.pNode inst
>         sdx = Instance.sNode inst
> -        pold = fromJust $ lookup pdx accu
> +        pold = Container.find pdx accu
>         pnew = Node.setPri pold inst
> -        ac1 = deleteBy assocEqual (pdx, pold) accu
> -        ac2 = (pdx, pnew):ac1
> +        ac2 = Container.add pdx pnew accu
>     in
>       if sdx /= Node.noSecondary
> -      then let sold = fromJust $ lookup sdx accu
> +      then let sold = Container.find sdx accu
>                snew = Node.setSec sold inst
> -               ac3 = deleteBy assocEqual (sdx, sold) ac2
> -           in (sdx, snew):ac3
> +           in Container.add sdx snew ac2
>       else ac2
>
>  -- | Remove non-selected tags from the exclusion list
> @@ -168,11 +160,11 @@ commonSuffix nl il =
>  mergeData :: [(String, DynUtil)]  -- ^ Instance utilisation data
>           -> [String]             -- ^ Exclusion tags
>           -> [String]             -- ^ Untouchable instances
> -          -> (Node.AssocList, Instance.AssocList, [String])
> +          -> (Node.List, Instance.List, [String])
>           -- ^ Data from backends
>           -> Result (Node.List, Instance.List, [String])
> -mergeData um extags exinsts (nl, il, tags) =
> -  let il2 = Container.fromAssocList il
> +mergeData um extags exinsts (nl, il2, tags) =
> +  let il = Container.elems il2
>       il3 = foldl' (\im (name, n_util) ->
>                         case Container.findByName im name of
>                           Nothing -> im -- skipping unknown instance
> @@ -184,10 +176,9 @@ mergeData um extags exinsts (nl, il, tags) =
>       il4 = Container.map (filterExTags allextags .
>                            updateMovable exinsts) il3
>       nl2 = foldl' fixNodes nl (Container.elems il4)
> -      nl3 = Container.fromAssocList
> -            (map (\ (k, v) -> (k, Node.buildPeers v il4)) nl2)
> -      node_names = map (Node.name . snd) nl
> -      inst_names = map (Instance.name . snd) il
> +      nl3 = Container.map (\node -> Node.buildPeers node il4) nl2
> +      node_names = map Node.name (Container.elems nl)
> +      inst_names = map Instance.name il
>       common_suffix = longestDomain (node_names ++ inst_names)
>       snl = Container.map (computeAlias common_suffix) nl3
>       sil = Container.map (computeAlias common_suffix) il4
> diff --git a/Ganeti/HTools/Luxi.hs b/Ganeti/HTools/Luxi.hs
> index 24441b3..cc80908 100644
> --- a/Ganeti/HTools/Luxi.hs
> +++ b/Ganeti/HTools/Luxi.hs
> @@ -162,7 +162,7 @@ readData master =
>        )
>
>  parseData :: (Result JSValue, Result JSValue, Result JSValue)
> -          -> Result (Node.AssocList, Instance.AssocList, [String])
> +          -> Result (Node.List, Instance.List, [String])
>  parseData (nodes, instances, cinfo) = do
>   node_data <- nodes >>= getNodes
>   let (node_names, node_idx) = assignIndices node_data
> @@ -173,5 +173,5 @@ parseData (nodes, instances, cinfo) = do
>
>  -- | Top level function for data loading
>  loadData :: String -- ^ Unix socket to use as source
> -            -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
> +            -> IO (Result (Node.List, Instance.List, [String]))
>  loadData master = readData master >>= return . parseData
> diff --git a/Ganeti/HTools/QC.hs b/Ganeti/HTools/QC.hs
> index a94b4a1..b4ab4b0 100644
> --- a/Ganeti/HTools/QC.hs
> +++ b/Ganeti/HTools/QC.hs
> @@ -110,7 +110,7 @@ makeSmallCluster node count =
>     let fn = Node.buildPeers node Container.empty
>         namelst = map (\n -> (Node.name n, n)) (replicate count fn)
>         (_, nlst) = Loader.assignIndices namelst
> -    in Container.fromAssocList nlst
> +    in nlst
>
>  -- | Checks if a node is "big" enough
>  isNodeBig :: Node.Node -> Int -> Bool
> @@ -448,11 +448,12 @@ prop_Text_Load_Instance name mem dsk vcpus status
> pnode snode pdx sdx =
>         ndx = if null snode
>               then [(pnode, pdx)]
>               else [(pnode, pdx), (snode, rsdx)]
> +        nl = Data.Map.fromList ndx
>         tags = ""
> -        inst = Text.loadInst ndx
> +        inst = Text.loadInst nl
>                [name, mem_s, dsk_s, vcpus_s, status, pnode, snode, tags]::
>                Maybe (String, Instance.Instance)
> -        fail1 = Text.loadInst ndx
> +        fail1 = Text.loadInst nl
>                [name, mem_s, dsk_s, vcpus_s, status, pnode, pnode, tags]::
>                Maybe (String, Instance.Instance)
>         _types = ( name::String, mem::Int, dsk::Int
> @@ -473,7 +474,8 @@ prop_Text_Load_Instance name mem dsk vcpus status pnode
> snode pdx sdx =
>              isNothing fail1)
>
>  prop_Text_Load_InstanceFail ktn fields =
> -    length fields /= 8 ==> isNothing $ Text.loadInst ktn fields
> +    length fields /= 8 ==> isNothing $ Text.loadInst nl fields
> +    where nl = Data.Map.fromList ktn
>
>  prop_Text_Load_Node name tm nm fm td fd tc fo =
>     let conv v = if v < 0
> @@ -822,35 +824,27 @@ testJobs =
>  -- | Loader tests
>
>  prop_Loader_lookupNode ktn inst node =
> -  isJust (Loader.lookupNode ktn inst node) == (node `elem` names)
> -    where names = map fst ktn
> +  Loader.lookupNode nl inst node == Data.Map.lookup node nl
> +  where nl = Data.Map.fromList ktn
>
>  prop_Loader_lookupInstance kti inst =
> -  isJust (Loader.lookupInstance kti inst) == (inst `elem` names)
> -    where names = map fst kti
> -
> -prop_Loader_lookupInstanceIdx kti inst =
> -  case (Loader.lookupInstance kti inst,
> -        findIndex (\p -> fst p == inst) kti) of
> -    (Nothing, Nothing) -> True
> -    (Just idx, Just ex) -> idx == snd (kti !! ex)
> -    _ -> False
> -
> -prop_Loader_assignIndices enames =
> -  length nassoc == length enames &&
> -  length kt == length enames &&
> -  (if not (null enames)
> -   then maximum (map fst kt) == length enames - 1
> +  Loader.lookupInstance il inst == Data.Map.lookup inst il
> +  where il = Data.Map.fromList kti
> +
> +prop_Loader_assignIndices nodes =
> +  Data.Map.size nassoc == length nodes &&
> +  Container.size kt == length nodes &&
> +  (if not (null nodes)
> +   then maximum (IntMap.keys kt) == length nodes - 1
>    else True)
> -  where (nassoc, kt) = Loader.assignIndices enames
> -        _types = enames::[(String, Node.Node)]
> +  where (nassoc, kt) = Loader.assignIndices (map (\n -> (Node.name n, n))
> nodes)
>
>
>  -- | Checks that the number of primary instances recorded on the nodes
>  -- is zero
>  prop_Loader_mergeData ns =
> -  let na = map (\n -> (Node.idx n, n)) ns
> -  in case Loader.mergeData [] [] [] (na, [], []) of
> +  let na = Container.fromAssocList $ map (\n -> (Node.idx n, n)) ns
> +  in case Loader.mergeData [] [] [] (na, Container.empty, []) of
>     Types.Bad _ -> False
>     Types.Ok (nl, il, _) ->
>       let nodes = Container.elems nl
> @@ -861,7 +855,6 @@ prop_Loader_mergeData ns =
>  testLoader =
>   [ run prop_Loader_lookupNode
>   , run prop_Loader_lookupInstance
> -  , run prop_Loader_lookupInstanceIdx
>   , run prop_Loader_assignIndices
>   , run prop_Loader_mergeData
>   ]
> diff --git a/Ganeti/HTools/Rapi.hs b/Ganeti/HTools/Rapi.hs
> index 37ac578..58df9bd 100644
> --- a/Ganeti/HTools/Rapi.hs
> +++ b/Ganeti/HTools/Rapi.hs
> @@ -131,7 +131,7 @@ readData master = do
>
>  -- | Builds the cluster data from the raw Rapi content
>  parseData :: (Result String, Result String, Result String)
> -          -> Result (Node.AssocList, Instance.AssocList, [String])
> +          -> Result (Node.List, Instance.List, [String])
>  parseData (node_body, inst_body, tags_body) = do
>   node_data <- node_body >>= getNodes
>   let (node_names, node_idx) = assignIndices node_data
> @@ -142,5 +142,5 @@ parseData (node_body, inst_body, tags_body) = do
>
>  -- | Top level function for data loading
>  loadData :: String -- ^ Cluster or URL to use as source
> -            -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
> +            -> IO (Result (Node.List, Instance.List, [String]))
>  loadData master = readData master >>= return . parseData
> diff --git a/Ganeti/HTools/Simu.hs b/Ganeti/HTools/Simu.hs
> index c29f3f5..47618b9 100644
> --- a/Ganeti/HTools/Simu.hs
> +++ b/Ganeti/HTools/Simu.hs
> @@ -6,7 +6,7 @@ This module holds the code for parsing a cluster
> description.
>
>  {-
>
> -Copyright (C) 2009 Google Inc.
> +Copyright (C) 2009, 2010 Google Inc.
>
>  This program is free software; you can redistribute it and/or modify
>  it under the terms of the GNU General Public License as published by
> @@ -35,6 +35,7 @@ import Text.Printf (printf)
>
>  import Ganeti.HTools.Utils
>  import Ganeti.HTools.Types
> +import qualified Ganeti.HTools.Container as Container
>  import qualified Ganeti.HTools.Node as Node
>  import qualified Ganeti.HTools.Instance as Instance
>
> @@ -52,7 +53,7 @@ parseDesc desc =
>
>  -- | Builds the cluster data from node\/instance files.
>  parseData :: String -- ^ Cluster description in text format
> -         -> Result (Node.AssocList, Instance.AssocList, [String])
> +         -> Result (Node.List, Instance.List, [String])
>  parseData ndata = do
>   (cnt, disk, mem, cpu) <- parseDesc ndata
>   let nodes = map (\idx ->
> @@ -62,10 +63,10 @@ parseData ndata = do
>                             (fromIntegral cpu) False defaultGroupID
>                     in (idx, Node.setIdx n idx)
>                   ) [1..cnt]
> -  return (nodes, [], [])
> +  return (Container.fromAssocList nodes, Container.empty, [])
>
>  -- | Builds the cluster data from node\/instance files.
>  loadData :: String -- ^ Cluster description in text format
> -         -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
> +         -> IO (Result (Node.List, Instance.List, [String]))
>  loadData = -- IO monad, just for consistency with the other loaders
>   return . parseData
> diff --git a/Ganeti/HTools/Text.hs b/Ganeti/HTools/Text.hs
> index 242620e..b4f9b72 100644
> --- a/Ganeti/HTools/Text.hs
> +++ b/Ganeti/HTools/Text.hs
> @@ -135,7 +135,7 @@ loadInst _ s = fail $ "Invalid/incomplete instance
> data: '" ++ show s ++ "'"
>  -- a supplied conversion function.
>  loadTabular :: (Monad m, Element a) =>
>                [String] -> ([String] -> m (String, a))
> -            -> m (NameAssoc, [(Int, a)])
> +            -> m (NameAssoc, Container.Container a)
>  loadTabular lines_data convert_fn = do
>   let rows = map (sepSplit '|') lines_data
>   kerows <- mapM convert_fn rows
> @@ -148,7 +148,7 @@ readData = readFile
>
>  -- | Builds the cluster data from text input.
>  parseData :: String -- ^ Text data
> -          -> Result (Node.AssocList, Instance.AssocList, [String])
> +          -> Result (Node.List, Instance.List, [String])
>  parseData fdata = do
>   let flines = lines fdata
>       (nlines, ilines) = break null flines
> @@ -163,5 +163,5 @@ parseData fdata = do
>
>  -- | Top level function for data loading
>  loadData :: String -- ^ Path to the text file
> -         -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
> +         -> IO (Result (Node.List, Instance.List, [String]))
>  loadData afile = readData afile >>= return . parseData
> diff --git a/hscan.hs b/hscan.hs
> index 06def66..ad503f6 100644
> --- a/hscan.hs
> +++ b/hscan.hs
> @@ -89,7 +89,7 @@ fixSlash = map (\x -> if x == '/' then '_' else x)
>
>
>  -- | Generates serialized data from loader input
> -processData :: Result (Node.AssocList, Instance.AssocList, [String])
> +processData :: Result (Node.List, Instance.List, [String])
>             -> Result (Node.List, Instance.List, String)
>  processData input_data = do
>   (nl, il, _) <- input_data >>= Loader.mergeData [] [] []
> --
> 1.7.2.3
>
>
LGTM

Re: [PATCH htools master 3/6] Rework the types used during data loading

Reply via email to