[PATCH htools master 3/6] Rework the types used during data loading

Iustin Pop Wed, 01 Dec 2010 06:47:28 -0800

This improves on the previous change. Currently, the node and instance
lists shipped around during data loading are (again) association lists.
For instances it's not a big issue, but the node list is rewritten
continuously while we assign instances to nodes, and that is very slow.
The code was originally written for small (10-20 node) clusters, and
today with multinodes… :)


Rewriting to use Node.List/Instance.List makes a bit of a messy patch,
but it allows to remove some custom functions for assoc. list
processing, and also some custom unittests.

At the end, the load time is almost halved, and we spend time now just
in the string parsing code (which is, as we know, slow…).
---
 Ganeti/HTools/Loader.hs |   37 ++++++++++++++-----------------------
 Ganeti/HTools/Luxi.hs   |    4 ++--
 Ganeti/HTools/QC.hs     |   45 +++++++++++++++++++--------------------------
 Ganeti/HTools/Rapi.hs   |    4 ++--
 Ganeti/HTools/Simu.hs   |    9 +++++----
 Ganeti/HTools/Text.hs   |    6 +++---
 hscan.hs                |    2 +-
 7 files changed, 46 insertions(+), 61 deletions(-)

diff --git a/Ganeti/HTools/Loader.hs b/Ganeti/HTools/Loader.hs
index e1f35a0..dcaef80 100644
--- a/Ganeti/HTools/Loader.hs
+++ b/Ganeti/HTools/Loader.hs
@@ -37,9 +37,7 @@ module Ganeti.HTools.Loader
     , Request(..)
     ) where
 
-import Data.Function (on)
 import Data.List
-import Data.Maybe (fromJust)
 import qualified Data.Map as M
 import Text.Printf (printf)
 
@@ -93,35 +91,29 @@ lookupInstance kti inst =
 -- | Given a list of elements (and their names), assign indices to them.
 assignIndices :: (Element a) =>
                  [(String, a)]
-              -> (NameAssoc, [(Int, a)])
+              -> (NameAssoc, Container.Container a)
 assignIndices nodes =
   let (na, idx_node) =
           unzip . map (\ (idx, (k, v)) -> ((k, idx), (idx, setIdx v idx)))
           . zip [0..] $ nodes
-  in (M.fromList na, idx_node)
-
--- | Assoc element comparator
-assocEqual :: (Eq a) => (a, b) -> (a, b) -> Bool
-assocEqual = (==) `on` fst
+  in (M.fromList na, Container.fromAssocList idx_node)
 
 -- | For each instance, add its index to its primary and secondary nodes.
-fixNodes :: [(Ndx, Node.Node)]
+fixNodes :: Node.List
          -> Instance.Instance
-         -> [(Ndx, Node.Node)]
+         -> Node.List
 fixNodes accu inst =
     let
         pdx = Instance.pNode inst
         sdx = Instance.sNode inst
-        pold = fromJust $ lookup pdx accu
+        pold = Container.find pdx accu
         pnew = Node.setPri pold inst
-        ac1 = deleteBy assocEqual (pdx, pold) accu
-        ac2 = (pdx, pnew):ac1
+        ac2 = Container.add pdx pnew accu
     in
       if sdx /= Node.noSecondary
-      then let sold = fromJust $ lookup sdx accu
+      then let sold = Container.find sdx accu
                snew = Node.setSec sold inst
-               ac3 = deleteBy assocEqual (sdx, sold) ac2
-           in (sdx, snew):ac3
+           in Container.add sdx snew ac2
       else ac2
 
 -- | Remove non-selected tags from the exclusion list
@@ -168,11 +160,11 @@ commonSuffix nl il =
 mergeData :: [(String, DynUtil)]  -- ^ Instance utilisation data
           -> [String]             -- ^ Exclusion tags
           -> [String]             -- ^ Untouchable instances
-          -> (Node.AssocList, Instance.AssocList, [String])
+          -> (Node.List, Instance.List, [String])
           -- ^ Data from backends
           -> Result (Node.List, Instance.List, [String])
-mergeData um extags exinsts (nl, il, tags) =
-  let il2 = Container.fromAssocList il
+mergeData um extags exinsts (nl, il2, tags) =
+  let il = Container.elems il2
       il3 = foldl' (\im (name, n_util) ->
                         case Container.findByName im name of
                           Nothing -> im -- skipping unknown instance
@@ -184,10 +176,9 @@ mergeData um extags exinsts (nl, il, tags) =
       il4 = Container.map (filterExTags allextags .
                            updateMovable exinsts) il3
       nl2 = foldl' fixNodes nl (Container.elems il4)
-      nl3 = Container.fromAssocList
-            (map (\ (k, v) -> (k, Node.buildPeers v il4)) nl2)
-      node_names = map (Node.name . snd) nl
-      inst_names = map (Instance.name . snd) il
+      nl3 = Container.map (\node -> Node.buildPeers node il4) nl2
+      node_names = map Node.name (Container.elems nl)
+      inst_names = map Instance.name il
       common_suffix = longestDomain (node_names ++ inst_names)
       snl = Container.map (computeAlias common_suffix) nl3
       sil = Container.map (computeAlias common_suffix) il4
diff --git a/Ganeti/HTools/Luxi.hs b/Ganeti/HTools/Luxi.hs
index 24441b3..cc80908 100644
--- a/Ganeti/HTools/Luxi.hs
+++ b/Ganeti/HTools/Luxi.hs
@@ -162,7 +162,7 @@ readData master =
        )
 
 parseData :: (Result JSValue, Result JSValue, Result JSValue)
-          -> Result (Node.AssocList, Instance.AssocList, [String])
+          -> Result (Node.List, Instance.List, [String])
 parseData (nodes, instances, cinfo) = do
   node_data <- nodes >>= getNodes
   let (node_names, node_idx) = assignIndices node_data
@@ -173,5 +173,5 @@ parseData (nodes, instances, cinfo) = do
 
 -- | Top level function for data loading
 loadData :: String -- ^ Unix socket to use as source
-            -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
+            -> IO (Result (Node.List, Instance.List, [String]))
 loadData master = readData master >>= return . parseData
diff --git a/Ganeti/HTools/QC.hs b/Ganeti/HTools/QC.hs
index a94b4a1..b4ab4b0 100644
--- a/Ganeti/HTools/QC.hs
+++ b/Ganeti/HTools/QC.hs
@@ -110,7 +110,7 @@ makeSmallCluster node count =
     let fn = Node.buildPeers node Container.empty
         namelst = map (\n -> (Node.name n, n)) (replicate count fn)
         (_, nlst) = Loader.assignIndices namelst
-    in Container.fromAssocList nlst
+    in nlst
 
 -- | Checks if a node is "big" enough
 isNodeBig :: Node.Node -> Int -> Bool
@@ -448,11 +448,12 @@ prop_Text_Load_Instance name mem dsk vcpus status pnode 
snode pdx sdx =
         ndx = if null snode
               then [(pnode, pdx)]
               else [(pnode, pdx), (snode, rsdx)]
+        nl = Data.Map.fromList ndx
         tags = ""
-        inst = Text.loadInst ndx
+        inst = Text.loadInst nl
                [name, mem_s, dsk_s, vcpus_s, status, pnode, snode, tags]::
                Maybe (String, Instance.Instance)
-        fail1 = Text.loadInst ndx
+        fail1 = Text.loadInst nl
                [name, mem_s, dsk_s, vcpus_s, status, pnode, pnode, tags]::
                Maybe (String, Instance.Instance)
         _types = ( name::String, mem::Int, dsk::Int
@@ -473,7 +474,8 @@ prop_Text_Load_Instance name mem dsk vcpus status pnode 
snode pdx sdx =
              isNothing fail1)
 
 prop_Text_Load_InstanceFail ktn fields =
-    length fields /= 8 ==> isNothing $ Text.loadInst ktn fields
+    length fields /= 8 ==> isNothing $ Text.loadInst nl fields
+    where nl = Data.Map.fromList ktn
 
 prop_Text_Load_Node name tm nm fm td fd tc fo =
     let conv v = if v < 0
@@ -822,35 +824,27 @@ testJobs =
 -- | Loader tests
 
 prop_Loader_lookupNode ktn inst node =
-  isJust (Loader.lookupNode ktn inst node) == (node `elem` names)
-    where names = map fst ktn
+  Loader.lookupNode nl inst node == Data.Map.lookup node nl
+  where nl = Data.Map.fromList ktn
 
 prop_Loader_lookupInstance kti inst =
-  isJust (Loader.lookupInstance kti inst) == (inst `elem` names)
-    where names = map fst kti
-
-prop_Loader_lookupInstanceIdx kti inst =
-  case (Loader.lookupInstance kti inst,
-        findIndex (\p -> fst p == inst) kti) of
-    (Nothing, Nothing) -> True
-    (Just idx, Just ex) -> idx == snd (kti !! ex)
-    _ -> False
-
-prop_Loader_assignIndices enames =
-  length nassoc == length enames &&
-  length kt == length enames &&
-  (if not (null enames)
-   then maximum (map fst kt) == length enames - 1
+  Loader.lookupInstance il inst == Data.Map.lookup inst il
+  where il = Data.Map.fromList kti
+
+prop_Loader_assignIndices nodes =
+  Data.Map.size nassoc == length nodes &&
+  Container.size kt == length nodes &&
+  (if not (null nodes)
+   then maximum (IntMap.keys kt) == length nodes - 1
    else True)
-  where (nassoc, kt) = Loader.assignIndices enames
-        _types = enames::[(String, Node.Node)]
+  where (nassoc, kt) = Loader.assignIndices (map (\n -> (Node.name n, n)) 
nodes)
 
 
 -- | Checks that the number of primary instances recorded on the nodes
 -- is zero
 prop_Loader_mergeData ns =
-  let na = map (\n -> (Node.idx n, n)) ns
-  in case Loader.mergeData [] [] [] (na, [], []) of
+  let na = Container.fromAssocList $ map (\n -> (Node.idx n, n)) ns
+  in case Loader.mergeData [] [] [] (na, Container.empty, []) of
     Types.Bad _ -> False
     Types.Ok (nl, il, _) ->
       let nodes = Container.elems nl
@@ -861,7 +855,6 @@ prop_Loader_mergeData ns =
 testLoader =
   [ run prop_Loader_lookupNode
   , run prop_Loader_lookupInstance
-  , run prop_Loader_lookupInstanceIdx
   , run prop_Loader_assignIndices
   , run prop_Loader_mergeData
   ]
diff --git a/Ganeti/HTools/Rapi.hs b/Ganeti/HTools/Rapi.hs
index 37ac578..58df9bd 100644
--- a/Ganeti/HTools/Rapi.hs
+++ b/Ganeti/HTools/Rapi.hs
@@ -131,7 +131,7 @@ readData master = do
 
 -- | Builds the cluster data from the raw Rapi content
 parseData :: (Result String, Result String, Result String)
-          -> Result (Node.AssocList, Instance.AssocList, [String])
+          -> Result (Node.List, Instance.List, [String])
 parseData (node_body, inst_body, tags_body) = do
   node_data <- node_body >>= getNodes
   let (node_names, node_idx) = assignIndices node_data
@@ -142,5 +142,5 @@ parseData (node_body, inst_body, tags_body) = do
 
 -- | Top level function for data loading
 loadData :: String -- ^ Cluster or URL to use as source
-            -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
+            -> IO (Result (Node.List, Instance.List, [String]))
 loadData master = readData master >>= return . parseData
diff --git a/Ganeti/HTools/Simu.hs b/Ganeti/HTools/Simu.hs
index c29f3f5..47618b9 100644
--- a/Ganeti/HTools/Simu.hs
+++ b/Ganeti/HTools/Simu.hs
@@ -6,7 +6,7 @@ This module holds the code for parsing a cluster description.
 
 {-
 
-Copyright (C) 2009 Google Inc.
+Copyright (C) 2009, 2010 Google Inc.
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -35,6 +35,7 @@ import Text.Printf (printf)
 
 import Ganeti.HTools.Utils
 import Ganeti.HTools.Types
+import qualified Ganeti.HTools.Container as Container
 import qualified Ganeti.HTools.Node as Node
 import qualified Ganeti.HTools.Instance as Instance
 
@@ -52,7 +53,7 @@ parseDesc desc =
 
 -- | Builds the cluster data from node\/instance files.
 parseData :: String -- ^ Cluster description in text format
-         -> Result (Node.AssocList, Instance.AssocList, [String])
+         -> Result (Node.List, Instance.List, [String])
 parseData ndata = do
   (cnt, disk, mem, cpu) <- parseDesc ndata
   let nodes = map (\idx ->
@@ -62,10 +63,10 @@ parseData ndata = do
                             (fromIntegral cpu) False defaultGroupID
                     in (idx, Node.setIdx n idx)
                   ) [1..cnt]
-  return (nodes, [], [])
+  return (Container.fromAssocList nodes, Container.empty, [])
 
 -- | Builds the cluster data from node\/instance files.
 loadData :: String -- ^ Cluster description in text format
-         -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
+         -> IO (Result (Node.List, Instance.List, [String]))
 loadData = -- IO monad, just for consistency with the other loaders
   return . parseData
diff --git a/Ganeti/HTools/Text.hs b/Ganeti/HTools/Text.hs
index 242620e..b4f9b72 100644
--- a/Ganeti/HTools/Text.hs
+++ b/Ganeti/HTools/Text.hs
@@ -135,7 +135,7 @@ loadInst _ s = fail $ "Invalid/incomplete instance data: '" 
++ show s ++ "'"
 -- a supplied conversion function.
 loadTabular :: (Monad m, Element a) =>
                [String] -> ([String] -> m (String, a))
-            -> m (NameAssoc, [(Int, a)])
+            -> m (NameAssoc, Container.Container a)
 loadTabular lines_data convert_fn = do
   let rows = map (sepSplit '|') lines_data
   kerows <- mapM convert_fn rows
@@ -148,7 +148,7 @@ readData = readFile
 
 -- | Builds the cluster data from text input.
 parseData :: String -- ^ Text data
-          -> Result (Node.AssocList, Instance.AssocList, [String])
+          -> Result (Node.List, Instance.List, [String])
 parseData fdata = do
   let flines = lines fdata
       (nlines, ilines) = break null flines
@@ -163,5 +163,5 @@ parseData fdata = do
 
 -- | Top level function for data loading
 loadData :: String -- ^ Path to the text file
-         -> IO (Result (Node.AssocList, Instance.AssocList, [String]))
+         -> IO (Result (Node.List, Instance.List, [String]))
 loadData afile = readData afile >>= return . parseData
diff --git a/hscan.hs b/hscan.hs
index 06def66..ad503f6 100644
--- a/hscan.hs
+++ b/hscan.hs
@@ -89,7 +89,7 @@ fixSlash = map (\x -> if x == '/' then '_' else x)
 
 
 -- | Generates serialized data from loader input
-processData :: Result (Node.AssocList, Instance.AssocList, [String])
+processData :: Result (Node.List, Instance.List, [String])
             -> Result (Node.List, Instance.List, String)
 processData input_data = do
   (nl, il, _) <- input_data >>= Loader.mergeData [] [] []
-- 
1.7.2.3

[PATCH htools master 3/6] Rework the types used during data loading

Reply via email to