On Thursday, March 10, 2016 at 4:36:21 PM UTC, Brian Foley wrote:
>
> On Thu, Mar 10, 2016 at 11:22:02AM +0000, 'Viktor Bachraty' via
> ganeti-devel wrote:
> > Don't list all running instances for each instance, generate the list
> > only once at start. Unfortunately, listing (as most other operations)
> > is done by executing a cmdline tool in bash, which is too slow for
> > frequent operations. This caused RPC to time out after 60s in case there
> > are >200 instances, with this patch it is <1s.
>
> Great!
>
> First comment: Could we make this patch against stable-2.15 since we're
> doing
> other performance work there too.
>
As discussed IRL, I'll send out patches against 2.15.
>
> > diff --git a/lib/hypervisor/hv_lxc.py b/lib/hypervisor/hv_lxc.py
> > index 514896f..ff25eac 100644
> > --- a/lib/hypervisor/hv_lxc.py
> > +++ b/lib/hypervisor/hv_lxc.py
> > @@ -476,19 +476,26 @@ class LXCHypervisor(hv_base.BaseHypervisor):
> > """Get the list of running instances.
> >
> > """
> > - return [iinfo[0] for iinfo in self.GetAllInstancesInfo()]
> > + return self._ListAliveInstances()
> D'oh! Well spotted. Why do a bunch of extra work only to throw it away?
>
> > @classmethod
> > def _IsInstanceAlive(cls, instance_name):
> > """Return True if instance is alive.
> >
> > """
> > + return instance_name in cls._ListAliveInstances()
> This probably isn't a performance bottleneck for the cases you've looked
> at,
> but we could just query the instance we're interested in rather than all
> of
> them:
> ... RunCmd(["lxc-ls", "--running", re.escape(instance_name)])
>
Good point, I think it won't make a huge difference, but it is definitely
nicer.
> > + @classmethod
> > + def _ListAliveInstances(cls):
> > + """Return list of alive instances.
> > +
> > + """
> > result = utils.RunCmd(["lxc-ls", "--running"])
> > if result.failed:
> > raise HypervisorError("Failed to get running LXC containers list:
> %s" %
> > result.output)
> >
> > - return instance_name in result.stdout.split()
> > + return result.stdout.split()
> >
> > def GetInstanceInfo(self, instance_name, hvparams=None):
> > """Get instance properties.
> > @@ -501,7 +508,20 @@ class LXCHypervisor(hv_base.BaseHypervisor):
> > @return: (name, id, memory, vcpus, stat, times)
> >
> > """
> > - if not self._IsInstanceAlive(instance_name):
> > + return self._GetInstanceInfoInner(instance_name, True)
> > +
> > + def _GetInstanceInfoInner(self, instance_name, verify_alive):
> > + """Get instance properties.
> > +
> > + @type instance_name: string
> > + @param instance_name: the instance name
> > + @type verify_alive: bool
> > + @param verify_alive: True if instance state should be verified
> > + @rtype: tuple of strings
> > + @return: (name, id, memory, vcpus, stat, times)
> > +
> > + """
> > + if verify_alive and not self._IsInstanceAlive(instance_name):
> > return None
> >
> > cpu_list = self._GetCgroupCpuList(instance_name)
> > @@ -519,10 +539,13 @@ class LXCHypervisor(hv_base.BaseHypervisor):
> >
> > """
> > data = []
> > + running_instances = self._ListAliveInstances()
> If we get to this line, we've run lxc-ls twice in succession (the first
> time in
> _IsInstanceAlive()). Could we run it once and inline the _IsInstanceAlive
> test
> so we reuse the results?
>
> > filter_fn = lambda x:
> os.path.isdir(utils.PathJoin(self._INSTANCE_DIR, x))
> > for dirname in filter(filter_fn, os.listdir(self._INSTANCE_DIR)):
> > + if dirname not in running_instances:
> > + continue
> > try:
> > - info = self.GetInstanceInfo(dirname)
> > + info = self._GetInstanceInfoInner(dirname, False)
> > except errors.HypervisorError:
> > continue
> > if info:
> > diff --git a/test/py/ganeti.hypervisor.hv_lxc_unittest.py b/test/py/
> ganeti.hypervisor.hv_lxc_unittest.py
> > index dd004f6..430f681 100755
> > --- a/test/py/ganeti.hypervisor.hv_lxc_unittest.py
> > +++ b/test/py/ganeti.hypervisor.hv_lxc_unittest.py
> > @@ -114,6 +114,17 @@ class TestLXCIsInstanceAlive(unittest.TestCase):
> > runcmd_mock.return_value = RunResultOk("inst1 inst2foo")
> > self.assertFalse(LXCHypervisor._IsInstanceAlive("inst2"))
> >
> > +class TestLXCListInstances(LXCHypervisorTestCase):
> > + @patch_object(utils, "RunCmd")
> > + def testRunningInstnaces(self, runcmd_mock):
> > + instance_list = ["inst1", "inst2", "inst3", "inst4", "inst5"]
> > + runcmd_mock.return_value = RunResultOk("inst1 inst2 inst3\ninst4
> inst5")
> > + self.assertEqual(self.hv.ListInstances(), instance_list)
> > +
> > + @patch_object(utils, "RunCmd")
> > + def testEmpty(self, runcmd_mock):
> > + runcmd_mock.return_value = RunResultOk(" ")
> > + self.assertEqual(self.hv.ListInstances(), [])
> >
> > class TestLXCHypervisorGetInstanceInfo(LXCHypervisorTestCase):
> > def setUp(self):
> > --
> > 2.7.0.rc3.207.g0ac5344
> >
>