Re: [PATCH stable-2.16] Optimize LXC hypervisor GetAllInstancesInfo

'Viktor Bachraty' via ganeti-devel Thu, 10 Mar 2016 10:36:44 -0800


On Thursday, March 10, 2016 at 4:36:21 PM UTC, Brian Foley wrote:
>
> On Thu, Mar 10, 2016 at 11:22:02AM +0000, 'Viktor Bachraty' via 
> ganeti-devel wrote: 
> > Don't list all running instances for each instance, generate the list 
> > only once at start. Unfortunately, listing (as most other operations) 
> > is done by executing a cmdline tool in bash, which is too slow for 
> > frequent operations. This caused RPC to time out after 60s in case there 
> > are >200 instances, with this patch it is <1s. 
>
> Great! 
>
> First comment: Could we make this patch against stable-2.15 since we're 
> doing 
> other performance work there too. 
>


As discussed IRL, I'll send out patches against 2.15.
 

>
> > diff --git a/lib/hypervisor/hv_lxc.py b/lib/hypervisor/hv_lxc.py 
> > index 514896f..ff25eac 100644 
> > --- a/lib/hypervisor/hv_lxc.py 
> > +++ b/lib/hypervisor/hv_lxc.py 
> > @@ -476,19 +476,26 @@ class LXCHypervisor(hv_base.BaseHypervisor): 
> >      """Get the list of running instances. 
> >   
> >      """ 
> > -    return [iinfo[0] for iinfo in self.GetAllInstancesInfo()] 
> > +    return self._ListAliveInstances() 
> D'oh! Well spotted. Why do a bunch of extra work only to throw it away? 
>
> >    @classmethod 
> >    def _IsInstanceAlive(cls, instance_name): 
> >      """Return True if instance is alive. 
> >   
> >      """ 
> > +    return instance_name in cls._ListAliveInstances() 
> This probably isn't a performance bottleneck for the cases you've looked 
> at, 
> but we could just query the instance we're interested in rather than all 
> of 
> them: 
>    ... RunCmd(["lxc-ls", "--running", re.escape(instance_name)]) 
>
 
Good point, I think it won't make a huge difference, but it is definitely 
nicer.
 

> > +  @classmethod 
> > +  def _ListAliveInstances(cls): 
> > +    """Return list of alive instances. 
> > + 
> > +    """ 
> >      result = utils.RunCmd(["lxc-ls", "--running"]) 
> >      if result.failed: 
> >        raise HypervisorError("Failed to get running LXC containers list: 
> %s" % 
> >                              result.output) 
> >   
> > -    return instance_name in result.stdout.split() 
> > +    return result.stdout.split() 
> >   
> >    def GetInstanceInfo(self, instance_name, hvparams=None): 
> >      """Get instance properties. 
> > @@ -501,7 +508,20 @@ class LXCHypervisor(hv_base.BaseHypervisor): 
> >      @return: (name, id, memory, vcpus, stat, times) 
> >   
> >      """ 
> > -    if not self._IsInstanceAlive(instance_name): 
> > +    return self._GetInstanceInfoInner(instance_name, True) 
> > + 
> > +  def _GetInstanceInfoInner(self, instance_name, verify_alive): 
> > +    """Get instance properties. 
> > + 
> > +    @type instance_name: string 
> > +    @param instance_name: the instance name 
> > +    @type verify_alive: bool 
> > +    @param verify_alive: True if instance state should be verified 
> > +    @rtype: tuple of strings 
> > +    @return: (name, id, memory, vcpus, stat, times) 
> > + 
> > +    """ 
> > +    if verify_alive and not self._IsInstanceAlive(instance_name): 
> >        return None 
> >   
> >      cpu_list = self._GetCgroupCpuList(instance_name) 
> > @@ -519,10 +539,13 @@ class LXCHypervisor(hv_base.BaseHypervisor): 
> >   
> >      """ 
> >      data = [] 
> > +    running_instances = self._ListAliveInstances() 
> If we get to this line, we've run lxc-ls twice in succession (the first 
> time in 
> _IsInstanceAlive()). Could we run it once and inline the _IsInstanceAlive 
> test 
> so we reuse the results? 
>
> >      filter_fn = lambda x: 
> os.path.isdir(utils.PathJoin(self._INSTANCE_DIR, x)) 
> >      for dirname in filter(filter_fn, os.listdir(self._INSTANCE_DIR)): 
> > +      if dirname not in running_instances: 
> > +        continue 
> >        try: 
> > -        info = self.GetInstanceInfo(dirname) 
> > +        info = self._GetInstanceInfoInner(dirname, False) 
> >        except errors.HypervisorError: 
> >          continue 
> >        if info: 
> > diff --git a/test/py/ganeti.hypervisor.hv_lxc_unittest.py b/test/py/
> ganeti.hypervisor.hv_lxc_unittest.py 
> > index dd004f6..430f681 100755 
> > --- a/test/py/ganeti.hypervisor.hv_lxc_unittest.py 
> > +++ b/test/py/ganeti.hypervisor.hv_lxc_unittest.py 
> > @@ -114,6 +114,17 @@ class TestLXCIsInstanceAlive(unittest.TestCase): 
> >      runcmd_mock.return_value = RunResultOk("inst1 inst2foo") 
> >      self.assertFalse(LXCHypervisor._IsInstanceAlive("inst2")) 
> >   
> > +class TestLXCListInstances(LXCHypervisorTestCase): 
> > +  @patch_object(utils, "RunCmd") 
> > +  def testRunningInstnaces(self, runcmd_mock): 
> > +    instance_list = ["inst1", "inst2", "inst3", "inst4", "inst5"] 
> > +    runcmd_mock.return_value = RunResultOk("inst1 inst2 inst3\ninst4 
> inst5") 
> > +    self.assertEqual(self.hv.ListInstances(), instance_list) 
> > + 
> > +  @patch_object(utils, "RunCmd") 
> > +  def testEmpty(self, runcmd_mock): 
> > +    runcmd_mock.return_value = RunResultOk(" ") 
> > +    self.assertEqual(self.hv.ListInstances(), []) 
> >   
> >  class TestLXCHypervisorGetInstanceInfo(LXCHypervisorTestCase): 
> >    def setUp(self): 
> > -- 
> > 2.7.0.rc3.207.g0ac5344 
> > 
>

Re: [PATCH stable-2.16] Optimize LXC hypervisor GetAllInstancesInfo

Reply via email to