[libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-06-28 Thread Jiri Denemark
---
 src/qemu/qemu_cgroup.c | 21 ++---
 src/qemu/qemu_domain.c | 29 +
 src/qemu/qemu_domain.h |  2 ++
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index 5f54ca6..22bf78e 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -461,9 +461,7 @@ static int
 qemuSetupMemoryCgroup(virDomainObjPtr vm)
 {
 qemuDomainObjPrivatePtr priv = vm->privateData;
-unsigned long long hard_limit;
 int rc;
-int i;
 
 if (!virCgroupHasController(priv->cgroup,VIR_CGROUP_CONTROLLER_MEMORY)) {
 if (vm->def->mem.hard_limit != 0 ||
@@ -477,23 +475,8 @@ qemuSetupMemoryCgroup(virDomainObjPtr vm)
 }
 }
 
-hard_limit = vm->def->mem.hard_limit;
-if (!hard_limit) {
-/* If there is no hard_limit set, set a reasonable one to avoid
- * system thrashing caused by exploited qemu.  A 'reasonable
- * limit' has been chosen:
- * (1 + k) * (domain memory + total video memory) + (32MB for
- * cache per each disk) + F
- * where k = 0.5 and F = 200MB.  The cache for disks is important as
- * kernel cache on the host side counts into the RSS limit. */
-hard_limit = vm->def->mem.max_balloon;
-for (i = 0; i < vm->def->nvideos; i++)
-hard_limit += vm->def->videos[i]->vram;
-hard_limit = hard_limit * 1.5 + 204800;
-hard_limit += vm->def->ndisks * 32768;
-}
-
-rc = virCgroupSetMemoryHardLimit(priv->cgroup, hard_limit);
+rc = virCgroupSetMemoryHardLimit(priv->cgroup,
+ qemuDomainMemoryLimit(vm->def));
 if (rc != 0) {
 virReportSystemError(-rc,
  _("Unable to set memory hard limit for domain 
%s"),
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 8d79066..77b94ec 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -2181,3 +2181,32 @@ cleanup:
 virObjectUnref(cfg);
 return ret;
 }
+
+
+unsigned long long
+qemuDomainMemoryLimit(virDomainDefPtr def)
+{
+unsigned long long mem;
+int i;
+
+if (def->mem.hard_limit) {
+mem = def->mem.hard_limit;
+} else {
+/* If there is no hard_limit set, compute a reasonable one to avoid
+ * system thrashing caused by exploited qemu.  A 'reasonable
+ * limit' has been chosen:
+ * (1 + k) * (domain memory + total video memory) + (32MB for
+ * cache per each disk) + F
+ * where k = 0.5 and F = 200MB.  The cache for disks is important as
+ * kernel cache on the host side counts into the RSS limit.
+ */
+mem = def->mem.max_balloon;
+for (i = 0; i < def->nvideos; i++)
+mem += def->videos[i]->vram;
+mem *= 1.5;
+mem += def->ndisks * 32768;
+mem += 204800;
+}
+
+return mem;
+}
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 068a4c3..ade2095 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -351,4 +351,6 @@ extern virDomainXMLPrivateDataCallbacks 
virQEMUDriverPrivateDataCallbacks;
 extern virDomainXMLNamespace virQEMUDriverDomainXMLNamespace;
 extern virDomainDefParserConfig virQEMUDriverDomainDefParserConfig;
 
+unsigned long long qemuDomainMemoryLimit(virDomainDefPtr def);
+
 #endif /* __QEMU_DOMAIN_H__ */
-- 
1.8.2.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-06-28 Thread Laine Stump
On 06/28/2013 11:04 AM, Jiri Denemark wrote:
> ---
>  src/qemu/qemu_cgroup.c | 21 ++---
>  src/qemu/qemu_domain.c | 29 +
>  src/qemu/qemu_domain.h |  2 ++
>  3 files changed, 33 insertions(+), 19 deletions(-)
>
> diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> index 5f54ca6..22bf78e 100644
> --- a/src/qemu/qemu_cgroup.c
> +++ b/src/qemu/qemu_cgroup.c
> @@ -461,9 +461,7 @@ static int
>  qemuSetupMemoryCgroup(virDomainObjPtr vm)
>  {
>  qemuDomainObjPrivatePtr priv = vm->privateData;
> -unsigned long long hard_limit;
>  int rc;
> -int i;
>  
>  if (!virCgroupHasController(priv->cgroup,VIR_CGROUP_CONTROLLER_MEMORY)) {
>  if (vm->def->mem.hard_limit != 0 ||
> @@ -477,23 +475,8 @@ qemuSetupMemoryCgroup(virDomainObjPtr vm)
>  }
>  }
>  
> -hard_limit = vm->def->mem.hard_limit;
> -if (!hard_limit) {
> -/* If there is no hard_limit set, set a reasonable one to avoid
> - * system thrashing caused by exploited qemu.  A 'reasonable
> - * limit' has been chosen:
> - * (1 + k) * (domain memory + total video memory) + (32MB for
> - * cache per each disk) + F
> - * where k = 0.5 and F = 200MB.  The cache for disks is important as
> - * kernel cache on the host side counts into the RSS limit. */
> -hard_limit = vm->def->mem.max_balloon;
> -for (i = 0; i < vm->def->nvideos; i++)
> -hard_limit += vm->def->videos[i]->vram;
> -hard_limit = hard_limit * 1.5 + 204800;
> -hard_limit += vm->def->ndisks * 32768;
> -}
> -
> -rc = virCgroupSetMemoryHardLimit(priv->cgroup, hard_limit);
> +rc = virCgroupSetMemoryHardLimit(priv->cgroup,
> + qemuDomainMemoryLimit(vm->def));
>  if (rc != 0) {
>  virReportSystemError(-rc,
>   _("Unable to set memory hard limit for domain 
> %s"),
> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> index 8d79066..77b94ec 100644
> --- a/src/qemu/qemu_domain.c
> +++ b/src/qemu/qemu_domain.c
> @@ -2181,3 +2181,32 @@ cleanup:
>  virObjectUnref(cfg);
>  return ret;
>  }
> +
> +
> +unsigned long long
> +qemuDomainMemoryLimit(virDomainDefPtr def)
> +{
> +unsigned long long mem;
> +int i;
> +
> +if (def->mem.hard_limit) {
> +mem = def->mem.hard_limit;
> +} else {
> +/* If there is no hard_limit set, compute a reasonable one to avoid
> + * system thrashing caused by exploited qemu.  A 'reasonable
> + * limit' has been chosen:
> + * (1 + k) * (domain memory + total video memory) + (32MB for
> + * cache per each disk) + F
> + * where k = 0.5 and F = 200MB.  The cache for disks is important as
> + * kernel cache on the host side counts into the RSS limit.
> + */
> +mem = def->mem.max_balloon;
> +for (i = 0; i < def->nvideos; i++)
> +mem += def->videos[i]->vram;
> +mem *= 1.5;
> +mem += def->ndisks * 32768;
> +mem += 204800;
> +}


I know you're just doing a cut-paste here, but I'm curious how we
arrived at this formula. On systems using vfio, it ends up locking *way*
more memory than previously. For my test guest with 4GB of ram assigned,
the old computation would lock 5GB of ram for the qemu process. With the
new method in this patch it ends up locking just about 7GB. I don't know
if that's a case of the original simple VFIO amount being inadequate, or
the new method goind overboard; just thought there should be at least
minimal discussion before it went in (note that I tried it on a guest
with VFIO passthrough and it does work without problems.

Aside from that, ACK to the idea.

> +
> +return mem;
> +}
> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
> index 068a4c3..ade2095 100644
> --- a/src/qemu/qemu_domain.h
> +++ b/src/qemu/qemu_domain.h
> @@ -351,4 +351,6 @@ extern virDomainXMLPrivateDataCallbacks 
> virQEMUDriverPrivateDataCallbacks;
>  extern virDomainXMLNamespace virQEMUDriverDomainXMLNamespace;
>  extern virDomainDefParserConfig virQEMUDriverDomainDefParserConfig;
>  
> +unsigned long long qemuDomainMemoryLimit(virDomainDefPtr def);
> +
>  #endif /* __QEMU_DOMAIN_H__ */

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-07-01 Thread Jiri Denemark
On Fri, Jun 28, 2013 at 17:24:42 -0400, Laine Stump wrote:
> On 06/28/2013 11:04 AM, Jiri Denemark wrote:
> > ---
> >  src/qemu/qemu_cgroup.c | 21 ++---
> >  src/qemu/qemu_domain.c | 29 +
> >  src/qemu/qemu_domain.h |  2 ++
> >  3 files changed, 33 insertions(+), 19 deletions(-)
> >
> > diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
> > index 5f54ca6..22bf78e 100644
> > --- a/src/qemu/qemu_cgroup.c
> > +++ b/src/qemu/qemu_cgroup.c
> > @@ -461,9 +461,7 @@ static int
> >  qemuSetupMemoryCgroup(virDomainObjPtr vm)
> >  {
> >  qemuDomainObjPrivatePtr priv = vm->privateData;
> > -unsigned long long hard_limit;
> >  int rc;
> > -int i;
> >  
> >  if 
> > (!virCgroupHasController(priv->cgroup,VIR_CGROUP_CONTROLLER_MEMORY)) {
> >  if (vm->def->mem.hard_limit != 0 ||
> > @@ -477,23 +475,8 @@ qemuSetupMemoryCgroup(virDomainObjPtr vm)
> >  }
> >  }
> >  
> > -hard_limit = vm->def->mem.hard_limit;
> > -if (!hard_limit) {
> > -/* If there is no hard_limit set, set a reasonable one to avoid
> > - * system thrashing caused by exploited qemu.  A 'reasonable
> > - * limit' has been chosen:
> > - * (1 + k) * (domain memory + total video memory) + (32MB for
> > - * cache per each disk) + F
> > - * where k = 0.5 and F = 200MB.  The cache for disks is important 
> > as
> > - * kernel cache on the host side counts into the RSS limit. */
> > -hard_limit = vm->def->mem.max_balloon;
> > -for (i = 0; i < vm->def->nvideos; i++)
> > -hard_limit += vm->def->videos[i]->vram;
> > -hard_limit = hard_limit * 1.5 + 204800;
> > -hard_limit += vm->def->ndisks * 32768;
> > -}
> > -
> > -rc = virCgroupSetMemoryHardLimit(priv->cgroup, hard_limit);
> > +rc = virCgroupSetMemoryHardLimit(priv->cgroup,
> > + qemuDomainMemoryLimit(vm->def));
> >  if (rc != 0) {
> >  virReportSystemError(-rc,
> >   _("Unable to set memory hard limit for domain 
> > %s"),
> > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
> > index 8d79066..77b94ec 100644
> > --- a/src/qemu/qemu_domain.c
> > +++ b/src/qemu/qemu_domain.c
> > @@ -2181,3 +2181,32 @@ cleanup:
> >  virObjectUnref(cfg);
> >  return ret;
> >  }
> > +
> > +
> > +unsigned long long
> > +qemuDomainMemoryLimit(virDomainDefPtr def)
> > +{
> > +unsigned long long mem;
> > +int i;
> > +
> > +if (def->mem.hard_limit) {
> > +mem = def->mem.hard_limit;
> > +} else {
> > +/* If there is no hard_limit set, compute a reasonable one to avoid
> > + * system thrashing caused by exploited qemu.  A 'reasonable
> > + * limit' has been chosen:
> > + * (1 + k) * (domain memory + total video memory) + (32MB for
> > + * cache per each disk) + F
> > + * where k = 0.5 and F = 200MB.  The cache for disks is important 
> > as
> > + * kernel cache on the host side counts into the RSS limit.
> > + */
> > +mem = def->mem.max_balloon;
> > +for (i = 0; i < def->nvideos; i++)
> > +mem += def->videos[i]->vram;
> > +mem *= 1.5;
> > +mem += def->ndisks * 32768;
> > +mem += 204800;
> > +}
> 
> 
> I know you're just doing a cut-paste here, but I'm curious how we
> arrived at this formula. On systems using vfio, it ends up locking *way*
> more memory than previously. For my test guest with 4GB of ram assigned,
> the old computation would lock 5GB of ram for the qemu process. With the
> new method in this patch it ends up locking just about 7GB.

That's because previously this computation was made only for hard_limit
and the limit for locking was just completely ignoring it and used
max_balloon + 1GB. However, this computation does not count with VFIO in
any way so I just combined the two computations by adding the extra 1GB
for VFIO into this shared code (in patch 2/3). I'm not sure if that's
the right think to do or not but it's certainly better than before when
memory locking limit completely ignore the need for VRAM and per-disk
cache. Unfortunately, the original formula was suggested by Avi, who
moved to new challenges. Perhaps others could jump in and share their
opinions (Paolo? :-P). Be careful, though, that we're actually
discussing qemuDomainMemoryLimit code after patch 2/3 is applied, that
is with the VFIO stuff counted in. The complete code of this function
is:

unsigned long long
qemuDomainMemoryLimit(virDomainDefPtr def)
{
unsigned long long mem;
int i;

if (def->mem.hard_limit) {
mem = def->mem.hard_limit;
} else {
/* If there is no hard_limit set, compute a reasonable one to avoid
 * system thrashing caused by exploited qemu.  A 'reasonable
 * limit' has been chosen:
 * (1 + k) * (domain memory + total vid

Re: [libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-07-02 Thread Paolo Bonzini
Il 02/07/2013 08:34, Jiri Denemark ha scritto:
> I'm not sure if that's
> the right think to do or not but it's certainly better than before when
> memory locking limit completely ignore the need for VRAM and per-disk
> cache. Unfortunately, the original formula was suggested by Avi, who
> moved to new challenges. Perhaps others could jump in and share their
> opinions (Paolo? :-P).

I think the disk cache should not be counted in the memory locking
limit.  Apart from that, the code you posted below makes sense.

Paolo

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-07-02 Thread Jiri Denemark
On Tue, Jul 02, 2013 at 12:34:44 +0200, Paolo Bonzini wrote:
> Il 02/07/2013 08:34, Jiri Denemark ha scritto:
> > I'm not sure if that's
> > the right think to do or not but it's certainly better than before when
> > memory locking limit completely ignore the need for VRAM and per-disk
> > cache. Unfortunately, the original formula was suggested by Avi, who
> > moved to new challenges. Perhaps others could jump in and share their
> > opinions (Paolo? :-P).
> 
> I think the disk cache should not be counted in the memory locking
> limit.

Hmm, I guess you're right. However, we're computing a limit and I feel
like allowing QEMU to lock a bit more shouldn't make any bad effects or
am I wrong? On the other hand, it would be pretty easy to let the
function know what kind of limit it's going to compute each time it's
called.

> Apart from that, the code you posted below makes sense.

Even with the 1GB addition for VFIO? I have to admit I'm a bit ignorant
of VFIO but shouldn't that limit be derived from the number of attached
devices (similarly to what we do with dick caches)?

Thanks,

Jirka

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-07-02 Thread Paolo Bonzini
Il 02/07/2013 13:40, Jiri Denemark ha scritto:
> On Tue, Jul 02, 2013 at 12:34:44 +0200, Paolo Bonzini wrote:
>> Il 02/07/2013 08:34, Jiri Denemark ha scritto:
>>> I'm not sure if that's
>>> the right think to do or not but it's certainly better than before when
>>> memory locking limit completely ignore the need for VRAM and per-disk
>>> cache. Unfortunately, the original formula was suggested by Avi, who
>>> moved to new challenges. Perhaps others could jump in and share their
>>> opinions (Paolo? :-P).
>>
>> I think the disk cache should not be counted in the memory locking
>> limit.
> 
> Hmm, I guess you're right. However, we're computing a limit and I feel
> like allowing QEMU to lock a bit more shouldn't make any bad effects or
> am I wrong? On the other hand, it would be pretty easy to let the
> function know what kind of limit it's going to compute each time it's
> called.

Yes, both ways are fine.  But you should at least have a comment.

>> Apart from that, the code you posted below makes sense.
> 
> Even with the 1GB addition for VFIO? I have to admit I'm a bit ignorant
> of VFIO but shouldn't that limit be derived from the number of attached
> devices

If that would be the amount of memory reserved for BARs (PCI memory
mapped regions), 1 GB should be enough.  Let's just ask Alex Williamson.

Paolo

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 1/3] qemu: Move memory limit computation to a reusable function

2013-07-02 Thread Alex Williamson
On Tue, 2013-07-02 at 13:51 +0200, Paolo Bonzini wrote:
> Il 02/07/2013 13:40, Jiri Denemark ha scritto:
> > On Tue, Jul 02, 2013 at 12:34:44 +0200, Paolo Bonzini wrote:
> >> Il 02/07/2013 08:34, Jiri Denemark ha scritto:
> >>> I'm not sure if that's
> >>> the right think to do or not but it's certainly better than before when
> >>> memory locking limit completely ignore the need for VRAM and per-disk
> >>> cache. Unfortunately, the original formula was suggested by Avi, who
> >>> moved to new challenges. Perhaps others could jump in and share their
> >>> opinions (Paolo? :-P).
> >>
> >> I think the disk cache should not be counted in the memory locking
> >> limit.
> > 
> > Hmm, I guess you're right. However, we're computing a limit and I feel
> > like allowing QEMU to lock a bit more shouldn't make any bad effects or
> > am I wrong? On the other hand, it would be pretty easy to let the
> > function know what kind of limit it's going to compute each time it's
> > called.
> 
> Yes, both ways are fine.  But you should at least have a comment.
> 
> >> Apart from that, the code you posted below makes sense.
> > 
> > Even with the 1GB addition for VFIO? I have to admit I'm a bit ignorant
> > of VFIO but shouldn't that limit be derived from the number of attached
> > devices
> 
> If that would be the amount of memory reserved for BARs (PCI memory
> mapped regions), 1 GB should be enough.  Let's just ask Alex Williamson.

Yes, the extra is for the MMIO space of devices that gets mapped into
the IOMMU and pinned in the host.  1G is sufficient unless we start
using 64bit MMIO space and have lots and lots of devices.  It's possible
we could get rid of this, at the cost of losing peer-to-peer, but I'm
not sure how useful that is anyway.  In the past I haven't been able to
tell the difference between guest RAM and device MMIO, but I expect
that's getting better.  Thanks,

Alex

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list