Re: [libvirt] [Xen-devel] [PATCH] libxl: fix dom0 balloon logic
Stefano Stabellini wrote: > On Mon, 23 Mar 2015, Ian Campbell wrote: > >> (just ccing the other tools maintainers, in particular Stefano who knows >> what this stuff is supposed to do...) >> >> On Fri, 2015-03-20 at 17:10 -0600, Jim Fehlig wrote: >> >>> Recent testing on large memory systems revealed a bug in the Xen xl >>> tool's freemem() function. When autoballooning is enabled, freemem() >>> is used to ensure enough memory is available to start a domain, >>> ballooning dom0 if necessary. When ballooning large amounts of memory >>> from dom0, freemem() would exceed its self-imposed wait time and >>> return an error. Meanwhile, dom0 continued to balloon. Starting the >>> domain later, after sufficient memory was ballooned from dom0, would >>> succeed. The libvirt implementation in libxlDomainFreeMem() suffers >>> the same bug since it is modeled after freemem(). >>> >>> In the end, the best place to fix the bug on the Xen side was to >>> slightly change the behavior of libxl_wait_for_memory_target(). >>> Instead of failing after caller-provided wait_sec, the function now >>> blocks as long as dom0 memory ballooning is progressing. It will return >>> failure only when more memory is needed to reach the target and wait_sec >>> have expired with no progress being made. See xen.git commit fd3aa246. >>> There was a dicussion on how this would affect other libxl apps like >>> libvirt >>> >>> http://lists.xen.org/archives/html/xen-devel/2015-03/msg00739.html >>> >>> If libvirt containing this patch was build against a Xen containing >>> the old libxl_wait_for_memory_target() behavior, libxlDomainFreeMem() >>> will fail after 30 sec and domain creation will be terminated. >>> Without this patch and with old libxl_wait_for_memory_target() behavior, >>> libxlDomainFreeMem() does not succeed after 30 sec, but returns success >>> anyway. Domain creation continues resulting in all sorts of fun stuff >>> like cpu soft lockups in the guest OS. It was decided to properly fix >>> libxl_wait_for_memory_target(), and if anything improve the default >>> behavior of apps using the freemem reference impl in xl. >>> >>> xl was patched to accommodate the change in libxl_wait_for_memory_target() >>> with xen.git commit 883b30a0. This patch does the same in the libxl >>> driver. While at it, I changed the logic to essentially match >>> freemem() in $xensrc/tools/libxl/xl_cmdimpl.c. It was a bit cleaner >>> IMO and will make it easier to spot future, potentially interesting >>> divergences. >>> >>> Signed-off-by: Jim Fehlig >>> --- >>> src/libxl/libxl_domain.c | 57 >>> >>> 1 file changed, 28 insertions(+), 29 deletions(-) >>> >>> diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c >>> index 407a9bd..ff78133 100644 >>> --- a/src/libxl/libxl_domain.c >>> +++ b/src/libxl/libxl_domain.c >>> @@ -1121,38 +1121,41 @@ libxlDomainFreeMem(libxlDomainObjPrivatePtr priv, >>> libxl_domain_config *d_config) >>> { >>> uint32_t needed_mem; >>> uint32_t free_mem; >>> -size_t i; >>> -int ret = -1; >>> +int ret; >>> int tries = 3; >>> int wait_secs = 10; >>> >>> -if ((ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, >>> -&needed_mem)) >= 0) { >>> -for (i = 0; i < tries; ++i) { >>> -if ((ret = libxl_get_free_memory(priv->ctx, &free_mem)) < 0) >>> -break; >>> +ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, >>> + &needed_mem); >>> +if (ret < 0) >>> +goto error; >>> >>> -if (free_mem >= needed_mem) { >>> -ret = 0; >>> -break; >>> -} >>> +do { >>> +ret = libxl_get_free_memory(priv->ctx, &free_mem); >>> +if (ret < 0) >>> +goto error; >>> >>> -if ((ret = libxl_set_memory_target(priv->ctx, 0, >>> - free_mem - needed_mem, >>> - /* relative */ 1, 0)) < 0) >>> -break; >>> +if (free_mem >= needed_mem) >>> +return 0; >>> >>> -ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, >>> - wait_secs); >>> -if (ret == 0 || ret != ERROR_NOMEM) >>> -break; >>> +ret = libxl_set_memory_target(priv->ctx, 0, >>> + free_mem - needed_mem, >>> + /* relative */ 1, 0); >>> +if (ret < 0) >>> +goto error; >>> >>> -if ((ret = libxl_wait_for_memory_target(priv->ctx, 0, 1)) < 0) >>> -break; >>> -} >>> -} >>> +ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, >>> + wait_secs); >>> +if (ret
Re: [libvirt] [Xen-devel] [PATCH] libxl: fix dom0 balloon logic
On Mon, 23 Mar 2015, Ian Campbell wrote: > (just ccing the other tools maintainers, in particular Stefano who knows > what this stuff is supposed to do...) > > On Fri, 2015-03-20 at 17:10 -0600, Jim Fehlig wrote: > > Recent testing on large memory systems revealed a bug in the Xen xl > > tool's freemem() function. When autoballooning is enabled, freemem() > > is used to ensure enough memory is available to start a domain, > > ballooning dom0 if necessary. When ballooning large amounts of memory > > from dom0, freemem() would exceed its self-imposed wait time and > > return an error. Meanwhile, dom0 continued to balloon. Starting the > > domain later, after sufficient memory was ballooned from dom0, would > > succeed. The libvirt implementation in libxlDomainFreeMem() suffers > > the same bug since it is modeled after freemem(). > > > > In the end, the best place to fix the bug on the Xen side was to > > slightly change the behavior of libxl_wait_for_memory_target(). > > Instead of failing after caller-provided wait_sec, the function now > > blocks as long as dom0 memory ballooning is progressing. It will return > > failure only when more memory is needed to reach the target and wait_sec > > have expired with no progress being made. See xen.git commit fd3aa246. > > There was a dicussion on how this would affect other libxl apps like > > libvirt > > > > http://lists.xen.org/archives/html/xen-devel/2015-03/msg00739.html > > > > If libvirt containing this patch was build against a Xen containing > > the old libxl_wait_for_memory_target() behavior, libxlDomainFreeMem() > > will fail after 30 sec and domain creation will be terminated. > > Without this patch and with old libxl_wait_for_memory_target() behavior, > > libxlDomainFreeMem() does not succeed after 30 sec, but returns success > > anyway. Domain creation continues resulting in all sorts of fun stuff > > like cpu soft lockups in the guest OS. It was decided to properly fix > > libxl_wait_for_memory_target(), and if anything improve the default > > behavior of apps using the freemem reference impl in xl. > > > > xl was patched to accommodate the change in libxl_wait_for_memory_target() > > with xen.git commit 883b30a0. This patch does the same in the libxl > > driver. While at it, I changed the logic to essentially match > > freemem() in $xensrc/tools/libxl/xl_cmdimpl.c. It was a bit cleaner > > IMO and will make it easier to spot future, potentially interesting > > divergences. > > > > Signed-off-by: Jim Fehlig > > --- > > src/libxl/libxl_domain.c | 57 > > > > 1 file changed, 28 insertions(+), 29 deletions(-) > > > > diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c > > index 407a9bd..ff78133 100644 > > --- a/src/libxl/libxl_domain.c > > +++ b/src/libxl/libxl_domain.c > > @@ -1121,38 +1121,41 @@ libxlDomainFreeMem(libxlDomainObjPrivatePtr priv, > > libxl_domain_config *d_config) > > { > > uint32_t needed_mem; > > uint32_t free_mem; > > -size_t i; > > -int ret = -1; > > +int ret; > > int tries = 3; > > int wait_secs = 10; > > > > -if ((ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, > > -&needed_mem)) >= 0) { > > -for (i = 0; i < tries; ++i) { > > -if ((ret = libxl_get_free_memory(priv->ctx, &free_mem)) < 0) > > -break; > > +ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, > > + &needed_mem); > > +if (ret < 0) > > +goto error; > > > > -if (free_mem >= needed_mem) { > > -ret = 0; > > -break; > > -} > > +do { > > +ret = libxl_get_free_memory(priv->ctx, &free_mem); > > +if (ret < 0) > > +goto error; > > > > -if ((ret = libxl_set_memory_target(priv->ctx, 0, > > - free_mem - needed_mem, > > - /* relative */ 1, 0)) < 0) > > -break; > > +if (free_mem >= needed_mem) > > +return 0; > > > > -ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, > > - wait_secs); > > -if (ret == 0 || ret != ERROR_NOMEM) > > -break; > > +ret = libxl_set_memory_target(priv->ctx, 0, > > + free_mem - needed_mem, > > + /* relative */ 1, 0); > > +if (ret < 0) > > +goto error; > > > > -if ((ret = libxl_wait_for_memory_target(priv->ctx, 0, 1)) < 0) > > -break; > > -} > > -} > > +ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, > > + wait_secs); > > +if (ret < 0) > > +goto error; Shou
Re: [libvirt] [Xen-devel] [PATCH] libxl: fix dom0 balloon logic
(just ccing the other tools maintainers, in particular Stefano who knows what this stuff is supposed to do...) On Fri, 2015-03-20 at 17:10 -0600, Jim Fehlig wrote: > Recent testing on large memory systems revealed a bug in the Xen xl > tool's freemem() function. When autoballooning is enabled, freemem() > is used to ensure enough memory is available to start a domain, > ballooning dom0 if necessary. When ballooning large amounts of memory > from dom0, freemem() would exceed its self-imposed wait time and > return an error. Meanwhile, dom0 continued to balloon. Starting the > domain later, after sufficient memory was ballooned from dom0, would > succeed. The libvirt implementation in libxlDomainFreeMem() suffers > the same bug since it is modeled after freemem(). > > In the end, the best place to fix the bug on the Xen side was to > slightly change the behavior of libxl_wait_for_memory_target(). > Instead of failing after caller-provided wait_sec, the function now > blocks as long as dom0 memory ballooning is progressing. It will return > failure only when more memory is needed to reach the target and wait_sec > have expired with no progress being made. See xen.git commit fd3aa246. > There was a dicussion on how this would affect other libxl apps like > libvirt > > http://lists.xen.org/archives/html/xen-devel/2015-03/msg00739.html > > If libvirt containing this patch was build against a Xen containing > the old libxl_wait_for_memory_target() behavior, libxlDomainFreeMem() > will fail after 30 sec and domain creation will be terminated. > Without this patch and with old libxl_wait_for_memory_target() behavior, > libxlDomainFreeMem() does not succeed after 30 sec, but returns success > anyway. Domain creation continues resulting in all sorts of fun stuff > like cpu soft lockups in the guest OS. It was decided to properly fix > libxl_wait_for_memory_target(), and if anything improve the default > behavior of apps using the freemem reference impl in xl. > > xl was patched to accommodate the change in libxl_wait_for_memory_target() > with xen.git commit 883b30a0. This patch does the same in the libxl > driver. While at it, I changed the logic to essentially match > freemem() in $xensrc/tools/libxl/xl_cmdimpl.c. It was a bit cleaner > IMO and will make it easier to spot future, potentially interesting > divergences. > > Signed-off-by: Jim Fehlig > --- > src/libxl/libxl_domain.c | 57 > > 1 file changed, 28 insertions(+), 29 deletions(-) > > diff --git a/src/libxl/libxl_domain.c b/src/libxl/libxl_domain.c > index 407a9bd..ff78133 100644 > --- a/src/libxl/libxl_domain.c > +++ b/src/libxl/libxl_domain.c > @@ -1121,38 +1121,41 @@ libxlDomainFreeMem(libxlDomainObjPrivatePtr priv, > libxl_domain_config *d_config) > { > uint32_t needed_mem; > uint32_t free_mem; > -size_t i; > -int ret = -1; > +int ret; > int tries = 3; > int wait_secs = 10; > > -if ((ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, > -&needed_mem)) >= 0) { > -for (i = 0; i < tries; ++i) { > -if ((ret = libxl_get_free_memory(priv->ctx, &free_mem)) < 0) > -break; > +ret = libxl_domain_need_memory(priv->ctx, &d_config->b_info, > + &needed_mem); > +if (ret < 0) > +goto error; > > -if (free_mem >= needed_mem) { > -ret = 0; > -break; > -} > +do { > +ret = libxl_get_free_memory(priv->ctx, &free_mem); > +if (ret < 0) > +goto error; > > -if ((ret = libxl_set_memory_target(priv->ctx, 0, > - free_mem - needed_mem, > - /* relative */ 1, 0)) < 0) > -break; > +if (free_mem >= needed_mem) > +return 0; > > -ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, > - wait_secs); > -if (ret == 0 || ret != ERROR_NOMEM) > -break; > +ret = libxl_set_memory_target(priv->ctx, 0, > + free_mem - needed_mem, > + /* relative */ 1, 0); > +if (ret < 0) > +goto error; > > -if ((ret = libxl_wait_for_memory_target(priv->ctx, 0, 1)) < 0) > -break; > -} > -} > +ret = libxl_wait_for_free_memory(priv->ctx, 0, needed_mem, > + wait_secs); > +if (ret < 0) > +goto error; > > -return ret; > +tries--; > +} while (tries > 0); > + > + error: > +virReportSystemError(ret, "%s", > + _("Failed to balloon domain0 memory")); > +return -1; > } > > static void > @@ -1271,12 +1274,8 @@