Re: [Qemu-devel] [PATCH 05/15] postcopy: enhance ram_discard_range for hugepages

2017-01-30 Thread Juan Quintela
"Dr. David Alan Gilbert"  wrote:
> * Juan Quintela (quint...@redhat.com) wrote:
>> "Dr. David Alan Gilbert (git)"  wrote:
>> > From: "Dr. David Alan Gilbert" 
>> >
>> > Unfortunately madvise DONTNEED doesn't work on hugepagetlb
>> > so use fallocate(FALLOC_FL_PUNCH_HOLE)
>> > qemu_fd_getpagesize only sets the page based off a file
>> > if the file is from hugetlbfs.
>> >
>> > Signed-off-by: Dr. David Alan Gilbert 

>> > +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE)
>> > +#include 
>> > +#endif

I hate this in generic code :-()





I think that the function will have to be called:

qemu_ram_punch_hole(RAMBblock *, length);

Put it all together at the beggining of the file?
What I don't want is that if someone arrives with a way to do this in
other OS, we need to put yet more ifdefs.  I preffer very much that it
just have to define a function with that semantics.  Agreed that this is
a mess, but I can't think of an easier way of doing it either :-()



>> > @@ -1874,15 +1878,27 @@ int ram_discard_range(MigrationIncomingState *mis,
>> >  
>> >  if ((start + length) <= rb->used_length) {
>> >  uint8_t *host_endaddr = host_startaddr + length;
>> > -if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
>> > +if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
>> >  error_report("ram_discard_range: Unaligned end address: %p",
>> >   host_endaddr);
>> >  goto err;
>> >  }
>> > -errno = ENOTSUP;
>> > +errno = ENOTSUP; /* If we are missing MADVISE etc */
>> > +
>> > +if (rb->page_size == qemu_host_page_size) {
>> >  #if defined(CONFIG_MADVISE)
>> > -ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
>> > +ret = qemu_madvise(host_startaddr, length, 
>> > QEMU_MADV_DONTNEED);
>> >  #endif
>> > +} else {
>> > +/* Huge page case  - unfortunately it can't do DONTNEED, but
>> > + * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
>> > + * huge page file.
>> > + */
>> > +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>> > +ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | 
>> > FALLOC_FL_KEEP_SIZE,
>> > +start, length);
>> > +#endif
>> > +}
>> >  if (ret) {
>> >  error_report("ram_discard_range: Failed to discard  range "
>> >   "%s:%" PRIx64 " +%zx (%d)",
>> 
>> Can we move this to qemu-posix or similar?
>> qemu_punch_hole() or similar and just put all the magic there?
>
> I'm trying but it's tricky.
> The problem is that:
>a) To be able to tell which you need you need the pagesize of the
>  area
>b) Then you need the fd if it is a hugepage
>  (You can get (a) from (b) by a syscall we already do once)
>c) If it's normal RAM you need the memory address but...
>d) If it's a hugepage you need the offset in the file
>
>   which is a mess; so you either have to pass all those parameters,
> or end up passing a RAMBlock* which doesn't feel like it should
> make its way into any of the os-* files.
>
>   I could move it to exec.c that already has some ifdef on OSs;
> what do you think?
>
> Dave
>
>> For the rest, I am ok with it.
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH 05/15] postcopy: enhance ram_discard_range for hugepages

2017-01-30 Thread Dr. David Alan Gilbert
* Juan Quintela (quint...@redhat.com) wrote:
> "Dr. David Alan Gilbert (git)"  wrote:
> > From: "Dr. David Alan Gilbert" 
> >
> > Unfortunately madvise DONTNEED doesn't work on hugepagetlb
> > so use fallocate(FALLOC_FL_PUNCH_HOLE)
> > qemu_fd_getpagesize only sets the page based off a file
> > if the file is from hugetlbfs.
> >
> > Signed-off-by: Dr. David Alan Gilbert 
> > ---
> >  migration/ram.c | 24 
> >  1 file changed, 20 insertions(+), 4 deletions(-)
> >
> > diff --git a/migration/ram.c b/migration/ram.c
> > index fe32836..7afabcd 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -45,6 +45,10 @@
> >  #include "qemu/rcu_queue.h"
> >  #include "migration/colo.h"
> >  
> > +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE)
> > +#include 
> > +#endif
> > +
> >  #ifdef DEBUG_MIGRATION_RAM
> >  #define DPRINTF(fmt, ...) \
> >  do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while 
> > (0)
> > @@ -1866,7 +1870,7 @@ int ram_discard_range(MigrationIncomingState *mis,
> >  
> >  uint8_t *host_startaddr = rb->host + start;
> >  
> > -if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
> > +if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
> >  error_report("ram_discard_range: Unaligned start address: %p",
> >   host_startaddr);
> >  goto err;
> > @@ -1874,15 +1878,27 @@ int ram_discard_range(MigrationIncomingState *mis,
> >  
> >  if ((start + length) <= rb->used_length) {
> >  uint8_t *host_endaddr = host_startaddr + length;
> > -if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
> > +if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
> >  error_report("ram_discard_range: Unaligned end address: %p",
> >   host_endaddr);
> >  goto err;
> >  }
> > -errno = ENOTSUP;
> > +errno = ENOTSUP; /* If we are missing MADVISE etc */
> > +
> > +if (rb->page_size == qemu_host_page_size) {
> >  #if defined(CONFIG_MADVISE)
> > -ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
> > +ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
> >  #endif
> > +} else {
> > +/* Huge page case  - unfortunately it can't do DONTNEED, but
> > + * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
> > + * huge page file.
> > + */
> > +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> > +ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | 
> > FALLOC_FL_KEEP_SIZE,
> > +start, length);
> > +#endif
> > +}
> >  if (ret) {
> >  error_report("ram_discard_range: Failed to discard  range "
> >   "%s:%" PRIx64 " +%zx (%d)",
> 
> Can we move this to qemu-posix or similar?
> qemu_punch_hole() or similar and just put all the magic there?

I'm trying but it's tricky.
The problem is that:
   a) To be able to tell which you need you need the pagesize of the
 area
   b) Then you need the fd if it is a hugepage
 (You can get (a) from (b) by a syscall we already do once)
   c) If it's normal RAM you need the memory address but...
   d) If it's a hugepage you need the offset in the file

  which is a mess; so you either have to pass all those parameters,
or end up passing a RAMBlock* which doesn't feel like it should
make its way into any of the os-* files.

  I could move it to exec.c that already has some ifdef on OSs;
what do you think?

Dave

> For the rest, I am ok with it.
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK



Re: [Qemu-devel] [PATCH 05/15] postcopy: enhance ram_discard_range for hugepages

2017-01-25 Thread Juan Quintela
"Dr. David Alan Gilbert (git)"  wrote:
> From: "Dr. David Alan Gilbert" 
>
> Unfortunately madvise DONTNEED doesn't work on hugepagetlb
> so use fallocate(FALLOC_FL_PUNCH_HOLE)
> qemu_fd_getpagesize only sets the page based off a file
> if the file is from hugetlbfs.
>
> Signed-off-by: Dr. David Alan Gilbert 
> ---
>  migration/ram.c | 24 
>  1 file changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/migration/ram.c b/migration/ram.c
> index fe32836..7afabcd 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -45,6 +45,10 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  
> +#if defined(CONFIG_FALLOCATE_PUNCH_HOLE)
> +#include 
> +#endif
> +
>  #ifdef DEBUG_MIGRATION_RAM
>  #define DPRINTF(fmt, ...) \
>  do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
> @@ -1866,7 +1870,7 @@ int ram_discard_range(MigrationIncomingState *mis,
>  
>  uint8_t *host_startaddr = rb->host + start;
>  
> -if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
> +if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
>  error_report("ram_discard_range: Unaligned start address: %p",
>   host_startaddr);
>  goto err;
> @@ -1874,15 +1878,27 @@ int ram_discard_range(MigrationIncomingState *mis,
>  
>  if ((start + length) <= rb->used_length) {
>  uint8_t *host_endaddr = host_startaddr + length;
> -if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
> +if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
>  error_report("ram_discard_range: Unaligned end address: %p",
>   host_endaddr);
>  goto err;
>  }
> -errno = ENOTSUP;
> +errno = ENOTSUP; /* If we are missing MADVISE etc */
> +
> +if (rb->page_size == qemu_host_page_size) {
>  #if defined(CONFIG_MADVISE)
> -ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
> +ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
>  #endif
> +} else {
> +/* Huge page case  - unfortunately it can't do DONTNEED, but
> + * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
> + * huge page file.
> + */
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | 
> FALLOC_FL_KEEP_SIZE,
> +start, length);
> +#endif
> +}
>  if (ret) {
>  error_report("ram_discard_range: Failed to discard  range "
>   "%s:%" PRIx64 " +%zx (%d)",

Can we move this to qemu-posix or similar?
qemu_punch_hole() or similar and just put all the magic there?

For the rest, I am ok with it.



[Qemu-devel] [PATCH 05/15] postcopy: enhance ram_discard_range for hugepages

2017-01-06 Thread Dr. David Alan Gilbert (git)
From: "Dr. David Alan Gilbert" 

Unfortunately madvise DONTNEED doesn't work on hugepagetlb
so use fallocate(FALLOC_FL_PUNCH_HOLE)
qemu_fd_getpagesize only sets the page based off a file
if the file is from hugetlbfs.

Signed-off-by: Dr. David Alan Gilbert 
---
 migration/ram.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index fe32836..7afabcd 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -45,6 +45,10 @@
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 
+#if defined(CONFIG_FALLOCATE_PUNCH_HOLE)
+#include 
+#endif
+
 #ifdef DEBUG_MIGRATION_RAM
 #define DPRINTF(fmt, ...) \
 do { fprintf(stdout, "migration_ram: " fmt, ## __VA_ARGS__); } while (0)
@@ -1866,7 +1870,7 @@ int ram_discard_range(MigrationIncomingState *mis,
 
 uint8_t *host_startaddr = rb->host + start;
 
-if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
+if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
 error_report("ram_discard_range: Unaligned start address: %p",
  host_startaddr);
 goto err;
@@ -1874,15 +1878,27 @@ int ram_discard_range(MigrationIncomingState *mis,
 
 if ((start + length) <= rb->used_length) {
 uint8_t *host_endaddr = host_startaddr + length;
-if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
+if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
 error_report("ram_discard_range: Unaligned end address: %p",
  host_endaddr);
 goto err;
 }
-errno = ENOTSUP;
+errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+if (rb->page_size == qemu_host_page_size) {
 #if defined(CONFIG_MADVISE)
-ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
+ret = qemu_madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
 #endif
+} else {
+/* Huge page case  - unfortunately it can't do DONTNEED, but
+ * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+ * huge page file.
+ */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+start, length);
+#endif
+}
 if (ret) {
 error_report("ram_discard_range: Failed to discard  range "
  "%s:%" PRIx64 " +%zx (%d)",
-- 
2.9.3