Re: [PATCH v3 08/32] exec: allow memory to be allocated from any kind of path

2015-10-12 Thread Michael S. Tsirkin
On Sun, Oct 11, 2015 at 11:52:40AM +0800, Xiao Guangrong wrote:
> Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
> of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
> locates at DAX enabled filesystem
> 
> So this patch let it work on any kind of path
> 
> Signed-off-by: Xiao Guangrong 

This conflicts with map alloc rework.
Please rebase this on top of my tree.


> ---
>  exec.c | 55 ++-
>  1 file changed, 14 insertions(+), 41 deletions(-)
> 
> diff --git a/exec.c b/exec.c
> index 7d90a52..70cb0ef 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1154,32 +1154,6 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> -
> -#include 
> -
> -#define HUGETLBFS_MAGIC   0x958458f6
> -
> -static long gethugepagesize(const char *path, Error **errp)
> -{
> -struct statfs fs;
> -int ret;
> -
> -do {
> -ret = statfs(path, );
> -} while (ret != 0 && errno == EINTR);
> -
> -if (ret != 0) {
> -error_setg_errno(errp, errno, "failed to get page size of file %s",
> - path);
> -return 0;
> -}
> -
> -if (fs.f_type != HUGETLBFS_MAGIC)
> -fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
> -
> -return fs.f_bsize;

What this *actually* is trying to warn against is that
mapping a regular file (as opposed to hugetlbfs)
means transparent huge pages don't work.

So I don't think we should drop this warning completely.
Either let's add the nvdimm magic, or simply check the
page size.


> -}
> -
>  static void *file_ram_alloc(RAMBlock *block,
>  ram_addr_t memory,
>  const char *path,
> @@ -1191,22 +1165,21 @@ static void *file_ram_alloc(RAMBlock *block,
>  void *ptr;
>  void *area = NULL;
>  int fd;
> -uint64_t hpagesize;
> +uint64_t pagesize;
>  uint64_t total;
> -Error *local_err = NULL;
>  size_t offset;
>  
> -hpagesize = gethugepagesize(path, _err);
> -if (local_err) {
> -error_propagate(errp, local_err);
> +pagesize = qemu_file_get_page_size(path);
> +if (!pagesize) {
> +error_setg(errp, "can't get page size for %s", path);
>  goto error;
>  }
> -block->mr->align = hpagesize;
> +block->mr->align = pagesize;
>  
> -if (memory < hpagesize) {
> +if (memory < pagesize) {
>  error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
> -   "or larger than huge page size 0x%" PRIx64,
> -   memory, hpagesize);
> +   "or larger than page size 0x%" PRIx64,
> +   memory, pagesize);
>  goto error;
>  }
>  
> @@ -1230,15 +1203,15 @@ static void *file_ram_alloc(RAMBlock *block,
>  fd = mkstemp(filename);
>  if (fd < 0) {
>  error_setg_errno(errp, errno,
> - "unable to create backing store for hugepages");
> + "unable to create backing store for path %s", path);
>  g_free(filename);
>  goto error;
>  }
>  unlink(filename);
>  g_free(filename);
>  
> -memory = ROUND_UP(memory, hpagesize);
> -total = memory + hpagesize;
> +memory = ROUND_UP(memory, pagesize);
> +total = memory + pagesize;
>  
>  /*
>   * ftruncate is not supported by hugetlbfs in older
> @@ -1254,12 +1227,12 @@ static void *file_ram_alloc(RAMBlock *block,
>  -1, 0);
>  if (ptr == MAP_FAILED) {
>  error_setg_errno(errp, errno,
> - "unable to allocate memory range for hugepages");
> + "unable to allocate memory range for path %s", 
> path);
>  close(fd);
>  goto error;
>  }
>  
> -offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> +offset = QEMU_ALIGN_UP((uintptr_t)ptr, pagesize) - (uintptr_t)ptr;
>  
>  area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
>  (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> @@ -1267,7 +1240,7 @@ static void *file_ram_alloc(RAMBlock *block,
>  fd, 0);
>  if (area == MAP_FAILED) {
>  error_setg_errno(errp, errno,
> - "unable to map backing store for hugepages");
> + "unable to map backing store for path %s", path);
>  munmap(ptr, total);
>  close(fd);
>  goto error;
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 08/32] exec: allow memory to be allocated from any kind of path

2015-10-12 Thread Xiao Guangrong



On 10/12/2015 06:08 PM, Michael S. Tsirkin wrote:

On Sun, Oct 11, 2015 at 11:52:40AM +0800, Xiao Guangrong wrote:

Currently file_ram_alloc() is designed for hugetlbfs, however, the memory
of nvdimm can come from either raw pmem device eg, /dev/pmem, or the file
locates at DAX enabled filesystem

So this patch let it work on any kind of path

Signed-off-by: Xiao Guangrong 


This conflicts with map alloc rework.
Please rebase this on top of my tree.



Okay, thanks for your reminder. I did it based on upstream QEMU tree,
will do it on pci branch on your tree instead.




---
  exec.c | 55 ++-
  1 file changed, 14 insertions(+), 41 deletions(-)

diff --git a/exec.c b/exec.c
index 7d90a52..70cb0ef 100644
--- a/exec.c
+++ b/exec.c
@@ -1154,32 +1154,6 @@ void qemu_mutex_unlock_ramlist(void)
  }

  #ifdef __linux__
-
-#include 
-
-#define HUGETLBFS_MAGIC   0x958458f6
-
-static long gethugepagesize(const char *path, Error **errp)
-{
-struct statfs fs;
-int ret;
-
-do {
-ret = statfs(path, );
-} while (ret != 0 && errno == EINTR);
-
-if (ret != 0) {
-error_setg_errno(errp, errno, "failed to get page size of file %s",
- path);
-return 0;
-}
-
-if (fs.f_type != HUGETLBFS_MAGIC)
-fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
-
-return fs.f_bsize;


What this *actually* is trying to warn against is that
mapping a regular file (as opposed to hugetlbfs)
means transparent huge pages don't work.

So I don't think we should drop this warning completely.
Either let's add the nvdimm magic, or simply check the
page size.


Check the page size sounds good, will check:
if (pagesize != getpagesize()) {
...print something...
}

I agree with you that showing the info is needed, however,
'Warning' might scare some users, how about drop this word or
just show “Memory is not allocated from HugeTlbfs”?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html