On Thu, Dec 22, 2016 at 04:22:11PM +1100, Alexey Kardashevskiy wrote:
> getrampagesize() returns the largest supported page size and mainly
> used to know if huge pages are enabled.
> 
> However is implemented in target-ppc/kvm.c and not available
> in TCG or other architectures.
> 
> This renames and moves gethugepagesize() to mmap-alloc.c where
> fd-based analog of it is already implemented. This renames and moves
> getrampagesize() to exec.c as it seems to be the common place for
> helpers like this.
> 
> This first user for it is going to be a spapr-pci-host-bridge which
> needs to know the largest RAM page size so the guest could try
> using bigger IOMMU pages to save memory.
> 
> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>

Reviewed-by: David Gibson <da...@gibson.dropbear.id.au>

Seems sensible to me, but I'm not comfortable merging this via my tree
since it touches such core code.  Probably should go via Paolo.

> ---
>  include/exec/ram_addr.h   |   1 +
>  include/qemu/mmap-alloc.h |   2 +
>  exec.c                    |  82 ++++++++++++++++++++++++++++++++++++
>  target-ppc/kvm.c          | 105 
> ++--------------------------------------------
>  util/mmap-alloc.c         |  25 +++++++++++
>  5 files changed, 113 insertions(+), 102 deletions(-)
> 
> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
> index 54d7108a9e..3935cbcfcd 100644
> --- a/include/exec/ram_addr.h
> +++ b/include/exec/ram_addr.h
> @@ -91,6 +91,7 @@ typedef struct RAMList {
>  } RAMList;
>  extern RAMList ram_list;
>  
> +long qemu_getrampagesize(void);
>  ram_addr_t last_ram_offset(void);
>  void qemu_mutex_lock_ramlist(void);
>  void qemu_mutex_unlock_ramlist(void);
> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> index 933c024ac5..50385e3f81 100644
> --- a/include/qemu/mmap-alloc.h
> +++ b/include/qemu/mmap-alloc.h
> @@ -5,6 +5,8 @@
>  
>  size_t qemu_fd_getpagesize(int fd);
>  
> +size_t qemu_mempath_getpagesize(const char *mem_path);
> +
>  void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
>  
>  void qemu_ram_munmap(void *ptr, size_t size);
> diff --git a/exec.c b/exec.c
> index 08c558eecf..d73b477a70 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -32,6 +32,7 @@
>  #endif
>  #include "sysemu/kvm.h"
>  #include "sysemu/sysemu.h"
> +#include "sysemu/numa.h"
>  #include "qemu/timer.h"
>  #include "qemu/config-file.h"
>  #include "qemu/error-report.h"
> @@ -1218,6 +1219,87 @@ void qemu_mutex_unlock_ramlist(void)
>  }
>  
>  #ifdef __linux__
> +/*
> + * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
> + * may or may not name the same files / on the same filesystem now as
> + * when we actually open and map them.  Iterate over the file
> + * descriptors instead, and use qemu_fd_getpagesize().
> + */
> +static int find_max_supported_pagesize(Object *obj, void *opaque)
> +{
> +    char *mem_path;
> +    long *hpsize_min = opaque;
> +
> +    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
> +        mem_path = object_property_get_str(obj, "mem-path", NULL);
> +        if (mem_path) {
> +            long hpsize = qemu_mempath_getpagesize(mem_path);
> +            if (hpsize < *hpsize_min) {
> +                *hpsize_min = hpsize;
> +            }
> +        } else {
> +            *hpsize_min = getpagesize();
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +long qemu_getrampagesize(void)
> +{
> +    long hpsize = LONG_MAX;
> +    long mainrampagesize;
> +    Object *memdev_root;
> +
> +    if (mem_path) {
> +        mainrampagesize = qemu_mempath_getpagesize(mem_path);
> +    } else {
> +        mainrampagesize = getpagesize();
> +    }
> +
> +    /* it's possible we have memory-backend objects with
> +     * hugepage-backed RAM. these may get mapped into system
> +     * address space via -numa parameters or memory hotplug
> +     * hooks. we want to take these into account, but we
> +     * also want to make sure these supported hugepage
> +     * sizes are applicable across the entire range of memory
> +     * we may boot from, so we take the min across all
> +     * backends, and assume normal pages in cases where a
> +     * backend isn't backed by hugepages.
> +     */
> +    memdev_root = object_resolve_path("/objects", NULL);
> +    if (memdev_root) {
> +        object_child_foreach(memdev_root, find_max_supported_pagesize, 
> &hpsize);
> +    }
> +    if (hpsize == LONG_MAX) {
> +        /* No additional memory regions found ==> Report main RAM page size 
> */
> +        return mainrampagesize;
> +    }
> +
> +    /* If NUMA is disabled or the NUMA nodes are not backed with a
> +     * memory-backend, then there is at least one node using "normal" RAM,
> +     * so if its page size is smaller we have got to report that size 
> instead.
> +     */
> +    if (hpsize > mainrampagesize &&
> +        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
> +        static bool warned;
> +        if (!warned) {
> +            error_report("Huge page support disabled (n/a for main 
> memory).");
> +            warned = true;
> +        }
> +        return mainrampagesize;
> +    }
> +
> +    return hpsize;
> +}
> +#else
> +long qemu_getrampagesize(void)
> +{
> +    return getpagesize();
> +}
> +#endif
> +
> +#ifdef __linux__
>  static int64_t get_file_size(int fd)
>  {
>      int64_t size = lseek(fd, 0, SEEK_END);
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 6e91a4d8bb..e0abffa8ad 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -42,6 +42,7 @@
>  #include "trace.h"
>  #include "exec/gdbstub.h"
>  #include "exec/memattrs.h"
> +#include "exec/ram_addr.h"
>  #include "sysemu/hostmem.h"
>  #include "qemu/cutils.h"
>  #if defined(TARGET_PPC64)
> @@ -325,106 +326,6 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, struct 
> kvm_ppc_smmu_info *info)
>      kvm_get_fallback_smmu_info(cpu, info);
>  }
>  
> -static long gethugepagesize(const char *mem_path)
> -{
> -    struct statfs fs;
> -    int ret;
> -
> -    do {
> -        ret = statfs(mem_path, &fs);
> -    } while (ret != 0 && errno == EINTR);
> -
> -    if (ret != 0) {
> -        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
> -                strerror(errno));
> -        exit(1);
> -    }
> -
> -#define HUGETLBFS_MAGIC       0x958458f6
> -
> -    if (fs.f_type != HUGETLBFS_MAGIC) {
> -        /* Explicit mempath, but it's ordinary pages */
> -        return getpagesize();
> -    }
> -
> -    /* It's hugepage, return the huge page size */
> -    return fs.f_bsize;
> -}
> -
> -/*
> - * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
> - * may or may not name the same files / on the same filesystem now as
> - * when we actually open and map them.  Iterate over the file
> - * descriptors instead, and use qemu_fd_getpagesize().
> - */
> -static int find_max_supported_pagesize(Object *obj, void *opaque)
> -{
> -    char *mem_path;
> -    long *hpsize_min = opaque;
> -
> -    if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
> -        mem_path = object_property_get_str(obj, "mem-path", NULL);
> -        if (mem_path) {
> -            long hpsize = gethugepagesize(mem_path);
> -            if (hpsize < *hpsize_min) {
> -                *hpsize_min = hpsize;
> -            }
> -        } else {
> -            *hpsize_min = getpagesize();
> -        }
> -    }
> -
> -    return 0;
> -}
> -
> -static long getrampagesize(void)
> -{
> -    long hpsize = LONG_MAX;
> -    long mainrampagesize;
> -    Object *memdev_root;
> -
> -    if (mem_path) {
> -        mainrampagesize = gethugepagesize(mem_path);
> -    } else {
> -        mainrampagesize = getpagesize();
> -    }
> -
> -    /* it's possible we have memory-backend objects with
> -     * hugepage-backed RAM. these may get mapped into system
> -     * address space via -numa parameters or memory hotplug
> -     * hooks. we want to take these into account, but we
> -     * also want to make sure these supported hugepage
> -     * sizes are applicable across the entire range of memory
> -     * we may boot from, so we take the min across all
> -     * backends, and assume normal pages in cases where a
> -     * backend isn't backed by hugepages.
> -     */
> -    memdev_root = object_resolve_path("/objects", NULL);
> -    if (memdev_root) {
> -        object_child_foreach(memdev_root, find_max_supported_pagesize, 
> &hpsize);
> -    }
> -    if (hpsize == LONG_MAX) {
> -        /* No additional memory regions found ==> Report main RAM page size 
> */
> -        return mainrampagesize;
> -    }
> -
> -    /* If NUMA is disabled or the NUMA nodes are not backed with a
> -     * memory-backend, then there is at least one node using "normal" RAM,
> -     * so if its page size is smaller we have got to report that size 
> instead.
> -     */
> -    if (hpsize > mainrampagesize &&
> -        (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
> -        static bool warned;
> -        if (!warned) {
> -            error_report("Huge page support disabled (n/a for main 
> memory).");
> -            warned = true;
> -        }
> -        return mainrampagesize;
> -    }
> -
> -    return hpsize;
> -}
> -
>  static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t 
> shift)
>  {
>      if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
> @@ -454,7 +355,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
>          has_smmu_info = true;
>      }
>  
> -    rampagesize = getrampagesize();
> +    rampagesize = qemu_getrampagesize();
>  
>      /* Convert to QEMU form */
>      memset(&env->sps, 0, sizeof(env->sps));
> @@ -2177,7 +2078,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, 
> unsigned int hash_shift)
>      /* Find the largest hardware supported page size that's less than
>       * or equal to the (logical) backing page size of guest RAM */
>      kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
> -    rampagesize = getrampagesize();
> +    rampagesize = qemu_getrampagesize();
>      best_page_shift = 0;
>  
>      for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index 5a85aa3c89..564c79109c 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -39,6 +39,31 @@ size_t qemu_fd_getpagesize(int fd)
>      return getpagesize();
>  }
>  
> +size_t qemu_mempath_getpagesize(const char *mem_path)
> +{
> +#ifdef CONFIG_LINUX
> +    struct statfs fs;
> +    int ret;
> +
> +    do {
> +        ret = statfs(mem_path, &fs);
> +    } while (ret != 0 && errno == EINTR);
> +
> +    if (ret != 0) {
> +        fprintf(stderr, "Couldn't statfs() memory path: %s\n",
> +                strerror(errno));
> +        exit(1);
> +    }
> +
> +    if (fs.f_type == HUGETLBFS_MAGIC) {
> +        /* It's hugepage, return the huge page size */
> +        return fs.f_bsize;
> +    }
> +#endif
> +
> +    return getpagesize();
> +}
> +
>  void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
>  {
>      /*

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to