On 01.03.2011, at 19:35, anthony.per...@citrix.com wrote:

> From: Jun Nakajima <jun.nakaj...@intel.com>
> 
> On IA32 host or IA32 PAE host, at present, generally, we can't create
> an HVM guest with more than 2G memory, because generally it's almost
> impossible for Qemu to find a large enough and consecutive virtual
> address space to map an HVM guest's whole physical address space.
> The attached patch fixes this issue using dynamic mapping based on
> little blocks of memory.
> 
> Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the
> lock option, so mapcache will not unmap these ram_ptr.
> 
> Signed-off-by: Jun Nakajima <jun.nakaj...@intel.com>
> Signed-off-by: Anthony PERARD <anthony.per...@citrix.com>
> Signed-off-by: Stefano Stabellini <stefano.stabell...@eu.citrix.com>
> ---
> Makefile.target     |    3 +
> configure           |    3 +
> exec.c              |   40 ++++++-
> hw/xen.h            |   13 ++
> hw/xen_common.h     |    9 ++
> xen-all.c           |   64 +++++++++++
> xen-mapcache-stub.c |   40 +++++++
> xen-mapcache.c      |  310 +++++++++++++++++++++++++++++++++++++++++++++++++++
> xen-mapcache.h      |   22 ++++
> xen-stub.c          |    4 +
> 10 files changed, 504 insertions(+), 4 deletions(-)
> create mode 100644 xen-mapcache-stub.c
> create mode 100644 xen-mapcache.c
> create mode 100644 xen-mapcache.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index c539b1e..dcdd51d 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -214,8 +214,11 @@ else
> CONFIG_NO_XEN = y
> endif
> # xen support
> +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y)
> obj-i386-$(CONFIG_XEN) += xen-all.o
> obj-$(CONFIG_NO_XEN) += xen-stub.o
> +obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o
> +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o
> 
> obj-i386-$(CONFIG_XEN) += xen_platform.o
> 
> diff --git a/configure b/configure
> index a84d974..4fd8696 100755
> --- a/configure
> +++ b/configure
> @@ -3172,6 +3172,9 @@ case "$target_arch2" in
>   i386|x86_64)
>     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
>       echo "CONFIG_XEN=y" >> $config_target_mak
> +      if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
> +          echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
> +      fi
>     fi
> esac
> case "$target_arch2" in
> diff --git a/exec.c b/exec.c
> index d611100..558595a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -32,6 +32,7 @@
> #include "hw/qdev.h"
> #include "osdep.h"
> #include "kvm.h"
> +#include "hw/xen.h"
> #include "qemu-timer.h"
> #if defined(CONFIG_USER_ONLY)
> #include <qemu.h>
> @@ -51,6 +52,8 @@
> #include <libutil.h>
> #endif
> #endif
> +#else /* !CONFIG_USER_ONLY */
> +#include "xen-mapcache.h"
> #endif
> 
> //#define DEBUG_TB_INVALIDATE
> @@ -2865,6 +2868,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, 
> const char *name,
>         }
>     }
> 
> +    new_block->offset = find_ram_offset(size);
>     if (host) {
>         new_block->host = host;
>     } else {
> @@ -2886,13 +2890,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, 
> const char *name,
>                                    PROT_EXEC|PROT_READ|PROT_WRITE,
>                                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> #else
> -            new_block->host = qemu_vmalloc(size);
> +            if (xen_mapcache_enabled()) {
> +                xen_ram_alloc(new_block->offset, size);
> +            } else {
> +                new_block->host = qemu_vmalloc(size);
> +            }
> #endif
>             qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>         }
>     }
> -
> -    new_block->offset = find_ram_offset(size);
>     new_block->length = size;
> 
>     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> @@ -2933,7 +2939,11 @@ void qemu_ram_free(ram_addr_t addr)
> #if defined(TARGET_S390X) && defined(CONFIG_KVM)
>                 munmap(block->host, block->length);
> #else
> -                qemu_vfree(block->host);
> +                if (xen_mapcache_enabled()) {
> +                    qemu_invalidate_entry(block->host);
> +                } else {
> +                    qemu_vfree(block->host);
> +                }
> #endif
>             }
>             qemu_free(block);
> @@ -2959,6 +2969,15 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>         if (addr - block->offset < block->length) {
>             QLIST_REMOVE(block, next);
>             QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
> +            if (xen_mapcache_enabled()) {
> +                /* We need to check if the requested address is in the RAM
> +                 * because we don't want to map the entire memory in QEMU.
> +                 */
> +                if (block->offset == 0) {
> +                    return qemu_map_cache(addr, 0, 1);
> +                }
> +                block->host = qemu_map_cache(block->offset, block->length, 
> 1);
> +            }
>             return block->host + (addr - block->offset);
>         }
>     }
> @@ -2994,11 +3013,21 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t 
> *ram_addr)
>     uint8_t *host = ptr;
> 
>     QLIST_FOREACH(block, &ram_list.blocks, next) {
> +        /* This case append when the block is not mapped. */
> +        if (block->host == NULL) {
> +            continue;
> +        }
>         if (host - block->host < block->length) {
>             *ram_addr = block->offset + (host - block->host);
>             return 0;
>         }
>     }
> +
> +    if (xen_mapcache_enabled()) {
> +        *ram_addr = qemu_ram_addr_from_mapcache(ptr);
> +        return 0;
> +    }
> +
>     return -1;
> }
> 
> @@ -3909,6 +3938,9 @@ void cpu_physical_memory_unmap(void *buffer, 
> target_phys_addr_t len,
>     if (is_write) {
>         cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
>     }
> +    if (xen_enabled()) {
> +        qemu_invalidate_entry(buffer);
> +    }
>     qemu_vfree(bounce.buffer);
>     bounce.buffer = NULL;
>     cpu_notify_map_clients();
> diff --git a/hw/xen.h b/hw/xen.h
> index 12d4e5f..e26d061 100644
> --- a/hw/xen.h
> +++ b/hw/xen.h
> @@ -31,6 +31,15 @@ static inline int xen_enabled(void)
> #endif
> }
> 
> +static inline int xen_mapcache_enabled(void)
> +{
> +#ifdef CONFIG_XEN_MAPCACHE
> +    return xen_enabled();
> +#else
> +    return 0;
> +#endif
> +}
> +
> int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num);
> void xen_piix3_set_irq(void *opaque, int irq_num, int level);
> void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int 
> len);
> @@ -41,6 +50,10 @@ void pci_xen_platform_init(PCIBus *bus);
> 
> int xen_init(void);
> 
> +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY)
> +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size);
> +#endif
> +
> #if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400
> #  define HVM_MAX_VCPUS 32
> #endif
> diff --git a/hw/xen_common.h b/hw/xen_common.h
> index 7e123ec..5a36642 100644
> --- a/hw/xen_common.h
> +++ b/hw/xen_common.h
> @@ -50,6 +50,15 @@ static inline int xc_fd(int xen_xc)
> }
> 
> 
> +static inline int xc_domain_populate_physmap_exact
> +    (XenXC xc_handle, uint32_t domid, unsigned long nr_extents,
> +     unsigned int extent_order, unsigned int mem_flags, xen_pfn_t 
> *extent_start)
> +{
> +    return xc_domain_memory_populate_physmap
> +        (xc_handle, domid, nr_extents, extent_order, mem_flags, 
> extent_start);
> +}
> +
> +
> /* Xen 4.1 */
> #else
> 
> diff --git a/xen-all.c b/xen-all.c
> index 761f2a0..03d1e90 100644
> --- a/xen-all.c
> +++ b/xen-all.c
> @@ -10,6 +10,8 @@
> #include "hw/xen_common.h"
> #include "hw/xen_backend.h"
> 
> +#include "xen-mapcache.h"
> +
> /* Xen specific function for piix pci */
> 
> int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
> @@ -52,6 +54,64 @@ qemu_irq *xen_interrupt_controller_init(void)
>     return qemu_allocate_irqs(xen_set_irq, NULL, 16);
> }
> 
> +
> +/* Memory Ops */
> +
> +static void xen_ram_init(ram_addr_t ram_size)
> +{
> +    RAMBlock *new_block;
> +    ram_addr_t below_4g_mem_size, above_4g_mem_size = 0;
> +
> +    new_block = qemu_mallocz(sizeof (*new_block));
> +    pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram");
> +    new_block->host = NULL;
> +    new_block->offset = 0;
> +    new_block->length = ram_size;
> +
> +    QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
> +
> +    ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
> +                                       new_block->length >> 
> TARGET_PAGE_BITS);
> +    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
> +           0xff, new_block->length >> TARGET_PAGE_BITS);
> +
> +    if (ram_size >= 0xe0000000 ) {
> +        above_4g_mem_size = ram_size - 0xe0000000;
> +        below_4g_mem_size = 0xe0000000;
> +    } else {
> +        below_4g_mem_size = ram_size;
> +    }
> +
> +    cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
> +#if TARGET_PHYS_ADDR_BITS > 32
> +    if (above_4g_mem_size > 0) {
> +        cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
> +                                     new_block->offset + below_4g_mem_size);
> +    }
> +#endif
> +}
> +
> +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
> +{
> +    unsigned long nr_pfn;
> +    xen_pfn_t *pfn_list;
> +    int i;
> +
> +    nr_pfn = size >> TARGET_PAGE_BITS;
> +    pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn);
> +
> +    for (i = 0; i < nr_pfn; i++) {
> +        pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
> +    }
> +
> +    if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, 
> pfn_list)) {
> +        hw_error("xen: failed to populate ram at %lx", ram_addr);
> +    }
> +
> +    qemu_free(pfn_list);
> +}
> +
> +
> /* Initialise Xen */
> 
> int xen_init(void)
> @@ -62,5 +122,9 @@ int xen_init(void)
>         return -1;
>     }
> 
> +    /* Init RAM management */
> +    qemu_map_cache_init();
> +    xen_ram_init(ram_size);
> +
>     return 0;
> }
> diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c
> new file mode 100644
> index 0000000..541bee6
> --- /dev/null
> +++ b/xen-mapcache-stub.c
> @@ -0,0 +1,40 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "config.h"
> +
> +#include "exec-all.h"
> +#include "qemu-common.h"
> +#include "cpu-common.h"
> +#include "xen-mapcache.h"
> +
> +void qemu_map_cache_init(void)
> +{
> +}
> +
> +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t 
> size, uint8_t lock)
> +{
> +    return qemu_get_ram_ptr(phys_addr);
> +}
> +
> +void qemu_map_cache_unlock(void *buffer)
> +{
> +}
> +
> +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr)
> +{
> +    return -1;
> +}
> +
> +void qemu_invalidate_map_cache(void)
> +{
> +}
> +
> +void qemu_invalidate_entry(uint8_t *buffer)
> +{
> +}
> diff --git a/xen-mapcache.c b/xen-mapcache.c
> new file mode 100644
> index 0000000..d7f44a7
> --- /dev/null
> +++ b/xen-mapcache.c
> @@ -0,0 +1,310 @@
> +/*
> + * Copyright (C) 2011       Citrix Ltd.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "config.h"
> +
> +#include <sys/resource.h>
> +
> +#include "hw/xen_backend.h"
> +#include "blockdev.h"
> +
> +#include <xen/hvm/params.h>
> +#include <sys/mman.h>
> +
> +#include "xen-mapcache.h"
> +
> +
> +//#define MAPCACHE_DEBUG
> +
> +#ifdef MAPCACHE_DEBUG
> +#  define DPRINTF(fmt, ...) do { \
> +    fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
> +} while (0)
> +#else
> +#  define DPRINTF(fmt, ...) do { } while (0)
> +#endif
> +
> +#if defined(__i386__)
> +#  define MCACHE_BUCKET_SHIFT 16
> +#elif defined(__x86_64__)
> +#  define MCACHE_BUCKET_SHIFT 20
> +#endif
> +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
> +
> +#define BITS_PER_LONG (sizeof(long) * 8)
> +#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
> +#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)]
> +
> +typedef struct MapCacheEntry {
> +    target_phys_addr_t paddr_index;
> +    uint8_t *vaddr_base;
> +    DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT);
> +    uint8_t lock;
> +    struct MapCacheEntry *next;
> +} MapCacheEntry;
> +
> +typedef struct MapCacheRev {
> +    uint8_t *vaddr_req;
> +    target_phys_addr_t paddr_index;
> +    QTAILQ_ENTRY(MapCacheRev) next;
> +} MapCacheRev;
> +
> +typedef struct MapCache {
> +    MapCacheEntry *entry;
> +    unsigned long nr_buckets;
> +    QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
> +
> +    /* For most cases (>99.9%), the page address is the same. */
> +    target_phys_addr_t last_address_index;
> +    uint8_t *last_address_vaddr;
> +    unsigned long max_mcache_size;
> +    unsigned int mcache_bucket_shift;
> +} MapCache;
> +
> +static MapCache *mapcache;
> +
> +static inline int test_bit(unsigned int bit, const unsigned long *map)
> +{
> +    return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % 
> BITS_PER_LONG)));
> +}

We have a bitmap framework in qemu now. Please use that :). See bitmap.h / 
bitops.h / bitops.c.


Alex


Reply via email to