Re: [Qemu-devel] i386: pc: align gpa<->hpa on 1GB boundary (v6)

2013-11-13 Thread Paolo Bonzini
> > > +if (guest_info->gb_align && above_4g_mem_size > holesize) {
> > > +/* Round the allocation up to 2 MB to use more hugepages.
> 
> To align to 2MB boundary, the number of hugepages is the same.

Right.

> > > + * Remove the slack from the [yyy] piece so that pieceonesize
> > > + * (and thus the start of piecetwo) remains aligned.
> > > + */
> > > +align_offset = ROUND_UP(memsize, 1UL << 21) - memsize;
> > > +piecetwosize = holesize - align_offset;
> > > +} else {
> > > +/* There's no "piece one", all memory above 4G starts
> 
> Piece two.

I'm calling "piece one" the part that is aligned at 0x1 in the
RAM block, and "piece two" the part that starts at below_4g_mem_size.
I'll change to "there's no [z] region".

> > > + * at below_4g_mem_size in the RAM block.  Also no need
> > > + * to align anything.
> > > + */
> > > +align_offset = 0;
> > > +piecetwosize = above_4g_mem_size;
> > > +}
> > > +
> > >  ram = g_malloc(sizeof(*ram));
> > > -memory_region_init_ram(ram, NULL, "pc.ram",
> > > -   below_4g_mem_size + above_4g_mem_size);
> > > +memory_region_init_ram(ram, NULL, "pc.ram", memsize + align_offset);
> > >  vmstate_register_ram_global(ram);
> > >  *ram_memory = ram;
> > > +
> > >  ram_below_4g = g_malloc(sizeof(*ram_below_4g));
> > >  memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
> > >   0, below_4g_mem_size);
> > >  memory_region_add_subregion(system_memory, 0, ram_below_4g);
> > > +
> > > +pieceonesize = above_4g_mem_size - piecetwosize;
> > > +if (pieceonesize) {
> > > +ram_above_4g_pieceone =
> > > g_malloc(sizeof(*ram_above_4g_pieceone));
> > > +memory_region_init_alias(ram_above_4g_pieceone, NULL,
> > > + "ram-above-4g-pieceone", ram,
> > > + 0x1ULL, pieceonesize);
> > > +memory_region_add_subregion(system_memory, 0x1ULL,
> > > +ram_above_4g_pieceone);
> > > +}
> > 
> > Can you change the name of aliases and subregions without breaking
> > migration?

Yes, memory regions are invisible except for RAM regions.

> Test with Q35?

Will do.  Thanks for the review!

Paolo



Re: [Qemu-devel] i386: pc: align gpa<->hpa on 1GB boundary (v6)

2013-11-13 Thread Marcelo Tosatti
On Wed, Nov 13, 2013 at 05:58:32PM -0200, Marcelo Tosatti wrote:
> On Wed, Nov 13, 2013 at 06:13:15PM +0100, Paolo Bonzini wrote:
> > > assert(piecetwosize <= holesize);
> > > 
> > > piecetwosize = MIN(above_4g_mem_size, piecetwosize);
> > > if ((above_4g_mem_size - piecetwosize) > 0) {
> > > memory_region_init_alias(ram_above_4g, NULL, 
> > > "ram-above-4g",
> > >  ram, 0x1ULL,
> > >  above_4g_mem_size - 
> > > piecetwosize);
> > > memory_region_add_subregion(system_memory, 0x1ULL,
> > >  ram_above_4g);
> > > } else {
> > > g_free(ram_above_4g);
> > > }
> > > memory_region_init_alias(ram_above_4g_piecetwo, NULL,
> > >  "ram-above-4g-piecetwo", ram,
> > >  0x1ULL - holesize, 
> > > piecetwosize);
> > > memory_region_add_subregion(system_memory,
> > > 0x1ULL +
> > > above_4g_mem_size - piecetwosize,
> > > ram_above_4g_piecetwo);
> > 
> > There is still a small problem in that the 2MB rounding must not be
> > done for old machine types.
> >
> > I did a really careful review of the code and everything else looks okay
> > to me.  However, it grew by accretion from v1 and now it took me really a
> > long time to figure it out...  I adjusted it a bit and the result seems
> > easier to understand to me.
> > 
> > Here's the hw/i386/pc.c part of the patch (the patch from v6 is unreadable):
> > 
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 12c436e..f2fd138 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -1156,8 +1156,10 @@ FWCfgState *pc_memory_init(MemoryRegion 
> > *system_memory,
> >  {
> >  int linux_boot, i;
> >  MemoryRegion *ram, *option_rom_mr;
> > -MemoryRegion *ram_below_4g, *ram_above_4g;
> > +MemoryRegion *ram_below_4g, *ram_above_4g_pieceone, 
> > *ram_above_4g_piecetwo;
> >  FWCfgState *fw_cfg;
> > +uint64_t holesize, pieceonesize, piecetwosize;
> > +uint64_t memsize, align_offset;
> >  
> >  linux_boot = (kernel_filename != NULL);
> >  
> > @@ -1165,26 +1167,74 @@ FWCfgState *pc_memory_init(MemoryRegion 
> > *system_memory,
> >   * aliases to address portions of it, mostly for backwards 
> > compatibility
> >   * with older qemus that used qemu_ram_alloc().
> >   */
> > +memsize = below_4g_mem_size + above_4g_mem_size;
> > +holesize = 0x1ULL - below_4g_mem_size;
> > +
> > +/* If 1GB hugepages are used to back guest RAM, we want the
> > + * physical address 4GB to map to 4GB in the RAM, so that
> > + * memory beyond 4GB is aligned on a 1GB boundary, at the
> > + * host physical address space.  Thus, the ram block range
> > + * [holestart, 4GB] is mapped to the last holesize bytes of RAM:
> > + *
> > + *  0  h 4G memsize-holesize
> > + *
> > + * contiguous-ram-block [xx][yyy][z]
> > + *'---.
> > + * guest-addr-space [xx] [z][yyy]
> > + *
> > + * This is only done in new-enough machine types, and of course
> > + * it is only necessary if the [z] block exists at all.
> > + */
> > +if (guest_info->gb_align && above_4g_mem_size > holesize) {
> > +/* Round the allocation up to 2 MB to use more hugepages.

To align to 2MB boundary, the number of hugepages is the same.

> > + * Remove the slack from the [yyy] piece so that pieceonesize
> > + * (and thus the start of piecetwo) remains aligned.
> > + */
> > +align_offset = ROUND_UP(memsize, 1UL << 21) - memsize;
> > +piecetwosize = holesize - align_offset;
> > +} else {
> > +/* There's no "piece one", all memory above 4G starts

Piece two.

> > + * at below_4g_mem_size in the RAM block.  Also no need
> > + * to align anything.
> > + */
> > +align_offset = 0;
> > +piecetwosize = above_4g_mem_size;
> > +}
> > +
> >  ram = g_malloc(sizeof(*ram));
> > -memory_region_init_ram(ram, NULL, "pc.ram",
> > -   below_4g_mem_size + above_4g_mem_size);
> > +memory_region_init_ram(ram, NULL, "pc.ram", memsize + align_offset);
> >  vmstate_register_ram_global(ram);
> >  *ram_memory = ram;
> > +
> >  ram_below_4g = g_malloc(sizeof(*ram_below_4g));
> >  memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
> >   0, below_4g_mem_size);
> >  memory_region_add_subregion(system_memory, 0, ram_below_4g);
> > +
> > +pieceonesize = above_4g_mem_size - piecetwosize;
> > +if (

Re: [Qemu-devel] i386: pc: align gpa<->hpa on 1GB boundary (v6)

2013-11-13 Thread Michael S. Tsirkin
On Tue, Nov 12, 2013 at 07:16:37PM -0200, Marcelo Tosatti wrote:
> 
> v2: condition enablement of new mapping to new machine types (Paolo)
> v3: fix changelog
> v4: rebase
> v5: ensure alignment of piecetwo on 2MB GPA (Igor)
> do not register zero-sized piece-one(Igor)
> v6: fix memory leak (Igor)
> fix integer overflow(Igor)

BTW if you respin anyway, and if you want me to queue this for you,
could you make a patch against pci branch please?
We have the 1.8 place-holders there already.

Thanks!

> 
> 
> Align guest physical address and host physical address
> beyond guest 4GB on a 1GB boundary.
> 
> Otherwise 1GB TLBs cannot be cached for the range.
> 
> Signed-off-by: Marcelo Tosatti 
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 12c436e..9cf5109 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1156,8 +1156,9 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>  {
>  int linux_boot, i;
>  MemoryRegion *ram, *option_rom_mr;
> -MemoryRegion *ram_below_4g, *ram_above_4g;
> +MemoryRegion *ram_below_4g, *ram_above_4g, *ram_above_4g_piecetwo;
>  FWCfgState *fw_cfg;
> +uint64_t memsize, align_offset;
>  
>  linux_boot = (kernel_filename != NULL);
>  
> @@ -1166,8 +1167,12 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>   * with older qemus that used qemu_ram_alloc().
>   */
>  ram = g_malloc(sizeof(*ram));
> -memory_region_init_ram(ram, NULL, "pc.ram",
> -   below_4g_mem_size + above_4g_mem_size);
> +
> +memsize = ROUND_UP(below_4g_mem_size + above_4g_mem_size, 1UL << 21);
> +align_offset = memsize - (below_4g_mem_size + above_4g_mem_size);
> +
> +memory_region_init_ram(ram, NULL, "pc.ram", memsize);
> +
>  vmstate_register_ram_global(ram);
>  *ram_memory = ram;
>  ram_below_4g = g_malloc(sizeof(*ram_below_4g));
> @@ -1177,10 +1182,53 @@ FWCfgState *pc_memory_init(MemoryRegion 
> *system_memory,
>  e820_add_entry(0, below_4g_mem_size, E820_RAM);
>  if (above_4g_mem_size > 0) {
>  ram_above_4g = g_malloc(sizeof(*ram_above_4g));
> -memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
> - below_4g_mem_size, above_4g_mem_size);
> -memory_region_add_subregion(system_memory, 0x1ULL,
> +/*
> + *
> + * If 1GB hugepages are used to back guest RAM, map guest address
> + * space in the range [ramsize,ramsize+holesize] to the ram block
> + * range [holestart, 4GB]
> + *
> + *  0  h 4G 
> [ramsize,ramsize+holesize]
> + *
> + * guest-addr-space [  ] [  ][xxx]
> + */--/
> + * contiguous-ram-block [  ][xxx][ ]
> + *
> + * So that memory beyond 4GB is aligned on a 1GB boundary,
> + * at the host physical address space.
> + *
> + */
> +if (guest_info->gb_align) {
> +uint64_t holesize = 0x1ULL - below_4g_mem_size;
> +uint64_t piecetwosize = holesize - align_offset;
> +
> +assert(piecetwosize <= holesize);
> +
> +piecetwosize = MIN(above_4g_mem_size, piecetwosize);
> +if ((above_4g_mem_size - piecetwosize) > 0) {
> +memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g",
> + ram, 0x1ULL,
> + above_4g_mem_size - piecetwosize);
> +memory_region_add_subregion(system_memory, 0x1ULL,
> + ram_above_4g);
> +} else {
> +g_free(ram_above_4g);
> +}
> +
> +ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo));
> +memory_region_init_alias(ram_above_4g_piecetwo, NULL,
> + "ram-above-4g-piecetwo", ram,
> + 0x1ULL - holesize, 
> piecetwosize);
> +memory_region_add_subregion(system_memory,
> +0x1ULL +
> +above_4g_mem_size - piecetwosize,
> +ram_above_4g_piecetwo);
> +} else {
> +memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
> +below_4g_mem_size, above_4g_mem_size);
> +memory_region_add_subregion(system_memory, 0x1ULL,
>  ram_above_4g);
> +}
>  e820_add_entry(0x1ULL, above_4g_mem_size, E820_RAM);
>  }
>  
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 4fdb7b6..686736e 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -60,6 +60,7 @@ static cons

Re: [Qemu-devel] i386: pc: align gpa<->hpa on 1GB boundary (v6)

2013-11-13 Thread Paolo Bonzini
> assert(piecetwosize <= holesize);
> 
> piecetwosize = MIN(above_4g_mem_size, piecetwosize);
> if ((above_4g_mem_size - piecetwosize) > 0) {
> memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g",
>  ram, 0x1ULL,
>  above_4g_mem_size - piecetwosize);
> memory_region_add_subregion(system_memory, 0x1ULL,
>  ram_above_4g);
> } else {
> g_free(ram_above_4g);
> }
> memory_region_init_alias(ram_above_4g_piecetwo, NULL,
>  "ram-above-4g-piecetwo", ram,
>  0x1ULL - holesize, piecetwosize);
> memory_region_add_subregion(system_memory,
> 0x1ULL +
> above_4g_mem_size - piecetwosize,
> ram_above_4g_piecetwo);

There is still a small problem in that the 2MB rounding must not be
done for old machine types.

I did a really careful review of the code and everything else looks okay
to me.  However, it grew by accretion from v1 and now it took me really a
long time to figure it out...  I adjusted it a bit and the result seems
easier to understand to me.

Here's the hw/i386/pc.c part of the patch (the patch from v6 is unreadable):

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 12c436e..f2fd138 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1156,8 +1156,10 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
 {
 int linux_boot, i;
 MemoryRegion *ram, *option_rom_mr;
-MemoryRegion *ram_below_4g, *ram_above_4g;
+MemoryRegion *ram_below_4g, *ram_above_4g_pieceone, *ram_above_4g_piecetwo;
 FWCfgState *fw_cfg;
+uint64_t holesize, pieceonesize, piecetwosize;
+uint64_t memsize, align_offset;
 
 linux_boot = (kernel_filename != NULL);
 
@@ -1165,26 +1167,74 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
  * aliases to address portions of it, mostly for backwards compatibility
  * with older qemus that used qemu_ram_alloc().
  */
+memsize = below_4g_mem_size + above_4g_mem_size;
+holesize = 0x1ULL - below_4g_mem_size;
+
+/* If 1GB hugepages are used to back guest RAM, we want the
+ * physical address 4GB to map to 4GB in the RAM, so that
+ * memory beyond 4GB is aligned on a 1GB boundary, at the
+ * host physical address space.  Thus, the ram block range
+ * [holestart, 4GB] is mapped to the last holesize bytes of RAM:
+ *
+ *  0  h 4G memsize-holesize
+ *
+ * contiguous-ram-block [xx][yyy][z]
+ *'---.
+ * guest-addr-space [xx] [z][yyy]
+ *
+ * This is only done in new-enough machine types, and of course
+ * it is only necessary if the [z] block exists at all.
+ */
+if (guest_info->gb_align && above_4g_mem_size > holesize) {
+/* Round the allocation up to 2 MB to use more hugepages.
+ * Remove the slack from the [yyy] piece so that pieceonesize
+ * (and thus the start of piecetwo) remains aligned.
+ */
+align_offset = ROUND_UP(memsize, 1UL << 21) - memsize;
+piecetwosize = holesize - align_offset;
+} else {
+/* There's no "piece one", all memory above 4G starts
+ * at below_4g_mem_size in the RAM block.  Also no need
+ * to align anything.
+ */
+align_offset = 0;
+piecetwosize = above_4g_mem_size;
+}
+
 ram = g_malloc(sizeof(*ram));
-memory_region_init_ram(ram, NULL, "pc.ram",
-   below_4g_mem_size + above_4g_mem_size);
+memory_region_init_ram(ram, NULL, "pc.ram", memsize + align_offset);
 vmstate_register_ram_global(ram);
 *ram_memory = ram;
+
 ram_below_4g = g_malloc(sizeof(*ram_below_4g));
 memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", ram,
  0, below_4g_mem_size);
 memory_region_add_subregion(system_memory, 0, ram_below_4g);
+
+pieceonesize = above_4g_mem_size - piecetwosize;
+if (pieceonesize) {
+ram_above_4g_pieceone = g_malloc(sizeof(*ram_above_4g_pieceone));
+memory_region_init_alias(ram_above_4g_pieceone, NULL,
+ "ram-above-4g-pieceone", ram,
+ 0x1ULL, pieceonesize);
+memory_region_add_subregion(system_memory, 0x1ULL,
+ram_above_4g_pieceone);
+}
+if (piecetwosize) {
+ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo));
+memory_region_init_alias(ram_above_4g_piecetwo, NULL,
+ "ram-above-4g-piecetwo",

Re: [Qemu-devel] i386: pc: align gpa<->hpa on 1GB boundary (v6)

2013-11-13 Thread Igor Mammedov
On Tue, 12 Nov 2013 19:16:37 -0200
Marcelo Tosatti  wrote:

> 
> v2: condition enablement of new mapping to new machine types (Paolo)
> v3: fix changelog
> v4: rebase
> v5: ensure alignment of piecetwo on 2MB GPA (Igor)
> do not register zero-sized piece-one(Igor)
> v6: fix memory leak (Igor)
> fix integer overflow(Igor)
> 
> 
> 
> Align guest physical address and host physical address
> beyond guest 4GB on a 1GB boundary.
> 
> Otherwise 1GB TLBs cannot be cached for the range.
> 
> Signed-off-by: Marcelo Tosatti 
Reviewed-By: Igor Mammedov 

PS:
all this alignment calculations look very fragile and if this code is
touched it's easy to regress.

It would be nice for make check to catch regression here when it happens.

> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 12c436e..9cf5109 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1156,8 +1156,9 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>  {
>  int linux_boot, i;
>  MemoryRegion *ram, *option_rom_mr;
> -MemoryRegion *ram_below_4g, *ram_above_4g;
> +MemoryRegion *ram_below_4g, *ram_above_4g, *ram_above_4g_piecetwo;
>  FWCfgState *fw_cfg;
> +uint64_t memsize, align_offset;
>  
>  linux_boot = (kernel_filename != NULL);
>  
> @@ -1166,8 +1167,12 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
>   * with older qemus that used qemu_ram_alloc().
>   */
>  ram = g_malloc(sizeof(*ram));
> -memory_region_init_ram(ram, NULL, "pc.ram",
> -   below_4g_mem_size + above_4g_mem_size);
> +
> +memsize = ROUND_UP(below_4g_mem_size + above_4g_mem_size, 1UL << 21);
> +align_offset = memsize - (below_4g_mem_size + above_4g_mem_size);
> +
> +memory_region_init_ram(ram, NULL, "pc.ram", memsize);
> +
>  vmstate_register_ram_global(ram);
>  *ram_memory = ram;
>  ram_below_4g = g_malloc(sizeof(*ram_below_4g));
> @@ -1177,10 +1182,53 @@ FWCfgState *pc_memory_init(MemoryRegion 
> *system_memory,
>  e820_add_entry(0, below_4g_mem_size, E820_RAM);
>  if (above_4g_mem_size > 0) {
>  ram_above_4g = g_malloc(sizeof(*ram_above_4g));
> -memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
> - below_4g_mem_size, above_4g_mem_size);
> -memory_region_add_subregion(system_memory, 0x1ULL,
> +/*
> + *
> + * If 1GB hugepages are used to back guest RAM, map guest address
> + * space in the range [ramsize,ramsize+holesize] to the ram block
> + * range [holestart, 4GB]
> + *
> + *  0  h 4G 
> [ramsize,ramsize+holesize]
> + *
> + * guest-addr-space [  ] [  ][xxx]
> + */--/
> + * contiguous-ram-block [  ][xxx][ ]
> + *
> + * So that memory beyond 4GB is aligned on a 1GB boundary,
> + * at the host physical address space.
> + *
> + */
> +if (guest_info->gb_align) {
> +uint64_t holesize = 0x1ULL - below_4g_mem_size;
> +uint64_t piecetwosize = holesize - align_offset;
> +
> +assert(piecetwosize <= holesize);
> +
> +piecetwosize = MIN(above_4g_mem_size, piecetwosize);
> +if ((above_4g_mem_size - piecetwosize) > 0) {
> +memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g",
> + ram, 0x1ULL,
> + above_4g_mem_size - piecetwosize);
> +memory_region_add_subregion(system_memory, 0x1ULL,
> + ram_above_4g);
> +} else {
> +g_free(ram_above_4g);
> +}
> +
> +ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo));
> +memory_region_init_alias(ram_above_4g_piecetwo, NULL,
> + "ram-above-4g-piecetwo", ram,
> + 0x1ULL - holesize, 
> piecetwosize);
> +memory_region_add_subregion(system_memory,
> +0x1ULL +
> +above_4g_mem_size - piecetwosize,
> +ram_above_4g_piecetwo);
> +} else {
> +memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
> +below_4g_mem_size, above_4g_mem_size);
> +memory_region_add_subregion(system_memory, 0x1ULL,
>  ram_above_4g);
> +}
>  e820_add_entry(0x1ULL, above_4g_mem_size, E820_RAM);
>  }
>  
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 4fdb7b6..686736e 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.

[Qemu-devel] i386: pc: align gpa<->hpa on 1GB boundary (v6)

2013-11-12 Thread Marcelo Tosatti

v2: condition enablement of new mapping to new machine types (Paolo)
v3: fix changelog
v4: rebase
v5: ensure alignment of piecetwo on 2MB GPA (Igor)
do not register zero-sized piece-one(Igor)
v6: fix memory leak (Igor)
fix integer overflow(Igor)



Align guest physical address and host physical address
beyond guest 4GB on a 1GB boundary.

Otherwise 1GB TLBs cannot be cached for the range.

Signed-off-by: Marcelo Tosatti 

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 12c436e..9cf5109 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1156,8 +1156,9 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
 {
 int linux_boot, i;
 MemoryRegion *ram, *option_rom_mr;
-MemoryRegion *ram_below_4g, *ram_above_4g;
+MemoryRegion *ram_below_4g, *ram_above_4g, *ram_above_4g_piecetwo;
 FWCfgState *fw_cfg;
+uint64_t memsize, align_offset;
 
 linux_boot = (kernel_filename != NULL);
 
@@ -1166,8 +1167,12 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
  * with older qemus that used qemu_ram_alloc().
  */
 ram = g_malloc(sizeof(*ram));
-memory_region_init_ram(ram, NULL, "pc.ram",
-   below_4g_mem_size + above_4g_mem_size);
+
+memsize = ROUND_UP(below_4g_mem_size + above_4g_mem_size, 1UL << 21);
+align_offset = memsize - (below_4g_mem_size + above_4g_mem_size);
+
+memory_region_init_ram(ram, NULL, "pc.ram", memsize);
+
 vmstate_register_ram_global(ram);
 *ram_memory = ram;
 ram_below_4g = g_malloc(sizeof(*ram_below_4g));
@@ -1177,10 +1182,53 @@ FWCfgState *pc_memory_init(MemoryRegion *system_memory,
 e820_add_entry(0, below_4g_mem_size, E820_RAM);
 if (above_4g_mem_size > 0) {
 ram_above_4g = g_malloc(sizeof(*ram_above_4g));
-memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
- below_4g_mem_size, above_4g_mem_size);
-memory_region_add_subregion(system_memory, 0x1ULL,
+/*
+ *
+ * If 1GB hugepages are used to back guest RAM, map guest address
+ * space in the range [ramsize,ramsize+holesize] to the ram block
+ * range [holestart, 4GB]
+ *
+ *  0  h 4G [ramsize,ramsize+holesize]
+ *
+ * guest-addr-space [  ] [  ][xxx]
+ */--/
+ * contiguous-ram-block [  ][xxx][ ]
+ *
+ * So that memory beyond 4GB is aligned on a 1GB boundary,
+ * at the host physical address space.
+ *
+ */
+if (guest_info->gb_align) {
+uint64_t holesize = 0x1ULL - below_4g_mem_size;
+uint64_t piecetwosize = holesize - align_offset;
+
+assert(piecetwosize <= holesize);
+
+piecetwosize = MIN(above_4g_mem_size, piecetwosize);
+if ((above_4g_mem_size - piecetwosize) > 0) {
+memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g",
+ ram, 0x1ULL,
+ above_4g_mem_size - piecetwosize);
+memory_region_add_subregion(system_memory, 0x1ULL,
+ ram_above_4g);
+} else {
+g_free(ram_above_4g);
+}
+
+ram_above_4g_piecetwo = g_malloc(sizeof(*ram_above_4g_piecetwo));
+memory_region_init_alias(ram_above_4g_piecetwo, NULL,
+ "ram-above-4g-piecetwo", ram,
+ 0x1ULL - holesize, piecetwosize);
+memory_region_add_subregion(system_memory,
+0x1ULL +
+above_4g_mem_size - piecetwosize,
+ram_above_4g_piecetwo);
+} else {
+memory_region_init_alias(ram_above_4g, NULL, "ram-above-4g", ram,
+below_4g_mem_size, above_4g_mem_size);
+memory_region_add_subregion(system_memory, 0x1ULL,
 ram_above_4g);
+}
 e820_add_entry(0x1ULL, above_4g_mem_size, E820_RAM);
 }
 
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 4fdb7b6..686736e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -60,6 +60,7 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
 static bool has_pvpanic;
 static bool has_pci_info = true;
 static bool has_acpi_build = true;
+static bool gb_align = true;
 
 /* PC hardware initialisation */
 static void pc_init1(QEMUMachineInitArgs *args,
@@ -128,6 +129,7 @@ static void pc_init1(QEMUMachineInitArgs *args,
 
 guest_info->has_pci_info = has_pci_info;
 guest_info->isapc_ram_fw = !pci_enabled;
+guest_info->gb_align = gb_align;
 
 /* all