Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-19 Thread Konrad Rzeszutek Wilk
On Thu, Nov 13, 2014 at 10:21:01AM +0100, Juergen Gross wrote:
> On 11/11/2014 06:47 PM, David Vrabel wrote:
> >On 11/11/14 05:43, Juergen Gross wrote:
> >>At start of the day the Xen hypervisor presents a contiguous mfn list
> >>to a pv-domain. In order to support sparse memory this mfn list is
> >>accessed via a three level p2m tree built early in the boot process.
> >>Whenever the system needs the mfn associated with a pfn this tree is
> >>used to find the mfn.
> >>
> >>Instead of using a software walked tree for accessing a specific mfn
> >>list entry this patch is creating a virtual address area for the
> >>entire possible mfn list including memory holes. The holes are
> >>covered by mapping a pre-defined  page consisting only of "invalid
> >>mfn" entries. Access to a mfn entry is possible by just using the
> >>virtual base address of the mfn list and the pfn as index into that
> >>list. This speeds up the (hot) path of determining the mfn of a
> >>pfn.
> >>
> >>Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0
> >>showed following improvements:
> >>
> >>Elapsed time: 32:50 ->  32:35
> >>System:   18:07 ->  17:47
> >>User:104:00 -> 103:30
> >>
> >>Tested on 64 bit dom0 and 32 bit domU.
> >
> >Reviewed-by: David Vrabel 
> >
> >Can you please test this with the following guests/scenarios.
> >
> >* 64 bit dom0 with PCI devices with high MMIO BARs.
> 
> I'm not sure I have a machine available with this configuration.
> 
> >* 32 bit domU with PCI devices assigned.
> >* 32 bit domU with 64 GiB of memory.
> >* domU that starts pre-ballooned and is subsequently ballooned up.
> >* 64 bit domU that is saved and restored (or local host migration)
> >* 32 bit domU that is saved and restored (or local host migration)

I would also add: try 64-bit domU with really bizzare memory sizes that
are not odd. Like 9765431 or such. And naturally do the migration to
make sure that the re-hook doesn't miss a page or such.

> 
> I'll try.
> 
> 
> Juergen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-19 Thread Konrad Rzeszutek Wilk
On Tue, Nov 11, 2014 at 06:43:45AM +0100, Juergen Gross wrote:
> At start of the day the Xen hypervisor presents a contiguous mfn list
> to a pv-domain. In order to support sparse memory this mfn list is
> accessed via a three level p2m tree built early in the boot process.
> Whenever the system needs the mfn associated with a pfn this tree is
> used to find the mfn.
> 
> Instead of using a software walked tree for accessing a specific mfn
> list entry this patch is creating a virtual address area for the
> entire possible mfn list including memory holes. The holes are
> covered by mapping a pre-defined  page consisting only of "invalid
> mfn" entries. Access to a mfn entry is possible by just using the
> virtual base address of the mfn list and the pfn as index into that
> list. This speeds up the (hot) path of determining the mfn of a
> pfn.
> 
> Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0
> showed following improvements:
> 
> Elapsed time: 32:50 ->  32:35
> System:   18:07 ->  17:47
> User:104:00 -> 103:30
> 
> Tested on 64 bit dom0 and 32 bit domU.
> 
> Signed-off-by: Juergen Gross 
> ---
>  arch/x86/include/asm/xen/page.h |  14 +-
>  arch/x86/xen/mmu.c  |  32 +-
>  arch/x86/xen/p2m.c  | 732 
> +---
>  arch/x86/xen/xen-ops.h  |   2 +-
>  4 files changed, 342 insertions(+), 438 deletions(-)
> 
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index 07d8a7b..4a227ec 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -72,7 +72,19 @@ extern unsigned long m2p_find_override_pfn(unsigned long 
> mfn, unsigned long pfn)
>   */
>  static inline unsigned long __pfn_to_mfn(unsigned long pfn)
>  {
> - return get_phys_to_machine(pfn);
> + unsigned long mfn;
> +
> + if (pfn < xen_p2m_size)
> + mfn = xen_p2m_addr[pfn];
> + else if (unlikely(pfn < xen_max_p2m_pfn))
> + return get_phys_to_machine(pfn);
> + else
> + return IDENTITY_FRAME(pfn);
> +
> + if (unlikely(mfn == INVALID_P2M_ENTRY))
> + return get_phys_to_machine(pfn);
> +
> + return mfn;
>  }
>  
>  static inline unsigned long pfn_to_mfn(unsigned long pfn)
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 31ca515..0b43c45 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long 
> vaddr,
>* instead of somewhere later and be confusing. */
>   xen_mc_flush();
>  }
> -static void __init xen_pagetable_p2m_copy(void)
> +
> +static void __init xen_pagetable_p2m_free(void)
>  {
>   unsigned long size;
>   unsigned long addr;
> - unsigned long new_mfn_list;
> -
> - if (xen_feature(XENFEAT_auto_translated_physmap))
> - return;
>  
>   size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
>  
> - new_mfn_list = xen_revector_p2m_tree();
>   /* No memory or already called. */
> - if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
> + if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
>   return;
>  
>   /* using __ka address and sticking INVALID_P2M_ENTRY! */
> @@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void)
>  
>   size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
>   memblock_free(__pa(xen_start_info->mfn_list), size);
> - /* And revector! Bye bye old array */
> - xen_start_info->mfn_list = new_mfn_list;
>  
>   /* At this stage, cleanup_highmap has already cleaned __ka space
>* from _brk_limit way up to the max_pfn_mapped (which is the end of
> @@ -1214,12 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void)
>  }
>  #endif
>  
> -static void __init xen_pagetable_init(void)
> +static void __init xen_pagetable_p2m_setup(void)
>  {
> - paging_init();
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return;
> +
> + xen_vmalloc_p2m_tree();
> +
>  #ifdef CONFIG_X86_64
> - xen_pagetable_p2m_copy();
> + xen_pagetable_p2m_free();
>  #endif
> + /* And revector! Bye bye old array */
> + xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
> +}
> +
> +static void __init xen_pagetable_init(void)
> +{
> + paging_init();
> +
> + xen_pagetable_p2m_setup();
> +
>   /* Allocate and initialize top and mid mfn levels for p2m structure */
>   xen_build_mfn_list_list();
>  
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index 328875a..7df446d 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -3,21 +3,22 @@
>   * guests themselves, but it must also access and update the p2m array
>   * during suspend/resume when all the pages are reallocated.
>   *
> - * The p2m table is logically a flat array, but we implement it as a
> - * three-level tree to allow the address space to be s

Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-14 Thread Juergen Gross

On 11/14/2014 12:58 PM, David Vrabel wrote:

On 13/11/14 09:21, Juergen Gross wrote:

On 11/11/2014 06:47 PM, David Vrabel wrote:


Can you please test this with the following guests/scenarios.

* 64 bit dom0 with PCI devices with high MMIO BARs.


I'm not sure I have a machine available with this configuration.


We have a bunch of them in our test lab. Unfortunately, xapi doesn't
work on Linux 3.12 or later so I won't be able to test this series in
the short term.


I've found one. Stay tuned. :-)


Juergen


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-14 Thread David Vrabel
On 13/11/14 09:21, Juergen Gross wrote:
> On 11/11/2014 06:47 PM, David Vrabel wrote:
>>
>> Can you please test this with the following guests/scenarios.
>>
>> * 64 bit dom0 with PCI devices with high MMIO BARs.
> 
> I'm not sure I have a machine available with this configuration.

We have a bunch of them in our test lab. Unfortunately, xapi doesn't
work on Linux 3.12 or later so I won't be able to test this series in
the short term.

David



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-13 Thread Juergen Gross

On 11/11/2014 06:47 PM, David Vrabel wrote:

On 11/11/14 05:43, Juergen Gross wrote:

At start of the day the Xen hypervisor presents a contiguous mfn list
to a pv-domain. In order to support sparse memory this mfn list is
accessed via a three level p2m tree built early in the boot process.
Whenever the system needs the mfn associated with a pfn this tree is
used to find the mfn.

Instead of using a software walked tree for accessing a specific mfn
list entry this patch is creating a virtual address area for the
entire possible mfn list including memory holes. The holes are
covered by mapping a pre-defined  page consisting only of "invalid
mfn" entries. Access to a mfn entry is possible by just using the
virtual base address of the mfn list and the pfn as index into that
list. This speeds up the (hot) path of determining the mfn of a
pfn.

Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0
showed following improvements:

Elapsed time: 32:50 ->  32:35
System:   18:07 ->  17:47
User:104:00 -> 103:30

Tested on 64 bit dom0 and 32 bit domU.


Reviewed-by: David Vrabel 

Can you please test this with the following guests/scenarios.

* 64 bit dom0 with PCI devices with high MMIO BARs.


I'm not sure I have a machine available with this configuration.


* 32 bit domU with PCI devices assigned.
* 32 bit domU with 64 GiB of memory.
* domU that starts pre-ballooned and is subsequently ballooned up.
* 64 bit domU that is saved and restored (or local host migration)
* 32 bit domU that is saved and restored (or local host migration)


I'll try.


Juergen

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-11 Thread David Vrabel
On 11/11/14 05:43, Juergen Gross wrote:
> At start of the day the Xen hypervisor presents a contiguous mfn list
> to a pv-domain. In order to support sparse memory this mfn list is
> accessed via a three level p2m tree built early in the boot process.
> Whenever the system needs the mfn associated with a pfn this tree is
> used to find the mfn.
> 
> Instead of using a software walked tree for accessing a specific mfn
> list entry this patch is creating a virtual address area for the
> entire possible mfn list including memory holes. The holes are
> covered by mapping a pre-defined  page consisting only of "invalid
> mfn" entries. Access to a mfn entry is possible by just using the
> virtual base address of the mfn list and the pfn as index into that
> list. This speeds up the (hot) path of determining the mfn of a
> pfn.
> 
> Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0
> showed following improvements:
> 
> Elapsed time: 32:50 ->  32:35
> System:   18:07 ->  17:47
> User:104:00 -> 103:30
> 
> Tested on 64 bit dom0 and 32 bit domU.

Reviewed-by: David Vrabel 

Can you please test this with the following guests/scenarios.

* 64 bit dom0 with PCI devices with high MMIO BARs.
* 32 bit domU with PCI devices assigned.
* 32 bit domU with 64 GiB of memory.
* domU that starts pre-ballooned and is subsequently ballooned up.
* 64 bit domU that is saved and restored (or local host migration)
* 32 bit domU that is saved and restored (or local host migration)

Thanks.

David

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list

2014-11-10 Thread Juergen Gross
At start of the day the Xen hypervisor presents a contiguous mfn list
to a pv-domain. In order to support sparse memory this mfn list is
accessed via a three level p2m tree built early in the boot process.
Whenever the system needs the mfn associated with a pfn this tree is
used to find the mfn.

Instead of using a software walked tree for accessing a specific mfn
list entry this patch is creating a virtual address area for the
entire possible mfn list including memory holes. The holes are
covered by mapping a pre-defined  page consisting only of "invalid
mfn" entries. Access to a mfn entry is possible by just using the
virtual base address of the mfn list and the pfn as index into that
list. This speeds up the (hot) path of determining the mfn of a
pfn.

Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0
showed following improvements:

Elapsed time: 32:50 ->  32:35
System:   18:07 ->  17:47
User:104:00 -> 103:30

Tested on 64 bit dom0 and 32 bit domU.

Signed-off-by: Juergen Gross 
---
 arch/x86/include/asm/xen/page.h |  14 +-
 arch/x86/xen/mmu.c  |  32 +-
 arch/x86/xen/p2m.c  | 732 +---
 arch/x86/xen/xen-ops.h  |   2 +-
 4 files changed, 342 insertions(+), 438 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 07d8a7b..4a227ec 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -72,7 +72,19 @@ extern unsigned long m2p_find_override_pfn(unsigned long 
mfn, unsigned long pfn)
  */
 static inline unsigned long __pfn_to_mfn(unsigned long pfn)
 {
-   return get_phys_to_machine(pfn);
+   unsigned long mfn;
+
+   if (pfn < xen_p2m_size)
+   mfn = xen_p2m_addr[pfn];
+   else if (unlikely(pfn < xen_max_p2m_pfn))
+   return get_phys_to_machine(pfn);
+   else
+   return IDENTITY_FRAME(pfn);
+
+   if (unlikely(mfn == INVALID_P2M_ENTRY))
+   return get_phys_to_machine(pfn);
+
+   return mfn;
 }
 
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 31ca515..0b43c45 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
 * instead of somewhere later and be confusing. */
xen_mc_flush();
 }
-static void __init xen_pagetable_p2m_copy(void)
+
+static void __init xen_pagetable_p2m_free(void)
 {
unsigned long size;
unsigned long addr;
-   unsigned long new_mfn_list;
-
-   if (xen_feature(XENFEAT_auto_translated_physmap))
-   return;
 
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 
-   new_mfn_list = xen_revector_p2m_tree();
/* No memory or already called. */
-   if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list)
+   if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
return;
 
/* using __ka address and sticking INVALID_P2M_ENTRY! */
@@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void)
 
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
memblock_free(__pa(xen_start_info->mfn_list), size);
-   /* And revector! Bye bye old array */
-   xen_start_info->mfn_list = new_mfn_list;
 
/* At this stage, cleanup_highmap has already cleaned __ka space
 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1214,12 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void)
 }
 #endif
 
-static void __init xen_pagetable_init(void)
+static void __init xen_pagetable_p2m_setup(void)
 {
-   paging_init();
+   if (xen_feature(XENFEAT_auto_translated_physmap))
+   return;
+
+   xen_vmalloc_p2m_tree();
+
 #ifdef CONFIG_X86_64
-   xen_pagetable_p2m_copy();
+   xen_pagetable_p2m_free();
 #endif
+   /* And revector! Bye bye old array */
+   xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
+}
+
+static void __init xen_pagetable_init(void)
+{
+   paging_init();
+
+   xen_pagetable_p2m_setup();
+
/* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list();
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 328875a..7df446d 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -3,21 +3,22 @@
  * guests themselves, but it must also access and update the p2m array
  * during suspend/resume when all the pages are reallocated.
  *
- * The p2m table is logically a flat array, but we implement it as a
- * three-level tree to allow the address space to be sparse.
+ * The logical flat p2m table is mapped to a linear kernel memory area.
+ * For accesses by Xen a three-level tree linked via mfns only is set up to
+ * allow the address space to be sparse.
  *
- *   Xen
- *