Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
On Thu, Nov 13, 2014 at 10:21:01AM +0100, Juergen Gross wrote: > On 11/11/2014 06:47 PM, David Vrabel wrote: > >On 11/11/14 05:43, Juergen Gross wrote: > >>At start of the day the Xen hypervisor presents a contiguous mfn list > >>to a pv-domain. In order to support sparse memory this mfn list is > >>accessed via a three level p2m tree built early in the boot process. > >>Whenever the system needs the mfn associated with a pfn this tree is > >>used to find the mfn. > >> > >>Instead of using a software walked tree for accessing a specific mfn > >>list entry this patch is creating a virtual address area for the > >>entire possible mfn list including memory holes. The holes are > >>covered by mapping a pre-defined page consisting only of "invalid > >>mfn" entries. Access to a mfn entry is possible by just using the > >>virtual base address of the mfn list and the pfn as index into that > >>list. This speeds up the (hot) path of determining the mfn of a > >>pfn. > >> > >>Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0 > >>showed following improvements: > >> > >>Elapsed time: 32:50 -> 32:35 > >>System: 18:07 -> 17:47 > >>User:104:00 -> 103:30 > >> > >>Tested on 64 bit dom0 and 32 bit domU. > > > >Reviewed-by: David Vrabel > > > >Can you please test this with the following guests/scenarios. > > > >* 64 bit dom0 with PCI devices with high MMIO BARs. > > I'm not sure I have a machine available with this configuration. > > >* 32 bit domU with PCI devices assigned. > >* 32 bit domU with 64 GiB of memory. > >* domU that starts pre-ballooned and is subsequently ballooned up. > >* 64 bit domU that is saved and restored (or local host migration) > >* 32 bit domU that is saved and restored (or local host migration) I would also add: try 64-bit domU with really bizzare memory sizes that are not odd. Like 9765431 or such. And naturally do the migration to make sure that the re-hook doesn't miss a page or such. > > I'll try. > > > Juergen ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
On Tue, Nov 11, 2014 at 06:43:45AM +0100, Juergen Gross wrote: > At start of the day the Xen hypervisor presents a contiguous mfn list > to a pv-domain. In order to support sparse memory this mfn list is > accessed via a three level p2m tree built early in the boot process. > Whenever the system needs the mfn associated with a pfn this tree is > used to find the mfn. > > Instead of using a software walked tree for accessing a specific mfn > list entry this patch is creating a virtual address area for the > entire possible mfn list including memory holes. The holes are > covered by mapping a pre-defined page consisting only of "invalid > mfn" entries. Access to a mfn entry is possible by just using the > virtual base address of the mfn list and the pfn as index into that > list. This speeds up the (hot) path of determining the mfn of a > pfn. > > Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0 > showed following improvements: > > Elapsed time: 32:50 -> 32:35 > System: 18:07 -> 17:47 > User:104:00 -> 103:30 > > Tested on 64 bit dom0 and 32 bit domU. > > Signed-off-by: Juergen Gross > --- > arch/x86/include/asm/xen/page.h | 14 +- > arch/x86/xen/mmu.c | 32 +- > arch/x86/xen/p2m.c | 732 > +--- > arch/x86/xen/xen-ops.h | 2 +- > 4 files changed, 342 insertions(+), 438 deletions(-) > > diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h > index 07d8a7b..4a227ec 100644 > --- a/arch/x86/include/asm/xen/page.h > +++ b/arch/x86/include/asm/xen/page.h > @@ -72,7 +72,19 @@ extern unsigned long m2p_find_override_pfn(unsigned long > mfn, unsigned long pfn) > */ > static inline unsigned long __pfn_to_mfn(unsigned long pfn) > { > - return get_phys_to_machine(pfn); > + unsigned long mfn; > + > + if (pfn < xen_p2m_size) > + mfn = xen_p2m_addr[pfn]; > + else if (unlikely(pfn < xen_max_p2m_pfn)) > + return get_phys_to_machine(pfn); > + else > + return IDENTITY_FRAME(pfn); > + > + if (unlikely(mfn == INVALID_P2M_ENTRY)) > + return get_phys_to_machine(pfn); > + > + return mfn; > } > > static inline unsigned long pfn_to_mfn(unsigned long pfn) > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c > index 31ca515..0b43c45 100644 > --- a/arch/x86/xen/mmu.c > +++ b/arch/x86/xen/mmu.c > @@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long > vaddr, >* instead of somewhere later and be confusing. */ > xen_mc_flush(); > } > -static void __init xen_pagetable_p2m_copy(void) > + > +static void __init xen_pagetable_p2m_free(void) > { > unsigned long size; > unsigned long addr; > - unsigned long new_mfn_list; > - > - if (xen_feature(XENFEAT_auto_translated_physmap)) > - return; > > size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); > > - new_mfn_list = xen_revector_p2m_tree(); > /* No memory or already called. */ > - if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) > + if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) > return; > > /* using __ka address and sticking INVALID_P2M_ENTRY! */ > @@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void) > > size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); > memblock_free(__pa(xen_start_info->mfn_list), size); > - /* And revector! Bye bye old array */ > - xen_start_info->mfn_list = new_mfn_list; > > /* At this stage, cleanup_highmap has already cleaned __ka space >* from _brk_limit way up to the max_pfn_mapped (which is the end of > @@ -1214,12 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void) > } > #endif > > -static void __init xen_pagetable_init(void) > +static void __init xen_pagetable_p2m_setup(void) > { > - paging_init(); > + if (xen_feature(XENFEAT_auto_translated_physmap)) > + return; > + > + xen_vmalloc_p2m_tree(); > + > #ifdef CONFIG_X86_64 > - xen_pagetable_p2m_copy(); > + xen_pagetable_p2m_free(); > #endif > + /* And revector! Bye bye old array */ > + xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; > +} > + > +static void __init xen_pagetable_init(void) > +{ > + paging_init(); > + > + xen_pagetable_p2m_setup(); > + > /* Allocate and initialize top and mid mfn levels for p2m structure */ > xen_build_mfn_list_list(); > > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c > index 328875a..7df446d 100644 > --- a/arch/x86/xen/p2m.c > +++ b/arch/x86/xen/p2m.c > @@ -3,21 +3,22 @@ > * guests themselves, but it must also access and update the p2m array > * during suspend/resume when all the pages are reallocated. > * > - * The p2m table is logically a flat array, but we implement it as a > - * three-level tree to allow the address space to be s
Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
On 11/14/2014 12:58 PM, David Vrabel wrote: On 13/11/14 09:21, Juergen Gross wrote: On 11/11/2014 06:47 PM, David Vrabel wrote: Can you please test this with the following guests/scenarios. * 64 bit dom0 with PCI devices with high MMIO BARs. I'm not sure I have a machine available with this configuration. We have a bunch of them in our test lab. Unfortunately, xapi doesn't work on Linux 3.12 or later so I won't be able to test this series in the short term. I've found one. Stay tuned. :-) Juergen ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
On 13/11/14 09:21, Juergen Gross wrote: > On 11/11/2014 06:47 PM, David Vrabel wrote: >> >> Can you please test this with the following guests/scenarios. >> >> * 64 bit dom0 with PCI devices with high MMIO BARs. > > I'm not sure I have a machine available with this configuration. We have a bunch of them in our test lab. Unfortunately, xapi doesn't work on Linux 3.12 or later so I won't be able to test this series in the short term. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
On 11/11/2014 06:47 PM, David Vrabel wrote: On 11/11/14 05:43, Juergen Gross wrote: At start of the day the Xen hypervisor presents a contiguous mfn list to a pv-domain. In order to support sparse memory this mfn list is accessed via a three level p2m tree built early in the boot process. Whenever the system needs the mfn associated with a pfn this tree is used to find the mfn. Instead of using a software walked tree for accessing a specific mfn list entry this patch is creating a virtual address area for the entire possible mfn list including memory holes. The holes are covered by mapping a pre-defined page consisting only of "invalid mfn" entries. Access to a mfn entry is possible by just using the virtual base address of the mfn list and the pfn as index into that list. This speeds up the (hot) path of determining the mfn of a pfn. Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0 showed following improvements: Elapsed time: 32:50 -> 32:35 System: 18:07 -> 17:47 User:104:00 -> 103:30 Tested on 64 bit dom0 and 32 bit domU. Reviewed-by: David Vrabel Can you please test this with the following guests/scenarios. * 64 bit dom0 with PCI devices with high MMIO BARs. I'm not sure I have a machine available with this configuration. * 32 bit domU with PCI devices assigned. * 32 bit domU with 64 GiB of memory. * domU that starts pre-ballooned and is subsequently ballooned up. * 64 bit domU that is saved and restored (or local host migration) * 32 bit domU that is saved and restored (or local host migration) I'll try. Juergen ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
On 11/11/14 05:43, Juergen Gross wrote: > At start of the day the Xen hypervisor presents a contiguous mfn list > to a pv-domain. In order to support sparse memory this mfn list is > accessed via a three level p2m tree built early in the boot process. > Whenever the system needs the mfn associated with a pfn this tree is > used to find the mfn. > > Instead of using a software walked tree for accessing a specific mfn > list entry this patch is creating a virtual address area for the > entire possible mfn list including memory holes. The holes are > covered by mapping a pre-defined page consisting only of "invalid > mfn" entries. Access to a mfn entry is possible by just using the > virtual base address of the mfn list and the pfn as index into that > list. This speeds up the (hot) path of determining the mfn of a > pfn. > > Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0 > showed following improvements: > > Elapsed time: 32:50 -> 32:35 > System: 18:07 -> 17:47 > User:104:00 -> 103:30 > > Tested on 64 bit dom0 and 32 bit domU. Reviewed-by: David Vrabel Can you please test this with the following guests/scenarios. * 64 bit dom0 with PCI devices with high MMIO BARs. * 32 bit domU with PCI devices assigned. * 32 bit domU with 64 GiB of memory. * domU that starts pre-ballooned and is subsequently ballooned up. * 64 bit domU that is saved and restored (or local host migration) * 32 bit domU that is saved and restored (or local host migration) Thanks. David ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH V3 7/8] xen: switch to linear virtual mapped sparse p2m list
At start of the day the Xen hypervisor presents a contiguous mfn list to a pv-domain. In order to support sparse memory this mfn list is accessed via a three level p2m tree built early in the boot process. Whenever the system needs the mfn associated with a pfn this tree is used to find the mfn. Instead of using a software walked tree for accessing a specific mfn list entry this patch is creating a virtual address area for the entire possible mfn list including memory holes. The holes are covered by mapping a pre-defined page consisting only of "invalid mfn" entries. Access to a mfn entry is possible by just using the virtual base address of the mfn list and the pfn as index into that list. This speeds up the (hot) path of determining the mfn of a pfn. Kernel build on a Dell Latitude E6440 (2 cores, HT) in 64 bit Dom0 showed following improvements: Elapsed time: 32:50 -> 32:35 System: 18:07 -> 17:47 User:104:00 -> 103:30 Tested on 64 bit dom0 and 32 bit domU. Signed-off-by: Juergen Gross --- arch/x86/include/asm/xen/page.h | 14 +- arch/x86/xen/mmu.c | 32 +- arch/x86/xen/p2m.c | 732 +--- arch/x86/xen/xen-ops.h | 2 +- 4 files changed, 342 insertions(+), 438 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 07d8a7b..4a227ec 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -72,7 +72,19 @@ extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) */ static inline unsigned long __pfn_to_mfn(unsigned long pfn) { - return get_phys_to_machine(pfn); + unsigned long mfn; + + if (pfn < xen_p2m_size) + mfn = xen_p2m_addr[pfn]; + else if (unlikely(pfn < xen_max_p2m_pfn)) + return get_phys_to_machine(pfn); + else + return IDENTITY_FRAME(pfn); + + if (unlikely(mfn == INVALID_P2M_ENTRY)) + return get_phys_to_machine(pfn); + + return mfn; } static inline unsigned long pfn_to_mfn(unsigned long pfn) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 31ca515..0b43c45 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1158,20 +1158,16 @@ static void __init xen_cleanhighmap(unsigned long vaddr, * instead of somewhere later and be confusing. */ xen_mc_flush(); } -static void __init xen_pagetable_p2m_copy(void) + +static void __init xen_pagetable_p2m_free(void) { unsigned long size; unsigned long addr; - unsigned long new_mfn_list; - - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); - new_mfn_list = xen_revector_p2m_tree(); /* No memory or already called. */ - if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) + if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list) return; /* using __ka address and sticking INVALID_P2M_ENTRY! */ @@ -1189,8 +1185,6 @@ static void __init xen_pagetable_p2m_copy(void) size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); memblock_free(__pa(xen_start_info->mfn_list), size); - /* And revector! Bye bye old array */ - xen_start_info->mfn_list = new_mfn_list; /* At this stage, cleanup_highmap has already cleaned __ka space * from _brk_limit way up to the max_pfn_mapped (which is the end of @@ -1214,12 +1208,26 @@ static void __init xen_pagetable_p2m_copy(void) } #endif -static void __init xen_pagetable_init(void) +static void __init xen_pagetable_p2m_setup(void) { - paging_init(); + if (xen_feature(XENFEAT_auto_translated_physmap)) + return; + + xen_vmalloc_p2m_tree(); + #ifdef CONFIG_X86_64 - xen_pagetable_p2m_copy(); + xen_pagetable_p2m_free(); #endif + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = (unsigned long)xen_p2m_addr; +} + +static void __init xen_pagetable_init(void) +{ + paging_init(); + + xen_pagetable_p2m_setup(); + /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 328875a..7df446d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -3,21 +3,22 @@ * guests themselves, but it must also access and update the p2m array * during suspend/resume when all the pages are reallocated. * - * The p2m table is logically a flat array, but we implement it as a - * three-level tree to allow the address space to be sparse. + * The logical flat p2m table is mapped to a linear kernel memory area. + * For accesses by Xen a three-level tree linked via mfns only is set up to + * allow the address space to be sparse. * - * Xen - *