Re: [PATCH v2 19/20] x86, mm: Make init_mem_mapping be able to be called several times

2013-03-11 Thread Yinghai Lu
On Mon, Mar 11, 2013 at 6:16 AM, Konrad Rzeszutek Wilk
 wrote:
> On Sat, Mar 09, 2013 at 10:44:46PM -0800, Yinghai Lu wrote:
>> Prepare to put page table on local nodes.
>>
>> Move calling of init_mem_mapping to early_initmem_init.
>>
>> Rework alloc_low_pages to alloc page table in following order:
>>   BRK, local node, low range
>>
>> Still only load_cr3 one time, otherwise we would break xen 64bit again.
>>
>
> We could also fix that. Now that the regression storm has passed
> and I am able to spend some time on it we could make it a bit more
> resistant.

Never mind, We should only need to call load_cr3 one time.

as init_memory_mapping itself flush tlb everytime on 64bit.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 19/20] x86, mm: Make init_mem_mapping be able to be called several times

2013-03-11 Thread Konrad Rzeszutek Wilk
On Sat, Mar 09, 2013 at 10:44:46PM -0800, Yinghai Lu wrote:
> Prepare to put page table on local nodes.
> 
> Move calling of init_mem_mapping to early_initmem_init.
> 
> Rework alloc_low_pages to alloc page table in following order:
>   BRK, local node, low range
> 
> Still only load_cr3 one time, otherwise we would break xen 64bit again.
> 

We could also fix that. Now that the regression storm has passed
and I am able to spend some time on it we could make it a bit more
resistant.

> Signed-off-by: Yinghai Lu 
> Cc: Pekka Enberg 
> Cc: Jacob Shin 
> Cc: Konrad Rzeszutek Wilk 
> ---
>  arch/x86/include/asm/pgtable.h |2 +-
>  arch/x86/kernel/setup.c|1 -
>  arch/x86/mm/init.c |   88 
> 
>  arch/x86/mm/numa.c |   24 +++
>  4 files changed, 79 insertions(+), 36 deletions(-)
> 
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 1e67223..868687c 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -621,7 +621,7 @@ static inline int pgd_none(pgd_t pgd)
>  #ifndef __ASSEMBLY__
>  
>  extern int direct_gbpages;
> -void init_mem_mapping(void);
> +void init_mem_mapping(unsigned long begin, unsigned long end);
>  void early_alloc_pgt_buf(void);
>  
>  /* local pte updates need not use xchg for locking */
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 86e1ec0..1cdc1a7 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -1105,7 +1105,6 @@ void __init setup_arch(char **cmdline_p)
>   acpi_boot_table_init();
>   early_acpi_boot_init();
>   early_initmem_init();
> - init_mem_mapping();
>   memblock.current_limit = get_max_mapped();
>   early_trap_pf_init();
>  
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index 28b294f..8d0007a 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -24,7 +24,10 @@ static unsigned long __initdata pgt_buf_start;
>  static unsigned long __initdata pgt_buf_end;
>  static unsigned long __initdata pgt_buf_top;
>  
> -static unsigned long min_pfn_mapped;
> +static unsigned long low_min_pfn_mapped;
> +static unsigned long low_max_pfn_mapped;
> +static unsigned long local_min_pfn_mapped;
> +static unsigned long local_max_pfn_mapped;
>  
>  static bool __initdata can_use_brk_pgt = true;
>  
> @@ -52,10 +55,17 @@ __ref void *alloc_low_pages(unsigned int num)
>  
>   if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
>   unsigned long ret;
> - if (min_pfn_mapped >= max_pfn_mapped)
> - panic("alloc_low_page: ran out of memory");
> - ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
> - max_pfn_mapped << PAGE_SHIFT,
> + if (local_min_pfn_mapped >= local_max_pfn_mapped) {
> + if (low_min_pfn_mapped >= low_max_pfn_mapped)
> + panic("alloc_low_page: ran out of memory");
> + ret = memblock_find_in_range(
> + low_min_pfn_mapped << PAGE_SHIFT,
> + low_max_pfn_mapped << PAGE_SHIFT,
> + PAGE_SIZE * num , PAGE_SIZE);
> + } else
> + ret = memblock_find_in_range(
> + local_min_pfn_mapped << PAGE_SHIFT,
> + local_max_pfn_mapped << PAGE_SHIFT,
>   PAGE_SIZE * num , PAGE_SIZE);
>   if (!ret)
>   panic("alloc_low_page: can not alloc memory");
> @@ -387,60 +397,75 @@ static unsigned long __init init_range_memory_mapping(
>  
>  /* (PUD_SHIFT-PMD_SHIFT)/2 */
>  #define STEP_SIZE_SHIFT 5
> -void __init init_mem_mapping(void)
> +void __init init_mem_mapping(unsigned long begin, unsigned long end)
>  {
> - unsigned long end, real_end, start, last_start;
> + unsigned long real_end, start, last_start;
>   unsigned long step_size;
>   unsigned long addr;
>   unsigned long mapped_ram_size = 0;
>   unsigned long new_mapped_ram_size;
> + bool is_low = false;
> +
> + if (!begin) {
> + probe_page_size_mask();
> + /* the ISA range is always mapped regardless of memory holes */
> + init_memory_mapping(0, ISA_END_ADDRESS);
> + begin = ISA_END_ADDRESS;
> + is_low = true;
> + }
>  
> - probe_page_size_mask();
> -
> -#ifdef CONFIG_X86_64
> - end = max_pfn << PAGE_SHIFT;
> -#else
> - end = max_low_pfn << PAGE_SHIFT;
> -#endif
> -
> - /* the ISA range is always mapped regardless of memory holes */
> - init_memory_mapping(0, ISA_END_ADDRESS);
> + if (begin >= end)
> + return;
>  
>   /* xen has big range in reserved near end of ram, skip it at first.*/
> - addr = memblock_find_i

[PATCH v2 19/20] x86, mm: Make init_mem_mapping be able to be called several times

2013-03-09 Thread Yinghai Lu
Prepare to put page table on local nodes.

Move calling of init_mem_mapping to early_initmem_init.

Rework alloc_low_pages to alloc page table in following order:
BRK, local node, low range

Still only load_cr3 one time, otherwise we would break xen 64bit again.

Signed-off-by: Yinghai Lu 
Cc: Pekka Enberg 
Cc: Jacob Shin 
Cc: Konrad Rzeszutek Wilk 
---
 arch/x86/include/asm/pgtable.h |2 +-
 arch/x86/kernel/setup.c|1 -
 arch/x86/mm/init.c |   88 
 arch/x86/mm/numa.c |   24 +++
 4 files changed, 79 insertions(+), 36 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1e67223..868687c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -621,7 +621,7 @@ static inline int pgd_none(pgd_t pgd)
 #ifndef __ASSEMBLY__
 
 extern int direct_gbpages;
-void init_mem_mapping(void);
+void init_mem_mapping(unsigned long begin, unsigned long end);
 void early_alloc_pgt_buf(void);
 
 /* local pte updates need not use xchg for locking */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 86e1ec0..1cdc1a7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1105,7 +1105,6 @@ void __init setup_arch(char **cmdline_p)
acpi_boot_table_init();
early_acpi_boot_init();
early_initmem_init();
-   init_mem_mapping();
memblock.current_limit = get_max_mapped();
early_trap_pf_init();
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 28b294f..8d0007a 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -24,7 +24,10 @@ static unsigned long __initdata pgt_buf_start;
 static unsigned long __initdata pgt_buf_end;
 static unsigned long __initdata pgt_buf_top;
 
-static unsigned long min_pfn_mapped;
+static unsigned long low_min_pfn_mapped;
+static unsigned long low_max_pfn_mapped;
+static unsigned long local_min_pfn_mapped;
+static unsigned long local_max_pfn_mapped;
 
 static bool __initdata can_use_brk_pgt = true;
 
@@ -52,10 +55,17 @@ __ref void *alloc_low_pages(unsigned int num)
 
if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) {
unsigned long ret;
-   if (min_pfn_mapped >= max_pfn_mapped)
-   panic("alloc_low_page: ran out of memory");
-   ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT,
-   max_pfn_mapped << PAGE_SHIFT,
+   if (local_min_pfn_mapped >= local_max_pfn_mapped) {
+   if (low_min_pfn_mapped >= low_max_pfn_mapped)
+   panic("alloc_low_page: ran out of memory");
+   ret = memblock_find_in_range(
+   low_min_pfn_mapped << PAGE_SHIFT,
+   low_max_pfn_mapped << PAGE_SHIFT,
+   PAGE_SIZE * num , PAGE_SIZE);
+   } else
+   ret = memblock_find_in_range(
+   local_min_pfn_mapped << PAGE_SHIFT,
+   local_max_pfn_mapped << PAGE_SHIFT,
PAGE_SIZE * num , PAGE_SIZE);
if (!ret)
panic("alloc_low_page: can not alloc memory");
@@ -387,60 +397,75 @@ static unsigned long __init init_range_memory_mapping(
 
 /* (PUD_SHIFT-PMD_SHIFT)/2 */
 #define STEP_SIZE_SHIFT 5
-void __init init_mem_mapping(void)
+void __init init_mem_mapping(unsigned long begin, unsigned long end)
 {
-   unsigned long end, real_end, start, last_start;
+   unsigned long real_end, start, last_start;
unsigned long step_size;
unsigned long addr;
unsigned long mapped_ram_size = 0;
unsigned long new_mapped_ram_size;
+   bool is_low = false;
+
+   if (!begin) {
+   probe_page_size_mask();
+   /* the ISA range is always mapped regardless of memory holes */
+   init_memory_mapping(0, ISA_END_ADDRESS);
+   begin = ISA_END_ADDRESS;
+   is_low = true;
+   }
 
-   probe_page_size_mask();
-
-#ifdef CONFIG_X86_64
-   end = max_pfn << PAGE_SHIFT;
-#else
-   end = max_low_pfn << PAGE_SHIFT;
-#endif
-
-   /* the ISA range is always mapped regardless of memory holes */
-   init_memory_mapping(0, ISA_END_ADDRESS);
+   if (begin >= end)
+   return;
 
/* xen has big range in reserved near end of ram, skip it at first.*/
-   addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE);
+   addr = memblock_find_in_range(begin, end, PMD_SIZE, PMD_SIZE);
real_end = addr + PMD_SIZE;
 
/* step_size need to be small so pgt_buf from BRK could cover it */
step_size = PMD_SIZE;
-   max_pfn_mapped = 0; /* will get exact value next */
-   min_pfn_mapped =