Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap

2012-07-11 Thread Wen Congyang
At 07/11/2012 01:52 PM, Yasuaki Ishimatsu Wrote:
 2012/07/11 14:06, Wen Congyang wrote:
 Hi Wen,
 
 At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.

 CC: David Rientjes rient...@google.com
 CC: Jiang Liu liu...@gmail.com
 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

 ---
   arch/x86/mm/init_64.c |   91 
 ++
   include/linux/mm.h|2 +
   mm/memory_hotplug.c   |5 ++
   mm/sparse.c   |5 +-
   4 files changed, 101 insertions(+), 2 deletions(-)

 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h   2012-07-03 14:22:18.530011567 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h2012-07-03 14:22:20.83872 
 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
   void vmemmap_populate_print_last(void);
   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
   unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

   enum mf_flags {
 MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c  2012-07-03 14:21:45.071429805 +0900
 +++ linux-3.5-rc4/mm/sparse.c   2012-07-03 14:22:21.000983767 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
 /* This will make the necessary allocations eventually. */
 return sparse_mem_map_populate(pnum, nid);
   }
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long 
 nr_pages)
   {
 -   return; /* XXX: Not implemented yet */
 +   vmemmap_kfree(page, nr_pages);

 Hmm, I think you try to free the memory allocated in 
 kmalloc_section_memmap().
 
 Yes.
 

   }
   static void free_map_bootmem(struct page *page, unsigned long nr_pages)
   {
 +   vmemmap_free_bootmem(page, nr_pages);
   }

 Hmm, which function is the memory you try to free allocated in?
 
 The function try to free memory allocated from bootmem. The memory has
 been registered by get_page_bootmem(). So we can free the memory by
 put_page_bootmem().

OK, I will read these codes, and check it.

 

   #else
   static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c2012-07-03 
 14:22:18.538011465 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 
 +0900
 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
 return 0;
   }

 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long 
 end,
 + struct page **pp)
 +{
 +   pgd_t *pgd;
 +   pud_t *pud;
 +   pmd_t *pmd;
 +   pte_t *pte;
 +   unsigned long next;
 +
 +   *pp = NULL;
 +
 +   pgd = pgd_offset_k(addr);
 +   if (pgd_none(*pgd))
 +   return (addr + PAGE_SIZE)  PAGE_MASK;

 Hmm, why not goto next pgd?
 
 Does it mean return (addr + PGDIR_SIZE)  PGDIR_MASK?
 

 +
 +   pud = pud_offset(pgd, addr);
 +   if (pud_none(*pud))
 +   return (addr + PAGE_SIZE)  PAGE_MASK;
 +
 +   if (!cpu_has_pse) {
 +   next = (addr + PAGE_SIZE)  PAGE_MASK;
 +   pmd = pmd_offset(pud, addr);
 +   if (pmd_none(*pmd))
 +   return next;
 +
 +   pte = pte_offset_kernel(pmd, addr);
 +   if (pte_none(*pte))
 +   return next;
 +
 +   *pp = pte_page(*pte);
 +   pte_clear(init_mm, addr, pte);

 I think you should flush tlb here.
 
 Thanks, I'll update it.
 

 +   } else {
 +   next = pmd_addr_end(addr, end);
 +
 +   pmd = pmd_offset(pud, addr);
 +   if (pmd_none(*pmd))
 +   return next;
 +
 +   *pp = pmd_page(*pmd);
 +   pmd_clear(pmd);
 +   }
 +
 +   return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 +   unsigned long addr = (unsigned long)memmap;
 +   unsigned long end = (unsigned long)(memmap + nr_pages);
 +   unsigned long next;
 +   unsigned int order;
 +   struct page *page;
 +

Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap

2012-07-11 Thread Yasuaki Ishimatsu
Hi Wen,

2012/07/11 15:25, Wen Congyang wrote:
 At 07/11/2012 01:52 PM, Yasuaki Ishimatsu Wrote:
 2012/07/11 14:06, Wen Congyang wrote:
 Hi Wen,

 At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.

 CC: David Rientjes rient...@google.com
 CC: Jiang Liu liu...@gmail.com
 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

 ---
arch/x86/mm/init_64.c |   91 
 ++
include/linux/mm.h|2 +
mm/memory_hotplug.c   |5 ++
mm/sparse.c   |5 +-
4 files changed, 101 insertions(+), 2 deletions(-)

 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h  2012-07-03 14:22:18.530011567 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h   2012-07-03 14:22:20.83872 
 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
void vmemmap_populate_print_last(void);
void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
  unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

enum mf_flags {
MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c 2012-07-03 14:21:45.071429805 +0900
 +++ linux-3.5-rc4/mm/sparse.c  2012-07-03 14:22:21.000983767 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
/* This will make the necessary allocations eventually. */
return sparse_mem_map_populate(pnum, nid);
}
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long 
 nr_pages)
{
 -  return; /* XXX: Not implemented yet */
 +  vmemmap_kfree(page, nr_pages);

 Hmm, I think you try to free the memory allocated in 
 kmalloc_section_memmap().

 Yes.


}
static void free_map_bootmem(struct page *page, unsigned long nr_pages)
{
 +  vmemmap_free_bootmem(page, nr_pages);
}

 Hmm, which function is the memory you try to free allocated in?

 The function try to free memory allocated from bootmem. The memory has
 been registered by get_page_bootmem(). So we can free the memory by
 put_page_bootmem().
 
 OK, I will read these codes, and check it.
 


#else
static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c   2012-07-03 
 14:22:18.538011465 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c2012-07-03 14:22:21.007983103 
 +0900
 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
return 0;
}

 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long 
 end,
 +struct page **pp)
 +{
 +  pgd_t *pgd;
 +  pud_t *pud;
 +  pmd_t *pmd;
 +  pte_t *pte;
 +  unsigned long next;
 +
 +  *pp = NULL;
 +
 +  pgd = pgd_offset_k(addr);
 +  if (pgd_none(*pgd))
 +  return (addr + PAGE_SIZE)  PAGE_MASK;

 Hmm, why not goto next pgd?

 Does it mean return (addr + PGDIR_SIZE)  PGDIR_MASK?


 +
 +  pud = pud_offset(pgd, addr);
 +  if (pud_none(*pud))
 +  return (addr + PAGE_SIZE)  PAGE_MASK;
 +
 +  if (!cpu_has_pse) {
 +  next = (addr + PAGE_SIZE)  PAGE_MASK;
 +  pmd = pmd_offset(pud, addr);
 +  if (pmd_none(*pmd))
 +  return next;
 +
 +  pte = pte_offset_kernel(pmd, addr);
 +  if (pte_none(*pte))
 +  return next;
 +
 +  *pp = pte_page(*pte);
 +  pte_clear(init_mm, addr, pte);

 I think you should flush tlb here.

 Thanks, I'll update it.


 +  } else {
 +  next = pmd_addr_end(addr, end);
 +
 +  pmd = pmd_offset(pud, addr);
 +  if (pmd_none(*pmd))
 +  return next;
 +
 +  *pp = pmd_page(*pmd);
 +  pmd_clear(pmd);
 +  }
 +
 +  return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 +  unsigned long addr = (unsigned long)memmap;
 +  unsigned long end = (unsigned long)(memmap + nr_pages);
 +  unsigned long next;
 

Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap

2012-07-11 Thread Wen Congyang
At 07/11/2012 02:48 PM, Yasuaki Ishimatsu Wrote:
 Hi Wen,
 
 2012/07/11 15:25, Wen Congyang wrote:
 At 07/11/2012 01:52 PM, Yasuaki Ishimatsu Wrote:
 2012/07/11 14:06, Wen Congyang wrote:
 Hi Wen,

 At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.

 CC: David Rientjes rient...@google.com
 CC: Jiang Liu liu...@gmail.com
 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

 ---
arch/x86/mm/init_64.c |   91 
 ++
include/linux/mm.h|2 +
mm/memory_hotplug.c   |5 ++
mm/sparse.c   |5 +-
4 files changed, 101 insertions(+), 2 deletions(-)

 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h  2012-07-03 14:22:20.83872 
 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
void vmemmap_populate_print_last(void);
void register_page_bootmem_memmap(unsigned long section_nr, struct 
 page *map,
 unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c2012-07-03 14:21:45.071429805 
 +0900
 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
   /* This will make the necessary allocations eventually. */
   return sparse_mem_map_populate(pnum, nid);
}
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long 
 nr_pages)
{
 - return; /* XXX: Not implemented yet */
 + vmemmap_kfree(page, nr_pages);

 Hmm, I think you try to free the memory allocated in 
 kmalloc_section_memmap().

 Yes.


}
static void free_map_bootmem(struct page *page, unsigned long nr_pages)
{
 + vmemmap_free_bootmem(page, nr_pages);
}

 Hmm, which function is the memory you try to free allocated in?

 The function try to free memory allocated from bootmem. The memory has
 been registered by get_page_bootmem(). So we can free the memory by
 put_page_bootmem().

 OK, I will read these codes, and check it.



#else
static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c  2012-07-03 
 14:22:18.538011465 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c   2012-07-03 14:22:21.007983103 
 +0900
 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
   return 0;
}

 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long 
 end,
 +   struct page **pp)
 +{
 + pgd_t *pgd;
 + pud_t *pud;
 + pmd_t *pmd;
 + pte_t *pte;
 + unsigned long next;
 +
 + *pp = NULL;
 +
 + pgd = pgd_offset_k(addr);
 + if (pgd_none(*pgd))
 + return (addr + PAGE_SIZE)  PAGE_MASK;

 Hmm, why not goto next pgd?

 Does it mean return (addr + PGDIR_SIZE)  PGDIR_MASK?


 +
 + pud = pud_offset(pgd, addr);
 + if (pud_none(*pud))
 + return (addr + PAGE_SIZE)  PAGE_MASK;
 +
 + if (!cpu_has_pse) {
 + next = (addr + PAGE_SIZE)  PAGE_MASK;
 + pmd = pmd_offset(pud, addr);
 + if (pmd_none(*pmd))
 + return next;
 +
 + pte = pte_offset_kernel(pmd, addr);
 + if (pte_none(*pte))
 + return next;
 +
 + *pp = pte_page(*pte);
 + pte_clear(init_mm, addr, pte);

 I think you should flush tlb here.

 Thanks, I'll update it.


 + } else {
 + next = pmd_addr_end(addr, end);
 +
 + pmd = pmd_offset(pud, addr);
 + if (pmd_none(*pmd))
 + return next;
 +
 + *pp = pmd_page(*pmd);
 + pmd_clear(pmd);
 + }
 +
 + return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 + unsigned long addr = (unsigned long)memmap;
 + unsigned long end = (unsigned long)(memmap + nr_pages);
 + unsigned 

Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap

2012-07-10 Thread Wen Congyang
At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.
 
 CC: David Rientjes rient...@google.com
 CC: Jiang Liu liu...@gmail.com
 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com
 
 ---
  arch/x86/mm/init_64.c |   91 
 ++
  include/linux/mm.h|2 +
  mm/memory_hotplug.c   |5 ++
  mm/sparse.c   |5 +-
  4 files changed, 101 insertions(+), 2 deletions(-)
 
 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h  2012-07-03 14:22:20.83872 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);
 
  enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c2012-07-03 14:21:45.071429805 +0900
 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
   /* This will make the necessary allocations eventually. */
   return sparse_mem_map_populate(pnum, nid);
  }
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
  {
 - return; /* XXX: Not implemented yet */
 + vmemmap_kfree(page, nr_pages);

Hmm, I think you try to free the memory allocated in kmalloc_section_memmap().

  }
  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
  {
 + vmemmap_free_bootmem(page, nr_pages);
  }

Hmm, which function is the memory you try to free allocated in?

  #else
  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c  2012-07-03 14:22:18.538011465 
 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c   2012-07-03 14:22:21.007983103 
 +0900
 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
   return 0;
  }
 
 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
 +   struct page **pp)
 +{
 + pgd_t *pgd;
 + pud_t *pud;
 + pmd_t *pmd;
 + pte_t *pte;
 + unsigned long next;
 +
 + *pp = NULL;
 +
 + pgd = pgd_offset_k(addr);
 + if (pgd_none(*pgd))
 + return (addr + PAGE_SIZE)  PAGE_MASK;

Hmm, why not goto next pgd?

 +
 + pud = pud_offset(pgd, addr);
 + if (pud_none(*pud))
 + return (addr + PAGE_SIZE)  PAGE_MASK;
 +
 + if (!cpu_has_pse) {
 + next = (addr + PAGE_SIZE)  PAGE_MASK;
 + pmd = pmd_offset(pud, addr);
 + if (pmd_none(*pmd))
 + return next;
 +
 + pte = pte_offset_kernel(pmd, addr);
 + if (pte_none(*pte))
 + return next;
 +
 + *pp = pte_page(*pte);
 + pte_clear(init_mm, addr, pte);

I think you should flush tlb here.

 + } else {
 + next = pmd_addr_end(addr, end);
 +
 + pmd = pmd_offset(pud, addr);
 + if (pmd_none(*pmd))
 + return next;
 +
 + *pp = pmd_page(*pmd);
 + pmd_clear(pmd);
 + }
 +
 + return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 + unsigned long addr = (unsigned long)memmap;
 + unsigned long end = (unsigned long)(memmap + nr_pages);
 + unsigned long next;
 + unsigned int order;
 + struct page *page;
 +
 + for (; addr  end; addr = next) {
 + page = NULL;
 + next = find_and_clear_pte_page(addr, end, page);
 + if (!page)
 + continue;
 +
 + if (is_vmalloc_addr(page_address(page)))
 + vfree(page_address(page));

Hmm, the memory is allocated in vmemmap_alloc_block(), 

Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap

2012-07-10 Thread Yasuaki Ishimatsu
2012/07/11 14:06, Wen Congyang wrote:
Hi Wen,

 At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote:
 I don't think that all pages of virtual mapping in removed memory can be
 freed, since page which type is MIX_SECTION_INFO is difficult to free.
 So, the patch only frees page which type is SECTION_INFO at first.

 CC: David Rientjes rient...@google.com
 CC: Jiang Liu liu...@gmail.com
 CC: Len Brown len.br...@intel.com
 CC: Benjamin Herrenschmidt b...@kernel.crashing.org
 CC: Paul Mackerras pau...@samba.org
 CC: Christoph Lameter c...@linux.com
 Cc: Minchan Kim minchan@gmail.com
 CC: Andrew Morton a...@linux-foundation.org
 CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
 CC: Wen Congyang we...@cn.fujitsu.com
 Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

 ---
   arch/x86/mm/init_64.c |   91 
 ++
   include/linux/mm.h|2 +
   mm/memory_hotplug.c   |5 ++
   mm/sparse.c   |5 +-
   4 files changed, 101 insertions(+), 2 deletions(-)

 Index: linux-3.5-rc4/include/linux/mm.h
 ===
 --- linux-3.5-rc4.orig/include/linux/mm.h2012-07-03 14:22:18.530011567 
 +0900
 +++ linux-3.5-rc4/include/linux/mm.h 2012-07-03 14:22:20.83872 +0900
 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
   void vmemmap_populate_print_last(void);
   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
unsigned long size);
 +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
 +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

   enum mf_flags {
  MF_COUNT_INCREASED = 1  0,
 Index: linux-3.5-rc4/mm/sparse.c
 ===
 --- linux-3.5-rc4.orig/mm/sparse.c   2012-07-03 14:21:45.071429805 +0900
 +++ linux-3.5-rc4/mm/sparse.c2012-07-03 14:22:21.000983767 +0900
 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
  /* This will make the necessary allocations eventually. */
  return sparse_mem_map_populate(pnum, nid);
   }
 -static void __kfree_section_memmap(struct page *memmap, unsigned long 
 nr_pages)
 +static void __kfree_section_memmap(struct page *page, unsigned long 
 nr_pages)
   {
 -return; /* XXX: Not implemented yet */
 +vmemmap_kfree(page, nr_pages);
 
 Hmm, I think you try to free the memory allocated in kmalloc_section_memmap().

Yes.

 
   }
   static void free_map_bootmem(struct page *page, unsigned long nr_pages)
   {
 +vmemmap_free_bootmem(page, nr_pages);
   }
 
 Hmm, which function is the memory you try to free allocated in?

The function try to free memory allocated from bootmem. The memory has
been registered by get_page_bootmem(). So we can free the memory by
put_page_bootmem().

 
   #else
   static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 Index: linux-3.5-rc4/arch/x86/mm/init_64.c
 ===
 --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c 2012-07-03 14:22:18.538011465 
 +0900
 +++ linux-3.5-rc4/arch/x86/mm/init_64.c  2012-07-03 14:22:21.007983103 
 +0900
 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
  return 0;
   }

 +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
 +  struct page **pp)
 +{
 +pgd_t *pgd;
 +pud_t *pud;
 +pmd_t *pmd;
 +pte_t *pte;
 +unsigned long next;
 +
 +*pp = NULL;
 +
 +pgd = pgd_offset_k(addr);
 +if (pgd_none(*pgd))
 +return (addr + PAGE_SIZE)  PAGE_MASK;
 
 Hmm, why not goto next pgd?

Does it mean return (addr + PGDIR_SIZE)  PGDIR_MASK?

 
 +
 +pud = pud_offset(pgd, addr);
 +if (pud_none(*pud))
 +return (addr + PAGE_SIZE)  PAGE_MASK;
 +
 +if (!cpu_has_pse) {
 +next = (addr + PAGE_SIZE)  PAGE_MASK;
 +pmd = pmd_offset(pud, addr);
 +if (pmd_none(*pmd))
 +return next;
 +
 +pte = pte_offset_kernel(pmd, addr);
 +if (pte_none(*pte))
 +return next;
 +
 +*pp = pte_page(*pte);
 +pte_clear(init_mm, addr, pte);
 
 I think you should flush tlb here.

Thanks, I'll update it.

 
 +} else {
 +next = pmd_addr_end(addr, end);
 +
 +pmd = pmd_offset(pud, addr);
 +if (pmd_none(*pmd))
 +return next;
 +
 +*pp = pmd_page(*pmd);
 +pmd_clear(pmd);
 +}
 +
 +return next;
 +}
 +
 +void __meminit
 +vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
 +{
 +unsigned long addr = (unsigned long)memmap;
 +unsigned long end = (unsigned long)(memmap + nr_pages);
 +unsigned long next;
 +unsigned int order;
 +struct page *page;
 +
 +for (; addr  end; addr = next) {
 +page = NULL;
 +  

[RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap

2012-07-09 Thread Yasuaki Ishimatsu
I don't think that all pages of virtual mapping in removed memory can be
freed, since page which type is MIX_SECTION_INFO is difficult to free.
So, the patch only frees page which type is SECTION_INFO at first.

CC: David Rientjes rient...@google.com
CC: Jiang Liu liu...@gmail.com
CC: Len Brown len.br...@intel.com
CC: Benjamin Herrenschmidt b...@kernel.crashing.org
CC: Paul Mackerras pau...@samba.org
CC: Christoph Lameter c...@linux.com
Cc: Minchan Kim minchan@gmail.com
CC: Andrew Morton a...@linux-foundation.org
CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com
CC: Wen Congyang we...@cn.fujitsu.com
Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com

---
 arch/x86/mm/init_64.c |   91 ++
 include/linux/mm.h|2 +
 mm/memory_hotplug.c   |5 ++
 mm/sparse.c   |5 +-
 4 files changed, 101 insertions(+), 2 deletions(-)

Index: linux-3.5-rc4/include/linux/mm.h
===
--- linux-3.5-rc4.orig/include/linux/mm.h   2012-07-03 14:22:18.530011567 
+0900
+++ linux-3.5-rc4/include/linux/mm.h2012-07-03 14:22:20.83872 +0900
@@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
+void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);

 enum mf_flags {
MF_COUNT_INCREASED = 1  0,
Index: linux-3.5-rc4/mm/sparse.c
===
--- linux-3.5-rc4.orig/mm/sparse.c  2012-07-03 14:21:45.071429805 +0900
+++ linux-3.5-rc4/mm/sparse.c   2012-07-03 14:22:21.000983767 +0900
@@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti
/* This will make the necessary allocations eventually. */
return sparse_mem_map_populate(pnum, nid);
 }
-static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
 {
-   return; /* XXX: Not implemented yet */
+   vmemmap_kfree(page, nr_pages);
 }
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
+   vmemmap_free_bootmem(page, nr_pages);
 }
 #else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
Index: linux-3.5-rc4/arch/x86/mm/init_64.c
===
--- linux-3.5-rc4.orig/arch/x86/mm/init_64.c2012-07-03 14:22:18.538011465 
+0900
+++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 +0900
@@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page
return 0;
 }

+unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
+ struct page **pp)
+{
+   pgd_t *pgd;
+   pud_t *pud;
+   pmd_t *pmd;
+   pte_t *pte;
+   unsigned long next;
+
+   *pp = NULL;
+
+   pgd = pgd_offset_k(addr);
+   if (pgd_none(*pgd))
+   return (addr + PAGE_SIZE)  PAGE_MASK;
+
+   pud = pud_offset(pgd, addr);
+   if (pud_none(*pud))
+   return (addr + PAGE_SIZE)  PAGE_MASK;
+
+   if (!cpu_has_pse) {
+   next = (addr + PAGE_SIZE)  PAGE_MASK;
+   pmd = pmd_offset(pud, addr);
+   if (pmd_none(*pmd))
+   return next;
+
+   pte = pte_offset_kernel(pmd, addr);
+   if (pte_none(*pte))
+   return next;
+
+   *pp = pte_page(*pte);
+   pte_clear(init_mm, addr, pte);
+   } else {
+   next = pmd_addr_end(addr, end);
+
+   pmd = pmd_offset(pud, addr);
+   if (pmd_none(*pmd))
+   return next;
+
+   *pp = pmd_page(*pmd);
+   pmd_clear(pmd);
+   }
+
+   return next;
+}
+
+void __meminit
+vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
+{
+   unsigned long addr = (unsigned long)memmap;
+   unsigned long end = (unsigned long)(memmap + nr_pages);
+   unsigned long next;
+   unsigned int order;
+   struct page *page;
+
+   for (; addr  end; addr = next) {
+   page = NULL;
+   next = find_and_clear_pte_page(addr, end, page);
+   if (!page)
+   continue;
+
+   if (is_vmalloc_addr(page_address(page)))
+   vfree(page_address(page));
+   else {
+   order = next - addr;
+   free_pages((unsigned long)page_address(page),
+  get_order(order));
+   }
+   }
+}
+
+void __meminit
+vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
+{
+   unsigned long addr =