Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap
At 07/11/2012 01:52 PM, Yasuaki Ishimatsu Wrote: 2012/07/11 14:06, Wen Congyang wrote: Hi Wen, At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote: I don't think that all pages of virtual mapping in removed memory can be freed, since page which type is MIX_SECTION_INFO is difficult to free. So, the patch only frees page which type is SECTION_INFO at first. CC: David Rientjes rient...@google.com CC: Jiang Liu liu...@gmail.com CC: Len Brown len.br...@intel.com CC: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: Christoph Lameter c...@linux.com Cc: Minchan Kim minchan@gmail.com CC: Andrew Morton a...@linux-foundation.org CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com CC: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com --- arch/x86/mm/init_64.c | 91 ++ include/linux/mm.h|2 + mm/memory_hotplug.c |5 ++ mm/sparse.c |5 +- 4 files changed, 101 insertions(+), 2 deletions(-) Index: linux-3.5-rc4/include/linux/mm.h === --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 +0900 +++ linux-3.5-rc4/include/linux/mm.h2012-07-03 14:22:20.83872 +0900 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_ void vmemmap_populate_print_last(void); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages); +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 0, Index: linux-3.5-rc4/mm/sparse.c === --- linux-3.5-rc4.orig/mm/sparse.c 2012-07-03 14:21:45.071429805 +0900 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + vmemmap_kfree(page, nr_pages); Hmm, I think you try to free the memory allocated in kmalloc_section_memmap(). Yes. } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { + vmemmap_free_bootmem(page, nr_pages); } Hmm, which function is the memory you try to free allocated in? The function try to free memory allocated from bootmem. The memory has been registered by get_page_bootmem(). So we can free the memory by put_page_bootmem(). OK, I will read these codes, and check it. #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) Index: linux-3.5-rc4/arch/x86/mm/init_64.c === --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c2012-07-03 14:22:18.538011465 +0900 +++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 +0900 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page return 0; } +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end, + struct page **pp) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long next; + + *pp = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return (addr + PAGE_SIZE) PAGE_MASK; Hmm, why not goto next pgd? Does it mean return (addr + PGDIR_SIZE) PGDIR_MASK? + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return (addr + PAGE_SIZE) PAGE_MASK; + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return next; + + *pp = pte_page(*pte); + pte_clear(init_mm, addr, pte); I think you should flush tlb here. Thanks, I'll update it. + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + *pp = pmd_page(*pmd); + pmd_clear(pmd); + } + + return next; +} + +void __meminit +vmemmap_kfree(struct page *memmap, unsigned long nr_pages) +{ + unsigned long addr = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + unsigned long next; + unsigned int order; + struct page *page; +
Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap
Hi Wen, 2012/07/11 15:25, Wen Congyang wrote: At 07/11/2012 01:52 PM, Yasuaki Ishimatsu Wrote: 2012/07/11 14:06, Wen Congyang wrote: Hi Wen, At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote: I don't think that all pages of virtual mapping in removed memory can be freed, since page which type is MIX_SECTION_INFO is difficult to free. So, the patch only frees page which type is SECTION_INFO at first. CC: David Rientjes rient...@google.com CC: Jiang Liu liu...@gmail.com CC: Len Brown len.br...@intel.com CC: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: Christoph Lameter c...@linux.com Cc: Minchan Kim minchan@gmail.com CC: Andrew Morton a...@linux-foundation.org CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com CC: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com --- arch/x86/mm/init_64.c | 91 ++ include/linux/mm.h|2 + mm/memory_hotplug.c |5 ++ mm/sparse.c |5 +- 4 files changed, 101 insertions(+), 2 deletions(-) Index: linux-3.5-rc4/include/linux/mm.h === --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 +0900 +++ linux-3.5-rc4/include/linux/mm.h 2012-07-03 14:22:20.83872 +0900 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_ void vmemmap_populate_print_last(void); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages); +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 0, Index: linux-3.5-rc4/mm/sparse.c === --- linux-3.5-rc4.orig/mm/sparse.c 2012-07-03 14:21:45.071429805 +0900 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + vmemmap_kfree(page, nr_pages); Hmm, I think you try to free the memory allocated in kmalloc_section_memmap(). Yes. } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { + vmemmap_free_bootmem(page, nr_pages); } Hmm, which function is the memory you try to free allocated in? The function try to free memory allocated from bootmem. The memory has been registered by get_page_bootmem(). So we can free the memory by put_page_bootmem(). OK, I will read these codes, and check it. #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) Index: linux-3.5-rc4/arch/x86/mm/init_64.c === --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c 2012-07-03 14:22:18.538011465 +0900 +++ linux-3.5-rc4/arch/x86/mm/init_64.c2012-07-03 14:22:21.007983103 +0900 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page return 0; } +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end, +struct page **pp) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long next; + + *pp = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return (addr + PAGE_SIZE) PAGE_MASK; Hmm, why not goto next pgd? Does it mean return (addr + PGDIR_SIZE) PGDIR_MASK? + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return (addr + PAGE_SIZE) PAGE_MASK; + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return next; + + *pp = pte_page(*pte); + pte_clear(init_mm, addr, pte); I think you should flush tlb here. Thanks, I'll update it. + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + *pp = pmd_page(*pmd); + pmd_clear(pmd); + } + + return next; +} + +void __meminit +vmemmap_kfree(struct page *memmap, unsigned long nr_pages) +{ + unsigned long addr = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + unsigned long next;
Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap
At 07/11/2012 02:48 PM, Yasuaki Ishimatsu Wrote: Hi Wen, 2012/07/11 15:25, Wen Congyang wrote: At 07/11/2012 01:52 PM, Yasuaki Ishimatsu Wrote: 2012/07/11 14:06, Wen Congyang wrote: Hi Wen, At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote: I don't think that all pages of virtual mapping in removed memory can be freed, since page which type is MIX_SECTION_INFO is difficult to free. So, the patch only frees page which type is SECTION_INFO at first. CC: David Rientjes rient...@google.com CC: Jiang Liu liu...@gmail.com CC: Len Brown len.br...@intel.com CC: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: Christoph Lameter c...@linux.com Cc: Minchan Kim minchan@gmail.com CC: Andrew Morton a...@linux-foundation.org CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com CC: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com --- arch/x86/mm/init_64.c | 91 ++ include/linux/mm.h|2 + mm/memory_hotplug.c |5 ++ mm/sparse.c |5 +- 4 files changed, 101 insertions(+), 2 deletions(-) Index: linux-3.5-rc4/include/linux/mm.h === --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 +0900 +++ linux-3.5-rc4/include/linux/mm.h 2012-07-03 14:22:20.83872 +0900 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_ void vmemmap_populate_print_last(void); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages); +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 0, Index: linux-3.5-rc4/mm/sparse.c === --- linux-3.5-rc4.orig/mm/sparse.c2012-07-03 14:21:45.071429805 +0900 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + vmemmap_kfree(page, nr_pages); Hmm, I think you try to free the memory allocated in kmalloc_section_memmap(). Yes. } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { + vmemmap_free_bootmem(page, nr_pages); } Hmm, which function is the memory you try to free allocated in? The function try to free memory allocated from bootmem. The memory has been registered by get_page_bootmem(). So we can free the memory by put_page_bootmem(). OK, I will read these codes, and check it. #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) Index: linux-3.5-rc4/arch/x86/mm/init_64.c === --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c 2012-07-03 14:22:18.538011465 +0900 +++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 +0900 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page return 0; } +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end, + struct page **pp) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long next; + + *pp = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return (addr + PAGE_SIZE) PAGE_MASK; Hmm, why not goto next pgd? Does it mean return (addr + PGDIR_SIZE) PGDIR_MASK? + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return (addr + PAGE_SIZE) PAGE_MASK; + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return next; + + *pp = pte_page(*pte); + pte_clear(init_mm, addr, pte); I think you should flush tlb here. Thanks, I'll update it. + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + *pp = pmd_page(*pmd); + pmd_clear(pmd); + } + + return next; +} + +void __meminit +vmemmap_kfree(struct page *memmap, unsigned long nr_pages) +{ + unsigned long addr = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + unsigned
Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap
At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote: I don't think that all pages of virtual mapping in removed memory can be freed, since page which type is MIX_SECTION_INFO is difficult to free. So, the patch only frees page which type is SECTION_INFO at first. CC: David Rientjes rient...@google.com CC: Jiang Liu liu...@gmail.com CC: Len Brown len.br...@intel.com CC: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: Christoph Lameter c...@linux.com Cc: Minchan Kim minchan@gmail.com CC: Andrew Morton a...@linux-foundation.org CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com CC: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com --- arch/x86/mm/init_64.c | 91 ++ include/linux/mm.h|2 + mm/memory_hotplug.c |5 ++ mm/sparse.c |5 +- 4 files changed, 101 insertions(+), 2 deletions(-) Index: linux-3.5-rc4/include/linux/mm.h === --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 +0900 +++ linux-3.5-rc4/include/linux/mm.h 2012-07-03 14:22:20.83872 +0900 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_ void vmemmap_populate_print_last(void); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages); +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 0, Index: linux-3.5-rc4/mm/sparse.c === --- linux-3.5-rc4.orig/mm/sparse.c2012-07-03 14:21:45.071429805 +0900 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + vmemmap_kfree(page, nr_pages); Hmm, I think you try to free the memory allocated in kmalloc_section_memmap(). } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { + vmemmap_free_bootmem(page, nr_pages); } Hmm, which function is the memory you try to free allocated in? #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) Index: linux-3.5-rc4/arch/x86/mm/init_64.c === --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c 2012-07-03 14:22:18.538011465 +0900 +++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 +0900 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page return 0; } +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end, + struct page **pp) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long next; + + *pp = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return (addr + PAGE_SIZE) PAGE_MASK; Hmm, why not goto next pgd? + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return (addr + PAGE_SIZE) PAGE_MASK; + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return next; + + *pp = pte_page(*pte); + pte_clear(init_mm, addr, pte); I think you should flush tlb here. + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + *pp = pmd_page(*pmd); + pmd_clear(pmd); + } + + return next; +} + +void __meminit +vmemmap_kfree(struct page *memmap, unsigned long nr_pages) +{ + unsigned long addr = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + unsigned long next; + unsigned int order; + struct page *page; + + for (; addr end; addr = next) { + page = NULL; + next = find_and_clear_pte_page(addr, end, page); + if (!page) + continue; + + if (is_vmalloc_addr(page_address(page))) + vfree(page_address(page)); Hmm, the memory is allocated in vmemmap_alloc_block(),
Re: [RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap
2012/07/11 14:06, Wen Congyang wrote: Hi Wen, At 07/09/2012 06:33 PM, Yasuaki Ishimatsu Wrote: I don't think that all pages of virtual mapping in removed memory can be freed, since page which type is MIX_SECTION_INFO is difficult to free. So, the patch only frees page which type is SECTION_INFO at first. CC: David Rientjes rient...@google.com CC: Jiang Liu liu...@gmail.com CC: Len Brown len.br...@intel.com CC: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: Christoph Lameter c...@linux.com Cc: Minchan Kim minchan@gmail.com CC: Andrew Morton a...@linux-foundation.org CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com CC: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com --- arch/x86/mm/init_64.c | 91 ++ include/linux/mm.h|2 + mm/memory_hotplug.c |5 ++ mm/sparse.c |5 +- 4 files changed, 101 insertions(+), 2 deletions(-) Index: linux-3.5-rc4/include/linux/mm.h === --- linux-3.5-rc4.orig/include/linux/mm.h2012-07-03 14:22:18.530011567 +0900 +++ linux-3.5-rc4/include/linux/mm.h 2012-07-03 14:22:20.83872 +0900 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_ void vmemmap_populate_print_last(void); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages); +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 0, Index: linux-3.5-rc4/mm/sparse.c === --- linux-3.5-rc4.orig/mm/sparse.c 2012-07-03 14:21:45.071429805 +0900 +++ linux-3.5-rc4/mm/sparse.c2012-07-03 14:22:21.000983767 +0900 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages) { -return; /* XXX: Not implemented yet */ +vmemmap_kfree(page, nr_pages); Hmm, I think you try to free the memory allocated in kmalloc_section_memmap(). Yes. } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { +vmemmap_free_bootmem(page, nr_pages); } Hmm, which function is the memory you try to free allocated in? The function try to free memory allocated from bootmem. The memory has been registered by get_page_bootmem(). So we can free the memory by put_page_bootmem(). #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) Index: linux-3.5-rc4/arch/x86/mm/init_64.c === --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c 2012-07-03 14:22:18.538011465 +0900 +++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 +0900 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page return 0; } +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end, + struct page **pp) +{ +pgd_t *pgd; +pud_t *pud; +pmd_t *pmd; +pte_t *pte; +unsigned long next; + +*pp = NULL; + +pgd = pgd_offset_k(addr); +if (pgd_none(*pgd)) +return (addr + PAGE_SIZE) PAGE_MASK; Hmm, why not goto next pgd? Does it mean return (addr + PGDIR_SIZE) PGDIR_MASK? + +pud = pud_offset(pgd, addr); +if (pud_none(*pud)) +return (addr + PAGE_SIZE) PAGE_MASK; + +if (!cpu_has_pse) { +next = (addr + PAGE_SIZE) PAGE_MASK; +pmd = pmd_offset(pud, addr); +if (pmd_none(*pmd)) +return next; + +pte = pte_offset_kernel(pmd, addr); +if (pte_none(*pte)) +return next; + +*pp = pte_page(*pte); +pte_clear(init_mm, addr, pte); I think you should flush tlb here. Thanks, I'll update it. +} else { +next = pmd_addr_end(addr, end); + +pmd = pmd_offset(pud, addr); +if (pmd_none(*pmd)) +return next; + +*pp = pmd_page(*pmd); +pmd_clear(pmd); +} + +return next; +} + +void __meminit +vmemmap_kfree(struct page *memmap, unsigned long nr_pages) +{ +unsigned long addr = (unsigned long)memmap; +unsigned long end = (unsigned long)(memmap + nr_pages); +unsigned long next; +unsigned int order; +struct page *page; + +for (; addr end; addr = next) { +page = NULL; +
[RFC PATCH v3 11/13] memory-hotplug : free memmap of sparse-vmemmap
I don't think that all pages of virtual mapping in removed memory can be freed, since page which type is MIX_SECTION_INFO is difficult to free. So, the patch only frees page which type is SECTION_INFO at first. CC: David Rientjes rient...@google.com CC: Jiang Liu liu...@gmail.com CC: Len Brown len.br...@intel.com CC: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: Christoph Lameter c...@linux.com Cc: Minchan Kim minchan@gmail.com CC: Andrew Morton a...@linux-foundation.org CC: KOSAKI Motohiro kosaki.motoh...@jp.fujitsu.com CC: Wen Congyang we...@cn.fujitsu.com Signed-off-by: Yasuaki Ishimatsu isimatu.yasu...@jp.fujitsu.com --- arch/x86/mm/init_64.c | 91 ++ include/linux/mm.h|2 + mm/memory_hotplug.c |5 ++ mm/sparse.c |5 +- 4 files changed, 101 insertions(+), 2 deletions(-) Index: linux-3.5-rc4/include/linux/mm.h === --- linux-3.5-rc4.orig/include/linux/mm.h 2012-07-03 14:22:18.530011567 +0900 +++ linux-3.5-rc4/include/linux/mm.h2012-07-03 14:22:20.83872 +0900 @@ -1588,6 +1588,8 @@ int vmemmap_populate(struct page *start_ void vmemmap_populate_print_last(void); void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long size); +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages); +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages); enum mf_flags { MF_COUNT_INCREASED = 1 0, Index: linux-3.5-rc4/mm/sparse.c === --- linux-3.5-rc4.orig/mm/sparse.c 2012-07-03 14:21:45.071429805 +0900 +++ linux-3.5-rc4/mm/sparse.c 2012-07-03 14:22:21.000983767 +0900 @@ -614,12 +614,13 @@ static inline struct page *kmalloc_secti /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages) { - return; /* XXX: Not implemented yet */ + vmemmap_kfree(page, nr_pages); } static void free_map_bootmem(struct page *page, unsigned long nr_pages) { + vmemmap_free_bootmem(page, nr_pages); } #else static struct page *__kmalloc_section_memmap(unsigned long nr_pages) Index: linux-3.5-rc4/arch/x86/mm/init_64.c === --- linux-3.5-rc4.orig/arch/x86/mm/init_64.c2012-07-03 14:22:18.538011465 +0900 +++ linux-3.5-rc4/arch/x86/mm/init_64.c 2012-07-03 14:22:21.007983103 +0900 @@ -978,6 +978,97 @@ vmemmap_populate(struct page *start_page return 0; } +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end, + struct page **pp) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + unsigned long next; + + *pp = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return (addr + PAGE_SIZE) PAGE_MASK; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return (addr + PAGE_SIZE) PAGE_MASK; + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return next; + + *pp = pte_page(*pte); + pte_clear(init_mm, addr, pte); + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return next; + + *pp = pmd_page(*pmd); + pmd_clear(pmd); + } + + return next; +} + +void __meminit +vmemmap_kfree(struct page *memmap, unsigned long nr_pages) +{ + unsigned long addr = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + unsigned long next; + unsigned int order; + struct page *page; + + for (; addr end; addr = next) { + page = NULL; + next = find_and_clear_pte_page(addr, end, page); + if (!page) + continue; + + if (is_vmalloc_addr(page_address(page))) + vfree(page_address(page)); + else { + order = next - addr; + free_pages((unsigned long)page_address(page), + get_order(order)); + } + } +} + +void __meminit +vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages) +{ + unsigned long addr =