Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-06 Thread Jianguo Wu
Hi Tang,

On 2012/12/7 9:42, Tang Chen wrote:

> Hi Wu,
> 
> I met some problems when I was digging into the code. It's very
> kind of you if you could help me with that. :)
> 
> If I misunderstood your code, please tell me.
> Please see below. :)
> 
> On 12/03/2012 10:23 AM, Jianguo Wu wrote:
>> Signed-off-by: Jianguo Wu
>> Signed-off-by: Jiang Liu
>> ---
>>   include/linux/mm.h  |1 +
>>   mm/sparse-vmemmap.c |  231 
>> +++
>>   mm/sparse.c |3 +-
>>   3 files changed, 234 insertions(+), 1 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 5657670..1f26af5 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
>> long pages, int node);
>>   void vmemmap_populate_print_last(void);
>>   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
>> *map,
>> unsigned long size);
>> +void vmemmap_free(struct page *memmap, unsigned long nr_pages);
>>
>>   enum mf_flags {
>>   MF_COUNT_INCREASED = 1<<  0,
>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
>> index 1b7e22a..748732d 100644
>> --- a/mm/sparse-vmemmap.c
>> +++ b/mm/sparse-vmemmap.c
>> @@ -29,6 +29,10 @@
>>   #include
>>   #include
>>
>> +#ifdef CONFIG_MEMORY_HOTREMOVE
>> +#include
>> +#endif
>> +
>>   /*
>>* Allocate a block of memory to be used to back the virtual memory map
>>* or to back the page tables that are used to create the mapping.
>> @@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
>> **map_map,
>>   vmemmap_buf_end = NULL;
>>   }
>>   }
>> +
>> +#ifdef CONFIG_MEMORY_HOTREMOVE
>> +
>> +#define PAGE_INUSE 0xFD
>> +
>> +static void vmemmap_free_pages(struct page *page, int order)
>> +{
>> +struct zone *zone;
>> +unsigned long magic;
>> +
>> +magic = (unsigned long) page->lru.next;
>> +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
>> +put_page_bootmem(page);
>> +
>> +zone = page_zone(page);
>> +zone_span_writelock(zone);
>> +zone->present_pages++;
>> +zone_span_writeunlock(zone);
>> +totalram_pages++;
>> +} else
>> +free_pages((unsigned long)page_address(page), order);
> 
> Here, I think SECTION_INFO and MIX_SECTION_INFO pages are all allocated
> by bootmem, so I put this function this way.
> 
> I'm not sure if parameter order is necessary here. It will always be 0
> in your code. Is this OK to you ?
> 

parameter order is necessary in cpu_has_pse case:
vmemmap_pmd_remove
free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE))

> static void free_pagetable(struct page *page)
> {
> struct zone *zone;
> bool bootmem = false;
> unsigned long magic;
> 
> /* bootmem page has reserved flag */
> if (PageReserved(page)) {
> __ClearPageReserved(page);
> bootmem = true;
> }
> 
> magic = (unsigned long) page->lru.next;
> if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
> put_page_bootmem(page);
> else
> __free_page(page);
> 
> /*
>  * SECTION_INFO pages and MIX_SECTION_INFO pages
>  * are all allocated by bootmem.
>  */
> if (bootmem) {
> zone = page_zone(page);
> zone_span_writelock(zone);
> zone->present_pages++;
> zone_span_writeunlock(zone);
> totalram_pages++;
> }
> }
> 
> (snip)
> 
>> +
>> +static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned 
>> long end)
>> +{
>> +pte_t *pte;
>> +unsigned long next;
>> +void *page_addr;
>> +
>> +pte = pte_offset_kernel(pmd, addr);
>> +for (; addr<  end; pte++, addr += PAGE_SIZE) {
>> +next = (addr + PAGE_SIZE)&  PAGE_MASK;
>> +if (next>  end)
>> +next = end;
>> +
>> +if (pte_none(*pte))
> 
> Here, you checked xxx_none() in your vmemmap_xxx_remove(), but you used
> !xxx_present() in your x86_64 patches. Is it OK if I only check
> !xxx_present() ?

It is Ok.

> 
>> +continue;
>> +if (IS_ALIGNED(addr, PAGE_SIZE)&&
>> +IS_ALIGNED(next, PAGE_SIZE)) {
>> +vmemmap_free_pages(pte_page(*pte), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pte_clear(_mm, addr, pte);
>> +spin_unlock(_mm.page_table_lock);
>> +} else {
>> +/*
>> + * Removed page structs are filled with 0xFD.
>> + */
>> +memset((void *)addr, PAGE_INUSE, next - addr);
>> +page_addr = page_address(pte_page(*pte));
>> +
>> +if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
>> +spin_lock(_mm.page_table_lock);
>> +pte_clear(_mm, addr, pte);
>> +  

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-06 Thread Tang Chen

Hi Wu,

I met some problems when I was digging into the code. It's very
kind of you if you could help me with that. :)

If I misunderstood your code, please tell me.
Please see below. :)

On 12/03/2012 10:23 AM, Jianguo Wu wrote:

Signed-off-by: Jianguo Wu
Signed-off-by: Jiang Liu
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  231 +++
  mm/sparse.c |3 +-
  3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1<<  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #include
  #include

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone->present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else
+   free_pages((unsigned long)page_address(page), order);


Here, I think SECTION_INFO and MIX_SECTION_INFO pages are all allocated
by bootmem, so I put this function this way.

I'm not sure if parameter order is necessary here. It will always be 0
in your code. Is this OK to you ?

static void free_pagetable(struct page *page)
{
struct zone *zone;
bool bootmem = false;
unsigned long magic;

/* bootmem page has reserved flag */
if (PageReserved(page)) {
__ClearPageReserved(page);
bootmem = true;
}

magic = (unsigned long) page->lru.next;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
put_page_bootmem(page);
else
__free_page(page);

/*
 * SECTION_INFO pages and MIX_SECTION_INFO pages
 * are all allocated by bootmem.
 */
if (bootmem) {
zone = page_zone(page);
zone_span_writelock(zone);
zone->present_pages++;
zone_span_writeunlock(zone);
totalram_pages++;
}
}

(snip)


+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+   void *page_addr;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr<  end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE)&  PAGE_MASK;
+   if (next>  end)
+   next = end;
+
+   if (pte_none(*pte))


Here, you checked xxx_none() in your vmemmap_xxx_remove(), but you used
!xxx_present() in your x86_64 patches. Is it OK if I only check
!xxx_present() ?


+   continue;
+   if (IS_ALIGNED(addr, PAGE_SIZE)&&
+   IS_ALIGNED(next, PAGE_SIZE)) {
+   vmemmap_free_pages(pte_page(*pte), 0);
+   spin_lock(_mm.page_table_lock);
+   pte_clear(_mm, addr, pte);
+   spin_unlock(_mm.page_table_lock);
+   } else {
+   /*
+* Removed page structs are filled with 0xFD.
+*/
+   memset((void *)addr, PAGE_INUSE, next - addr);
+   page_addr = page_address(pte_page(*pte));
+
+   if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+   spin_lock(_mm.page_table_lock);
+   pte_clear(_mm, addr, pte);
+   spin_unlock(_mm.page_table_lock);


Here, since we clear pte, we should also free the page, right ?


+   }
+   }
+   }
+
+   free_pte_table(pmd);
+   __flush_tlb_all();
+}
+
+static void vmemmap_pmd_remove(pud_t *pud, unsigned 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-06 Thread Tang Chen

Hi Wu,

I met some problems when I was digging into the code. It's very
kind of you if you could help me with that. :)

If I misunderstood your code, please tell me.
Please see below. :)

On 12/03/2012 10:23 AM, Jianguo Wu wrote:

Signed-off-by: Jianguo Wuwujian...@huawei.com
Signed-off-by: Jiang Liujiang@huawei.com
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  231 +++
  mm/sparse.c |3 +-
  3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #includeasm/pgalloc.h
  #includeasm/pgtable.h

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#includeasm/tlbflush.h
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone-present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else
+   free_pages((unsigned long)page_address(page), order);


Here, I think SECTION_INFO and MIX_SECTION_INFO pages are all allocated
by bootmem, so I put this function this way.

I'm not sure if parameter order is necessary here. It will always be 0
in your code. Is this OK to you ?

static void free_pagetable(struct page *page)
{
struct zone *zone;
bool bootmem = false;
unsigned long magic;

/* bootmem page has reserved flag */
if (PageReserved(page)) {
__ClearPageReserved(page);
bootmem = true;
}

magic = (unsigned long) page-lru.next;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
put_page_bootmem(page);
else
__free_page(page);

/*
 * SECTION_INFO pages and MIX_SECTION_INFO pages
 * are all allocated by bootmem.
 */
if (bootmem) {
zone = page_zone(page);
zone_span_writelock(zone);
zone-present_pages++;
zone_span_writeunlock(zone);
totalram_pages++;
}
}

(snip)


+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+   void *page_addr;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr  end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE)  PAGE_MASK;
+   if (next  end)
+   next = end;
+
+   if (pte_none(*pte))


Here, you checked xxx_none() in your vmemmap_xxx_remove(), but you used
!xxx_present() in your x86_64 patches. Is it OK if I only check
!xxx_present() ?


+   continue;
+   if (IS_ALIGNED(addr, PAGE_SIZE)
+   IS_ALIGNED(next, PAGE_SIZE)) {
+   vmemmap_free_pages(pte_page(*pte), 0);
+   spin_lock(init_mm.page_table_lock);
+   pte_clear(init_mm, addr, pte);
+   spin_unlock(init_mm.page_table_lock);
+   } else {
+   /*
+* Removed page structs are filled with 0xFD.
+*/
+   memset((void *)addr, PAGE_INUSE, next - addr);
+   page_addr = page_address(pte_page(*pte));
+
+   if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
+   spin_lock(init_mm.page_table_lock);
+   pte_clear(init_mm, addr, pte);
+   spin_unlock(init_mm.page_table_lock);


Here, since we clear pte, we should also free the page, right ?


+   }
+   }
+   }
+
+   

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-06 Thread Jianguo Wu
Hi Tang,

On 2012/12/7 9:42, Tang Chen wrote:

 Hi Wu,
 
 I met some problems when I was digging into the code. It's very
 kind of you if you could help me with that. :)
 
 If I misunderstood your code, please tell me.
 Please see below. :)
 
 On 12/03/2012 10:23 AM, Jianguo Wu wrote:
 Signed-off-by: Jianguo Wuwujian...@huawei.com
 Signed-off-by: Jiang Liujiang@huawei.com
 ---
   include/linux/mm.h  |1 +
   mm/sparse-vmemmap.c |  231 
 +++
   mm/sparse.c |3 +-
   3 files changed, 234 insertions(+), 1 deletions(-)

 diff --git a/include/linux/mm.h b/include/linux/mm.h
 index 5657670..1f26af5 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
 long pages, int node);
   void vmemmap_populate_print_last(void);
   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
 unsigned long size);
 +void vmemmap_free(struct page *memmap, unsigned long nr_pages);

   enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
 index 1b7e22a..748732d 100644
 --- a/mm/sparse-vmemmap.c
 +++ b/mm/sparse-vmemmap.c
 @@ -29,6 +29,10 @@
   #includeasm/pgalloc.h
   #includeasm/pgtable.h

 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +#includeasm/tlbflush.h
 +#endif
 +
   /*
* Allocate a block of memory to be used to back the virtual memory map
* or to back the page tables that are used to create the mapping.
 @@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
 **map_map,
   vmemmap_buf_end = NULL;
   }
   }
 +
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +
 +#define PAGE_INUSE 0xFD
 +
 +static void vmemmap_free_pages(struct page *page, int order)
 +{
 +struct zone *zone;
 +unsigned long magic;
 +
 +magic = (unsigned long) page-lru.next;
 +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
 +put_page_bootmem(page);
 +
 +zone = page_zone(page);
 +zone_span_writelock(zone);
 +zone-present_pages++;
 +zone_span_writeunlock(zone);
 +totalram_pages++;
 +} else
 +free_pages((unsigned long)page_address(page), order);
 
 Here, I think SECTION_INFO and MIX_SECTION_INFO pages are all allocated
 by bootmem, so I put this function this way.
 
 I'm not sure if parameter order is necessary here. It will always be 0
 in your code. Is this OK to you ?
 

parameter order is necessary in cpu_has_pse case:
vmemmap_pmd_remove
free_pagetable(pmd_page(*pmd), get_order(PMD_SIZE))

 static void free_pagetable(struct page *page)
 {
 struct zone *zone;
 bool bootmem = false;
 unsigned long magic;
 
 /* bootmem page has reserved flag */
 if (PageReserved(page)) {
 __ClearPageReserved(page);
 bootmem = true;
 }
 
 magic = (unsigned long) page-lru.next;
 if (magic == SECTION_INFO || magic == MIX_SECTION_INFO)
 put_page_bootmem(page);
 else
 __free_page(page);
 
 /*
  * SECTION_INFO pages and MIX_SECTION_INFO pages
  * are all allocated by bootmem.
  */
 if (bootmem) {
 zone = page_zone(page);
 zone_span_writelock(zone);
 zone-present_pages++;
 zone_span_writeunlock(zone);
 totalram_pages++;
 }
 }
 
 (snip)
 
 +
 +static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned 
 long end)
 +{
 +pte_t *pte;
 +unsigned long next;
 +void *page_addr;
 +
 +pte = pte_offset_kernel(pmd, addr);
 +for (; addr  end; pte++, addr += PAGE_SIZE) {
 +next = (addr + PAGE_SIZE)  PAGE_MASK;
 +if (next  end)
 +next = end;
 +
 +if (pte_none(*pte))
 
 Here, you checked xxx_none() in your vmemmap_xxx_remove(), but you used
 !xxx_present() in your x86_64 patches. Is it OK if I only check
 !xxx_present() ?

It is Ok.

 
 +continue;
 +if (IS_ALIGNED(addr, PAGE_SIZE)
 +IS_ALIGNED(next, PAGE_SIZE)) {
 +vmemmap_free_pages(pte_page(*pte), 0);
 +spin_lock(init_mm.page_table_lock);
 +pte_clear(init_mm, addr, pte);
 +spin_unlock(init_mm.page_table_lock);
 +} else {
 +/*
 + * Removed page structs are filled with 0xFD.
 + */
 +memset((void *)addr, PAGE_INUSE, next - addr);
 +page_addr = page_address(pte_page(*pte));
 +
 +if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
 +spin_lock(init_mm.page_table_lock);
 +pte_clear(init_mm, addr, pte);
 +spin_unlock(init_mm.page_table_lock);
 
 Here, since we clear pte, we should also free the page, right ?
 

Right, I forgot here, sorry.

 +}
 + 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Jianguo Wu
Hi Tang,

On 2012/12/5 10:07, Tang Chen wrote:

> Hi Wu,
> 
> On 12/04/2012 08:20 PM, Jianguo Wu wrote:
> (snip)
>>>
>>> Seems that we have different ways to handle pages allocated by bootmem
>>> or by regular allocator. Is the checking way in [PATCH 09/12] available
>>> here ?
>>>
>>> +/* bootmem page has reserved flag */
>>> +if (PageReserved(page)) {
>>> ..
>>> +}
>>>
>>> If so, I think we can just merge these two functions.
>>
>> Hmm, direct mapping table isn't allocated by bootmem allocator such as 
>> memblock, can't be free by put_page_bootmem().
>> But I will try to merge these two functions.
>>
> 
> Oh, I didn't notice this, thanks. :)
> 
> (snip)
> 
 +
 +__split_large_page(kpte, address, pbase);
>>>
>>> Is this patch going to replace [PATCH 08/12] ?
>>>
>>
>> I wish to replace [PATCH 08/12], but need Congyang and Yasuaki to confirm 
>> first:)
>>
>>> If so, __split_large_page() was added and exported in [PATCH 09/12],
>>> then we should move it here, right ?
>>
>> yes.
>>
>> and what do you think about moving vmemmap_pud[pmd/pte]_remove() to 
>> arch/x86/mm/init_64.c,
>> to be consistent with vmemmap_populate() ?
> 
> It is a good idea since pud/pmd/pte related code could be platform
> dependent. And I'm also trying to move vmemmap_free() to
> arch/x86/mm/init_64.c too. I want to have a common interface just
> like vmemmap_populate(). :)
> 

Great.

>>
>> I will rework [PATCH 08/12] and [PATCH 09/12] soon.
> 
> I am rebasing the whole patch set now. And I think I chould finish part
> of your work too. A new patch-set is coming soon, and your rework is
> also welcome. :)
>

Since you are rebasing now, I will wait for your new patche-set :).

Thanks.
Jianguo Wu

> Thanks. :)
> 
> 
> 
> .
> 



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Tang Chen

Hi Wu,

On 12/04/2012 08:20 PM, Jianguo Wu wrote:
(snip)


Seems that we have different ways to handle pages allocated by bootmem
or by regular allocator. Is the checking way in [PATCH 09/12] available
here ?

+/* bootmem page has reserved flag */
+if (PageReserved(page)) {
..
+}

If so, I think we can just merge these two functions.


Hmm, direct mapping table isn't allocated by bootmem allocator such as 
memblock, can't be free by put_page_bootmem().
But I will try to merge these two functions.



Oh, I didn't notice this, thanks. :)

(snip)


+
+__split_large_page(kpte, address, pbase);


Is this patch going to replace [PATCH 08/12] ?



I wish to replace [PATCH 08/12], but need Congyang and Yasuaki to confirm 
first:)


If so, __split_large_page() was added and exported in [PATCH 09/12],
then we should move it here, right ?


yes.

and what do you think about moving vmemmap_pud[pmd/pte]_remove() to 
arch/x86/mm/init_64.c,
to be consistent with vmemmap_populate() ?


It is a good idea since pud/pmd/pte related code could be platform
dependent. And I'm also trying to move vmemmap_free() to
arch/x86/mm/init_64.c too. I want to have a common interface just
like vmemmap_populate(). :)



I will rework [PATCH 08/12] and [PATCH 09/12] soon.


I am rebasing the whole patch set now. And I think I chould finish part
of your work too. A new patch-set is coming soon, and your rework is
also welcome. :)

Thanks. :)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Jianguo Wu
Hi Tang,

Thanks for your review and comments, Please see below for my reply.

On 2012/12/4 17:13, Tang Chen wrote:

> Hi Wu,
> 
> Sorry to make noise here. Please see below. :)
> 
> On 12/03/2012 10:23 AM, Jianguo Wu wrote:
>> Signed-off-by: Jianguo Wu
>> Signed-off-by: Jiang Liu
>> ---
>>   include/linux/mm.h  |1 +
>>   mm/sparse-vmemmap.c |  231 
>> +++
>>   mm/sparse.c |3 +-
>>   3 files changed, 234 insertions(+), 1 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 5657670..1f26af5 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
>> long pages, int node);
>>   void vmemmap_populate_print_last(void);
>>   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
>> *map,
>> unsigned long size);
>> +void vmemmap_free(struct page *memmap, unsigned long nr_pages);
>>
>>   enum mf_flags {
>>   MF_COUNT_INCREASED = 1<<  0,
>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
>> index 1b7e22a..748732d 100644
>> --- a/mm/sparse-vmemmap.c
>> +++ b/mm/sparse-vmemmap.c
>> @@ -29,6 +29,10 @@
>>   #include
>>   #include
>>
>> +#ifdef CONFIG_MEMORY_HOTREMOVE
>> +#include
>> +#endif
>> +
>>   /*
>>* Allocate a block of memory to be used to back the virtual memory map
>>* or to back the page tables that are used to create the mapping.
>> @@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
>> **map_map,
>>   vmemmap_buf_end = NULL;
>>   }
>>   }
>> +
>> +#ifdef CONFIG_MEMORY_HOTREMOVE
>> +
>> +#define PAGE_INUSE 0xFD
>> +
>> +static void vmemmap_free_pages(struct page *page, int order)
>> +{
>> +struct zone *zone;
>> +unsigned long magic;
>> +
>> +magic = (unsigned long) page->lru.next;
>> +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
>> +put_page_bootmem(page);
>> +
>> +zone = page_zone(page);
>> +zone_span_writelock(zone);
>> +zone->present_pages++;
>> +zone_span_writeunlock(zone);
>> +totalram_pages++;
> 
> Seems that we have different ways to handle pages allocated by bootmem
> or by regular allocator. Is the checking way in [PATCH 09/12] available
> here ?
> 
> +/* bootmem page has reserved flag */
> +if (PageReserved(page)) {
> ..
> +}
> 
> If so, I think we can just merge these two functions.

Hmm, direct mapping table isn't allocated by bootmem allocator such as 
memblock, can't be free by put_page_bootmem().
But I will try to merge these two functions.

> 
>> +} else
>> +free_pages((unsigned long)page_address(page), order);
>> +}
>> +
>> +static void free_pte_table(pmd_t *pmd)
>> +{
>> +pte_t *pte, *pte_start;
>> +int i;
>> +
>> +pte_start = (pte_t *)pmd_page_vaddr(*pmd);
>> +for (i = 0; i<  PTRS_PER_PTE; i++) {
>> +pte = pte_start + i;
>> +if (pte_val(*pte))
>> +return;
>> +}
>> +
>> +/* free a pte talbe */
>> +vmemmap_free_pages(pmd_page(*pmd), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pmd_clear(pmd);
>> +spin_unlock(_mm.page_table_lock);
>> +}
>> +
>> +static void free_pmd_table(pud_t *pud)
>> +{
>> +pmd_t *pmd, *pmd_start;
>> +int i;
>> +
>> +pmd_start = (pmd_t *)pud_page_vaddr(*pud);
>> +for (i = 0; i<  PTRS_PER_PMD; i++) {
>> +pmd = pmd_start + i;
>> +if (pmd_val(*pmd))
>> +return;
>> +}
>> +
>> +/* free a pmd talbe */
>> +vmemmap_free_pages(pud_page(*pud), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pud_clear(pud);
>> +spin_unlock(_mm.page_table_lock);
>> +}
>> +
>> +static void free_pud_table(pgd_t *pgd)
>> +{
>> +pud_t *pud, *pud_start;
>> +int i;
>> +
>> +pud_start = (pud_t *)pgd_page_vaddr(*pgd);
>> +for (i = 0; i<  PTRS_PER_PUD; i++) {
>> +pud = pud_start + i;
>> +if (pud_val(*pud))
>> +return;
>> +}
>> +
>> +/* free a pud table */
>> +vmemmap_free_pages(pgd_page(*pgd), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pgd_clear(pgd);
>> +spin_unlock(_mm.page_table_lock);
>> +}
> 
> All the free_xxx_table() are very similar to the functions in
> [PATCH 09/12]. Could we reuse them anyway ?

yes, we can reuse them.

> 
>> +
>> +static int split_large_page(pte_t *kpte, unsigned long address, pte_t 
>> *pbase)
>> +{
>> +struct page *page = pmd_page(*(pmd_t *)kpte);
>> +int i = 0;
>> +unsigned long magic;
>> +unsigned long section_nr;
>> +
>> +__split_large_page(kpte, address, pbase);
> 
> Is this patch going to replace [PATCH 08/12] ?
> 

I wish to replace [PATCH 08/12], but need Congyang and Yasuaki to confirm 
first:)

> If so, __split_large_page() was added and exported in [PATCH 09/12],
> then we should move it here, right ?

yes.

and what do you think about moving vmemmap_pud[pmd/pte]_remove() to 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Tang Chen

On 11/27/2012 06:00 PM, Wen Congyang wrote:

  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
unsigned long flags;
@@ -330,9 +317,9 @@ static int __remove_section(struct zone *zone, struct 
mem_section *ms)
pgdat_resize_lock(pgdat,);
sparse_remove_one_section(zone, ms);
pgdat_resize_unlock(pgdat,);
-   return 0;
+
+   return ret;


I think we don't need to change this line. :)

Reviewed-by: Tang Chen 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Tang Chen

Hi Wu,

Sorry to make noise here. Please see below. :)

On 12/03/2012 10:23 AM, Jianguo Wu wrote:

Signed-off-by: Jianguo Wu
Signed-off-by: Jiang Liu
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  231 +++
  mm/sparse.c |3 +-
  3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1<<  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #include
  #include

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone->present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;


Seems that we have different ways to handle pages allocated by bootmem
or by regular allocator. Is the checking way in [PATCH 09/12] available
here ?

+   /* bootmem page has reserved flag */
+   if (PageReserved(page)) {
..
+   }

If so, I think we can just merge these two functions.


+   } else
+   free_pages((unsigned long)page_address(page), order);
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i<  PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i<  PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i<  PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(_mm.page_table_lock);
+}


All the free_xxx_table() are very similar to the functions in
[PATCH 09/12]. Could we reuse them anyway ?


+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);


Is this patch going to replace [PATCH 08/12] ?

If so, __split_large_page() was added and exported in [PATCH 09/12],
then we should move it here, right ?

If not, free_map_bootmem() and __kfree_section_memmap() were changed in
[PATCH 08/12], and we need to handle this.


+   __flush_tlb_all();
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i<  PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+ 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Tang Chen

Hi Wu,

Sorry to make noise here. Please see below. :)

On 12/03/2012 10:23 AM, Jianguo Wu wrote:

Signed-off-by: Jianguo Wuwujian...@huawei.com
Signed-off-by: Jiang Liujiang@huawei.com
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  231 +++
  mm/sparse.c |3 +-
  3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #includeasm/pgalloc.h
  #includeasm/pgtable.h

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#includeasm/tlbflush.h
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone-present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;


Seems that we have different ways to handle pages allocated by bootmem
or by regular allocator. Is the checking way in [PATCH 09/12] available
here ?

+   /* bootmem page has reserved flag */
+   if (PageReserved(page)) {
..
+   }

If so, I think we can just merge these two functions.


+   } else
+   free_pages((unsigned long)page_address(page), order);
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i  PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i  PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(init_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i  PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(init_mm.page_table_lock);
+}


All the free_xxx_table() are very similar to the functions in
[PATCH 09/12]. Could we reuse them anyway ?


+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);


Is this patch going to replace [PATCH 08/12] ?

If so, __split_large_page() was added and exported in [PATCH 09/12],
then we should move it here, right ?

If not, free_map_bootmem() and __kfree_section_memmap() were changed in
[PATCH 08/12], and we need to handle this.


+   __flush_tlb_all();
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i  PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Tang Chen

On 11/27/2012 06:00 PM, Wen Congyang wrote:

  static int __remove_section(struct zone *zone, struct mem_section *ms)
  {
unsigned long flags;
@@ -330,9 +317,9 @@ static int __remove_section(struct zone *zone, struct 
mem_section *ms)
pgdat_resize_lock(pgdat,flags);
sparse_remove_one_section(zone, ms);
pgdat_resize_unlock(pgdat,flags);
-   return 0;
+
+   return ret;


I think we don't need to change this line. :)

Reviewed-by: Tang Chen tangc...@cn.fujitsu.com

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Jianguo Wu
Hi Tang,

Thanks for your review and comments, Please see below for my reply.

On 2012/12/4 17:13, Tang Chen wrote:

 Hi Wu,
 
 Sorry to make noise here. Please see below. :)
 
 On 12/03/2012 10:23 AM, Jianguo Wu wrote:
 Signed-off-by: Jianguo Wuwujian...@huawei.com
 Signed-off-by: Jiang Liujiang@huawei.com
 ---
   include/linux/mm.h  |1 +
   mm/sparse-vmemmap.c |  231 
 +++
   mm/sparse.c |3 +-
   3 files changed, 234 insertions(+), 1 deletions(-)

 diff --git a/include/linux/mm.h b/include/linux/mm.h
 index 5657670..1f26af5 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
 long pages, int node);
   void vmemmap_populate_print_last(void);
   void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
 unsigned long size);
 +void vmemmap_free(struct page *memmap, unsigned long nr_pages);

   enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
 index 1b7e22a..748732d 100644
 --- a/mm/sparse-vmemmap.c
 +++ b/mm/sparse-vmemmap.c
 @@ -29,6 +29,10 @@
   #includeasm/pgalloc.h
   #includeasm/pgtable.h

 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +#includeasm/tlbflush.h
 +#endif
 +
   /*
* Allocate a block of memory to be used to back the virtual memory map
* or to back the page tables that are used to create the mapping.
 @@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
 **map_map,
   vmemmap_buf_end = NULL;
   }
   }
 +
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +
 +#define PAGE_INUSE 0xFD
 +
 +static void vmemmap_free_pages(struct page *page, int order)
 +{
 +struct zone *zone;
 +unsigned long magic;
 +
 +magic = (unsigned long) page-lru.next;
 +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
 +put_page_bootmem(page);
 +
 +zone = page_zone(page);
 +zone_span_writelock(zone);
 +zone-present_pages++;
 +zone_span_writeunlock(zone);
 +totalram_pages++;
 
 Seems that we have different ways to handle pages allocated by bootmem
 or by regular allocator. Is the checking way in [PATCH 09/12] available
 here ?
 
 +/* bootmem page has reserved flag */
 +if (PageReserved(page)) {
 ..
 +}
 
 If so, I think we can just merge these two functions.

Hmm, direct mapping table isn't allocated by bootmem allocator such as 
memblock, can't be free by put_page_bootmem().
But I will try to merge these two functions.

 
 +} else
 +free_pages((unsigned long)page_address(page), order);
 +}
 +
 +static void free_pte_table(pmd_t *pmd)
 +{
 +pte_t *pte, *pte_start;
 +int i;
 +
 +pte_start = (pte_t *)pmd_page_vaddr(*pmd);
 +for (i = 0; i  PTRS_PER_PTE; i++) {
 +pte = pte_start + i;
 +if (pte_val(*pte))
 +return;
 +}
 +
 +/* free a pte talbe */
 +vmemmap_free_pages(pmd_page(*pmd), 0);
 +spin_lock(init_mm.page_table_lock);
 +pmd_clear(pmd);
 +spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static void free_pmd_table(pud_t *pud)
 +{
 +pmd_t *pmd, *pmd_start;
 +int i;
 +
 +pmd_start = (pmd_t *)pud_page_vaddr(*pud);
 +for (i = 0; i  PTRS_PER_PMD; i++) {
 +pmd = pmd_start + i;
 +if (pmd_val(*pmd))
 +return;
 +}
 +
 +/* free a pmd talbe */
 +vmemmap_free_pages(pud_page(*pud), 0);
 +spin_lock(init_mm.page_table_lock);
 +pud_clear(pud);
 +spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static void free_pud_table(pgd_t *pgd)
 +{
 +pud_t *pud, *pud_start;
 +int i;
 +
 +pud_start = (pud_t *)pgd_page_vaddr(*pgd);
 +for (i = 0; i  PTRS_PER_PUD; i++) {
 +pud = pud_start + i;
 +if (pud_val(*pud))
 +return;
 +}
 +
 +/* free a pud table */
 +vmemmap_free_pages(pgd_page(*pgd), 0);
 +spin_lock(init_mm.page_table_lock);
 +pgd_clear(pgd);
 +spin_unlock(init_mm.page_table_lock);
 +}
 
 All the free_xxx_table() are very similar to the functions in
 [PATCH 09/12]. Could we reuse them anyway ?

yes, we can reuse them.

 
 +
 +static int split_large_page(pte_t *kpte, unsigned long address, pte_t 
 *pbase)
 +{
 +struct page *page = pmd_page(*(pmd_t *)kpte);
 +int i = 0;
 +unsigned long magic;
 +unsigned long section_nr;
 +
 +__split_large_page(kpte, address, pbase);
 
 Is this patch going to replace [PATCH 08/12] ?
 

I wish to replace [PATCH 08/12], but need Congyang and Yasuaki to confirm 
first:)

 If so, __split_large_page() was added and exported in [PATCH 09/12],
 then we should move it here, right ?

yes.

and what do you think about moving vmemmap_pud[pmd/pte]_remove() to 
arch/x86/mm/init_64.c,
to be consistent with vmemmap_populate() ?

I will rework [PATCH 08/12] and [PATCH 09/12] soon.

Thanks,
Jianguo Wu.

 
 If not, free_map_bootmem() and 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Tang Chen

Hi Wu,

On 12/04/2012 08:20 PM, Jianguo Wu wrote:
(snip)


Seems that we have different ways to handle pages allocated by bootmem
or by regular allocator. Is the checking way in [PATCH 09/12] available
here ?

+/* bootmem page has reserved flag */
+if (PageReserved(page)) {
..
+}

If so, I think we can just merge these two functions.


Hmm, direct mapping table isn't allocated by bootmem allocator such as 
memblock, can't be free by put_page_bootmem().
But I will try to merge these two functions.



Oh, I didn't notice this, thanks. :)

(snip)


+
+__split_large_page(kpte, address, pbase);


Is this patch going to replace [PATCH 08/12] ?



I wish to replace [PATCH 08/12], but need Congyang and Yasuaki to confirm 
first:)


If so, __split_large_page() was added and exported in [PATCH 09/12],
then we should move it here, right ?


yes.

and what do you think about moving vmemmap_pud[pmd/pte]_remove() to 
arch/x86/mm/init_64.c,
to be consistent with vmemmap_populate() ?


It is a good idea since pud/pmd/pte related code could be platform
dependent. And I'm also trying to move vmemmap_free() to
arch/x86/mm/init_64.c too. I want to have a common interface just
like vmemmap_populate(). :)



I will rework [PATCH 08/12] and [PATCH 09/12] soon.


I am rebasing the whole patch set now. And I think I chould finish part
of your work too. A new patch-set is coming soon, and your rework is
also welcome. :)

Thanks. :)


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-04 Thread Jianguo Wu
Hi Tang,

On 2012/12/5 10:07, Tang Chen wrote:

 Hi Wu,
 
 On 12/04/2012 08:20 PM, Jianguo Wu wrote:
 (snip)

 Seems that we have different ways to handle pages allocated by bootmem
 or by regular allocator. Is the checking way in [PATCH 09/12] available
 here ?

 +/* bootmem page has reserved flag */
 +if (PageReserved(page)) {
 ..
 +}

 If so, I think we can just merge these two functions.

 Hmm, direct mapping table isn't allocated by bootmem allocator such as 
 memblock, can't be free by put_page_bootmem().
 But I will try to merge these two functions.

 
 Oh, I didn't notice this, thanks. :)
 
 (snip)
 
 +
 +__split_large_page(kpte, address, pbase);

 Is this patch going to replace [PATCH 08/12] ?


 I wish to replace [PATCH 08/12], but need Congyang and Yasuaki to confirm 
 first:)

 If so, __split_large_page() was added and exported in [PATCH 09/12],
 then we should move it here, right ?

 yes.

 and what do you think about moving vmemmap_pud[pmd/pte]_remove() to 
 arch/x86/mm/init_64.c,
 to be consistent with vmemmap_populate() ?
 
 It is a good idea since pud/pmd/pte related code could be platform
 dependent. And I'm also trying to move vmemmap_free() to
 arch/x86/mm/init_64.c too. I want to have a common interface just
 like vmemmap_populate(). :)
 

Great.


 I will rework [PATCH 08/12] and [PATCH 09/12] soon.
 
 I am rebasing the whole patch set now. And I think I chould finish part
 of your work too. A new patch-set is coming soon, and your rework is
 also welcome. :)


Since you are rebasing now, I will wait for your new patche-set :).

Thanks.
Jianguo Wu

 Thanks. :)
 
 
 
 .
 



--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-02 Thread Jianguo Wu
Hi Congyang,

This is the new version.

Thanks,
Jianguo Wu.


Signed-off-by: Jianguo Wu 
Signed-off-by: Jiang Liu 
---
 include/linux/mm.h  |1 +
 mm/sparse-vmemmap.c |  231 +++
 mm/sparse.c |3 +-
 3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);
 
 enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
 #include 
 #include 
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include 
+#endif
+
 /*
  * Allocate a block of memory to be used to back the virtual memory map
  * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone->present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else
+   free_pages((unsigned long)page_address(page), order);
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i < PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i < PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i < PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);
+   __flush_tlb_all();
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i < PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+   void *page_addr;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr < end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE) & PAGE_MASK;
+   if (next > end)
+   next = end;
+
+   if (pte_none(*pte))
+   continue;
+   if (IS_ALIGNED(addr, PAGE_SIZE) &&
+   IS_ALIGNED(next, PAGE_SIZE)) {
+   vmemmap_free_pages(pte_page(*pte), 0);
+   spin_lock(_mm.page_table_lock);
+   pte_clear(_mm, addr, pte);
+   spin_unlock(_mm.page_table_lock);
+   } else {
+   /*
+

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-12-02 Thread Jianguo Wu
Hi Congyang,

This is the new version.

Thanks,
Jianguo Wu.


Signed-off-by: Jianguo Wu wujian...@huawei.com
Signed-off-by: Jiang Liu jiang@huawei.com
---
 include/linux/mm.h  |1 +
 mm/sparse-vmemmap.c |  231 +++
 mm/sparse.c |3 +-
 3 files changed, 234 insertions(+), 1 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);
 
 enum mf_flags {
MF_COUNT_INCREASED = 1  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..748732d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
 #include asm/pgalloc.h
 #include asm/pgtable.h
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include asm/tlbflush.h
+#endif
+
 /*
  * Allocate a block of memory to be used to back the virtual memory map
  * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,230 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+#define PAGE_INUSE 0xFD
+
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone-present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else
+   free_pages((unsigned long)page_address(page), order);
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i  PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i  PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(init_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i  PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);
+   __flush_tlb_all();
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i  PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+   void *page_addr;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr  end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE)  PAGE_MASK;
+   if (next  end)
+   next = end;
+
+   if (pte_none(*pte))
+   continue;
+   if (IS_ALIGNED(addr, PAGE_SIZE) 
+   IS_ALIGNED(next, PAGE_SIZE)) {
+   vmemmap_free_pages(pte_page(*pte), 0);
+   spin_lock(init_mm.page_table_lock);
+   pte_clear(init_mm, addr, pte);
+

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-29 Thread Yasuaki Ishimatsu

Hi Jianguo,

2012/11/30 11:47, Jianguo Wu wrote:

Hi Congyang,

Thanks for your review and comments.

On 2012/11/30 9:45, Wen Congyang wrote:


At 11/28/2012 05:40 PM, Jianguo Wu Wrote:

Hi Congyang,

I think vmemmap's pgtable pages should be freed after all entries are cleared, 
I have a patch to do this.
The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
table of x86_64 architecture.

How do you think about this?

Signed-off-by: Jianguo Wu 
Signed-off-by: Jiang Liu 
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  214 +++
  mm/sparse.c |5 +-
  3 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..242cb28 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #include 
  #include 

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include 
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone->present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else {
+   if (is_vmalloc_addr(page_address(page)))
+   vfree(page_address(page));


Hmm, vmemmap doesn't use vmalloc() to allocate memory.



yes, this can be removed.


+   else
+   free_pages((unsigned long)page_address(page), order);
+   }
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i < PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i < PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i < PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);
+   __flush_tlb_all();
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i < PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr < end; pte++, addr += PAGE_SIZE) {
+   

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-29 Thread Jianguo Wu
Hi Congyang,

Thanks for your review and comments.

On 2012/11/30 9:45, Wen Congyang wrote:

> At 11/28/2012 05:40 PM, Jianguo Wu Wrote:
>> Hi Congyang,
>>
>> I think vmemmap's pgtable pages should be freed after all entries are 
>> cleared, I have a patch to do this.
>> The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
>> table of x86_64 architecture.
>>
>> How do you think about this?
>>
>> Signed-off-by: Jianguo Wu 
>> Signed-off-by: Jiang Liu 
>> ---
>>  include/linux/mm.h  |1 +
>>  mm/sparse-vmemmap.c |  214 
>> +++
>>  mm/sparse.c |5 +-
>>  3 files changed, 218 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 5657670..1f26af5 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
>> long pages, int node);
>>  void vmemmap_populate_print_last(void);
>>  void register_page_bootmem_memmap(unsigned long section_nr, struct page 
>> *map,
>>unsigned long size);
>> +void vmemmap_free(struct page *memmap, unsigned long nr_pages);
>>  
>>  enum mf_flags {
>>  MF_COUNT_INCREASED = 1 << 0,
>> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
>> index 1b7e22a..242cb28 100644
>> --- a/mm/sparse-vmemmap.c
>> +++ b/mm/sparse-vmemmap.c
>> @@ -29,6 +29,10 @@
>>  #include 
>>  #include 
>>  
>> +#ifdef CONFIG_MEMORY_HOTREMOVE
>> +#include 
>> +#endif
>> +
>>  /*
>>   * Allocate a block of memory to be used to back the virtual memory map
>>   * or to back the page tables that are used to create the mapping.
>> @@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
>> **map_map,
>>  vmemmap_buf_end = NULL;
>>  }
>>  }
>> +
>> +#ifdef CONFIG_MEMORY_HOTREMOVE
>> +static void vmemmap_free_pages(struct page *page, int order)
>> +{
>> +struct zone *zone;
>> +unsigned long magic;
>> +
>> +magic = (unsigned long) page->lru.next;
>> +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
>> +put_page_bootmem(page);
>> +
>> +zone = page_zone(page);
>> +zone_span_writelock(zone);
>> +zone->present_pages++;
>> +zone_span_writeunlock(zone);
>> +totalram_pages++;
>> +} else {
>> +if (is_vmalloc_addr(page_address(page)))
>> +vfree(page_address(page));
> 
> Hmm, vmemmap doesn't use vmalloc() to allocate memory.
> 

yes, this can be removed.

>> +else
>> +free_pages((unsigned long)page_address(page), order);
>> +}
>> +}
>> +
>> +static void free_pte_table(pmd_t *pmd)
>> +{
>> +pte_t *pte, *pte_start;
>> +int i;
>> +
>> +pte_start = (pte_t *)pmd_page_vaddr(*pmd);
>> +for (i = 0; i < PTRS_PER_PTE; i++) {
>> +pte = pte_start + i;
>> +if (pte_val(*pte))
>> +return;
>> +}
>> +
>> +/* free a pte talbe */
>> +vmemmap_free_pages(pmd_page(*pmd), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pmd_clear(pmd);
>> +spin_unlock(_mm.page_table_lock);
>> +}
>> +
>> +static void free_pmd_table(pud_t *pud)
>> +{
>> +pmd_t *pmd, *pmd_start;
>> +int i;
>> +
>> +pmd_start = (pmd_t *)pud_page_vaddr(*pud);
>> +for (i = 0; i < PTRS_PER_PMD; i++) {
>> +pmd = pmd_start + i;
>> +if (pmd_val(*pmd))
>> +return;
>> +}
>> +
>> +/* free a pmd talbe */
>> +vmemmap_free_pages(pud_page(*pud), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pud_clear(pud);
>> +spin_unlock(_mm.page_table_lock);
>> +}
>> +
>> +static void free_pud_table(pgd_t *pgd)
>> +{
>> +pud_t *pud, *pud_start;
>> +int i;
>> +
>> +pud_start = (pud_t *)pgd_page_vaddr(*pgd);
>> +for (i = 0; i < PTRS_PER_PUD; i++) {
>> +pud = pud_start + i;
>> +if (pud_val(*pud))
>> +return;
>> +}
>> +
>> +/* free a pud table */
>> +vmemmap_free_pages(pgd_page(*pgd), 0);
>> +spin_lock(_mm.page_table_lock);
>> +pgd_clear(pgd);
>> +spin_unlock(_mm.page_table_lock);
>> +}
>> +
>> +static int split_large_page(pte_t *kpte, unsigned long address, pte_t 
>> *pbase)
>> +{
>> +struct page *page = pmd_page(*(pmd_t *)kpte);
>> +int i = 0;
>> +unsigned long magic;
>> +unsigned long section_nr;
>> +
>> +__split_large_page(kpte, address, pbase);
>> +__flush_tlb_all();
>> +
>> +magic = (unsigned long) page->lru.next;
>> +if (magic == SECTION_INFO) {
>> +section_nr = pfn_to_section_nr(page_to_pfn(page));
>> +while (i < PTRS_PER_PMD) {
>> +page++;
>> +i++;
>> +get_page_bootmem(section_nr, page, SECTION_INFO);
>> +}
>> +}
>> +
>> +return 0;
>> +}
>> +
>> +static void vmemmap_pte_remove(pmd_t *pmd, 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-29 Thread Wen Congyang
At 11/28/2012 05:40 PM, Jianguo Wu Wrote:
> Hi Congyang,
> 
> I think vmemmap's pgtable pages should be freed after all entries are 
> cleared, I have a patch to do this.
> The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
> table of x86_64 architecture.
> 
> How do you think about this?
> 
> Signed-off-by: Jianguo Wu 
> Signed-off-by: Jiang Liu 
> ---
>  include/linux/mm.h  |1 +
>  mm/sparse-vmemmap.c |  214 
> +++
>  mm/sparse.c |5 +-
>  3 files changed, 218 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 5657670..1f26af5 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
> long pages, int node);
>  void vmemmap_populate_print_last(void);
>  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
> unsigned long size);
> +void vmemmap_free(struct page *memmap, unsigned long nr_pages);
>  
>  enum mf_flags {
>   MF_COUNT_INCREASED = 1 << 0,
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index 1b7e22a..242cb28 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -29,6 +29,10 @@
>  #include 
>  #include 
>  
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> +#include 
> +#endif
> +
>  /*
>   * Allocate a block of memory to be used to back the virtual memory map
>   * or to back the page tables that are used to create the mapping.
> @@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
> **map_map,
>   vmemmap_buf_end = NULL;
>   }
>  }
> +
> +#ifdef CONFIG_MEMORY_HOTREMOVE
> +static void vmemmap_free_pages(struct page *page, int order)
> +{
> + struct zone *zone;
> + unsigned long magic;
> +
> + magic = (unsigned long) page->lru.next;
> + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
> + put_page_bootmem(page);
> +
> + zone = page_zone(page);
> + zone_span_writelock(zone);
> + zone->present_pages++;
> + zone_span_writeunlock(zone);
> + totalram_pages++;
> + } else {
> + if (is_vmalloc_addr(page_address(page)))
> + vfree(page_address(page));

Hmm, vmemmap doesn't use vmalloc() to allocate memory.

> + else
> + free_pages((unsigned long)page_address(page), order);
> + }
> +}
> +
> +static void free_pte_table(pmd_t *pmd)
> +{
> + pte_t *pte, *pte_start;
> + int i;
> +
> + pte_start = (pte_t *)pmd_page_vaddr(*pmd);
> + for (i = 0; i < PTRS_PER_PTE; i++) {
> + pte = pte_start + i;
> + if (pte_val(*pte))
> + return;
> + }
> +
> + /* free a pte talbe */
> + vmemmap_free_pages(pmd_page(*pmd), 0);
> + spin_lock(_mm.page_table_lock);
> + pmd_clear(pmd);
> + spin_unlock(_mm.page_table_lock);
> +}
> +
> +static void free_pmd_table(pud_t *pud)
> +{
> + pmd_t *pmd, *pmd_start;
> + int i;
> +
> + pmd_start = (pmd_t *)pud_page_vaddr(*pud);
> + for (i = 0; i < PTRS_PER_PMD; i++) {
> + pmd = pmd_start + i;
> + if (pmd_val(*pmd))
> + return;
> + }
> +
> + /* free a pmd talbe */
> + vmemmap_free_pages(pud_page(*pud), 0);
> + spin_lock(_mm.page_table_lock);
> + pud_clear(pud);
> + spin_unlock(_mm.page_table_lock);
> +}
> +
> +static void free_pud_table(pgd_t *pgd)
> +{
> + pud_t *pud, *pud_start;
> + int i;
> +
> + pud_start = (pud_t *)pgd_page_vaddr(*pgd);
> + for (i = 0; i < PTRS_PER_PUD; i++) {
> + pud = pud_start + i;
> + if (pud_val(*pud))
> + return;
> + }
> +
> + /* free a pud table */
> + vmemmap_free_pages(pgd_page(*pgd), 0);
> + spin_lock(_mm.page_table_lock);
> + pgd_clear(pgd);
> + spin_unlock(_mm.page_table_lock);
> +}
> +
> +static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
> +{
> + struct page *page = pmd_page(*(pmd_t *)kpte);
> + int i = 0;
> + unsigned long magic;
> + unsigned long section_nr;
> +
> + __split_large_page(kpte, address, pbase);
> + __flush_tlb_all();
> +
> + magic = (unsigned long) page->lru.next;
> + if (magic == SECTION_INFO) {
> + section_nr = pfn_to_section_nr(page_to_pfn(page));
> + while (i < PTRS_PER_PMD) {
> + page++;
> + i++;
> + get_page_bootmem(section_nr, page, SECTION_INFO);
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
> end)
> +{
> + pte_t *pte;
> + unsigned long next;
> +
> + pte = pte_offset_kernel(pmd, addr);
> + for (; addr < end; pte++, addr += PAGE_SIZE) {
> + next = 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-29 Thread Wen Congyang
At 11/28/2012 05:40 PM, Jianguo Wu Wrote:
 Hi Congyang,
 
 I think vmemmap's pgtable pages should be freed after all entries are 
 cleared, I have a patch to do this.
 The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
 table of x86_64 architecture.
 
 How do you think about this?
 
 Signed-off-by: Jianguo Wu wujian...@huawei.com
 Signed-off-by: Jiang Liu jiang@huawei.com
 ---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  214 
 +++
  mm/sparse.c |5 +-
  3 files changed, 218 insertions(+), 2 deletions(-)
 
 diff --git a/include/linux/mm.h b/include/linux/mm.h
 index 5657670..1f26af5 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
 long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
 unsigned long size);
 +void vmemmap_free(struct page *memmap, unsigned long nr_pages);
  
  enum mf_flags {
   MF_COUNT_INCREASED = 1  0,
 diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
 index 1b7e22a..242cb28 100644
 --- a/mm/sparse-vmemmap.c
 +++ b/mm/sparse-vmemmap.c
 @@ -29,6 +29,10 @@
  #include asm/pgalloc.h
  #include asm/pgtable.h
  
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +#include asm/tlbflush.h
 +#endif
 +
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
 @@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
 **map_map,
   vmemmap_buf_end = NULL;
   }
  }
 +
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +static void vmemmap_free_pages(struct page *page, int order)
 +{
 + struct zone *zone;
 + unsigned long magic;
 +
 + magic = (unsigned long) page-lru.next;
 + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
 + put_page_bootmem(page);
 +
 + zone = page_zone(page);
 + zone_span_writelock(zone);
 + zone-present_pages++;
 + zone_span_writeunlock(zone);
 + totalram_pages++;
 + } else {
 + if (is_vmalloc_addr(page_address(page)))
 + vfree(page_address(page));

Hmm, vmemmap doesn't use vmalloc() to allocate memory.

 + else
 + free_pages((unsigned long)page_address(page), order);
 + }
 +}
 +
 +static void free_pte_table(pmd_t *pmd)
 +{
 + pte_t *pte, *pte_start;
 + int i;
 +
 + pte_start = (pte_t *)pmd_page_vaddr(*pmd);
 + for (i = 0; i  PTRS_PER_PTE; i++) {
 + pte = pte_start + i;
 + if (pte_val(*pte))
 + return;
 + }
 +
 + /* free a pte talbe */
 + vmemmap_free_pages(pmd_page(*pmd), 0);
 + spin_lock(init_mm.page_table_lock);
 + pmd_clear(pmd);
 + spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static void free_pmd_table(pud_t *pud)
 +{
 + pmd_t *pmd, *pmd_start;
 + int i;
 +
 + pmd_start = (pmd_t *)pud_page_vaddr(*pud);
 + for (i = 0; i  PTRS_PER_PMD; i++) {
 + pmd = pmd_start + i;
 + if (pmd_val(*pmd))
 + return;
 + }
 +
 + /* free a pmd talbe */
 + vmemmap_free_pages(pud_page(*pud), 0);
 + spin_lock(init_mm.page_table_lock);
 + pud_clear(pud);
 + spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static void free_pud_table(pgd_t *pgd)
 +{
 + pud_t *pud, *pud_start;
 + int i;
 +
 + pud_start = (pud_t *)pgd_page_vaddr(*pgd);
 + for (i = 0; i  PTRS_PER_PUD; i++) {
 + pud = pud_start + i;
 + if (pud_val(*pud))
 + return;
 + }
 +
 + /* free a pud table */
 + vmemmap_free_pages(pgd_page(*pgd), 0);
 + spin_lock(init_mm.page_table_lock);
 + pgd_clear(pgd);
 + spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
 +{
 + struct page *page = pmd_page(*(pmd_t *)kpte);
 + int i = 0;
 + unsigned long magic;
 + unsigned long section_nr;
 +
 + __split_large_page(kpte, address, pbase);
 + __flush_tlb_all();
 +
 + magic = (unsigned long) page-lru.next;
 + if (magic == SECTION_INFO) {
 + section_nr = pfn_to_section_nr(page_to_pfn(page));
 + while (i  PTRS_PER_PMD) {
 + page++;
 + i++;
 + get_page_bootmem(section_nr, page, SECTION_INFO);
 + }
 + }
 +
 + return 0;
 +}
 +
 +static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
 end)
 +{
 + pte_t *pte;
 + unsigned long next;
 +
 + pte = pte_offset_kernel(pmd, addr);
 + for (; addr  end; pte++, addr += PAGE_SIZE) {
 + next = (addr + PAGE_SIZE)  PAGE_MASK;
 + if (next  end)
 +   

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-29 Thread Jianguo Wu
Hi Congyang,

Thanks for your review and comments.

On 2012/11/30 9:45, Wen Congyang wrote:

 At 11/28/2012 05:40 PM, Jianguo Wu Wrote:
 Hi Congyang,

 I think vmemmap's pgtable pages should be freed after all entries are 
 cleared, I have a patch to do this.
 The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
 table of x86_64 architecture.

 How do you think about this?

 Signed-off-by: Jianguo Wu wujian...@huawei.com
 Signed-off-by: Jiang Liu jiang@huawei.com
 ---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  214 
 +++
  mm/sparse.c |5 +-
  3 files changed, 218 insertions(+), 2 deletions(-)

 diff --git a/include/linux/mm.h b/include/linux/mm.h
 index 5657670..1f26af5 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
 long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page 
 *map,
unsigned long size);
 +void vmemmap_free(struct page *memmap, unsigned long nr_pages);
  
  enum mf_flags {
  MF_COUNT_INCREASED = 1  0,
 diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
 index 1b7e22a..242cb28 100644
 --- a/mm/sparse-vmemmap.c
 +++ b/mm/sparse-vmemmap.c
 @@ -29,6 +29,10 @@
  #include asm/pgalloc.h
  #include asm/pgtable.h
  
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +#include asm/tlbflush.h
 +#endif
 +
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
 @@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
 **map_map,
  vmemmap_buf_end = NULL;
  }
  }
 +
 +#ifdef CONFIG_MEMORY_HOTREMOVE
 +static void vmemmap_free_pages(struct page *page, int order)
 +{
 +struct zone *zone;
 +unsigned long magic;
 +
 +magic = (unsigned long) page-lru.next;
 +if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
 +put_page_bootmem(page);
 +
 +zone = page_zone(page);
 +zone_span_writelock(zone);
 +zone-present_pages++;
 +zone_span_writeunlock(zone);
 +totalram_pages++;
 +} else {
 +if (is_vmalloc_addr(page_address(page)))
 +vfree(page_address(page));
 
 Hmm, vmemmap doesn't use vmalloc() to allocate memory.
 

yes, this can be removed.

 +else
 +free_pages((unsigned long)page_address(page), order);
 +}
 +}
 +
 +static void free_pte_table(pmd_t *pmd)
 +{
 +pte_t *pte, *pte_start;
 +int i;
 +
 +pte_start = (pte_t *)pmd_page_vaddr(*pmd);
 +for (i = 0; i  PTRS_PER_PTE; i++) {
 +pte = pte_start + i;
 +if (pte_val(*pte))
 +return;
 +}
 +
 +/* free a pte talbe */
 +vmemmap_free_pages(pmd_page(*pmd), 0);
 +spin_lock(init_mm.page_table_lock);
 +pmd_clear(pmd);
 +spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static void free_pmd_table(pud_t *pud)
 +{
 +pmd_t *pmd, *pmd_start;
 +int i;
 +
 +pmd_start = (pmd_t *)pud_page_vaddr(*pud);
 +for (i = 0; i  PTRS_PER_PMD; i++) {
 +pmd = pmd_start + i;
 +if (pmd_val(*pmd))
 +return;
 +}
 +
 +/* free a pmd talbe */
 +vmemmap_free_pages(pud_page(*pud), 0);
 +spin_lock(init_mm.page_table_lock);
 +pud_clear(pud);
 +spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static void free_pud_table(pgd_t *pgd)
 +{
 +pud_t *pud, *pud_start;
 +int i;
 +
 +pud_start = (pud_t *)pgd_page_vaddr(*pgd);
 +for (i = 0; i  PTRS_PER_PUD; i++) {
 +pud = pud_start + i;
 +if (pud_val(*pud))
 +return;
 +}
 +
 +/* free a pud table */
 +vmemmap_free_pages(pgd_page(*pgd), 0);
 +spin_lock(init_mm.page_table_lock);
 +pgd_clear(pgd);
 +spin_unlock(init_mm.page_table_lock);
 +}
 +
 +static int split_large_page(pte_t *kpte, unsigned long address, pte_t 
 *pbase)
 +{
 +struct page *page = pmd_page(*(pmd_t *)kpte);
 +int i = 0;
 +unsigned long magic;
 +unsigned long section_nr;
 +
 +__split_large_page(kpte, address, pbase);
 +__flush_tlb_all();
 +
 +magic = (unsigned long) page-lru.next;
 +if (magic == SECTION_INFO) {
 +section_nr = pfn_to_section_nr(page_to_pfn(page));
 +while (i  PTRS_PER_PMD) {
 +page++;
 +i++;
 +get_page_bootmem(section_nr, page, SECTION_INFO);
 +}
 +}
 +
 +return 0;
 +}
 +
 +static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned 
 long end)
 +{
 +pte_t *pte;
 +unsigned long next;
 +
 +pte = pte_offset_kernel(pmd, addr);
 +for (; addr  end; pte++, addr += PAGE_SIZE) {
 +next = (addr + PAGE_SIZE)  

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-29 Thread Yasuaki Ishimatsu

Hi Jianguo,

2012/11/30 11:47, Jianguo Wu wrote:

Hi Congyang,

Thanks for your review and comments.

On 2012/11/30 9:45, Wen Congyang wrote:


At 11/28/2012 05:40 PM, Jianguo Wu Wrote:

Hi Congyang,

I think vmemmap's pgtable pages should be freed after all entries are cleared, 
I have a patch to do this.
The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
table of x86_64 architecture.

How do you think about this?

Signed-off-by: Jianguo Wu wujian...@huawei.com
Signed-off-by: Jiang Liu jiang@huawei.com
---
  include/linux/mm.h  |1 +
  mm/sparse-vmemmap.c |  214 +++
  mm/sparse.c |5 +-
  3 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
  void vmemmap_populate_print_last(void);
  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);

  enum mf_flags {
MF_COUNT_INCREASED = 1  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..242cb28 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
  #include asm/pgalloc.h
  #include asm/pgtable.h

+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include asm/tlbflush.h
+#endif
+
  /*
   * Allocate a block of memory to be used to back the virtual memory map
   * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
  }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone-present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else {
+   if (is_vmalloc_addr(page_address(page)))
+   vfree(page_address(page));


Hmm, vmemmap doesn't use vmalloc() to allocate memory.



yes, this can be removed.


+   else
+   free_pages((unsigned long)page_address(page), order);
+   }
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i  PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i  PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(init_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i  PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);
+   __flush_tlb_all();
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i  PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+
+   

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-28 Thread Jianguo Wu
Hi Congyang,

I think vmemmap's pgtable pages should be freed after all entries are cleared, 
I have a patch to do this.
The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
table of x86_64 architecture.

How do you think about this?

Signed-off-by: Jianguo Wu 
Signed-off-by: Jiang Liu 
---
 include/linux/mm.h  |1 +
 mm/sparse-vmemmap.c |  214 +++
 mm/sparse.c |5 +-
 3 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);
 
 enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..242cb28 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
 #include 
 #include 
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include 
+#endif
+
 /*
  * Allocate a block of memory to be used to back the virtual memory map
  * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone->present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else {
+   if (is_vmalloc_addr(page_address(page)))
+   vfree(page_address(page));
+   else
+   free_pages((unsigned long)page_address(page), order);
+   }
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i < PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i < PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i < PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(_mm.page_table_lock);
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);
+   __flush_tlb_all();
+
+   magic = (unsigned long) page->lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i < PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr < end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE) & PAGE_MASK;
+   if (next > end)
+   next = end;
+
+   if (pte_none(*pte))
+   continue;
+   if (IS_ALIGNED(addr, PAGE_SIZE) &&
+   IS_ALIGNED(end, 

Re: [Patch v4 08/12] memory-hotplug: remove memmap of sparse-vmemmap

2012-11-28 Thread Jianguo Wu
Hi Congyang,

I think vmemmap's pgtable pages should be freed after all entries are cleared, 
I have a patch to do this.
The code logic is the same as [Patch v4 09/12] memory-hotplug: remove page 
table of x86_64 architecture.

How do you think about this?

Signed-off-by: Jianguo Wu wujian...@huawei.com
Signed-off-by: Jiang Liu jiang@huawei.com
---
 include/linux/mm.h  |1 +
 mm/sparse-vmemmap.c |  214 +++
 mm/sparse.c |5 +-
 3 files changed, 218 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5657670..1f26af5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1642,6 +1642,7 @@ int vmemmap_populate(struct page *start_page, unsigned 
long pages, int node);
 void vmemmap_populate_print_last(void);
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
+void vmemmap_free(struct page *memmap, unsigned long nr_pages);
 
 enum mf_flags {
MF_COUNT_INCREASED = 1  0,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 1b7e22a..242cb28 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,6 +29,10 @@
 #include asm/pgalloc.h
 #include asm/pgtable.h
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+#include asm/tlbflush.h
+#endif
+
 /*
  * Allocate a block of memory to be used to back the virtual memory map
  * or to back the page tables that are used to create the mapping.
@@ -224,3 +228,213 @@ void __init sparse_mem_maps_populate_node(struct page 
**map_map,
vmemmap_buf_end = NULL;
}
 }
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static void vmemmap_free_pages(struct page *page, int order)
+{
+   struct zone *zone;
+   unsigned long magic;
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
+   put_page_bootmem(page);
+
+   zone = page_zone(page);
+   zone_span_writelock(zone);
+   zone-present_pages++;
+   zone_span_writeunlock(zone);
+   totalram_pages++;
+   } else {
+   if (is_vmalloc_addr(page_address(page)))
+   vfree(page_address(page));
+   else
+   free_pages((unsigned long)page_address(page), order);
+   }
+}
+
+static void free_pte_table(pmd_t *pmd)
+{
+   pte_t *pte, *pte_start;
+   int i;
+
+   pte_start = (pte_t *)pmd_page_vaddr(*pmd);
+   for (i = 0; i  PTRS_PER_PTE; i++) {
+   pte = pte_start + i;
+   if (pte_val(*pte))
+   return;
+   }
+
+   /* free a pte talbe */
+   vmemmap_free_pages(pmd_page(*pmd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pmd_clear(pmd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pmd_table(pud_t *pud)
+{
+   pmd_t *pmd, *pmd_start;
+   int i;
+
+   pmd_start = (pmd_t *)pud_page_vaddr(*pud);
+   for (i = 0; i  PTRS_PER_PMD; i++) {
+   pmd = pmd_start + i;
+   if (pmd_val(*pmd))
+   return;
+   }
+
+   /* free a pmd talbe */
+   vmemmap_free_pages(pud_page(*pud), 0);
+   spin_lock(init_mm.page_table_lock);
+   pud_clear(pud);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static void free_pud_table(pgd_t *pgd)
+{
+   pud_t *pud, *pud_start;
+   int i;
+
+   pud_start = (pud_t *)pgd_page_vaddr(*pgd);
+   for (i = 0; i  PTRS_PER_PUD; i++) {
+   pud = pud_start + i;
+   if (pud_val(*pud))
+   return;
+   }
+
+   /* free a pud table */
+   vmemmap_free_pages(pgd_page(*pgd), 0);
+   spin_lock(init_mm.page_table_lock);
+   pgd_clear(pgd);
+   spin_unlock(init_mm.page_table_lock);
+}
+
+static int split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
+{
+   struct page *page = pmd_page(*(pmd_t *)kpte);
+   int i = 0;
+   unsigned long magic;
+   unsigned long section_nr;
+
+   __split_large_page(kpte, address, pbase);
+   __flush_tlb_all();
+
+   magic = (unsigned long) page-lru.next;
+   if (magic == SECTION_INFO) {
+   section_nr = pfn_to_section_nr(page_to_pfn(page));
+   while (i  PTRS_PER_PMD) {
+   page++;
+   i++;
+   get_page_bootmem(section_nr, page, SECTION_INFO);
+   }
+   }
+
+   return 0;
+}
+
+static void vmemmap_pte_remove(pmd_t *pmd, unsigned long addr, unsigned long 
end)
+{
+   pte_t *pte;
+   unsigned long next;
+
+   pte = pte_offset_kernel(pmd, addr);
+   for (; addr  end; pte++, addr += PAGE_SIZE) {
+   next = (addr + PAGE_SIZE)  PAGE_MASK;
+   if (next  end)
+   next = end;
+
+   if (pte_none(*pte))
+   continue;
+