Re: [PATCH] mm, x86: Do not zero hugetlbfs pages at boot. -v2

2013-04-04 Thread Michal Hocko
On Wed 03-04-13 12:21:32, Robin Holt wrote:
> On Wed, Apr 03, 2013 at 04:00:49PM +0200, Michal Hocko wrote:
> > On Tue 02-04-13 21:43:44, Robin Holt wrote:
> > [...]
> > > diff --git a/mm/bootmem.c b/mm/bootmem.c
> > > index 2b0bcb0..b2e4027 100644
> > > --- a/mm/bootmem.c
> > > +++ b/mm/bootmem.c
> > > @@ -705,12 +705,16 @@ void * __init __alloc_bootmem(unsigned long size, 
> > > unsigned long align,
> > >  
> > >  void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
> > >   unsigned long size, unsigned long align,
> > > - unsigned long goal, unsigned long limit)
> > > + unsigned long goal, unsigned long limit,
> > > + int zeroed)
> > >  {
> > >   void *ptr;
> > >  
> > >   if (WARN_ON_ONCE(slab_is_available()))
> > > - return kzalloc(size, GFP_NOWAIT);
> > > + if (zeroed)
> > > + return kzalloc(size, GFP_NOWAIT);
> > > + else
> > > + return kmalloc(size, GFP_NOWAIT);
> > >  again:
> > >  
> > >   /* do not panic in alloc_bootmem_bdata() */
> > 
> > You need to update alloc_bootmem_bdata and alloc_bootmem_core as well.
> > Otherwise this is a no-op for early allocations when slab is not
> > available which is the case unless something is broken.
> 
> Michal,
> 
> Does this do what you would expect?  

yes, it looks right when I quickly glanced over it. I haven't checked
deeply yet. I would suggest reposting and adding more *bootmem people
into CC (e.g. Johannes Weiner, Yinghai Lu, Tejun Heo and maybe others).

> I compiled this for ia64, but I have not tested it at all.
> 
> Robin
> 
> ---
>  mm/bootmem.c | 30 +++---
>  1 file changed, 19 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index b2e4027..350e0ab 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -497,7 +497,8 @@ static unsigned long __init align_off(struct bootmem_data 
> *bdata,
>  
>  static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
>   unsigned long size, unsigned long align,
> - unsigned long goal, unsigned long limit)
> + unsigned long goal, unsigned long limit,
> + int zeroed)
>  {
>   unsigned long fallback = 0;
>   unsigned long min, max, start, sidx, midx, step;
> @@ -584,7 +585,8 @@ find_block:
>  
>   region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
>   start_off);
> - memset(region, 0, size);
> + if (zeroed)
> + memset(region, 0, size);
>   /*
>* The min_count is set to 0 so that bootmem allocated blocks
>* are never reported as leaks.
> @@ -605,13 +607,18 @@ find_block:
>  static void * __init alloc_bootmem_core(unsigned long size,
>   unsigned long align,
>   unsigned long goal,
> - unsigned long limit)
> + unsigned long limit,
> + int zeroed)
>  {
>   bootmem_data_t *bdata;
>   void *region;
>  
> - if (WARN_ON_ONCE(slab_is_available()))
> - return kzalloc(size, GFP_NOWAIT);
> + if (WARN_ON_ONCE(slab_is_available())) {
> + if (zeroed)
> + return kzalloc(size, GFP_NOWAIT);
> + else
> + return kmalloc(size, GFP_NOWAIT);
> + }
>  
>   list_for_each_entry(bdata, &bdata_list, list) {
>   if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
> @@ -619,7 +626,7 @@ static void * __init alloc_bootmem_core(unsigned long 
> size,
>   if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
>   break;
>  
> - region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
> + region = alloc_bootmem_bdata(bdata, size, align, goal, limit, 
> zeroed);
>   if (region)
>   return region;
>   }
> @@ -635,7 +642,7 @@ static void * __init ___alloc_bootmem_nopanic(unsigned 
> long size,
>   void *ptr;
>  
>  restart:
> - ptr = alloc_bootmem_core(size, align, goal, limit);
> + ptr = alloc_bootmem_core(size, align, goal, limit, 1);
>   if (ptr)
>   return ptr;
>   if (goal) {
> @@ -710,22 +717,23 @@ void * __init ___alloc_bootmem_node_nopanic(pg_data_t 
> *pgdat,
>  {
>   void *ptr;
>  
> - if (WARN_ON_ONCE(slab_is_available()))
> + if (WARN_ON_ONCE(slab_is_available())) {
>   if (zeroed)
>   return kzalloc(size, GFP_NOWAIT);
>   else
>   return kmalloc(size, GFP_NOWAIT);
> + }
>  again:
>  
>   /* do not panic in alloc_bootmem_bdata() */
>   if (lim

Re: [PATCH] mm, x86: Do not zero hugetlbfs pages at boot. -v2

2013-04-04 Thread Michal Hocko
On Wed 03-04-13 12:00:12, Robin Holt wrote:
> On Wed, Apr 03, 2013 at 04:02:47PM +0200, Michal Hocko wrote:
> > On Tue 02-04-13 21:43:44, Robin Holt wrote:
> > [...]
> > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > > index ca9a7c6..7683f6a 100644
> > > --- a/mm/hugetlb.c
> > > +++ b/mm/hugetlb.c
> > > @@ -1185,7 +1185,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
> > >   while (nr_nodes) {
> > >   void *addr;
> > >  
> > > - addr = __alloc_bootmem_node_nopanic(
> > > + addr = __alloc_bootmem_node_nopanic_notzeroed(
> > >   NODE_DATA(hstate_next_node_to_alloc(h,
> > >   &node_states[N_MEMORY])),
> > >   huge_page_size(h), huge_page_size(h), 0);
> > 
> > Ohh, and powerpc seems to have its own opinion how to allocate huge
> > pages. See arch/powerpc/mm/hugetlbpage.c
> 
> Do I need to address their allocations?  Can I leave that part of the
> changes as something powerpc can address if they are affected by this?

I mentioned powerpc basically because I encountered it as the only
alternative implementation of alloc_bootmem_huge_page. I haven't checked
how it does the job and now that I am looking closer it uses memblock
allocator so it would need a separate fix.
I guess you are right saying that this should be handled when the need
arises.

-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm, x86: Do not zero hugetlbfs pages at boot. -v2

2013-04-03 Thread Robin Holt
On Wed, Apr 03, 2013 at 04:00:49PM +0200, Michal Hocko wrote:
> On Tue 02-04-13 21:43:44, Robin Holt wrote:
> [...]
> > diff --git a/mm/bootmem.c b/mm/bootmem.c
> > index 2b0bcb0..b2e4027 100644
> > --- a/mm/bootmem.c
> > +++ b/mm/bootmem.c
> > @@ -705,12 +705,16 @@ void * __init __alloc_bootmem(unsigned long size, 
> > unsigned long align,
> >  
> >  void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
> > unsigned long size, unsigned long align,
> > -   unsigned long goal, unsigned long limit)
> > +   unsigned long goal, unsigned long limit,
> > +   int zeroed)
> >  {
> > void *ptr;
> >  
> > if (WARN_ON_ONCE(slab_is_available()))
> > -   return kzalloc(size, GFP_NOWAIT);
> > +   if (zeroed)
> > +   return kzalloc(size, GFP_NOWAIT);
> > +   else
> > +   return kmalloc(size, GFP_NOWAIT);
> >  again:
> >  
> > /* do not panic in alloc_bootmem_bdata() */
> 
> You need to update alloc_bootmem_bdata and alloc_bootmem_core as well.
> Otherwise this is a no-op for early allocations when slab is not
> available which is the case unless something is broken.

Michal,

Does this do what you would expect?  I compiled this for ia64, but I
have not tested it at all.

Robin

---
 mm/bootmem.c | 30 +++---
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/mm/bootmem.c b/mm/bootmem.c
index b2e4027..350e0ab 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -497,7 +497,8 @@ static unsigned long __init align_off(struct bootmem_data 
*bdata,
 
 static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
unsigned long size, unsigned long align,
-   unsigned long goal, unsigned long limit)
+   unsigned long goal, unsigned long limit,
+   int zeroed)
 {
unsigned long fallback = 0;
unsigned long min, max, start, sidx, midx, step;
@@ -584,7 +585,8 @@ find_block:
 
region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
start_off);
-   memset(region, 0, size);
+   if (zeroed)
+   memset(region, 0, size);
/*
 * The min_count is set to 0 so that bootmem allocated blocks
 * are never reported as leaks.
@@ -605,13 +607,18 @@ find_block:
 static void * __init alloc_bootmem_core(unsigned long size,
unsigned long align,
unsigned long goal,
-   unsigned long limit)
+   unsigned long limit,
+   int zeroed)
 {
bootmem_data_t *bdata;
void *region;
 
-   if (WARN_ON_ONCE(slab_is_available()))
-   return kzalloc(size, GFP_NOWAIT);
+   if (WARN_ON_ONCE(slab_is_available())) {
+   if (zeroed)
+   return kzalloc(size, GFP_NOWAIT);
+   else
+   return kmalloc(size, GFP_NOWAIT);
+   }
 
list_for_each_entry(bdata, &bdata_list, list) {
if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
@@ -619,7 +626,7 @@ static void * __init alloc_bootmem_core(unsigned long size,
if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
break;
 
-   region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
+   region = alloc_bootmem_bdata(bdata, size, align, goal, limit, 
zeroed);
if (region)
return region;
}
@@ -635,7 +642,7 @@ static void * __init ___alloc_bootmem_nopanic(unsigned long 
size,
void *ptr;
 
 restart:
-   ptr = alloc_bootmem_core(size, align, goal, limit);
+   ptr = alloc_bootmem_core(size, align, goal, limit, 1);
if (ptr)
return ptr;
if (goal) {
@@ -710,22 +717,23 @@ void * __init ___alloc_bootmem_node_nopanic(pg_data_t 
*pgdat,
 {
void *ptr;
 
-   if (WARN_ON_ONCE(slab_is_available()))
+   if (WARN_ON_ONCE(slab_is_available())) {
if (zeroed)
return kzalloc(size, GFP_NOWAIT);
else
return kmalloc(size, GFP_NOWAIT);
+   }
 again:
 
/* do not panic in alloc_bootmem_bdata() */
if (limit && goal + size > limit)
limit = 0;
 
-   ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
+   ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit, 
zeroed);
if (ptr)
return ptr;
 
-   ptr = alloc_bootmem_core(size, align, goal, limit);
+   ptr = alloc_bootmem_core(size, align, goal, limit, z

Re: [PATCH] mm, x86: Do not zero hugetlbfs pages at boot. -v2

2013-04-03 Thread Robin Holt
On Wed, Apr 03, 2013 at 04:02:47PM +0200, Michal Hocko wrote:
> On Tue 02-04-13 21:43:44, Robin Holt wrote:
> [...]
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index ca9a7c6..7683f6a 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -1185,7 +1185,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
> > while (nr_nodes) {
> > void *addr;
> >  
> > -   addr = __alloc_bootmem_node_nopanic(
> > +   addr = __alloc_bootmem_node_nopanic_notzeroed(
> > NODE_DATA(hstate_next_node_to_alloc(h,
> > &node_states[N_MEMORY])),
> > huge_page_size(h), huge_page_size(h), 0);
> 
> Ohh, and powerpc seems to have its own opinion how to allocate huge
> pages. See arch/powerpc/mm/hugetlbpage.c

Do I need to address their allocations?  Can I leave that part of the
changes as something powerpc can address if they are affected by this?

Robin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm, x86: Do not zero hugetlbfs pages at boot. -v2

2013-04-03 Thread Michal Hocko
On Tue 02-04-13 21:43:44, Robin Holt wrote:
[...]
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index ca9a7c6..7683f6a 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1185,7 +1185,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
>   while (nr_nodes) {
>   void *addr;
>  
> - addr = __alloc_bootmem_node_nopanic(
> + addr = __alloc_bootmem_node_nopanic_notzeroed(
>   NODE_DATA(hstate_next_node_to_alloc(h,
>   &node_states[N_MEMORY])),
>   huge_page_size(h), huge_page_size(h), 0);

Ohh, and powerpc seems to have its own opinion how to allocate huge
pages. See arch/powerpc/mm/hugetlbpage.c

-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mm, x86: Do not zero hugetlbfs pages at boot. -v2

2013-04-03 Thread Michal Hocko
On Tue 02-04-13 21:43:44, Robin Holt wrote:
[...]
> diff --git a/mm/bootmem.c b/mm/bootmem.c
> index 2b0bcb0..b2e4027 100644
> --- a/mm/bootmem.c
> +++ b/mm/bootmem.c
> @@ -705,12 +705,16 @@ void * __init __alloc_bootmem(unsigned long size, 
> unsigned long align,
>  
>  void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
>   unsigned long size, unsigned long align,
> - unsigned long goal, unsigned long limit)
> + unsigned long goal, unsigned long limit,
> + int zeroed)
>  {
>   void *ptr;
>  
>   if (WARN_ON_ONCE(slab_is_available()))
> - return kzalloc(size, GFP_NOWAIT);
> + if (zeroed)
> + return kzalloc(size, GFP_NOWAIT);
> + else
> + return kmalloc(size, GFP_NOWAIT);
>  again:
>  
>   /* do not panic in alloc_bootmem_bdata() */

You need to update alloc_bootmem_bdata and alloc_bootmem_core as well.
Otherwise this is a no-op for early allocations when slab is not
available which is the case unless something is broken.

[...]
-- 
Michal Hocko
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/