On Sat, 21 Sep 2019, Xiaoming Ni wrote: > @ Nicolas Pitre > Can I make a v2 patch based on your advice ? > Or you will submit a patch for "GFP_WONTFAIL" yourself ?
Here's a patch implementing what I had in mind. This is compile tested only. ----- >8 Subject: [PATCH] mm: add __GFP_WONTFAIL and GFP_ONBOOT Some memory allocations are very unlikely to fail during system boot. Because of that, the code often doesn't bother to check for allocation failure, but this gets reported anyway. As an alternative to adding code to check for NULL that has almost no chance of ever being exercised, let's use a GFP flag to identify those cases and panic the kernel if allocation failure ever occurs. Conversion of one such instance is also included. Signed-off-by: Nicolas Pitre <n...@fluxnic.net> diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 34aa39d1ae..bd0a0e4807 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -3356,7 +3356,7 @@ static int __init con_init(void) } for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { - vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT); + vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_ONBOOT); INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); tty_port_init(&vc->port); visual_init(vc, currcons, 1); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f33881688f..6f33575cd6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,8 +39,9 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x100000u #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u +#define ___GFP_WONTFAIL 0x800000u #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x800000u +#define ___GFP_NOLOCKDEP 0x1000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -187,6 +188,12 @@ struct vm_area_struct; * definitely preferable to use the flag rather than opencode endless * loop around allocator. * Using this flag for costly allocations is _highly_ discouraged. + * + * %__GFP_WONTFAIL: No allocation error is expected what so ever. The + * caller presumes allocation will always succeed (e.g. the system is still + * booting, the allocation size is relatively small and memory should be + * plentiful) so testing for failure is skipped. If allocation ever fails + * then the kernel will simply panic. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) @@ -196,6 +203,7 @@ struct vm_area_struct; #define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) +#define __GFP_WONTFAIL ((__force gfp_t)___GFP_WONTFAIL) /** * DOC: Action modifiers @@ -217,7 +225,7 @@ struct vm_area_struct; #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** @@ -285,6 +293,9 @@ struct vm_area_struct; * available and will not wake kswapd/kcompactd on failure. The _LIGHT * version does not attempt reclaim/compaction at all and is by default used * in page fault path, while the non-light is used by khugepaged. + * + * %GFP_ONBOOT is for relatively small allocations that are not expected + * to fail while the system is booting. */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) @@ -300,6 +311,7 @@ struct vm_area_struct; #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) +#define GFP_ONBOOT (GFP_NOWAIT | __GFP_WONTFAIL) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ff5484fdbd..36dee09f7f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4625,6 +4625,14 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, fail: warn_alloc(gfp_mask, ac->nodemask, "page allocation failure: order:%u", order); + if (gfp_mask & __GFP_WONTFAIL) { + /* + * The assumption was wrong. This is never supposed to happen. + * Caller most likely won't check for a returned NULL either. + * So the only reasonable thing to do is to pannic. + */ + panic("Failed to allocate memory despite GFP_WONTFAIL\n"); + } got_pg: return page; }