> 
> No, I meant going back to idea of new gfp flag, but adjust the implementation 
> in
> the allocator (different from what you posted in previous version) so that it
> only looks at the flag after it tries to allocate from pcplist and finds out
> it's empty. So, no inventing of new page allocator entry points or checks such
> as the one you wrote above, but adding the new gfp flag in a way that it 
> doesn't
> affect existing fast paths.
>
OK. Now i see. Please have a look below at the patch, so we fully understand
each other. If that is something that is close to your view or not:

<snip>
t a/include/linux/gfp.h b/include/linux/gfp.h
index c603237e006c..7e613560a502 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,8 +39,9 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL                0x100000u
 #define ___GFP_THISNODE                0x200000u
 #define ___GFP_ACCOUNT         0x400000u
+#define ___GFP_NO_LOCKS                0x800000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP       0x800000u
+#define ___GFP_NOLOCKDEP       0x1000000u
 #else
 #define ___GFP_NOLOCKDEP       0
 #endif
@@ -215,16 +216,22 @@ struct vm_area_struct;
  * %__GFP_COMP address compound page metadata.
  *
  * %__GFP_ZERO returns a zeroed page on success.
+ *
+ * %__GFP_NO_LOCKS order-0 allocation without sleepable-locks.
+ * It obtains a page from the per-cpu-list and considered as
+ * lock-less. No other actions are performed, thus it returns
+ * NULL if per-cpu-list is empty.
  */
 #define __GFP_NOWARN   ((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP     ((__force gfp_t)___GFP_COMP)
 #define __GFP_ZERO     ((__force gfp_t)___GFP_ZERO)
+#define __GFP_NO_LOCKS ((__force gfp_t)___GFP_NO_LOCKS)

 /* Disable lockdep for GFP context tracking */
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)

 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 67018d367b9f..d99af78237be 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -45,6 +45,7 @@
        {(unsigned long)__GFP_RECLAIMABLE,      "__GFP_RECLAIMABLE"},   \
        {(unsigned long)__GFP_MOVABLE,          "__GFP_MOVABLE"},       \
        {(unsigned long)__GFP_ACCOUNT,          "__GFP_ACCOUNT"},       \
+       {(unsigned long)__GFP_NO_LOCKS,         "__GFP_NO_LOCKS"},      \
        {(unsigned long)__GFP_WRITE,            "__GFP_WRITE"},         \
        {(unsigned long)__GFP_RECLAIM,          "__GFP_RECLAIM"},       \
        {(unsigned long)__GFP_DIRECT_RECLAIM,   "__GFP_DIRECT_RECLAIM"},\
diff --git a/mm/internal.h b/mm/internal.h
index 6345b08ce86c..5724fba921f9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -569,6 +569,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone 
*zone,
 #define ALLOC_NOFRAGMENT         0x0
 #endif
 #define ALLOC_KSWAPD           0x800 /* allow waking of kswapd, 
__GFP_KSWAPD_RECLAIM set */
+#define ALLOC_NO_LOCKS         0x1000 /* Lock free allocation. */
 
 enum ttu_flags;
 struct tlbflush_unmap_batch;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index aff1f84bf268..19cd9794dd45 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2837,6 +2837,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int 
order,
 {
        int i, alloced = 0;
 
+       if (alloc_flags & ALLOC_NO_LOCKS)
+               return alloced;
+
        spin_lock(&zone->lock);
        for (i = 0; i < count; ++i) {
                struct page *page = __rmqueue(zone, order, migratetype,
@@ -3805,7 +3808,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int 
order, int alloc_flags,
                         * grow this zone if it contains deferred pages.
                         */
                        if (static_branch_unlikely(&deferred_pages)) {
-                               if (_deferred_grow_zone(zone, order))
+                               if (!(alloc_flags & ALLOC_NO_LOCKS) &&
+                                               _deferred_grow_zone(zone, 
order))
                                        goto try_this_zone;
                        }
 #endif
@@ -3850,7 +3854,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int 
order, int alloc_flags,
                                reserve_highatomic_pageblock(page, zone, order);

                        return page;
-               } else {
+               } else if (!(alloc_flags & ALLOC_NO_LOCKS)) {
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
                        /* Try again if zone has deferred pages */
                        if (static_branch_unlikely(&deferred_pages)) {
@@ -4846,6 +4850,9 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, 
unsigned int order,
        ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
                                        ac->highest_zoneidx, ac->nodemask);

+       if (gfp_mask & __GFP_NO_LOCKS)
+               *alloc_flags |= ALLOC_NO_LOCKS;
+
        return true;
 }

@@ -4886,6 +4893,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int 
order, int preferred_nid,
        if (likely(page))
                goto out;

+       /* Bypass slow path if __GFP_NO_LOCKS. */
+       if ((gfp_mask & __GFP_NO_LOCKS))
+               goto out;
+
        /*
         * Apply scoped allocation constraints. This is mainly about GFP_NOFS
         * resp. GFP_NOIO which has to be inherited for all allocation requests
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a50dae2c4ae9..fee3221bcf6a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -656,6 +656,7 @@ static const struct {
        { "__GFP_RECLAIMABLE",          "RC" },
        { "__GFP_MOVABLE",              "M" },
        { "__GFP_ACCOUNT",              "AC" },
+       { "__GFP_NO_LOCKS",             "NL" },
        { "__GFP_WRITE",                "WR" },
        { "__GFP_RECLAIM",              "R" },
        { "__GFP_DIRECT_RECLAIM",       "DR" },
<snip>

If not, could you please provide some snips or any pseudo code?

Thanks Vlastimil!

--
Vlad Rezki

Reply via email to