Inspired by [1], move the page fragment allocator from page_alloc
into its own c file and header file, as we are about to make more
change for it to replace another page_frag implementation in
sock.c

As this patchset is going to replace 'struct page_frag' with
'struct page_frag_cache' in sched.h, including page_frag_cache.h
in sched.h has a compiler error caused by interdependence between
mm_types.h and mm.h for asm-offsets.c, see [2]. So avoid the compiler
error by moving 'struct page_frag_cache' to mm_types_task.h as
suggested by Alexander, see [3].

1. https://lore.kernel.org/all/20230411160902.4134381-3-dhowe...@redhat.com/
2. https://lore.kernel.org/all/15623dac-9358-4597-b3ee-3694a5956...@gmail.com/
3. 
https://lore.kernel.org/all/CAKgT0UdH1yD=LSCXFJ=ym_aia4oomd-2wxyko42bizawmt_...@mail.gmail.com/
CC: David Howells <dhowe...@redhat.com>
CC: Alexander Duyck <alexander.du...@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsh...@huawei.com>
Acked-by: Andrew Morton <a...@linux-foundation.org>
Reviewed-by: Alexander Duyck <alexanderdu...@fb.com>
---
 include/linux/gfp.h                           |  22 ---
 include/linux/mm_types.h                      |  18 ---
 include/linux/mm_types_task.h                 |  18 +++
 include/linux/page_frag_cache.h               |  31 ++++
 include/linux/skbuff.h                        |   1 +
 mm/Makefile                                   |   1 +
 mm/page_alloc.c                               | 136 ----------------
 mm/page_frag_cache.c                          | 145 ++++++++++++++++++
 .../selftests/mm/page_frag/page_frag_test.c   |   2 +-
 9 files changed, 197 insertions(+), 177 deletions(-)
 create mode 100644 include/linux/page_frag_cache.h
 create mode 100644 mm/page_frag_cache.c

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a951de920e20..a0a6d25f883f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -371,28 +371,6 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, 
size_t size, gfp_t gfp_mas
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 
-struct page_frag_cache;
-void page_frag_cache_drain(struct page_frag_cache *nc);
-extern void __page_frag_cache_drain(struct page *page, unsigned int count);
-void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
-                             gfp_t gfp_mask, unsigned int align_mask);
-
-static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
-                                         unsigned int fragsz, gfp_t gfp_mask,
-                                         unsigned int align)
-{
-       WARN_ON_ONCE(!is_power_of_2(align));
-       return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
-}
-
-static inline void *page_frag_alloc(struct page_frag_cache *nc,
-                            unsigned int fragsz, gfp_t gfp_mask)
-{
-       return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
-}
-
-extern void page_frag_free(void *addr);
-
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..92314ef2d978 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -521,9 +521,6 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
  */
 #define STRUCT_PAGE_MAX_SHIFT  (order_base_2(sizeof(struct page)))
 
-#define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
-#define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
-
 /*
  * page_private can be used on tail pages.  However, PagePrivate is only
  * checked by the VM on the head page.  So page_private on the tail pages
@@ -542,21 +539,6 @@ static inline void *folio_get_private(struct folio *folio)
        return folio->private;
 }
 
-struct page_frag_cache {
-       void * va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-       __u16 offset;
-       __u16 size;
-#else
-       __u32 offset;
-#endif
-       /* we maintain a pagecount bias, so that we dont dirty cache line
-        * containing page->_refcount every time we allocate a fragment.
-        */
-       unsigned int            pagecnt_bias;
-       bool pfmemalloc;
-};
-
 typedef unsigned long vm_flags_t;
 
 /*
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index bff5706b76e1..0ac6daebdd5c 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -8,6 +8,7 @@
  * (These are defined separately to decouple sched.h from mm_types.h as much 
as possible.)
  */
 
+#include <linux/align.h>
 #include <linux/types.h>
 
 #include <asm/page.h>
@@ -43,6 +44,23 @@ struct page_frag {
 #endif
 };
 
+#define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
+#define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
+struct page_frag_cache {
+       void *va;
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+       __u16 offset;
+       __u16 size;
+#else
+       __u32 offset;
+#endif
+       /* we maintain a pagecount bias, so that we dont dirty cache line
+        * containing page->_refcount every time we allocate a fragment.
+        */
+       unsigned int            pagecnt_bias;
+       bool pfmemalloc;
+};
+
 /* Track pages that require TLB flushes */
 struct tlbflush_unmap_batch {
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
new file mode 100644
index 000000000000..67ac8626ed9b
--- /dev/null
+++ b/include/linux/page_frag_cache.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_PAGE_FRAG_CACHE_H
+#define _LINUX_PAGE_FRAG_CACHE_H
+
+#include <linux/log2.h>
+#include <linux/mm_types_task.h>
+#include <linux/types.h>
+
+void page_frag_cache_drain(struct page_frag_cache *nc);
+void __page_frag_cache_drain(struct page *page, unsigned int count);
+void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz,
+                             gfp_t gfp_mask, unsigned int align_mask);
+
+static inline void *page_frag_alloc_align(struct page_frag_cache *nc,
+                                         unsigned int fragsz, gfp_t gfp_mask,
+                                         unsigned int align)
+{
+       WARN_ON_ONCE(!is_power_of_2(align));
+       return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align);
+}
+
+static inline void *page_frag_alloc(struct page_frag_cache *nc,
+                                   unsigned int fragsz, gfp_t gfp_mask)
+{
+       return __page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+}
+
+void page_frag_free(void *addr);
+
+#endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39f1d16f3628..560e2b49f98b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -31,6 +31,7 @@
 #include <linux/in6.h>
 #include <linux/if_packet.h>
 #include <linux/llist.h>
+#include <linux/page_frag_cache.h>
 #include <net/flow.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <linux/netfilter/nf_conntrack_common.h>
diff --git a/mm/Makefile b/mm/Makefile
index d5639b036166..dba52bb0da8a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -65,6 +65,7 @@ page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
 memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 
 obj-y += page-alloc.o
+obj-y += page_frag_cache.o
 obj-y += init-mm.o
 obj-y += memblock.o
 obj-y += $(memory-hotplug-y)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8afab64814dc..6ca2abce857b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4836,142 +4836,6 @@ void free_pages(unsigned long addr, unsigned int order)
 
 EXPORT_SYMBOL(free_pages);
 
-/*
- * Page Fragment:
- *  An arbitrary-length arbitrary-offset area of memory which resides
- *  within a 0 or higher order page.  Multiple fragments within that page
- *  are individually refcounted, in the page's reference counter.
- *
- * The page_frag functions below provide a simple allocation framework for
- * page fragments.  This is used by the network stack and network device
- * drivers to provide a backing region of memory for use as either an
- * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
- */
-static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
-                                            gfp_t gfp_mask)
-{
-       struct page *page = NULL;
-       gfp_t gfp = gfp_mask;
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-       gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
-                  __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
-       page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
-                               PAGE_FRAG_CACHE_MAX_ORDER);
-       nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
-#endif
-       if (unlikely(!page))
-               page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
-
-       nc->va = page ? page_address(page) : NULL;
-
-       return page;
-}
-
-void page_frag_cache_drain(struct page_frag_cache *nc)
-{
-       if (!nc->va)
-               return;
-
-       __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-       nc->va = NULL;
-}
-EXPORT_SYMBOL(page_frag_cache_drain);
-
-void __page_frag_cache_drain(struct page *page, unsigned int count)
-{
-       VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
-
-       if (page_ref_sub_and_test(page, count))
-               free_unref_page(page, compound_order(page));
-}
-EXPORT_SYMBOL(__page_frag_cache_drain);
-
-void *__page_frag_alloc_align(struct page_frag_cache *nc,
-                             unsigned int fragsz, gfp_t gfp_mask,
-                             unsigned int align_mask)
-{
-       unsigned int size = PAGE_SIZE;
-       struct page *page;
-       int offset;
-
-       if (unlikely(!nc->va)) {
-refill:
-               page = __page_frag_cache_refill(nc, gfp_mask);
-               if (!page)
-                       return NULL;
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-               /* if size can vary use size else just use PAGE_SIZE */
-               size = nc->size;
-#endif
-               /* Even if we own the page, we do not use atomic_set().
-                * This would break get_page_unless_zero() users.
-                */
-               page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
-
-               /* reset page count bias and offset to start of new frag */
-               nc->pfmemalloc = page_is_pfmemalloc(page);
-               nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
-               nc->offset = size;
-       }
-
-       offset = nc->offset - fragsz;
-       if (unlikely(offset < 0)) {
-               page = virt_to_page(nc->va);
-
-               if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
-                       goto refill;
-
-               if (unlikely(nc->pfmemalloc)) {
-                       free_unref_page(page, compound_order(page));
-                       goto refill;
-               }
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-               /* if size can vary use size else just use PAGE_SIZE */
-               size = nc->size;
-#endif
-               /* OK, page count is 0, we can safely set it */
-               set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
-
-               /* reset page count bias and offset to start of new frag */
-               nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
-               offset = size - fragsz;
-               if (unlikely(offset < 0)) {
-                       /*
-                        * The caller is trying to allocate a fragment
-                        * with fragsz > PAGE_SIZE but the cache isn't big
-                        * enough to satisfy the request, this may
-                        * happen in low memory conditions.
-                        * We don't release the cache page because
-                        * it could make memory pressure worse
-                        * so we simply return NULL here.
-                        */
-                       return NULL;
-               }
-       }
-
-       nc->pagecnt_bias--;
-       offset &= align_mask;
-       nc->offset = offset;
-
-       return nc->va + offset;
-}
-EXPORT_SYMBOL(__page_frag_alloc_align);
-
-/*
- * Frees a page fragment allocated out of either a compound or order 0 page.
- */
-void page_frag_free(void *addr)
-{
-       struct page *page = virt_to_head_page(addr);
-
-       if (unlikely(put_page_testzero(page)))
-               free_unref_page(page, compound_order(page));
-}
-EXPORT_SYMBOL(page_frag_free);
-
 static void *make_alloc_exact(unsigned long addr, unsigned int order,
                size_t size)
 {
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
new file mode 100644
index 000000000000..609a485cd02a
--- /dev/null
+++ b/mm/page_frag_cache.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Page fragment allocator
+ *
+ * Page Fragment:
+ *  An arbitrary-length arbitrary-offset area of memory which resides within a
+ *  0 or higher order page.  Multiple fragments within that page are
+ *  individually refcounted, in the page's reference counter.
+ *
+ * The page_frag functions provide a simple allocation framework for page
+ * fragments.  This is used by the network stack and network device drivers to
+ * provide a backing region of memory for use as either an sk_buff->head, or to
+ * be used in the "frags" portion of skb_shared_info.
+ */
+
+#include <linux/export.h>
+#include <linux/gfp_types.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/page_frag_cache.h>
+#include "internal.h"
+
+static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
+                                            gfp_t gfp_mask)
+{
+       struct page *page = NULL;
+       gfp_t gfp = gfp_mask;
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+       gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
+                  __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
+       page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
+                               PAGE_FRAG_CACHE_MAX_ORDER);
+       nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
+#endif
+       if (unlikely(!page))
+               page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+
+       nc->va = page ? page_address(page) : NULL;
+
+       return page;
+}
+
+void page_frag_cache_drain(struct page_frag_cache *nc)
+{
+       if (!nc->va)
+               return;
+
+       __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
+       nc->va = NULL;
+}
+EXPORT_SYMBOL(page_frag_cache_drain);
+
+void __page_frag_cache_drain(struct page *page, unsigned int count)
+{
+       VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
+
+       if (page_ref_sub_and_test(page, count))
+               free_unref_page(page, compound_order(page));
+}
+EXPORT_SYMBOL(__page_frag_cache_drain);
+
+void *__page_frag_alloc_align(struct page_frag_cache *nc,
+                             unsigned int fragsz, gfp_t gfp_mask,
+                             unsigned int align_mask)
+{
+       unsigned int size = PAGE_SIZE;
+       struct page *page;
+       int offset;
+
+       if (unlikely(!nc->va)) {
+refill:
+               page = __page_frag_cache_refill(nc, gfp_mask);
+               if (!page)
+                       return NULL;
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+               /* if size can vary use size else just use PAGE_SIZE */
+               size = nc->size;
+#endif
+               /* Even if we own the page, we do not use atomic_set().
+                * This would break get_page_unless_zero() users.
+                */
+               page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
+
+               /* reset page count bias and offset to start of new frag */
+               nc->pfmemalloc = page_is_pfmemalloc(page);
+               nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+               nc->offset = size;
+       }
+
+       offset = nc->offset - fragsz;
+       if (unlikely(offset < 0)) {
+               page = virt_to_page(nc->va);
+
+               if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
+                       goto refill;
+
+               if (unlikely(nc->pfmemalloc)) {
+                       free_unref_page(page, compound_order(page));
+                       goto refill;
+               }
+
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+               /* if size can vary use size else just use PAGE_SIZE */
+               size = nc->size;
+#endif
+               /* OK, page count is 0, we can safely set it */
+               set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+
+               /* reset page count bias and offset to start of new frag */
+               nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+               offset = size - fragsz;
+               if (unlikely(offset < 0)) {
+                       /*
+                        * The caller is trying to allocate a fragment
+                        * with fragsz > PAGE_SIZE but the cache isn't big
+                        * enough to satisfy the request, this may
+                        * happen in low memory conditions.
+                        * We don't release the cache page because
+                        * it could make memory pressure worse
+                        * so we simply return NULL here.
+                        */
+                       return NULL;
+               }
+       }
+
+       nc->pagecnt_bias--;
+       offset &= align_mask;
+       nc->offset = offset;
+
+       return nc->va + offset;
+}
+EXPORT_SYMBOL(__page_frag_alloc_align);
+
+/*
+ * Frees a page fragment allocated out of either a compound or order 0 page.
+ */
+void page_frag_free(void *addr)
+{
+       struct page *page = virt_to_head_page(addr);
+
+       if (unlikely(put_page_testzero(page)))
+               free_unref_page(page, compound_order(page));
+}
+EXPORT_SYMBOL(page_frag_free);
diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c 
b/tools/testing/selftests/mm/page_frag/page_frag_test.c
index eeb2b6bc681a..fdf204550c9a 100644
--- a/tools/testing/selftests/mm/page_frag/page_frag_test.c
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -6,12 +6,12 @@
  * Copyright (C) 2024 Yunsheng Lin <linyunsh...@huawei.com>
  */
 
-#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/cpumask.h>
 #include <linux/completion.h>
 #include <linux/ptr_ring.h>
 #include <linux/kthread.h>
+#include <linux/page_frag_cache.h>
 
 static struct ptr_ring ptr_ring;
 static int nr_objs = 512;
-- 
2.34.1


Reply via email to