If we know we have a folio, we can call put_folio() instead of put_page()
and save the overhead of calling compound_head().  Also skips the
devmap checks.

This commit looks like it should be a no-op, but actually saves 1312 bytes
of text with the distro-derived config that I'm testing.  Some functions
grow a little while others shrink.  I presume the compiler is making
different inlining decisions.

Signed-off-by: Matthew Wilcox (Oracle) <wi...@infradead.org>
Reviewed-by: Zi Yan <z...@nvidia.com>
---
 include/linux/mm.h | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 195c4740522d..824acedc1253 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -737,6 +737,11 @@ static inline int put_page_testzero(struct page *page)
        return page_ref_dec_and_test(page);
 }
 
+static inline int put_folio_testzero(struct folio *folio)
+{
+       return put_page_testzero(&folio->page);
+}
+
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
@@ -1228,9 +1233,28 @@ static inline __must_check bool try_get_page(struct page 
*page)
        return true;
 }
 
+/**
+ * put_folio - Decrement the reference count on a folio.
+ * @folio: The folio.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately.  Do not access the memory or the struct folio
+ * after calling put_folio() unless you can be sure that it wasn't the
+ * last reference.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context.  May be called while holding a spinlock.
+ */
+static inline void put_folio(struct folio *folio)
+{
+       if (put_folio_testzero(folio))
+               __put_page(&folio->page);
+}
+
 static inline void put_page(struct page *page)
 {
-       page = compound_head(page);
+       struct folio *folio = page_folio(page);
 
        /*
         * For devmap managed pages we need to catch refcount transition from
@@ -1238,13 +1262,12 @@ static inline void put_page(struct page *page)
         * need to inform the device driver through callback. See
         * include/linux/memremap.h and HMM for details.
         */
-       if (page_is_devmap_managed(page)) {
-               put_devmap_managed_page(page);
+       if (page_is_devmap_managed(&folio->page)) {
+               put_devmap_managed_page(&folio->page);
                return;
        }
 
-       if (put_page_testzero(page))
-               __put_page(page);
+       put_folio(folio);
 }
 
 /*
-- 
2.30.2

Reply via email to