From: "Matthew Wilcox (Oracle)" <wi...@infradead.org>

This replaces ->readpages with a saner interface:
 - Return the number of pages not read instead of an ignored error code.
 - Pages are already in the page cache when ->readahead is called.
 - Implementation looks up the pages in the page cache instead of
   having them passed in a linked list.

Signed-off-by: Matthew Wilcox (Oracle) <wi...@infradead.org>
Cc: linux-bt...@vger.kernel.org
Cc: linux-er...@lists.ozlabs.org
Cc: linux-e...@vger.kernel.org
Cc: linux-f2fs-de...@lists.sourceforge.net
Cc: linux-...@vger.kernel.org
Cc: cluster-devel@redhat.com
Cc: ocfs2-de...@oss.oracle.com
---
 Documentation/filesystems/locking.rst |  7 ++++++-
 Documentation/filesystems/vfs.rst     | 14 ++++++++++++++
 include/linux/fs.h                    |  2 ++
 include/linux/pagemap.h               | 12 ++++++++++++
 mm/readahead.c                        | 13 ++++++++++++-
 5 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/locking.rst 
b/Documentation/filesystems/locking.rst
index 5057e4d9dcd1..3d10729caf44 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -239,6 +239,8 @@ prototypes::
        int (*readpage)(struct file *, struct page *);
        int (*writepages)(struct address_space *, struct writeback_control *);
        int (*set_page_dirty)(struct page *page);
+       unsigned (*readahead)(struct file *, struct address_space *,
+                                pgoff_t start, unsigned nr_pages);
        int (*readpages)(struct file *filp, struct address_space *mapping,
                        struct list_head *pages, unsigned nr_pages);
        int (*write_begin)(struct file *, struct address_space *mapping,
@@ -271,7 +273,8 @@ writepage:          yes, unlocks (see below)
 readpage:              yes, unlocks
 writepages:
 set_page_dirty         no
-readpages:
+readahead:             yes, unlocks
+readpages:             no
 write_begin:           locks the page           exclusive
 write_end:             yes, unlocks             exclusive
 bmap:
@@ -295,6 +298,8 @@ the request handler (/dev/loop).
 ->readpage() unlocks the page, either synchronously or via I/O
 completion.
 
+->readahead() unlocks the pages like ->readpage().
+
 ->readpages() populates the pagecache with the passed pages and starts
 I/O against them.  They come unlocked upon I/O completion.
 
diff --git a/Documentation/filesystems/vfs.rst 
b/Documentation/filesystems/vfs.rst
index 7d4d09dd5e6d..c2bc345f2169 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -706,6 +706,8 @@ cache in your filesystem.  The following members are 
defined:
                int (*readpage)(struct file *, struct page *);
                int (*writepages)(struct address_space *, struct 
writeback_control *);
                int (*set_page_dirty)(struct page *page);
+               unsigned (*readahead)(struct file *filp, struct address_space 
*mapping,
+                                pgoff_t start, unsigned nr_pages);
                int (*readpages)(struct file *filp, struct address_space 
*mapping,
                                 struct list_head *pages, unsigned nr_pages);
                int (*write_begin)(struct file *, struct address_space *mapping,
@@ -781,6 +783,18 @@ cache in your filesystem.  The following members are 
defined:
        If defined, it should set the PageDirty flag, and the
        PAGECACHE_TAG_DIRTY tag in the radix tree.
 
+``readahead``
+       Called by the VM to read pages associated with the address_space
+       object.  The pages are consecutive in the page cache and
+       are locked.  The implementation should decrement the page
+       refcount after attempting I/O on each page.  Usually the
+       page will be unlocked by the I/O completion handler.  If the
+       function does not attempt I/O on some pages, return the number
+       of pages which were not read so the caller can unlock the pages
+       for you.  Set PageUptodate if the I/O completes successfully.
+       Setting PageError on any page will be ignored; simply unlock
+       the page if an I/O error occurs.
+
 ``readpages``
        called by the VM to read pages associated with the address_space
        object.  This is essentially just a vector version of readpage.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 41584f50af0d..3bfc142e7d10 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -375,6 +375,8 @@ struct address_space_operations {
         */
        int (*readpages)(struct file *filp, struct address_space *mapping,
                        struct list_head *pages, unsigned nr_pages);
+       unsigned (*readahead)(struct file *, struct address_space *,
+                       pgoff_t start, unsigned nr_pages);
 
        int (*write_begin)(struct file *, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ccb14b6a16b5..a2cf007826f2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -630,6 +630,18 @@ static inline int add_to_page_cache(struct page *page,
        return error;
 }
 
+/*
+ * Only call this from a ->readahead implementation.
+ */
+static inline
+struct page *readahead_page(struct address_space *mapping, pgoff_t index)
+{
+       struct page *page = xa_load(&mapping->i_pages, index);
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+       return page;
+}
+
 static inline unsigned long dir_pages(struct inode *inode)
 {
        return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
diff --git a/mm/readahead.c b/mm/readahead.c
index 7daef0038b14..b2ed0baf3a5d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -121,7 +121,18 @@ static void read_pages(struct address_space *mapping, 
struct file *filp,
 
        blk_start_plug(&plug);
 
-       if (mapping->a_ops->readpages) {
+       if (mapping->a_ops->readahead) {
+               unsigned left = mapping->a_ops->readahead(filp, mapping,
+                               start, nr_pages);
+
+               while (left) {
+                       struct page *page = readahead_page(mapping,
+                                       start + nr_pages - left);
+                       unlock_page(page);
+                       put_page(page);
+                       left--;
+               }
+       } else if (mapping->a_ops->readpages) {
                mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
                /* Clean up the remaining pages */
                put_pages_list(pages);
-- 
2.24.1


Reply via email to