After looking at v2 yesterday I noticed I few things in the structure
that I really didn't like:

 - using a struct page * return value just to communicate status codes
 - the extremely long function names
 - a generally somewhat non-intuitive split of the helpers

I then hacked on top of it for a bit while sitting in a telephone
conference.  Below is my result, which passes a quick xfstests run.
Note that this includes the refactoring and the batch lookup changes
as I did it on top of your series:

diff --git a/mm/filemap.c b/mm/filemap.c
index f0ae9a6308cb4d..9e0aecd99950f4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1972,273 +1972,360 @@ static void shrink_readahead_size_eio(struct 
file_ra_state *ra)
        ra->ra_pages /= 4;
 }
 
-/**
- * generic_file_buffered_read - generic file read routine
- * @iocb:      the iocb to read
- * @iter:      data destination
- * @written:   already copied
- *
- * This is a generic file read routine, and uses the
- * mapping->a_ops->readpage() function for the actual low-level stuff.
- *
- * This is really ugly. But the goto's actually try to clarify some
- * of the logic when it comes to error handling etc.
- *
- * Return:
- * * total number of bytes copied, including those the were already @written
- * * negative error code if nothing was copied
- */
-ssize_t generic_file_buffered_read(struct kiocb *iocb,
-               struct iov_iter *iter, ssize_t written)
+static inline pgoff_t filemap_last_index(struct kiocb *iocb,
+               struct iov_iter *iter)
 {
-       struct file *filp = iocb->ki_filp;
-       struct address_space *mapping = filp->f_mapping;
-       struct inode *inode = mapping->host;
-       struct file_ra_state *ra = &filp->f_ra;
-       loff_t *ppos = &iocb->ki_pos;
-       pgoff_t index;
-       pgoff_t last_index;
-       pgoff_t prev_index;
-       unsigned long offset;      /* offset into pagecache page */
-       unsigned int prev_offset;
-       int error = 0;
-
-       if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
-               return 0;
-       iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
-
-       index = *ppos >> PAGE_SHIFT;
-       prev_index = ra->prev_pos >> PAGE_SHIFT;
-       prev_offset = ra->prev_pos & (PAGE_SIZE-1);
-       last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-       offset = *ppos & ~PAGE_MASK;
+       return (iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
 
-       for (;;) {
-               struct page *page;
-               pgoff_t end_index;
-               loff_t isize;
-               unsigned long nr, ret;
+static inline unsigned long filemap_nr_pages(struct kiocb *iocb,
+               struct iov_iter *iter)
+{
+       return filemap_last_index(iocb, iter) - (iocb->ki_pos >> PAGE_SHIFT);
+}
 
-               cond_resched();
-find_page:
-               if (fatal_signal_pending(current)) {
-                       error = -EINTR;
-                       goto out;
-               }
+static int __filemap_read_not_uptodate(struct file *file, struct page *page)
+{
+       int error;
 
-               page = find_get_page(mapping, index);
-               if (!page) {
-                       if (iocb->ki_flags & IOCB_NOWAIT)
-                               goto would_block;
-                       page_cache_sync_readahead(mapping,
-                                       ra, filp,
-                                       index, last_index - index);
-                       page = find_get_page(mapping, index);
-                       if (unlikely(page == NULL))
-                               goto no_cached_page;
-               }
-               if (PageReadahead(page)) {
-                       page_cache_async_readahead(mapping,
-                                       ra, filp, page,
-                                       index, last_index - index);
-               }
-               if (!PageUptodate(page)) {
-                       if (iocb->ki_flags & IOCB_NOWAIT) {
-                               put_page(page);
-                               goto would_block;
-                       }
+       error = lock_page_killable(page);
+       if (error)
+               return error;
 
+       if (!PageUptodate(page)) {
+               if (!page->mapping) {
                        /*
-                        * See comment in do_read_cache_page on why
-                        * wait_on_page_locked is used to avoid unnecessarily
-                        * serialisations and why it's safe.
+                        * invalidate_mapping_pages got it
                         */
-                       error = wait_on_page_locked_killable(page);
-                       if (unlikely(error))
-                               goto readpage_error;
-                       if (PageUptodate(page))
-                               goto page_ok;
-
-                       if (inode->i_blkbits == PAGE_SHIFT ||
-                                       !mapping->a_ops->is_partially_uptodate)
-                               goto page_not_up_to_date;
-                       /* pipes can't handle partially uptodate pages */
-                       if (unlikely(iov_iter_is_pipe(iter)))
-                               goto page_not_up_to_date;
-                       if (!trylock_page(page))
-                               goto page_not_up_to_date;
-                       /* Did it get truncated before we got the lock? */
-                       if (!page->mapping)
-                               goto page_not_up_to_date_locked;
-                       if (!mapping->a_ops->is_partially_uptodate(page,
-                                                       offset, iter->count))
-                               goto page_not_up_to_date_locked;
-                       unlock_page(page);
+                       error = AOP_TRUNCATED_PAGE;
+               } else {
+                       error = -EIO;
                }
-page_ok:
-               /*
-                * i_size must be checked after we know the page is Uptodate.
-                *
-                * Checking i_size after the check allows us to calculate
-                * the correct value for "nr", which means the zero-filled
-                * part of the page is not copied back to userspace (unless
-                * another truncate extends the file - this is desired though).
-                */
+       }
 
-               isize = i_size_read(inode);
-               end_index = (isize - 1) >> PAGE_SHIFT;
-               if (unlikely(!isize || index > end_index)) {
-                       put_page(page);
-                       goto out;
-               }
+       unlock_page(page);
+       if (error == -EIO)
+               shrink_readahead_size_eio(&file->f_ra);
+       return error;
+}
 
-               /* nr is the maximum number of bytes to copy from this page */
-               nr = PAGE_SIZE;
-               if (index == end_index) {
-                       nr = ((isize - 1) & ~PAGE_MASK) + 1;
-                       if (nr <= offset) {
-                               put_page(page);
-                               goto out;
-                       }
-               }
-               nr = nr - offset;
+static int __filemap_read_one_page(struct file *file, struct page *page)
+{
+       int error;
 
-               /* If users can be writing to this page using arbitrary
-                * virtual addresses, take care about potential aliasing
-                * before reading the page on the kernel side.
-                */
-               if (mapping_writably_mapped(mapping))
-                       flush_dcache_page(page);
+       /*
+        * A previous I/O error may have been due to temporary failures, e.g.
+        * multipath errors. PG_error will be set again if readpage fails.
+        */
+       ClearPageError(page);
 
-               /*
-                * When a sequential read accesses a page several times,
-                * only mark it as accessed the first time.
-                */
-               if (prev_index != index || offset != prev_offset)
-                       mark_page_accessed(page);
-               prev_index = index;
+       /*
+        * Start the actual read. The read will unlock the page.
+        */
+       error = file->f_mapping->a_ops->readpage(file, page);
+       if (!error && !PageUptodate(page))
+               return __filemap_read_not_uptodate(file, page);
+       return error;
+}
 
-               /*
-                * Ok, we have the page, and it's up-to-date, so
-                * now we can copy it to user space...
-                */
+static int filemap_read_one_page(struct kiocb *iocb, struct iov_iter *iter,
+               struct page *page, pgoff_t index)
+{
+       struct file *file = iocb->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+       loff_t pos = max(iocb->ki_pos, (loff_t)index << PAGE_SHIFT);
+       loff_t count = iocb->ki_pos + iter->count - pos;
+       int error;
 
-               ret = copy_page_to_iter(page, offset, nr, iter);
-               offset += ret;
-               index += offset >> PAGE_SHIFT;
-               offset &= ~PAGE_MASK;
-               prev_offset = offset;
+       /*
+        * See comment in do_read_cache_page on why wait_on_page_locked is used
+        * to avoid unnecessarily serialisations and why it's safe.
+        */
+       error = wait_on_page_locked_killable(page);
+       if (unlikely(error))
+               goto out_put_page;
 
-               put_page(page);
-               written += ret;
-               if (!iov_iter_count(iter))
-                       goto out;
-               if (ret < nr) {
-                       error = -EFAULT;
-                       goto out;
-               }
-               continue;
+       if (PageUptodate(page))
+               return 0;
+
+       if (mapping->host->i_blkbits == PAGE_SHIFT ||
+           !mapping->a_ops->is_partially_uptodate)
+               goto page_not_up_to_date;
+       /* pipes can't handle partially uptodate pages */
+       if (unlikely(iov_iter_is_pipe(iter)))
+               goto page_not_up_to_date;
+       if (!trylock_page(page))
+               goto page_not_up_to_date;
+       /* Did it get truncated before we got the lock? */
+       if (!page->mapping)
+               goto page_not_up_to_date_locked;
+       if (!mapping->a_ops->is_partially_uptodate(page, pos & ~PAGE_MASK,
+                       count))
+               goto page_not_up_to_date_locked;
+done:
+       unlock_page(page);
+       return 0;
 
 page_not_up_to_date:
-               /* Get exclusive access to the page ... */
-               error = lock_page_killable(page);
-               if (unlikely(error))
-                       goto readpage_error;
+       /* Get exclusive access to the page ... */
+       error = lock_page_killable(page);
+       if (unlikely(error))
+               goto out_put_page;
 
 page_not_up_to_date_locked:
-               /* Did it get truncated before we got the lock? */
-               if (!page->mapping) {
-                       unlock_page(page);
-                       put_page(page);
+       /* Did it get truncated before we got the lock? */
+       if (!page->mapping) {
+               error = AOP_TRUNCATED_PAGE;
+               unlock_page(page);
+               goto out_put_page;
+       }
+
+       /* Did somebody else fill it already? */
+       if (PageUptodate(page))
+               goto done;
+
+       error = __filemap_read_one_page(file, page);
+       if (error)
+               goto out_put_page;
+       return 0;
+
+out_put_page:
+       put_page(page);
+       return error;
+}
+
+static int filemap_read_get_pages(struct kiocb *iocb, struct iov_iter *iter,
+               struct page **pages, unsigned long max_pages)
+{
+       struct file *file = iocb->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+       unsigned long nr = min(filemap_nr_pages(iocb, iter), max_pages);
+       pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+       int ret;
+
+       if (fatal_signal_pending(current))
+               return -EINTR;
+
+       ret = find_get_pages_contig(mapping, index, nr, pages);
+       if (ret)
+               return ret;
+
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               return -EAGAIN;
+
+       page_cache_sync_readahead(mapping, &file->f_ra, file, index,
+                                 filemap_nr_pages(iocb, iter));
+
+       ret = find_get_pages_contig(mapping, index, nr, pages);
+       if (ret)
+               return ret;
+
+       /*
+        * Ok, it wasn't cached, so we need to create a new page..
+        */
+       pages[0] = page_cache_alloc(mapping);
+       if (!pages[0])
+               return -ENOMEM;
+
+       ret = add_to_page_cache_lru(pages[0], mapping, index,
+                       mapping_gfp_constraint(mapping, GFP_KERNEL));
+       if (ret) {
+               if (ret == -EEXIST)
+                       ret = 0;
+               goto out_put_page;
+       }
+
+       ret = __filemap_read_one_page(file, pages[0]);
+       if (ret) {
+               if (ret == AOP_TRUNCATED_PAGE)
+                       ret = 0;
+               goto out_put_page;
+       }
+
+       return 1;
+
+out_put_page:
+       put_page(pages[0]);
+       return ret;
+}
+
+static int filemap_read_pages(struct kiocb *iocb, struct iov_iter *iter,
+               struct page **pages, unsigned int nr_pages)
+{
+       struct file *file = iocb->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+       pgoff_t last_index = filemap_last_index(iocb, iter);
+       pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
+       unsigned int cur_page, j;
+       int err = 0;
+
+       for (cur_page = 0; cur_page < nr_pages; cur_page++, index++) {
+               struct page *page = pages[cur_page];
+
+               if (PageReadahead(page))
+                       page_cache_async_readahead(mapping, &file->f_ra, file,
+                                       page, index, last_index - index);
+
+               if (PageUptodate(page))
                        continue;
-               }
 
-               /* Did somebody else fill it already? */
-               if (PageUptodate(page)) {
-                       unlock_page(page);
-                       goto page_ok;
+               if (iocb->ki_flags & IOCB_NOWAIT) {
+                       put_page(page);
+                       err = -EAGAIN;
+                       goto out_put_pages;
                }
 
-readpage:
-               /*
-                * A previous I/O error may have been due to temporary
-                * failures, eg. multipath errors.
-                * PG_error will be set again if readpage fails.
-                */
-               ClearPageError(page);
-               /* Start the actual read. The read will unlock the page. */
-               error = mapping->a_ops->readpage(filp, page);
+               err = filemap_read_one_page(iocb, iter, page, index);
+               if (err)
+                       goto out_put_pages;
+       }
 
-               if (unlikely(error)) {
-                       if (error == AOP_TRUNCATED_PAGE) {
-                               put_page(page);
-                               error = 0;
-                               goto find_page;
-                       }
-                       goto readpage_error;
-               }
+       return cur_page;
 
-               if (!PageUptodate(page)) {
-                       error = lock_page_killable(page);
-                       if (unlikely(error))
-                               goto readpage_error;
-                       if (!PageUptodate(page)) {
-                               if (page->mapping == NULL) {
-                                       /*
-                                        * invalidate_mapping_pages got it
-                                        */
-                                       unlock_page(page);
-                                       put_page(page);
-                                       goto find_page;
-                               }
-                               unlock_page(page);
-                               shrink_readahead_size_eio(ra);
-                               error = -EIO;
-                               goto readpage_error;
-                       }
-                       unlock_page(page);
-               }
+out_put_pages:
+       for (j = cur_page + 1; j < nr_pages; j++)
+               put_page(pages[j]);
+       if (cur_page == 0) {
+               if (err == AOP_TRUNCATED_PAGE)
+                       err = 0;
+               return err;
+       }
+       return cur_page;
+}
 
-               goto page_ok;
+static int filemap_do_read(struct kiocb *iocb, struct iov_iter *iter,
+               struct page **pages, unsigned long max_pages)
+{
+       struct file *filp = iocb->ki_filp;
+       struct address_space *mapping = filp->f_mapping;
+       struct file_ra_state *ra = &filp->f_ra;
+       loff_t isize, end_offset;
+       int nr_pages, i;
+       bool writably_mapped;
 
-readpage_error:
-               /* UHHUH! A synchronous read error occurred. Report it */
-               put_page(page);
-               goto out;
+       cond_resched();
+
+was_truncated:
+       nr_pages = filemap_read_get_pages(iocb, iter, pages, max_pages);
+       if (nr_pages > 0)
+               nr_pages = filemap_read_pages(iocb, iter, pages, nr_pages);
+       if (nr_pages == 0)
+               goto was_truncated;
+       if (nr_pages < 0)
+               return nr_pages;
+
+       /*
+        * i_size must be checked after we know the pages are Uptodate.
+        *
+        * Checking i_size after the check allows us to calculate the correct
+        * value for "nr_pages", which means the zero-filled part of the page is
+        * not copied back to userspace (unless another truncate extends the
+        * file - this is desired though).
+        */
+       isize = i_size_read(mapping->host);
+       if (unlikely(iocb->ki_pos >= isize))
+               goto put_pages;
+
+       end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
+
+       while ((iocb->ki_pos >> PAGE_SHIFT) + nr_pages >
+              (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
+               put_page(pages[--nr_pages]);
+
+       /*
+        * Once we start copying data, we don't want to be touching any
+        * cachelines that might be contended:
+        */
+       writably_mapped = mapping_writably_mapped(mapping);
+
+       /*
+        * When a sequential read accesses a page several times, only mark it as
+        * accessed the first time.
+        */
+       if (iocb->ki_pos >> PAGE_SHIFT != ra->prev_pos >> PAGE_SHIFT)
+               mark_page_accessed(pages[0]);
+       for (i = 1; i < nr_pages; i++)
+               mark_page_accessed(pages[i]);
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned offset = iocb->ki_pos & ~PAGE_MASK;
+               unsigned bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+                                              PAGE_SIZE - offset);
+               unsigned copied;
 
-no_cached_page:
                /*
-                * Ok, it wasn't cached, so we need to create a new
-                * page..
+                * If users can be writing to this page using arbitrary virtual
+                * addresses, take care about potential aliasing before reading
+                * the page on the kernel side.
                 */
-               page = page_cache_alloc(mapping);
-               if (!page) {
-                       error = -ENOMEM;
-                       goto out;
-               }
-               error = add_to_page_cache_lru(page, mapping, index,
-                               mapping_gfp_constraint(mapping, GFP_KERNEL));
-               if (error) {
-                       put_page(page);
-                       if (error == -EEXIST) {
-                               error = 0;
-                               goto find_page;
-                       }
-                       goto out;
-               }
-               goto readpage;
+               if (writably_mapped)
+                       flush_dcache_page(pages[i]);
+
+               copied = copy_page_to_iter(pages[i], offset, bytes, iter);
+
+               iocb->ki_pos += copied;
+               ra->prev_pos = iocb->ki_pos;
+
+               if (copied < bytes)
+                       return -EFAULT;
        }
 
-would_block:
-       error = -EAGAIN;
-out:
-       ra->prev_pos = prev_index;
-       ra->prev_pos <<= PAGE_SHIFT;
-       ra->prev_pos |= prev_offset;
+put_pages:
+       for (i = 0; i < nr_pages; i++)
+               put_page(pages[i]);
+       return 0;
+}
+
+/**
+ * generic_file_buffered_read - generic file read routine
+ * @iocb:      the iocb to read
+ * @iter:      data destination
+ * @written:   already copied
+ *
+ * This is a generic file read routine, and uses the
+ * mapping->a_ops->readpage() function for the actual low-level stuff.
+ *
+ * Return:
+ * * total number of bytes copied, including those the were already @written
+ * * negative error code if nothing was copied
+ */
+ssize_t generic_file_buffered_read(struct kiocb *iocb,
+               struct iov_iter *iter, ssize_t written)
+{
+       struct address_space *mapping = iocb->ki_filp->f_mapping;
+       size_t orig_count = iov_iter_count(iter);
+       struct inode *inode = mapping->host;
+       struct page *page_array[8], **pages = NULL;
+       unsigned max_pages = filemap_nr_pages(iocb, iter);
+       int error;
 
-       *ppos = ((loff_t)index << PAGE_SHIFT) + offset;
-       file_accessed(filp);
-       return written ? written : error;
+       if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes))
+               return 0;
+       iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
+
+       if (max_pages > ARRAY_SIZE(page_array))
+               pages = kmalloc_array(max_pages, sizeof(void *), GFP_KERNEL);
+
+       if (!pages) {
+               pages = page_array;
+               max_pages = ARRAY_SIZE(page_array);
+       }
+
+       do {
+               error = filemap_do_read(iocb, iter, pages, max_pages);
+               if (error)
+                       break;
+       } while (iov_iter_count(iter) && iocb->ki_pos < i_size_read(inode));
+
+       file_accessed(iocb->ki_filp);
+       written += orig_count - iov_iter_count(iter);
+
+       if (pages != page_array)
+               kfree(pages);
+
+       if (unlikely(!written))
+               return error;
+       return written;
 }
 EXPORT_SYMBOL_GPL(generic_file_buffered_read);
 

Reply via email to