The block layer spends quite a while in iov_iter_npages(), but for the
bvec case the number of pages is already known and stored in
iter->nr_segs, so it can be returned immediately as an optimisation

Perf for an io_uring benchmark with registered buffers (i.e. bvec) shows
~1.5-2.0% total cycle count spent in iov_iter_npages(), that's dropped
by this patch to ~0.2%.

Reviewed-by: Jens Axboe <ax...@kernel.dk>
Signed-off-by: Pavel Begunkov <asml.sile...@gmail.com>
---
 lib/iov_iter.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1635111c5bd2..0fa7ac330acf 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1594,6 +1594,8 @@ int iov_iter_npages(const struct iov_iter *i, int 
maxpages)
                return 0;
        if (unlikely(iov_iter_is_discard(i)))
                return 0;
+       if (unlikely(iov_iter_is_bvec(i)))
+               return min_t(int, i->nr_segs, maxpages);
 
        if (unlikely(iov_iter_is_pipe(i))) {
                struct pipe_inode_info *pipe = i->pipe;
@@ -1614,11 +1616,9 @@ int iov_iter_npages(const struct iov_iter *i, int 
maxpages)
                        - p / PAGE_SIZE;
                if (npages >= maxpages)
                        return maxpages;
-       0;}),({
-               npages++;
-               if (npages >= maxpages)
-                       return maxpages;
-       }),({
+       0;}),
+               0 /* bvecs are handled above */
+       ,({
                unsigned long p = (unsigned long)v.iov_base;
                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
                        - p / PAGE_SIZE;
-- 
2.24.0

Reply via email to