4.9-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Yan, Zheng <z...@redhat.com>

commit b50c2de51e611da90cf3cf04c058f7e9bbe79e93 upstream.

The dirfragtree is lazily updated, it's not always accurate. Infinite
loops happens in following circumstance.

- client send request to read frag A
- frag A has been fragmented into frag B and C. So mds fills the reply
  with contents of frag B
- client wants to read next frag C. ceph_choose_frag(frag value of C)
  return frag A.

The fix is using previous readdir reply to calculate next readdir frag
when possible.

Signed-off-by: "Yan, Zheng" <z...@redhat.com>
Signed-off-by: Ilya Dryomov <idryo...@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>

---
 fs/ceph/dir.c |   19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -315,7 +315,7 @@ static int ceph_readdir(struct file *fil
        struct ceph_mds_client *mdsc = fsc->mdsc;
        int i;
        int err;
-       u32 ftype;
+       unsigned frag = -1;
        struct ceph_mds_reply_info_parsed *rinfo;
 
        dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
@@ -362,7 +362,6 @@ more:
        /* do we have the correct frag content buffered? */
        if (need_send_readdir(fi, ctx->pos)) {
                struct ceph_mds_request *req;
-               unsigned frag;
                int op = ceph_snap(inode) == CEPH_SNAPDIR ?
                        CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
@@ -373,8 +372,11 @@ more:
                }
 
                if (is_hash_order(ctx->pos)) {
-                       frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
-                                               NULL, NULL);
+                       /* fragtree isn't always accurate. choose frag
+                        * based on previous reply when possible. */
+                       if (frag == (unsigned)-1)
+                               frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
+                                                       NULL, NULL);
                } else {
                        frag = fpos_frag(ctx->pos);
                }
@@ -497,6 +499,7 @@ more:
                struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
                struct ceph_vino vino;
                ino_t ino;
+               u32 ftype;
 
                BUG_ON(rde->offset < ctx->pos);
 
@@ -519,15 +522,17 @@ more:
                ctx->pos++;
        }
 
+       ceph_mdsc_put_request(fi->last_readdir);
+       fi->last_readdir = NULL;
+
        if (fi->next_offset > 2) {
-               ceph_mdsc_put_request(fi->last_readdir);
-               fi->last_readdir = NULL;
+               frag = fi->frag;
                goto more;
        }
 
        /* more frags? */
        if (!ceph_frag_is_rightmost(fi->frag)) {
-               unsigned frag = ceph_frag_next(fi->frag);
+               frag = ceph_frag_next(fi->frag);
                if (is_hash_order(ctx->pos)) {
                        loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
                                                        fi->next_offset, true);


Reply via email to