On 2025/2/18 15:12, Hongbo Li wrote:


On 2025/2/18 8:39, Kent Overstreet wrote:
Hongbo - please go over this thoroughly. It turns out (and I should've
caught this before I merged it) your check code in fsck.c was completely
fubar, and wouldn't ever log an error when i_size was off, so bugs
elsewhere would have been masked.

-- >8 --

Error handling was wrong, causing unhandled transaction restart errors.

check_directory_size() was also inefficient, since keys in multiple
snapshots would be iterated over once for every snapshot. Convert it to
the same scheme used for i_sectors and subdir count checking.

Cc: Hongbo Li <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
---
  fs/bcachefs/fsck.c | 78 ++++++++++++++++++----------------------------
  1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 53a421ff136d..d406336e9a0a 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -823,6 +823,7 @@ struct inode_walker_entry {
      struct bch_inode_unpacked inode;
      u32            snapshot;
      u64            count;
+    u64            i_size;
  };
  struct inode_walker {
@@ -910,8 +911,9 @@ lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_
      if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
          struct inode_walker_entry new = *i;
-        new.snapshot = k.k->p.snapshot;
-        new.count = 0;
+        new.snapshot    = k.k->p.snapshot;
+        new.count    = 0;
+        new.i_size    = 0;
          struct printbuf buf = PRINTBUF;
          bch2_bkey_val_to_text(&buf, c, k);
@@ -1116,37 +1118,6 @@ static int get_snapshot_root_inode(struct btree_trans *trans,
      return ret;
  }
-static int check_directory_size(struct btree_trans *trans,
-                struct bch_inode_unpacked *inode_u,
-                struct bkey_s_c inode_k, bool *write_inode)
-{
-    struct btree_iter iter;
-    struct bkey_s_c k;
-    u64 new_size = 0;
-    int ret;
-
-    for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
-            SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot),
-            POS(inode_k.k->p.offset, U64_MAX),
-            0, k, ret) {
-        if (k.k->type != KEY_TYPE_dirent)
-            continue;
-
-        struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k);
-        struct qstr name = bch2_dirent_get_name(dirent);
-
-        new_size += dirent_occupied_size(&name);
-    }
-    bch2_trans_iter_exit(trans, &iter);
-
-    if (!ret && inode_u->bi_size != new_size) {
-        inode_u->bi_size = new_size;
-        *write_inode = true;
-    }
-
-    return ret;
-}
-
  static int check_inode(struct btree_trans *trans,
                 struct btree_iter *iter,
                 struct bkey_s_c k,
@@ -1335,16 +1306,6 @@ static int check_inode(struct btree_trans *trans,
          u.bi_journal_seq = journal_cur_seq(&c->journal);
          do_update = true;
      }
-
-    if (S_ISDIR(u.bi_mode)) {
-        ret = check_directory_size(trans, &u, k, &do_update);
-
-        fsck_err_on(ret,
-                trans, directory_size_mismatch,
-                "directory inode %llu:%u with the mismatch directory size",
-                u.bi_inum, k.k->p.snapshot);
It only tries to fix the mismatched i_size, and only logs the error during recalculating. And why the i_size will be off?
-        ret = 0;
-    }
  do_update:
      if (do_update) {
          ret = __bch2_fsck_write_inode(trans, &u);
@@ -2017,10 +1978,31 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
      return ret;
  }
-static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) +static int check_dir_i_size_notnested(struct btree_trans *trans, struct inode_walker *w)
+{
+    struct bch_fs *c = trans->c;
+    int ret = 0;
+
+    darray_for_each(w->inodes, i)
+        if (fsck_err_on(i->inode.bi_size != i->i_size,
+                trans, inode_dir_wrong_nlink,
+                "directory %llu:%u with wrong i_nlink: got %u, should be %llu", +                w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
This shows the wrong message. It should use directory_size_mismatch error and corresponding values.
+            i->inode.bi_size = i->i_size;
+            ret = bch2_fsck_write_inode(trans, &i->inode);
+            if (ret)
+                break;
+        }
+fsck_err:
+    bch_err_fn(c, ret);
+    return ret;
+}
+
+static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w)
  {
      u32 restart_count = trans->restart_count;
      return check_subdir_count_notnested(trans, w) ?:
+        check_dir_i_size_notnested(trans, w) ?:
          trans_was_restarted(trans, restart_count);
  }
@@ -2367,7 +2349,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
          goto out;
      if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
-        ret = check_subdir_count(trans, dir);
+        ret = check_subdir_dirents_count(trans, dir);
          if (ret)
              goto err;
      }
@@ -2457,9 +2439,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
      if (ret)
          goto err;
-    if (d.v->d_type == DT_DIR)
-        for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
+    for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) {
+        if (d.v->d_type == DT_DIR)
              i->count++;
+        i->i_size += bkey_bytes(d.k);
We cannot rely on the value of bkey_bytes of the older version (the older is 0 for directory).
Sorry I am wrong, the older is not zero for directory. May be other problems cause the wrong results.

 The traversal order will affect the accounts
of i_size. We should recalculate the sub-entries.

Also I did the following test:

older version without dir i_size:

├── foo
│   ├── MAINTAINERS
│   ├── foo-2
│   │   ├── MAINTAINERS
│   │   └── profile
│   └── profile
└── lost+found

After fsck, it shows the i_size of foo-2 is still 0.

Thanks,
Hongbo


+    }
  out:
  err:
  fsck_err:


Reply via email to