On 2/4/26 3:18 PM, Al Viro wrote:
On Wed, Feb 04, 2026 at 01:16:15PM -0500, Waiman Long wrote:


Thanks for the detailed explanation. I am thinking about something like
the code diff below. Of course, there are other corner cases like unshare(2)
that still needs to be handled. Do you think something like this is viable?
Deadlocks aside, the immediate problem here is that consensus number is too
low.  Take three threads sharing the same fs_struct instance.  The first one
calls your get_fs_pwd_share(); then the remaining two threads call set_fs_pwd()
(e.g. by calling chdir(2) in userland code).  The reference stored into
fs->pwd_waiter by the first of those two gets overwritten by that stored
by the second.  When the caller of get_fs_pwd_share() gets to 
put_fs_pwd_share(),
only one of the sleepers gets woken up...

And it's very easy to end up with something as simple as chdir("foo") 
deadlocking -
we start with resolving the relative pathname we'd been given, audit wants to
record the current directory, on the theory that relative pathname is none too
useful in logs without knowing what had it been relative to.  Then, in the
same thread, you call set_fs_pwd() - after all, that's the main effect of 
chdir(2).
Deadlock...

IOW, it's not just unshare(2) that needs to be taken care of - chdir(2) would 
need
to be treated differently.

Now I realize that there is indeed a deadlock problem. Scrap that. Now I have a simpler idea that shouldn't have this type of deadlock problem. So what do you think about the sample code below?

Thanks,
Longman

=======================[ Cut here ]================================

diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index b8c46c5a38a0..daeeb80cf088 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -32,15 +32,19 @@ void set_fs_root(struct fs_struct *fs, const struct path *p>
 void set_fs_pwd(struct fs_struct *fs, const struct path *path)
 {
        struct path old_pwd;
+       int xrefs;

        path_get(path);
        write_seqlock(&fs->seq);
        old_pwd = fs->pwd;
        fs->pwd = *path;
+       xrefs = fs->pwd_xrefs + 1;
+       fs->pwd_xrefs = 0;
        write_sequnlock(&fs->seq);

        if (old_pwd.dentry)
-               path_put(&old_pwd);
+               while (xrefs--)
+                       path_put(&old_pwd);
 }

 static inline int replace_path(struct path *p, const struct path *old, const s>
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 0070764b790a..0d79d51de240 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -8,10 +8,11 @@
 #include <linux/seqlock.h>

 struct fs_struct {
-       int users;
        seqlock_t seq;
+       int users;
        int umask;
        int in_exec;
+       int pwd_xrefs;  /* Extra references of pwd */
        struct path root, pwd;
 } __randomize_layout;

@@ -40,6 +41,31 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct p>
        read_sequnlock_excl(&fs->seq);
 }

+static inline void get_fs_pwd_share(struct fs_struct *fs, struct path *pwd)
+{
+       read_seqlock_excl(&fs->seq);
+       *pwd = fs->pwd;
+       if (fs->pwd_xrefs)
+               fs->pwd_xrefs--;
+       else
+               path_get(pwd);
+       read_sequnlock_excl(&fs->seq);
+}
+
+static inline void put_fs_pwd_share(struct fs_struct *fs, struct path *pwd)
+{
+       bool put = false;
+
+       read_seqlock_excl(&fs->seq);
+       if ((fs->pwd.dentry == pwd->dentry) && (fs->pwd.mnt == pwd->mnt))
+               fs->pwd_xrefs++;
+       else
+               put = true;
+       read_sequnlock_excl(&fs->seq);
+       if (put)
+               path_put(pwd);
+}
+
 extern bool current_chrooted(void);

 static inline int current_umask(void)


Reply via email to