FUSE passthrough backing files are currently not installed in the
daemon's fd table, making them invisible to lsof(8) and /proc/<pid>/fd.
This complicates debugging and is the reason passthrough currently
requires CAP_SYS_ADMIN.

Install each backing file into the daemon's fd table on BACKING_OPEN
and close it on BACKING_CLOSE.  Add an fd field to struct fuse_backing
to carry the installed fd number.

BACKING_CLOSE always runs in the daemon's ioctl context, so close_fd()
targets the correct fd table directly.

To handle fds that remain open at connection teardown (e.g. on umount
or connection abort without an explicit BACKING_CLOSE), store a
reference to the daemon's task_struct in fc->daemon_task on the first
BACKING_OPEN.  During teardown, fuse_backing_files_free() schedules a
task_work callback on the daemon task that calls close_fd() from the
daemon's own context.  If the daemon has already exited (task_work_add
returns -ESRCH), its fd table was already cleaned up on exit.

This is a prerequisite for relaxing the CAP_SYS_ADMIN requirement on
FUSE passthrough; a follow-up patch will drop the capability check now
that backing fds are accountable via /proc/<pid>/fd.

Signed-off-by: Matan Cohen <[email protected]>
---
 fs/fuse/backing.c                             |  97 +++++-
 fs/fuse/fuse_i.h                              |  15 +
 .../selftests/filesystems/fuse/.gitignore     |   1 +
 .../selftests/filesystems/fuse/Makefile       |   2 +-
 .../filesystems/fuse/fuse_passthrough_test.c  | 297 ++++++++++++++++++
 5 files changed, 405 insertions(+), 7 deletions(-)
 create mode 100644 
tools/testing/selftests/filesystems/fuse/fuse_passthrough_test.c

diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c
index d95dfa48483f0..9c4ac0388491c 100644
--- a/fs/fuse/backing.c
+++ b/fs/fuse/backing.c
@@ -8,6 +8,8 @@
 #include "fuse_i.h"
 
 #include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/task_work.h>
 
 struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
 {
@@ -20,6 +22,7 @@ static void fuse_backing_free(struct fuse_backing *fb)
 {
        pr_debug("%s: fb=0x%p\n", __func__, fb);
 
+       WARN_ON_ONCE(fb->fd >= 0);
        if (fb->file)
                fput(fb->file);
        put_cred(fb->cred);
@@ -64,19 +67,63 @@ static struct fuse_backing *fuse_backing_id_remove(struct 
fuse_conn *fc,
        return fb;
 }
 
+struct fuse_backing_close_work {
+       struct callback_head cb;
+       int fd;
+};
+
+static void fuse_backing_close_fd(struct callback_head *cb)
+{
+       struct fuse_backing_close_work *w =
+               container_of(cb, struct fuse_backing_close_work, cb);
+       close_fd(w->fd);
+       kfree(w);
+}
+
 static int fuse_backing_id_free(int id, void *p, void *data)
 {
+       struct fuse_conn *fc = data;
        struct fuse_backing *fb = p;
 
        WARN_ON_ONCE(refcount_read(&fb->count) != 1);
+
+       if (fb->fd >= 0 && fc->daemon_task) {
+               struct fuse_backing_close_work *w;
+
+               w = kmalloc_obj(*w, GFP_ATOMIC);
+               if (w) {
+                       init_task_work(&w->cb, fuse_backing_close_fd);
+                       w->fd = fb->fd;
+                       /*
+                        * Schedule close_fd() to run in the daemon's context.
+                        * TWA_RESUME fires on the daemon's next return to
+                        * userspace -- in practice immediately after its
+                        * blocked read() unblocks with an error at teardown.
+                        * -ESRCH means the daemon already exited and closed
+                        * all its fds; nothing to do.
+                        */
+                       if (task_work_add(fc->daemon_task, &w->cb, TWA_RESUME))
+                               kfree(w);
+               } else {
+                       pr_warn_ratelimited("fuse: failed to close backing fd 
%d on teardown\n",
+                                           fb->fd);
+               }
+               fb->fd = -1;
+       }
+
        fuse_backing_free(fb);
        return 0;
 }
 
 void fuse_backing_files_free(struct fuse_conn *fc)
 {
-       idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
+       idr_for_each(&fc->backing_files_map, fuse_backing_id_free, fc);
        idr_destroy(&fc->backing_files_map);
+
+       if (fc->daemon_task) {
+               put_task_struct(fc->daemon_task);
+               fc->daemon_task = NULL;
+       }
 }
 
 int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
@@ -117,21 +164,50 @@ int fuse_backing_open(struct fuse_conn *fc, struct 
fuse_backing_map *map)
        if (!fb)
                goto out_fput;
 
-       fb->file = file;
+       /*
+        * Capture the daemon's task on the first BACKING_OPEN so that
+        * fuse_backing_files_free() can schedule fd closes via task_work
+        * during teardown, even when teardown runs outside daemon context.
+        */
+       if (!fc->daemon_task) {
+               spin_lock(&fc->lock);
+               if (!fc->daemon_task)
+                       fc->daemon_task = get_task_struct(current);
+               spin_unlock(&fc->lock);
+       }
+
+       fb->file = file;        /* fget_raw ref transferred */
+       fb->fd = -1;
        fb->cred = prepare_creds();
+       if (!fb->cred) {
+               res = -ENOMEM;
+               goto out_free;
+       }
        refcount_set(&fb->count, 1);
+       fb->fd = get_unused_fd_flags(O_CLOEXEC);
+       if (fb->fd < 0) {
+               res = fb->fd;
+               goto out_free;
+       }
+       get_file(file);
+       fd_install(fb->fd, file);
 
        res = fuse_backing_id_alloc(fc, fb);
-       if (res < 0) {
-               fuse_backing_free(fb);
-               fb = NULL;
-       }
+       if (res < 0)
+               goto out_close_fd;
 
 out:
        pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res);
 
        return res;
 
+out_close_fd:
+       close_fd(fb->fd);
+       fb->fd = -1;
+out_free:
+       fuse_backing_free(fb);
+       fb = NULL;
+       goto out;
 out_fput:
        fput(file);
        goto out;
@@ -158,6 +234,15 @@ int fuse_backing_close(struct fuse_conn *fc, int 
backing_id)
        if (!fb)
                goto out;
 
+       /*
+        * Close the fd installed in the daemon's fd table on BACKING_OPEN.
+        * BACKING_CLOSE always runs in the daemon's ioctl context, so
+        * close_fd() targets the correct fd table.
+        */
+       if (fb->fd >= 0) {
+               close_fd(fb->fd);
+               fb->fd = -1;
+       }
        fuse_backing_put(fb);
        err = 0;
 out:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 17423d4e3cfa6..0edda751747a2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -107,6 +107,14 @@ struct fuse_submount_lookup {
 struct fuse_backing {
        struct file *file;
        struct cred *cred;
+       /**
+        * fd: fd installed in the daemon's fd table on BACKING_OPEN, or -1.
+        * Valid only while this entry is in fc->backing_files_map.
+        * Closed by BACKING_CLOSE (daemon ioctl context) or by a task_work
+        * callback scheduled on fc->daemon_task during connection teardown.
+        * Must be -1 before fuse_backing_free() is called.
+        */
+       int fd;
 
        /** refcount */
        refcount_t count;
@@ -979,6 +987,13 @@ struct fuse_conn {
 #ifdef CONFIG_FUSE_PASSTHROUGH
        /** IDR for backing files ids */
        struct idr backing_files_map;
+       /**
+        * daemon_task: task_struct of the daemon, held with a reference.
+        * Used during connection teardown to schedule task_work that closes
+        * any remaining backing fds in the daemon's fd table.  Set on the
+        * first BACKING_OPEN ioctl; NULL if no backing files were registered.
+        */
+       struct task_struct *daemon_task;
 #endif
 
 #ifdef CONFIG_FUSE_IO_URING
diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore 
b/tools/testing/selftests/filesystems/fuse/.gitignore
index 3e72e742d08e8..ef1ff428aa7a6 100644
--- a/tools/testing/selftests/filesystems/fuse/.gitignore
+++ b/tools/testing/selftests/filesystems/fuse/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 fuse_mnt
 fusectl_test
+fuse_passthrough_test
diff --git a/tools/testing/selftests/filesystems/fuse/Makefile 
b/tools/testing/selftests/filesystems/fuse/Makefile
index 612aad69a93aa..494042ebc4f2c 100644
--- a/tools/testing/selftests/filesystems/fuse/Makefile
+++ b/tools/testing/selftests/filesystems/fuse/Makefile
@@ -2,7 +2,7 @@
 
 CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
 
-TEST_GEN_PROGS := fusectl_test
+TEST_GEN_PROGS := fusectl_test fuse_passthrough_test
 TEST_GEN_FILES := fuse_mnt
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/fuse/fuse_passthrough_test.c 
b/tools/testing/selftests/filesystems/fuse/fuse_passthrough_test.c
new file mode 100644
index 0000000000000..31cfced89ca8e
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fuse/fuse_passthrough_test.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that FUSE backing files are visible in /proc/self/fd
+ *
+ * When FUSE passthrough mode registers a backing file via
+ * FUSE_DEV_IOC_BACKING_OPEN, that file should remain visible in
+ * /proc/self/fd of the daemon process so that tools like lsof can
+ * observe it. This test verifies that behavior.
+ *
+ * Requires: root (CAP_SYS_ADMIN), CONFIG_FUSE_PASSTHROUGH=y
+ */
+
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fuse.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../../kselftest.h"
+
+#define FUSE_DEV       "/dev/fuse"
+#define BUF_SIZE       131072
+
+/*
+ * FUSE_PASSTHROUGH is bit 37.  The protocol splits flags across two
+ * 32-bit fields: flags holds bits 0-31, flags2 holds bits 32-63
+ * shifted right by 32.
+ */
+#define FUSE_PASSTHROUGH_FLAGS2        ((uint32_t)(FUSE_PASSTHROUGH >> 32))
+
+/* Count /proc/self/fd symlinks that resolve to @path. */
+static int count_proc_fd_refs(const char *path)
+{
+       DIR *dir;
+       struct dirent *de;
+       char link_target[PATH_MAX];
+       char proc_entry[PATH_MAX];
+       int count = 0;
+
+       dir = opendir("/proc/self/fd");
+       if (!dir)
+               return -1;
+
+       while ((de = readdir(dir)) != NULL) {
+               ssize_t len;
+
+               if (de->d_name[0] == '.')
+                       continue;
+
+               snprintf(proc_entry, sizeof(proc_entry),
+                        "/proc/self/fd/%s", de->d_name);
+               len = readlink(proc_entry, link_target, sizeof(link_target) - 
1);
+               if (len < 0)
+                       continue;
+               link_target[len] = '\0';
+
+               if (strcmp(link_target, path) == 0)
+                       count++;
+       }
+
+       closedir(dir);
+       return count;
+}
+
+/*
+ * Minimal FUSE daemon: performs the INIT handshake with FUSE_PASSTHROUGH
+ * enabled, registers @backing_path as a backing file, writes the
+ * /proc/self/fd ref counts before and after BACKING_CLOSE into
+ * @result_pipe, then drains requests until the mount is torn down.
+ */
+static void run_daemon(int fuse_fd, const char *backing_path, int result_pipe)
+{
+       char buf[BUF_SIZE];
+       struct fuse_in_header *in_hdr;
+       struct {
+               struct fuse_out_header  hdr;
+               struct fuse_init_out    init;
+       } init_reply;
+       struct {
+               struct fuse_out_header  hdr;
+       } err_reply;
+       struct fuse_backing_map map = {};
+       ssize_t len;
+       int backing_fd;
+       int backing_id;
+       uint32_t close_id;
+       int fd_count_open = -1;
+       int fd_count_close = -1;
+
+       len = read(fuse_fd, buf, sizeof(buf));
+       if (len < 0) {
+               ksft_print_msg("daemon: read INIT: %s\n", strerror(errno));
+               goto out;
+       }
+
+       in_hdr = (struct fuse_in_header *)buf;
+       if (in_hdr->opcode != FUSE_INIT) {
+               ksft_print_msg("daemon: expected FUSE_INIT, got %u\n",
+                              in_hdr->opcode);
+               goto out;
+       }
+
+       memset(&init_reply, 0, sizeof(init_reply));
+       init_reply.hdr.len      = sizeof(init_reply);
+       init_reply.hdr.unique   = in_hdr->unique;
+       init_reply.init.major           = FUSE_KERNEL_VERSION;
+       init_reply.init.minor           = FUSE_KERNEL_MINOR_VERSION;
+       init_reply.init.max_readahead   = 65536;
+       init_reply.init.flags           = FUSE_INIT_EXT;
+       init_reply.init.max_write       = 65536;
+       init_reply.init.max_pages       = 256;
+       init_reply.init.flags2          = FUSE_PASSTHROUGH_FLAGS2;
+       init_reply.init.max_stack_depth = 1;
+
+       if (write(fuse_fd, &init_reply, sizeof(init_reply)) < 0) {
+               ksft_print_msg("daemon: write INIT reply: %s\n", 
strerror(errno));
+               goto out;
+       }
+
+       backing_fd = open(backing_path, O_RDWR);
+       if (backing_fd < 0) {
+               ksft_print_msg("daemon: open backing file: %s\n", 
strerror(errno));
+               goto out;
+       }
+
+       map.fd = backing_fd;
+       backing_id = ioctl(fuse_fd, FUSE_DEV_IOC_BACKING_OPEN, &map);
+       if (backing_id < 0) {
+               ksft_print_msg("daemon: FUSE_DEV_IOC_BACKING_OPEN: %s\n",
+                              strerror(errno));
+               close(backing_fd);
+               goto out;
+       }
+
+       /*
+        * Close our own fd.  The kernel now holds the only reference via
+        * its backing_files_map.  Check whether the file is still visible
+        * in /proc/self/fd -- it should be, via the fd installed by the kernel.
+        */
+       close(backing_fd);
+       fd_count_open = count_proc_fd_refs(backing_path);
+
+       close_id = (uint32_t)backing_id;
+       ioctl(fuse_fd, FUSE_DEV_IOC_BACKING_CLOSE, &close_id);
+       fd_count_close = count_proc_fd_refs(backing_path);
+
+out:
+       /* Signal results before draining so the parent can proceed to unmount. 
*/
+       if (write(result_pipe, &fd_count_open, sizeof(fd_count_open)) < 0 ||
+           write(result_pipe, &fd_count_close, sizeof(fd_count_close)) < 0)
+               ksft_print_msg("daemon: write result pipe: %s\n", 
strerror(errno));
+
+       while (1) {
+               len = read(fuse_fd, buf, sizeof(buf));
+               if (len <= 0)
+                       break;
+
+               in_hdr = (struct fuse_in_header *)buf;
+               memset(&err_reply, 0, sizeof(err_reply));
+               err_reply.hdr.len    = sizeof(err_reply);
+               err_reply.hdr.error  = -ENOSYS;
+               err_reply.hdr.unique = in_hdr->unique;
+               if (write(fuse_fd, &err_reply, sizeof(err_reply)) < 0)
+                       break;
+       }
+}
+
+int main(void)
+{
+       char tmpdir[] = "/tmp/fuse_backing_test_XXXXXX";
+       char mntpoint[PATH_MAX];
+       char backing_path[PATH_MAX];
+       char mount_opts[64];
+       int fuse_fd = -1;
+       int pipe_fds[2] = {-1, -1};
+       int tmp_fd;
+       pid_t daemon_pid;
+       int fd_count_open = -1;
+       int fd_count_close = -1;
+       int status;
+
+       ksft_print_header();
+       ksft_set_plan(2);
+
+       if (geteuid() != 0)
+               ksft_exit_skip("requires root (CAP_SYS_ADMIN)\n");
+
+       if (!mkdtemp(tmpdir))
+               ksft_exit_fail_msg("mkdtemp: %s\n", strerror(errno));
+
+       snprintf(mntpoint, sizeof(mntpoint), "%s/mnt", tmpdir);
+       snprintf(backing_path, sizeof(backing_path), "%s/backing_file", tmpdir);
+
+       if (mkdir(mntpoint, 0700) < 0) {
+               ksft_print_msg("mkdir mntpoint: %s\n", strerror(errno));
+               goto cleanup_tmpdir;
+       }
+
+       tmp_fd = open(backing_path, O_CREAT | O_RDWR, 0600);
+       if (tmp_fd < 0) {
+               ksft_print_msg("create backing file: %s\n", strerror(errno));
+               goto cleanup_dirs;
+       }
+       close(tmp_fd);
+
+       fuse_fd = open(FUSE_DEV, O_RDWR);
+       if (fuse_fd < 0) {
+               ksft_print_msg("open %s: %s\n", FUSE_DEV, strerror(errno));
+               goto cleanup_files;
+       }
+
+       if (pipe(pipe_fds) < 0) {
+               ksft_print_msg("pipe: %s\n", strerror(errno));
+               goto cleanup_fuse;
+       }
+
+       daemon_pid = fork();
+       if (daemon_pid < 0) {
+               ksft_print_msg("fork: %s\n", strerror(errno));
+               goto cleanup_pipe;
+       }
+
+       if (daemon_pid == 0) {
+               close(pipe_fds[0]);
+               run_daemon(fuse_fd, backing_path, pipe_fds[1]);
+               close(pipe_fds[1]);
+               exit(0);
+       }
+
+       close(pipe_fds[1]);
+       pipe_fds[1] = -1;
+
+       snprintf(mount_opts, sizeof(mount_opts),
+                "fd=%d,rootmode=040000,user_id=0,group_id=0", fuse_fd);
+
+       if (mount("fuse.test", mntpoint, "fuse", MS_NOSUID | MS_NODEV,
+                 mount_opts) < 0) {
+               ksft_print_msg("mount: %s\n", strerror(errno));
+               kill(daemon_pid, SIGTERM);
+               waitpid(daemon_pid, &status, 0);
+               goto cleanup_pipe;
+       }
+
+       if (read(pipe_fds[0], &fd_count_open, sizeof(fd_count_open)) != 
sizeof(fd_count_open) ||
+           read(pipe_fds[0], &fd_count_close, sizeof(fd_count_close)) != 
sizeof(fd_count_close))
+               ksft_print_msg("read result pipe: %s\n", strerror(errno));
+
+       umount2(mntpoint, MNT_DETACH);
+       waitpid(daemon_pid, &status, 0);
+
+       if (fd_count_open == 1)
+               ksft_test_result_pass(
+                       "backing file visible in /proc/self/fd after 
BACKING_OPEN\n");
+       else if (fd_count_open == 0)
+               ksft_test_result_fail(
+                       "backing file NOT visible in /proc/self/fd after 
BACKING_OPEN\n");
+       else
+               ksft_test_result_fail(
+                       "BACKING_OPEN: unexpected fd_count=%d\n", 
fd_count_open);
+
+       if (fd_count_close == 0)
+               ksft_test_result_pass(
+                       "backing file removed from /proc/self/fd after 
BACKING_CLOSE\n");
+       else if (fd_count_close == 1)
+               ksft_test_result_fail(
+                       "backing file still visible in /proc/self/fd after 
BACKING_CLOSE\n");
+       else
+               ksft_test_result_fail(
+                       "BACKING_CLOSE: unexpected fd_count=%d\n", 
fd_count_close);
+
+cleanup_pipe:
+       if (pipe_fds[0] >= 0)
+               close(pipe_fds[0]);
+       if (pipe_fds[1] >= 0)
+               close(pipe_fds[1]);
+cleanup_fuse:
+       if (fuse_fd >= 0)
+               close(fuse_fd);
+cleanup_files:
+       unlink(backing_path);
+cleanup_dirs:
+       rmdir(mntpoint);
+cleanup_tmpdir:
+       rmdir(tmpdir);
+
+       ksft_finished();
+}
-- 
2.43.0


Reply via email to