The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxcfs/pull/328
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === This makes running lxcfs way more reliable. It is opt-in for now. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
From 20400f482bb66a5312a2dfaa2be4220184f590e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner <christian.brau...@ubuntu.com> Date: Wed, 26 Feb 2020 14:08:08 +0100 Subject: [PATCH] lxcfs: add --pidfd option This makes running lxcfs way more reliable. It is opt-in for now. Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com> --- Makefile.am | 2 +- bindings.c | 210 +++++++++++++++++++++++++++++++-------------------- bindings.h | 5 +- configure.ac | 10 +++ lxcfs.c | 4 + utils.h | 23 ++++++ 6 files changed, 171 insertions(+), 83 deletions(-) diff --git a/Makefile.am b/Makefile.am index 41f3249..5f64cfb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,7 +7,7 @@ AM_CFLAGS = -Wall -ggdb -D_GNU_SOURCE -DSBINDIR=\"$(SBINDIR)\" -pthread AM_CFLAGS += $(FUSE_CFLAGS) AM_CFLAGS += -DLIBDIR=\"$(LIBDIR)\" AM_LDFLAGS = $(FUSE_LIBS) -pthread -#AM_CFLAGS += -DDEBUG +AM_CFLAGS += -DDEBUG #AM_CFLAGS += -DVERBOSE AM_CFLAGS += -DRUNTIME_PATH=\"$(RUNTIME_PATH)\" diff --git a/bindings.c b/bindings.c index 7bf5d28..f5a6415 100644 --- a/bindings.c +++ b/bindings.c @@ -53,6 +53,8 @@ #include "proc_cpuview.h" #include "utils.h" +static bool can_use_pidfd; + /* Define pivot_root() if missing from the C library */ #ifndef HAVE_PIVOT_ROOT static int pivot_root(const char *new_root, const char *put_old) @@ -83,9 +85,10 @@ extern int pivot_root(const char *new_root, const char *put_old); * cached initpid. */ struct pidns_init_store { - ino_t ino; // inode number for /proc/$pid/ns/pid - pid_t initpid; // the pid of nit in that ns - long int ctime; // the time at which /proc/$initpid was created + ino_t ino; /* inode number for /proc/$pid/ns/pid */ + pid_t initpid; /* the pid of nit in that ns */ + int init_pidfd; + long int ctime; /* the time at which /proc/$initpid was created */ struct pidns_init_store *next; long int lastcheck; }; @@ -127,47 +130,62 @@ static void store_unlock(void) unlock_mutex(&pidns_store_mutex); } +/* /proc/ = 6 + * + + * <pid-as-str> = INTTYPE_TO_STRLEN(pid_t) + * + + * \0 = 1 + */ +#define LXCFS_PROC_PID_LEN \ + (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1) + /* Must be called under store_lock */ -static bool initpid_still_valid(struct pidns_init_store *e, struct stat *nsfdsb) +static bool initpid_still_valid(struct pidns_init_store *entry) { - struct stat initsb; - char fnam[100]; + bool valid = true; - snprintf(fnam, 100, "/proc/%d", e->initpid); - if (stat(fnam, &initsb) < 0) - return false; + if (entry->init_pidfd >= 0) { + if (pidfd_send_signal(entry->init_pidfd, 0, NULL, 0)) + valid = false; + } else { + struct stat st; + char path[LXCFS_PROC_PID_LEN]; - lxcfs_debug("Comparing ctime %ld == %ld for pid %d.\n", e->ctime, - initsb.st_ctime, e->initpid); + snprintf(path, sizeof(path), "/proc/%d", entry->initpid); - if (e->ctime != initsb.st_ctime) - return false; - return true; + if (stat(path, &st) || entry->ctime != st.st_ctime) + valid = false; + } + + return valid; } /* Must be called under store_lock */ -static void remove_initpid(struct pidns_init_store *e) +static void remove_initpid(struct pidns_init_store *entry) { - struct pidns_init_store *tmp; - int h; + struct pidns_init_store *it; + int ino_hash; - lxcfs_debug("Remove_initpid: removing entry for %d.\n", e->initpid); + lxcfs_debug("Removing cached entry for pid %d from init pid cache", + entry->initpid); - h = HASH(e->ino); - if (pidns_hash_table[h] == e) { - pidns_hash_table[h] = e->next; - free_disarm(e); + ino_hash = HASH(entry->ino); + if (pidns_hash_table[ino_hash] == entry) { + pidns_hash_table[ino_hash] = entry->next; + close_prot_errno_disarm(entry->init_pidfd); + free_disarm(entry); return; } - tmp = pidns_hash_table[h]; - while (tmp) { - if (tmp->next == e) { - tmp->next = e->next; - free_disarm(e); + it = pidns_hash_table[ino_hash]; + while (it) { + if (it->next == entry) { + it->next = entry->next; + close_prot_errno_disarm(entry->init_pidfd); + free_disarm(entry); return; } - tmp = tmp->next; + it = it->next; } } @@ -176,39 +194,39 @@ static void remove_initpid(struct pidns_init_store *e) static void prune_initpid_store(void) { static long int last_prune = 0; - struct pidns_init_store *e, *prev, *delme; long int now, threshold; - int i; if (!last_prune) { last_prune = time(NULL); return; } + now = time(NULL); if (now < last_prune + PURGE_SECS) return; - lxcfs_debug("%s\n", "Pruning."); + lxcfs_debug("Pruning init pid cache"); last_prune = now; threshold = now - 2 * PURGE_SECS; - for (i = 0; i < PIDNS_HASH_SIZE; i++) { - for (prev = NULL, e = pidns_hash_table[i]; e; ) { - if (e->lastcheck < threshold) { + for (int i = 0; i < PIDNS_HASH_SIZE; i++) { + for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { + if (entry->lastcheck < threshold) { + struct pidns_init_store *cur = entry; - lxcfs_debug("Removing cached entry for %d.\n", e->initpid); + lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); - delme = e; if (prev) - prev->next = e->next; + prev->next = entry->next; else - pidns_hash_table[i] = e->next; - e = e->next; - free_disarm(delme); + pidns_hash_table[i] = entry->next; + entry = entry->next; + close_prot_errno_disarm(cur->init_pidfd); + free_disarm(cur); } else { - prev = e; - e = e->next; + prev = entry; + entry = entry->next; } } } @@ -217,26 +235,37 @@ static void prune_initpid_store(void) /* Must be called under store_lock */ static void save_initpid(struct stat *sb, pid_t pid) { - struct pidns_init_store *e; - char fpath[100]; - struct stat procsb; - int h; + __do_free struct pidns_init_store *e = NULL; + __do_close_prot_errno int pidfd = -EBADF; + char path[LXCFS_PROC_PID_LEN]; + struct lxcfs_opts *opts = fuse_get_context()->private_data; + struct stat st; + int ino_hash; + + if (opts->use_pidfd && can_use_pidfd) { + pidfd = pidfd_open(pid, 0); + if (pidfd < 0) + return; + } - lxcfs_debug("Save_initpid: adding entry for %d.\n", pid); + snprintf(path, sizeof(path), "/proc/%d", pid); + if (stat(path, &st)) + return; - snprintf(fpath, 100, "/proc/%d", pid); - if (stat(fpath, &procsb) < 0) + e = malloc(sizeof(*e)); + if (!e) return; - do { - e = malloc(sizeof(*e)); - } while (!e); + e->ino = sb->st_ino; e->initpid = pid; - e->ctime = procsb.st_ctime; - h = HASH(e->ino); - e->next = pidns_hash_table[h]; + e->ctime = st.st_ctime; + ino_hash = HASH(e->ino); + e->next = pidns_hash_table[ino_hash]; e->lastcheck = time(NULL); - pidns_hash_table[h] = e; + e->init_pidfd = move_fd(pidfd); + pidns_hash_table[ino_hash] = move_ptr(e); + + lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid); } /* @@ -248,19 +277,19 @@ static void save_initpid(struct stat *sb, pid_t pid) */ static struct pidns_init_store *lookup_verify_initpid(struct stat *sb) { - int h = HASH(sb->st_ino); - struct pidns_init_store *e = pidns_hash_table[h]; - - while (e) { - if (e->ino == sb->st_ino) { - if (initpid_still_valid(e, sb)) { - e->lastcheck = time(NULL); - return e; + struct pidns_init_store *entry = pidns_hash_table[HASH(sb->st_ino)]; + + while (entry) { + if (entry->ino == sb->st_ino) { + if (initpid_still_valid(entry)) { + entry->lastcheck = time(NULL); + return entry; } - remove_initpid(e); + + remove_initpid(entry); return NULL; } - e = e->next; + entry = entry->next; } return NULL; @@ -358,31 +387,42 @@ static pid_t get_init_pid_for_task(pid_t task) return ret; } -pid_t lookup_initpid_in_store(pid_t qpid) +#define LXCFS_PROC_PID_NS_LEN \ + (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \ + STRLITERALLEN("/ns/pid") + 1) + +pid_t lookup_initpid_in_store(pid_t pid) { pid_t answer = 0; - struct stat sb; - struct pidns_init_store *e; - char fnam[100]; + char path[LXCFS_PROC_PID_NS_LEN]; + struct stat st; + struct pidns_init_store *entry; + + snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid); - snprintf(fnam, 100, "/proc/%d/ns/pid", qpid); store_lock(); - if (stat(fnam, &sb) < 0) + if (stat(path, &st)) goto out; - e = lookup_verify_initpid(&sb); - if (e) { - answer = e->initpid; + + entry = lookup_verify_initpid(&st); + if (entry) { + answer = entry->initpid; goto out; } - answer = get_init_pid_for_task(qpid); + + answer = get_init_pid_for_task(pid); if (answer > 0) - save_initpid(&sb, answer); + save_initpid(&st, answer); out: - /* we prune at end in case we are returning - * the value we were about to return */ + /* + * Prune at the end in case we're returning the value we were about to + * return. + */ prune_initpid_store(); + store_unlock(); + return answer; } @@ -663,8 +703,9 @@ static bool cgfs_setup_controllers(void) static void __attribute__((constructor)) lxcfs_init(void) { - __do_close_prot_errno int init_ns = -EBADF; + __do_close_prot_errno int init_ns = -EBADF, pidfd = -EBADF; int i = 0; + pid_t pid; char *cret; char cwd[MAXPATHLEN]; @@ -673,7 +714,8 @@ static void __attribute__((constructor)) lxcfs_init(void) log_exit("Failed to initialize cgroup support"); /* Preserve initial namespace. */ - init_ns = preserve_ns(getpid(), "mnt"); + pid = getpid(); + init_ns = preserve_ns(pid, "mnt"); if (init_ns < 0) log_exit("Failed to preserve initial mount namespace"); @@ -702,6 +744,12 @@ static void __attribute__((constructor)) lxcfs_init(void) __do_free char *controllers = lxc_string_join(",", (const char **)(*h)->controllers, false); fprintf(stderr, " %2d: fd: %3d: %s\n", i, (*h)->fd, controllers ?: ""); } + + pidfd = pidfd_open(pid, 0); + if (pidfd >= 0 && pidfd_send_signal(pidfd, 0, NULL, 0) == 0) { + can_use_pidfd = true; + lxcfs_error("Kernel supports pidfds"); + } } static void __attribute__((destructor)) lxcfs_exit(void) diff --git a/bindings.h b/bindings.h index 8e330ec..777fcd2 100644 --- a/bindings.h +++ b/bindings.h @@ -14,15 +14,16 @@ #define _FILE_OFFSET_BITS 64 #include <fuse.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> +#include "cgroup_fuse.h" #include "config.h" #include "macro.h" -#include "cgroup_fuse.h" #include "proc_cpuview.h" #include "proc_fuse.h" #include "proc_loadavg.h" @@ -64,9 +65,11 @@ struct file_info { struct lxcfs_opts { bool swap_off; + bool use_pidfd; }; extern pid_t lookup_initpid_in_store(pid_t qpid); extern void prune_init_slice(char *cg); +extern bool supports_pidfd(void); #endif /* __LXCFS_BINDINGS_H */ diff --git a/configure.ac b/configure.ac index 63cd934..e22cf3c 100644 --- a/configure.ac +++ b/configure.ac @@ -171,4 +171,14 @@ AC_CHECK_FUNCS([strlcat], AC_DEFINE(HAVE_STRLCAT,1,[Have strlcat]), AM_CONDITIONAL(HAVE_STRLCAT, false)) +AC_CHECK_FUNCS([pidfd_open], + AM_CONDITIONAL(HAVE_PIDFD_OPEN, true) + AC_DEFINE(HAVE_PIDFD_OPEN,1,[Supports pidfd_open]), + AM_CONDITIONAL(HAVE_PIDFD_OPEN, false)) + +AC_CHECK_FUNCS([pidfd_send_signal], + AM_CONDITIONAL(HAVE_PIDFD_SEND_SIGNAL, true) + AC_DEFINE(HAVE_PIDFD_SEND_SIGNAL,1,[Supports pidfd_send_signal]), + AM_CONDITIONAL(HAVE_PIDFD_SEND_SIGNAL, false)) + AC_OUTPUT diff --git a/lxcfs.c b/lxcfs.c index ce5a24b..d825303 100644 --- a/lxcfs.c +++ b/lxcfs.c @@ -1102,6 +1102,7 @@ int main(int argc, char *argv[]) goto out; } opts->swap_off = false; + opts->use_pidfd = false; /* accomodate older init scripts */ swallow_arg(&argc, argv, "-s"); @@ -1112,6 +1113,9 @@ int main(int argc, char *argv[]) if (swallow_arg(&argc, argv, "-u")) opts->swap_off = true; + if (swallow_arg(&argc, argv, "--pidfd")) + opts->use_pidfd = true; + if (swallow_option(&argc, argv, "-o", &v)) { /* Parse multiple values */ for (; (token = strtok_r(v, ",", &saveptr)); v = NULL) { diff --git a/utils.h b/utils.h index 6c874b4..d5673ed 100644 --- a/utils.h +++ b/utils.h @@ -46,4 +46,27 @@ extern int read_file_fuse(const char *path, char *buf, size_t size, extern void prune_init_slice(char *cg); extern int wait_for_pid(pid_t pid); +#ifndef HAVE_PIDFD_OPEN +static inline int pidfd_open(pid_t pid, unsigned int flags) +{ +#ifdef __NR_pidfd_open + return syscall(__NR_pidfd_open, pid, flags); +#else + return ret_errno(ENOSYS); +#endif +} +#endif + +#ifndef HAVE_PIDFD_SEND_SIGNAL +static inline int pidfd_send_signal(int pidfd, int sig, siginfo_t *info, + unsigned int flags) +{ +#ifdef __NR_pidfd_send_signal + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +#else + return ret_errno(ENOSYS); +#endif +} +#endif + #endif /* __LXCFS_UTILS_H */
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel