Hey Yonghong, I have changed getname_kernel interface to specify the allocation type, all previous callers preserve GFP_KERNEL as the allocation type, after this change getname_kernel could be used with GFP_ATOMIC in ebpf helpers. If this change goes in, I could complete bpf_get_current_pidns_info helper. Let me know your thoughts.
>From dad6b281a744afb88660137a6a5f27057a72d7d5 Mon Sep 17 00:00:00 2001 From: Carlos <cneirabus...@gmail.com> Date: Mon, 22 Jul 2019 17:50:02 -0400 Subject: [PATCH] [bpf-next 1/1] BPF: getname_kernel specify allocation type A parameter to specify the allocation type has been added to getname_kernel, as currently all allocations done with getname_kernel could sleep, the reason for this change is that currently ebpf cannot call functions that could sleep and future work will need to call getname_kernel that currently could sleep on allocation. This change preserves the GFP_KERNEL allocation type on all existing code that calls getname_kernel. --- fs/coredump.c | 2 +- fs/exec.c | 2 +- fs/fs_parser.c | 2 +- fs/namei.c | 23 ++++++++++++++--------- fs/open.c | 2 +- include/linux/fs.h | 2 +- init/main.c | 2 +- kernel/umh.c | 2 +- 8 files changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index e42e17e55bfd..5c43a32f7ba2 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -684,7 +684,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) * If it doesn't exist, that's fine. If there's some * other problem, we'll catch it at the filp_open(). */ - do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); + do_unlinkat(AT_FDCWD, getname_kernel(cn.corename, GFP_KERNEL)); } /* diff --git a/fs/exec.c b/fs/exec.c index c71cbfe6826a..3811afb37d9a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -882,7 +882,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) struct file *open_exec(const char *name) { - struct filename *filename = getname_kernel(name); + struct filename *filename = getname_kernel(name, GFP_KERNEL); struct file *f = ERR_CAST(filename); if (!IS_ERR(filename)) { diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 0d388faa25d1..2a8f27cbbf39 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -248,7 +248,7 @@ int fs_lookup_param(struct fs_context *fc, switch (param->type) { case fs_value_is_string: - f = getname_kernel(param->string); + f = getname_kernel(param->string, GFP_KERNEL); if (IS_ERR(f)) return PTR_ERR(f); put_f = true; diff --git a/fs/namei.c b/fs/namei.c index 209c51a5226c..0e0a9710bdfe 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -122,6 +122,11 @@ * PATH_MAX includes the nul terminator --RR. */ +/* [July 2019 neirac] getname_kernel added allocation type parameter as + * currently all allocations on getname_kernel could sleep, but ebpf + * needs non blocking allocations. + */ + #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * @@ -210,12 +215,12 @@ getname(const char __user * filename) } struct filename * -getname_kernel(const char * filename) +getname_kernel(const char * filename, gfp_t flags) { struct filename *result; int len = strlen(filename) + 1; - result = __getname(); + result = kmem_cache_alloc(names_cachep, flags); if (unlikely(!result)) return ERR_PTR(-ENOMEM); @@ -225,7 +230,7 @@ getname_kernel(const char * filename) const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; - tmp = kmalloc(size, GFP_KERNEL); + tmp = kmalloc(size, flags); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); @@ -2408,7 +2413,7 @@ struct dentry *kern_path_locked(const char *name, struct path *path) struct qstr last; int type; - filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path, + filename = filename_parentat(AT_FDCWD, getname_kernel(name, GFP_KERNEL), 0, path, &last, &type); if (IS_ERR(filename)) return ERR_CAST(filename); @@ -2429,7 +2434,7 @@ struct dentry *kern_path_locked(const char *name, struct path *path) int kern_path(const char *name, unsigned int flags, struct path *path) { - return filename_lookup(AT_FDCWD, getname_kernel(name), + return filename_lookup(AT_FDCWD, getname_kernel(name, GFP_KERNEL), flags, path, NULL); } EXPORT_SYMBOL(kern_path); @@ -2448,7 +2453,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, { struct path root = {.mnt = mnt, .dentry = dentry}; /* the first argument of filename_lookup() is ignored with root */ - return filename_lookup(AT_FDCWD, getname_kernel(name), + return filename_lookup(AT_FDCWD, getname_kernel(name, GFP_KERNEL), flags , path, &root); } EXPORT_SYMBOL(vfs_path_lookup); @@ -2749,7 +2754,7 @@ int kern_path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) { - return filename_mountpoint(dfd, getname_kernel(name), path, flags); + return filename_mountpoint(dfd, getname_kernel(name, GFP_KERNEL), path, flags); } EXPORT_SYMBOL(kern_path_mountpoint); @@ -3583,7 +3588,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); - filename = getname_kernel(name); + filename = getname_kernel(name, GFP_KERNEL); if (IS_ERR(filename)) return ERR_CAST(filename); @@ -3672,7 +3677,7 @@ static struct dentry *filename_create(int dfd, struct filename *name, struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, unsigned int lookup_flags) { - return filename_create(dfd, getname_kernel(pathname), + return filename_create(dfd, getname_kernel(pathname, GFP_KERNEL), path, lookup_flags); } EXPORT_SYMBOL(kern_path_create); diff --git a/fs/open.c b/fs/open.c index b5b80469b93d..cc0e23a605f3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1030,7 +1030,7 @@ struct file *file_open_name(struct filename *name, int flags, umode_t mode) */ struct file *filp_open(const char *filename, int flags, umode_t mode) { - struct filename *name = getname_kernel(filename); + struct filename *name = getname_kernel(filename, GFP_KERNEL); struct file *file = ERR_CAST(name); if (!IS_ERR(name)) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 75f2ed289a3f..826ab644f9bf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2532,7 +2532,7 @@ extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int, int *); extern struct filename *getname(const char __user *); -extern struct filename *getname_kernel(const char *); +extern struct filename *getname_kernel(const char *, gfp_t flags); extern void putname(struct filename *name); extern int finish_open(struct file *file, struct dentry *dentry, diff --git a/init/main.c b/init/main.c index ff5803b0841c..29972a5a0ebc 100644 --- a/init/main.c +++ b/init/main.c @@ -1046,7 +1046,7 @@ static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; pr_info("Run %s as init process\n", init_filename); - return do_execve(getname_kernel(init_filename), + return do_execve(getname_kernel(init_filename, GFP_KERNEL), (const char __user *const __user *)argv_init, (const char __user *const __user *)envp_init); } diff --git a/kernel/umh.c b/kernel/umh.c index 7f255b5a8845..f99e08fc327c 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -109,7 +109,7 @@ static int call_usermodehelper_exec_async(void *data) if (!retval) current->flags |= PF_UMH; } else - retval = do_execve(getname_kernel(sub_info->path), + retval = do_execve(getname_kernel(sub_info->path, GFP_KERNEL), (const char __user *const __user *)sub_info->argv, (const char __user *const __user *)sub_info->envp); out: -- 2.11.0 On Tue, Jul 23, 2019 at 12:55:37PM -0400, Carlos Antonio Neira Bustos wrote: > Yonghong, > > I totally agree with this new approach as it isolates the allocation type per > caller.That way we should not have any regressions on existing code, and the > only caller using GFP_ATOMIC getname_kernel should be at this point the ebpf > helper, I'll also change __getname to use the allocation type requested by > getname_kernel flags. > > Thanks again for your help. > > > > On Tue, Jul 23, 2019 at 08:56:03AM -0700, Y Song wrote: > > On Tue, Jul 23, 2019 at 7:37 AM Carlos Antonio Neira Bustos > > <cneirabus...@gmail.com> wrote: > > > > > > Hey Yonghong, > > > I also needed to replace the call to __getname as allocations on the slab > > > cache > > > could sleep, that makes getname_kernel the only name cache consumer that > > > does not block. > > > What do you think ? > > > > Maybe the following alternative approach is better to preserve the > > existing behavior. > > Change getname_kernel(filename) to getname_kernel(filename, flags) and > > the *flags* will > > be used for kmem_cache_alloc and kmalloc. Existing behavior won't > > change and bpf helper > > will call getname_kernel(filename, GFP_ATOMIC). More code changes but > > cleaner and not > > unnecessarily overusing kernel atomic memory region. > > > > > > > > > > > > > From 639233cc05e96a81054b049d16d1b9193ae667e9 Mon Sep 17 00:00:00 2001 > > > From: Carlos <cneirabus...@gmail.com> > > > Date: Mon, 22 Jul 2019 17:50:02 -0400 > > > Subject: [PATCH] [bpf-next 1/1] BPF: getname_kernel don't block on > > > allocation. > > > > > > This patch calls directly kmem_cache_alloc to specify the allocation type > > > as GFP_ATOMIC, > > > the reason is that currently ebpf cannot call functions that could sleep > > > and future work > > > will need to call kern_path that currently could sleep on allocation. > > > --- > > > fs/namei.c | 8 ++++++-- > > > 1 file changed, 6 insertions(+), 2 deletions(-) > > > > > > diff --git a/fs/namei.c b/fs/namei.c > > > index 209c51a5226c..a4269ca52503 100644 > > > --- a/fs/namei.c > > > +++ b/fs/namei.c > > > @@ -122,6 +122,10 @@ > > > * PATH_MAX includes the nul terminator --RR. > > > */ > > > > > > +/* [July 2019 neirac] getname_kernel changed allocation type to > > > GFP_ATOMIC, > > > + * as ebpf needs to use kern_path. > > > + */ > > > + > > > #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, > > > iname)) > > > > > > struct filename * > > > @@ -215,7 +219,7 @@ getname_kernel(const char * filename) > > > struct filename *result; > > > int len = strlen(filename) + 1; > > > > > > - result = __getname(); > > > + result = kmem_cache_alloc(names_cachep, GFP_ATOMIC); > > > if (unlikely(!result)) > > > return ERR_PTR(-ENOMEM); > > > > > > @@ -225,7 +229,7 @@ getname_kernel(const char * filename) > > > const size_t size = offsetof(struct filename, iname[1]); > > > struct filename *tmp; > > > > > > - tmp = kmalloc(size, GFP_KERNEL); > > > + tmp = kmalloc(size, GFP_ATOMIC); > > > if (unlikely(!tmp)) { > > > __putname(result); > > > return ERR_PTR(-ENOMEM); > > > -- > > > 2.11.0 > > > > > > > > > > > > On Fri, Jul 19, 2019 at 09:06:54AM -0700, Y Song wrote: > > > > On Fri, Jul 19, 2019 at 6:02 AM cnb <cneirabus...@gmail.com> wrote: > > > > > > > > > > kern_path is the one, should I move this and dependencies to a new > > > > > file called bfp_namei.c, in there I'll change it to use GFP_ATOMIC or > > > > > create a new function to replace kern_path. What do you think? > > > > > > > > I think adding GFP_ATOMIC to function getname_kernel() is reasonable. > > > > > > > > const size_t size = offsetof(struct filename, iname[1]); > > > > struct filename *tmp; > > > > > > > > tmp = kmalloc(size, GFP_KERNEL); > > > > > > > > /* fs/open.c */ > > > > struct audit_names; > > > > struct filename { > > > > const char *name; /* pointer to actual string */ > > > > const __user char *uptr; /* original userland pointer */ > > > > int refcnt; > > > > struct audit_names *aname; > > > > const char iname[]; > > > > }; > > > > > > > > The size is pretty small. on x64, it should be 25 bytes. If the system > > > > cannot honor > > > > this 25 bytes in non-blocking mode, it is probably already in stress, > > > > bpf subsystem itself may not work reliably as it uses some GFP_ATOMIC > > > > as well for some map (e.g., update) helpers. > > > > > > > > So let us have this patch (adding GFP_ATOMIC) in the first series. We > > > > can > > > > think of alternatives (e.g., separate functions) if anybody objects. > > > > > > > > > > > > > > El jue., 18 de jul. de 2019 22:10, Y Song <ys114...@gmail.com> > > > > > escribió: > > > > >> > > > > >> On Thu, Jul 18, 2019 at 6:21 PM carlos antonio neira bustos > > > > >> <cneirabus...@gmail.com> wrote: > > > > >> > > > > > >> > Hi, > > > > >> > Yes, I'm still interested as I need this capability at $WORK, but > > > > >> > haven't had the time to re-write the dentry functions we need, as > > > > >> > currently they could sleep. > > > > >> > I could resume work on this next monday when I get back from > > > > >> > vacations. > > > > >> > I really appreciate your help and guidance on this. > > > > >> > > > > >> Great. Which dentry function are you referring to? dput? kern_path > > > > >> cannot be used as it uses > > > > >> kmalloc without GFP_ATOMIC. See > > > > >> https://github.com/iovisor/bcc/issues/1329. > > > > >> > > > > >> > > > > > >> > Bests > > > > >> > > > > > >> > > > > > >> > El jue., 18 de jul. de 2019 20:29, Y Song <ys114...@gmail.com> > > > > >> > escribió: > > > > >> >> > > > > >> >> Hi, Carlos, > > > > >> >> > > > > >> >> Are you still interested in upstreaming this patch? Looks like > > > > >> >> there > > > > >> >> still a desire > > > > >> >> to make bcc work inside the containers. > > > > >> >> > > > > >> >> The bpf-next will open next week. If you would like, could you > > > > >> >> submit again? > > > > >> >> I will review the patch on bpf-next as well to make sure we made > > > > >> >> forward progress. > > > > >> >> > > > > >> >> Please let me know. Thanks! > > > > >> >> > > > > >> >> Yonghong > > > > >> >> > > > > >> >> On Tue, Apr 16, 2019 at 6:31 PM Carlos Antonio Neira Bustos > > > > >> >> <cneirabus...@gmail.com> wrote: > > > > >> >> > > > > > >> >> > As a bpf program cannot sleep, I needed to add a spinlock to > > > > >> >> > kern_path, as > > > > >> >> > it calls getname_kernel() which may sleep. > > > > >> >> > The inode is accessed directly, as we are just interested in > > > > >> >> > the inode's s_dev. > > > > >> >> > Let me know if this approach is the correct one. > > > > >> >> > ------------------------------------------------------------------------------- > > > > >> >> > From 35b7bfcbf6524ec807f107302209a0fb07614cc8 Mon Sep 17 > > > > >> >> > 00:00:00 2001 > > > > >> >> > From: Carlos <cneirabus...@gmail.com> > > > > >> >> > Date: Tue, 16 Apr 2019 17:10:46 -0400 > > > > >> >> > Subject: [PATCH] [PATCH bpf-next 1/3] BPF: New helper to obtain > > > > >> >> > namespace data > > > > >> >> > from current task > > > > >> >> > > > > > >> >> > This helper obtains the active namespace from current and > > > > >> >> > returns pid, tgid, > > > > >> >> > device and namespace id as seen from that namespace, allowing > > > > >> >> > to instrument > > > > >> >> > a process inside a container. > > > > >> >> > Device is read from /proc/self/ns/pid, as in the future it's > > > > >> >> > possible that > > > > >> >> > different pid_ns files may belong to different devices, > > > > >> >> > according > > > > >> >> > to the discussion between Eric Biederman and Yonghong in 2017 > > > > >> >> > linux plumbers > > > > >> >> > conference. > > > > >> >> > Currently bpf_get_current_pid_tgid(), is used to do pid > > > > >> >> > filtering in bcc's > > > > >> >> > scripts but this helper returns the pid as seen by the root > > > > >> >> > namespace which is > > > > >> >> > fine when a bcc script is not executed inside a container. > > > > >> >> > When the process of interest is inside a container, pid > > > > >> >> > filtering will not work > > > > >> >> > if bpf_get_current_pid_tgid() is used. This helper addresses > > > > >> >> > this limitation > > > > >> >> > returning the pid as it's seen by the current namespace where > > > > >> >> > the script is > > > > >> >> > executing. > > > > >> >> > > > > > >> >> > This helper has the same use cases as > > > > >> >> > bpf_get_current_pid_tgid() as it can be > > > > >> >> > used to do pid filtering even inside a container. > > > > >> >> > > > > > >> >> > For example a bcc script using bpf_get_current_pid_tgid() > > > > >> >> > (tools/funccount.py): > > > > >> >> > > > > > >> >> > u32 pid = bpf_get_current_pid_tgid() >> 32; > > > > >> >> > if (pid != <pid_arg_passed_in>) > > > > >> >> > return 0; > > > > >> >> > Could be modified to use bpf_get_current_pidns_info() as > > > > >> >> > follows: > > > > >> >> > > > > > >> >> > struct bpf_pidns pidns; > > > > >> >> > bpf_get_current_pidns_info(&pidns, sizeof(struct > > > > >> >> > bpf_pidns)); > > > > >> >> > u32 pid = pidns.tgid; > > > > >> >> > u32 nsid = pidns.nsid; > > > > >> >> > if ((pid != <pid_arg_passed_in>) && (nsid != > > > > >> >> > <nsid_arg_passed_in>)) > > > > >> >> > return 0; > > > > >> >> > > > > > >> >> > To find out the name PID namespace id of a process, you could > > > > >> >> > use this command: > > > > >> >> > > > > > >> >> > $ ps -h -o pidns -p <pid_of_interest> > > > > >> >> > > > > > >> >> > Or this other command: > > > > >> >> > > > > > >> >> > $ ls -Li /proc/<pid_of_interest>/ns/pid > > > > >> >> > > > > > >> >> > Signed-off-by: Carlos Antonio Neira Bustos > > > > >> >> > <cneirabus...@gmail.com> > > > > >> >> > --- > > > > >> >> > include/linux/bpf.h | 1 + > > > > >> >> > include/uapi/linux/bpf.h | 25 ++++++++++++++++++- > > > > >> >> > kernel/bpf/core.c | 1 + > > > > >> >> > kernel/bpf/helpers.c | 65 > > > > >> >> > ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > >> >> > kernel/trace/bpf_trace.c | 2 ++ > > > > >> >> > 5 files changed, 93 insertions(+), 1 deletion(-) > > > > >> >> > > > > > >> >> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > > > >> >> > index e4d4c1771ab0..4393f8f088cc 100644 > > > > >> >> > --- a/include/linux/bpf.h > > > > >> >> > +++ b/include/linux/bpf.h > > > > >> >> > @@ -987,6 +987,7 @@ extern const struct bpf_func_proto > > > > >> >> > bpf_sk_redirect_map_proto; > > > > >> >> > extern const struct bpf_func_proto bpf_spin_lock_proto; > > > > >> >> > extern const struct bpf_func_proto bpf_spin_unlock_proto; > > > > >> >> > extern const struct bpf_func_proto bpf_get_local_storage_proto; > > > > >> >> > +extern const struct bpf_func_proto > > > > >> >> > bpf_get_current_pidns_info_proto; > > > > >> >> > > > > > >> >> > /* Shared helpers among cBPF and eBPF. */ > > > > >> >> > void bpf_user_rnd_init_once(void); > > > > >> >> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > > >> >> > index 31a27dd337dc..7bf457875c31 100644 > > > > >> >> > --- a/include/uapi/linux/bpf.h > > > > >> >> > +++ b/include/uapi/linux/bpf.h > > > > >> >> > @@ -2500,6 +2500,18 @@ union bpf_attr { > > > > >> >> > * Return > > > > >> >> > * 0 if iph and th are a valid SYN cookie ACK, or > > > > >> >> > a negative error > > > > >> >> > * otherwise. > > > > >> >> > + * > > > > >> >> > + * int bpf_get_current_pidns_info(struct bpf_pidns_info > > > > >> >> > *pidns, u32 size_of_pidns) > > > > >> >> > + * Description > > > > >> >> > + * Copies into *pidns* pid, namespace id and tgid > > > > >> >> > as seen by the > > > > >> >> > + * current namespace and also device from > > > > >> >> > /proc/self/ns/pid. > > > > >> >> > + * *size_of_pidns* must be the size of *pidns* > > > > >> >> > + * > > > > >> >> > + * This helper is used when pid filtering is > > > > >> >> > needed inside a > > > > >> >> > + * container as bpf_get_current_tgid() helper > > > > >> >> > returns always the > > > > >> >> > + * pid id as seen by the root namespace. > > > > >> >> > + * Return > > > > >> >> > + * 0 on success -EINVAL on error. > > > > >> >> > */ > > > > >> >> > #define __BPF_FUNC_MAPPER(FN) \ > > > > >> >> > FN(unspec), \ > > > > >> >> > @@ -2602,7 +2614,8 @@ union bpf_attr { > > > > >> >> > FN(skb_ecn_set_ce), \ > > > > >> >> > FN(get_listener_sock), \ > > > > >> >> > FN(skc_lookup_tcp), \ > > > > >> >> > - FN(tcp_check_syncookie), > > > > >> >> > + FN(tcp_check_syncookie), \ > > > > >> >> > + FN(get_current_pidns_info), > > > > >> >> > > > > > >> >> > /* integer value in 'imm' field of BPF_CALL instruction > > > > >> >> > selects which helper > > > > >> >> > * function eBPF program intends to call > > > > >> >> > @@ -3298,4 +3311,14 @@ struct bpf_line_info { > > > > >> >> > struct bpf_spin_lock { > > > > >> >> > __u32 val; > > > > >> >> > }; > > > > >> >> > + > > > > >> >> > +/* helper bpf_get_current_pidns_info will store the following > > > > >> >> > + * data, dev will contain major/minor from /proc/self/pid. > > > > >> >> > +*/ > > > > >> >> > +struct bpf_pidns_info { > > > > >> >> > + __u32 dev; > > > > >> >> > + __u32 nsid; > > > > >> >> > + __u32 tgid; > > > > >> >> > + __u32 pid; > > > > >> >> > +}; > > > > >> >> > #endif /* _UAPI__LINUX_BPF_H__ */ > > > > >> >> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > > > > >> >> > index ace8c22c8b0e..ecbdc72ba459 100644 > > > > >> >> > --- a/kernel/bpf/core.c > > > > >> >> > +++ b/kernel/bpf/core.c > > > > >> >> > @@ -2046,6 +2046,7 @@ const struct bpf_func_proto > > > > >> >> > bpf_get_current_uid_gid_proto __weak; > > > > >> >> > const struct bpf_func_proto bpf_get_current_comm_proto __weak; > > > > >> >> > const struct bpf_func_proto bpf_get_current_cgroup_id_proto > > > > >> >> > __weak; > > > > >> >> > const struct bpf_func_proto bpf_get_local_storage_proto __weak; > > > > >> >> > +const struct bpf_func_proto bpf_get_current_pidns_info __weak; > > > > >> >> > > > > > >> >> > const struct bpf_func_proto * __weak > > > > >> >> > bpf_get_trace_printk_proto(void) > > > > >> >> > { > > > > >> >> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > > > > >> >> > index a411fc17d265..2de82d14424d 100644 > > > > >> >> > --- a/kernel/bpf/helpers.c > > > > >> >> > +++ b/kernel/bpf/helpers.c > > > > >> >> > @@ -18,6 +18,11 @@ > > > > >> >> > #include <linux/sched.h> > > > > >> >> > #include <linux/uidgid.h> > > > > >> >> > #include <linux/filter.h> > > > > >> >> > +#include <linux/pid_namespace.h> > > > > >> >> > +#include <linux/major.h> > > > > >> >> > +#include <linux/stat.h> > > > > >> >> > +#include <linux/namei.h> > > > > >> >> > +#include <linux/version.h> > > > > >> >> > > > > > >> >> > /* If kernel subsystem is allowing eBPF programs to call this > > > > >> >> > function, > > > > >> >> > * inside its own verifier_ops->get_func_proto() callback it > > > > >> >> > should return > > > > >> >> > @@ -317,6 +322,66 @@ void copy_map_value_locked(struct bpf_map > > > > >> >> > *map, void *dst, void *src, > > > > >> >> > preempt_enable(); > > > > >> >> > } > > > > >> >> > > > > > >> >> > +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info > > > > >> >> > *, pidns_info, u32, > > > > >> >> > + size) > > > > >> >> > +{ > > > > >> >> > + const char *pidnspath = "/proc/self/ns/pid"; > > > > >> >> > + struct pid_namespace *pidns = NULL; > > > > >> >> > + DEFINE_SPINLOCK(bpf_spinlock); > > > > >> >> > + struct inode *inode; > > > > >> >> > + struct kstat ks; > > > > >> >> > + struct path kp; > > > > >> >> > + pid_t tgid = 0; > > > > >> >> > + pid_t pid = 0; > > > > >> >> > + int ret; > > > > >> >> > + > > > > >> >> > + if (unlikely(size != sizeof(struct bpf_pidns_info))) > > > > >> >> > + goto clear; > > > > >> >> > + > > > > >> >> > + pidns = task_active_pid_ns(current); > > > > >> >> > + > > > > >> >> > + if (unlikely(!pidns)) > > > > >> >> > + goto clear; > > > > >> >> > + > > > > >> >> > + pidns_info->nsid = pidns->ns.inum; > > > > >> >> > + pid = task_pid_nr_ns(current, pidns); > > > > >> >> > + > > > > >> >> > + if (unlikely(!pid)) > > > > >> >> > + goto clear; > > > > >> >> > + > > > > >> >> > + tgid = task_tgid_nr_ns(current, pidns); > > > > >> >> > + > > > > >> >> > + if (unlikely(!tgid)) > > > > >> >> > + goto clear; > > > > >> >> > + > > > > >> >> > + pidns_info->tgid = (u32) tgid; > > > > >> >> > + pidns_info->pid = (u32) pid; > > > > >> >> > + > > > > >> >> > + spin_lock(&bpf_spinlock); > > > > >> >> > + ret = kern_path(pidnspath, 0, &kp); > > > > >> >> > + if (ret) > > > > >> >> > + goto clear; > > > > >> >> > + inode = d_backing_inode(kp.dentry); > > > > >> >> > + pidns_info->dev = inode->i_sb->s_dev; > > > > >> >> > + spin_unlock(&bpf_spinlock); > > > > >> >> > + > > > > >> >> > + return 0; > > > > >> >> > + > > > > >> >> > + clear: > > > > >> >> > + if (pidns_info) > > > > >> >> > + memset((void *)pidns, 0, (size_t) size); > > > > >> >> > + > > > > >> >> > + return -EINVAL; > > > > >> >> > +} > > > > >> >> > + > > > > >> >> > +const struct bpf_func_proto bpf_get_current_pidns_info_proto = > > > > >> >> > { > > > > >> >> > + .func = bpf_get_current_pidns_info, > > > > >> >> > + .gpl_only = false, > > > > >> >> > + .ret_type = RET_INTEGER, > > > > >> >> > + .arg1_type = ARG_PTR_TO_UNINIT_MEM, > > > > >> >> > + .arg2_type = ARG_CONST_SIZE, > > > > >> >> > +}; > > > > >> >> > + > > > > >> >> > #ifdef CONFIG_CGROUPS > > > > >> >> > BPF_CALL_0(bpf_get_current_cgroup_id) > > > > >> >> > { > > > > >> >> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > > > >> >> > index d64c00afceb5..2ef0de78d4ec 100644 > > > > >> >> > --- a/kernel/trace/bpf_trace.c > > > > >> >> > +++ b/kernel/trace/bpf_trace.c > > > > >> >> > @@ -603,6 +603,8 @@ tracing_func_proto(enum bpf_func_id > > > > >> >> > func_id, const struct bpf_prog *prog) > > > > >> >> > case BPF_FUNC_get_current_cgroup_id: > > > > >> >> > return &bpf_get_current_cgroup_id_proto; > > > > >> >> > #endif > > > > >> >> > + case BPF_FUNC_get_current_pidns_info: > > > > >> >> > + return &bpf_get_current_pidns_info_proto; > > > > >> >> > default: > > > > >> >> > return NULL; > > > > >> >> > } > > > > >> >> > -- > > > > >> >> > 2.11.0 > > > > >> >> > > > > > > > > > > > -=-=-=-=-=-=-=-=-=-=-=- Links: You receive all messages sent to this group. View/Reply Online (#1750): https://lists.iovisor.org/g/iovisor-dev/message/1750 Mute This Topic: https://lists.iovisor.org/mt/31207586/21656 Group Owner: iovisor-dev+ow...@lists.iovisor.org Unsubscribe: https://lists.iovisor.org/g/iovisor-dev/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-