Hi, I'll add the device major/minor numbers into the pidns_info structure and work on those 4 points. Thanks for your guidance on this.
Bests On Sat, Sep 30, 2017 at 9:55 PM, Y Song <ys114...@gmail.com> wrote: > On Thu, Sep 28, 2017 at 2:02 PM, carlos antonio neira bustos > <cneirabus...@gmail.com> wrote: > > Hi All, > > > > I'm still working this issue https://github.com/iovisor/bcc/issues/1329. > > I have added tests under samples/bpf, here is test calling this new > helper. > > > > Inside the container > > > > ping-10619 [000] d.s1 5319.547909: 0x00000001: ns_id 4026532197 tgid > 10619 > > pid 10619 > > > > Outside the container > > > > ping-12174 [000] d.s1 5480.582818: 0x00000001: ns_id 4026531836 tgid > 12174 > > pid 12174 > > > > > > Let me know if something needs to be changed. > > > > Thanks again for your help and comments. > > > > Here is the patch > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index b69e7a5..34b608e 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -227,6 +227,12 @@ struct bpf_event_entry { > > struct rcu_head rcu; > > }; > > > > +struct bpf_current_pidns_info { > > + u64 ns_id; > > + u32 tgid; > > + u32 pid; > > +}; > > During linux plumbers conference, I talked to > Eric Biederman (kernel namespace maintainer). > The below is an example output of "stat -L /proc/self/ns/pid": > > -bash-4.3$ stat -L /proc/self/ns/pid > File: '/proc/self/ns/pid' > Size: 0 Blocks: 0 IO Block: 4096 regular empty > file > Device: 3h/3d Inode: 4026531836 Links: 1 > Access: (0444/-r--r--r--) Uid: ( 0/ root) Gid: ( 0/ root) > Context: system_u:object_r:nsfs_t:s0 > Access: 2017-09-28 21:21:41.496571299 -0700 > Modify: 2017-09-28 21:21:41.496571299 -0700 > Change: 2017-09-28 21:21:41.496571299 -0700 > Birth: - > -bash-4.3$ > > You will notice that there is a "Device" field with major and minor > number. Currently, all namespace files will have the same device. > However, in the future, it is possible (maybe under really rare > cases) that different pid_ns files may belong to different device. > > So he suggests that we should put device major/minor numbers > in the pidns_info structure as well. Could you help do that as well? > > > + > > u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); > > u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); > > > > @@ -375,6 +381,8 @@ extern const struct bpf_func_proto > > bpf_skb_vlan_push_proto; > > extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; > > extern const struct bpf_func_proto bpf_get_stackid_proto; > > > > +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto; > > + > > /* Shared helpers among cBPF and eBPF. */ > > void bpf_user_rnd_init_once(void); > > u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index e99e3e6..c1b94fa 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -539,6 +539,15 @@ union bpf_attr { > > * @mode: operation mode (enum bpf_adj_room_mode) > > * @flags: reserved for future use > > * Return: 0 on success or negative error code > > + * > > + * int bpf_get_current_pidns_info(void *buf, int size_of_buf) > > + * stores the following namespace data into > > + * bpf_current_pins_info struct: > > + * namespace id > > + * tgid inside namespace > > + * pid inside namespace > > + * Return: 0 on success or negative error > > + * > > */ > > #define __BPF_FUNC_MAPPER(FN) \ > > FN(unspec), \ > > @@ -591,7 +600,9 @@ union bpf_attr { > > FN(get_socket_uid), \ > > FN(set_hash), \ > > FN(setsockopt), \ > > - FN(skb_adjust_room), > > + FN(skb_adjust_room), \ > > + FN(get_current_pidns_info), > > + > > > > /* integer value in 'imm' field of BPF_CALL instruction selects which > > helper > > * function eBPF program intends to call > > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > > index ad5f559..c81ffa0 100644 > > --- a/kernel/bpf/core.c > > +++ b/kernel/bpf/core.c > > @@ -1379,6 +1379,9 @@ const struct bpf_func_proto > > bpf_get_current_pid_tgid_proto __weak; > > const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; > > const struct bpf_func_proto bpf_get_current_comm_proto __weak; > > > > +const struct bpf_func_proto bpf_get_current_pidns_info __weak; > > + > > + > > const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) > > { > > return NULL; > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > > index 3d24e23..682d623 100644 > > --- a/kernel/bpf/helpers.c > > +++ b/kernel/bpf/helpers.c > > @@ -18,6 +18,7 @@ > > #include <linux/sched.h> > > #include <linux/uidgid.h> > > #include <linux/filter.h> > > +#include <linux/pid_namespace.h> > > > > /* If kernel subsystem is allowing eBPF programs to call this function, > > * inside its own verifier_ops->get_func_proto() callback it should > return > > @@ -179,3 +180,45 @@ const struct bpf_func_proto > bpf_get_current_comm_proto > > = { > > .arg1_type = ARG_PTR_TO_UNINIT_MEM, > > .arg2_type = ARG_CONST_SIZE, > > }; > > + > > +BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size) > > +{ > > + > > + if (!buf) > > + return -EINVAL; > > + > > + struct task_struct *ts = current; > > + struct task_struct *ns_task = NULL; > > + const struct cred *cred = NULL; > > + pid_t pid = 0; > > + > > + if (unlikely(!ts)) > > + return -EINVAL; > > + > > + ((struct bpf_current_pidns_info*)buf)->ns_id = > > + ts->nsproxy->pid_ns_for_children->ns.inum; > > + > > + pid = task_pid_nr_ns(ts, ts->nsproxy->pid_ns_for_children); > > + > > + if (!pid) > > + return -EINVAL; > > + > > + ns_task = find_task_by_pid_ns(pid, > > ts->nsproxy->pid_ns_for_children); > > > > I did not check the code. But pid name space could be deeply nested. > Have you tested the case for more than two level nesting? > > > + if (unlikely(!ns_task)) > > + return -EINVAL; > > + > > + ((struct bpf_current_pidns_info*)buf)->tgid = ns_task->tgid; > > + ((struct bpf_current_pidns_info*)buf)->pid = ns_task->pid; > > + > > + return 0; > > +} > > + > > + > > +const struct bpf_func_proto bpf_get_current_pidns_info_proto = { > > + .func = bpf_get_current_pidns_info, > > + .gpl_only = false, > > + .ret_type = RET_INTEGER, > > + .arg1_type = ARG_PTR_TO_UNINIT_MEM, > > + .arg2_type = ARG_CONST_SIZE, > > +}; > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index dc498b6..06297e7 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -487,6 +487,8 @@ static const struct bpf_func_proto > > *tracing_func_proto(enum bpf_func_id func_id) > > return &bpf_get_prandom_u32_proto; > > case BPF_FUNC_probe_read_str: > > return &bpf_probe_read_str_proto; > > + case BPF_FUNC_get_current_pidns_info: > > + return &bpf_get_current_pidns_info_proto; > > default: > > return NULL; > > } > > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile > > index 87246be..0af924f 100644 > > --- a/samples/bpf/Makefile > > +++ b/samples/bpf/Makefile > > @@ -37,6 +37,7 @@ hostprogs-y += xdp_tx_iptunnel > > hostprogs-y += test_map_in_map > > hostprogs-y += per_socket_stats_example > > hostprogs-y += load_sock_ops > > +hostprogs-y += trace_ns_info > > > > # Libbpf dependencies > > LIBBPF := ../../tools/lib/bpf/bpf.o > > @@ -78,7 +79,7 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF) > > lwt_len_hist_user.o > > xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o > > test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o > > per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o > > - > > +trace_ns_info-objs := bpf_load.o $(LIBBPF) trace_ns_info_user.o > > # Tell kbuild to always build the programs > > always := $(hostprogs-y) > > always += sockex1_kern.o > > @@ -119,6 +120,7 @@ always += tcp_bufs_kern.o > > always += tcp_cong_kern.o > > always += tcp_iw_kern.o > > always += tcp_clamp_kern.o > > +always += trace_ns_info_user_kern.o > > > > HOSTCFLAGS += -I$(objtree)/usr/include > > HOSTCFLAGS += -I$(srctree)/tools/lib/ > > @@ -155,6 +157,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf > > HOSTLOADLIBES_lwt_len_hist += -l elf > > HOSTLOADLIBES_xdp_tx_iptunnel += -lelf > > HOSTLOADLIBES_test_map_in_map += -lelf > > +HOSTLOADLIBES_trace_ns_info += -lelf > > > > # Allows pointing LLC/CLANG to a LLVM backend with bpf support, > redefine on > > cmdline: > > # make samples/bpf/ LLC=~/git/llvm/build/bin/llc > > CLANG=~/git/llvm/build/bin/clang > > diff --git a/samples/bpf/trace_ns_info_user.c > > b/samples/bpf/trace_ns_info_user.c > > new file mode 100644 > > index 0000000..0dce083 > > --- /dev/null > > +++ b/samples/bpf/trace_ns_info_user.c > > @@ -0,0 +1,32 @@ > > +/* Copyright (c) 2017 cneirabus...@gmail.com > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of version 2 of the GNU General Public > > + * License as published by the Free Software Foundation. > > + */ > > + > > +#include <stdio.h> > > +#include <linux/bpf.h> > > +#include <unistd.h> > > +#include "libbpf.h" > > +#include "bpf_load.h" > > + > > +int main(int ac, char **argv) > > +{ > > + FILE *f; > > + char filename[256]; > > + > > + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]); > > + printf("loading %s\n",filename); > > + > > + > > + if (load_bpf_file(filename)) { > > + printf("%s", bpf_log_buf); > > + return 1; > > + } > > + > > + f = popen("taskset 1 ping localhost", "r"); > > + (void) f; > > + read_trace_pipe(); > > The test does not really test return values. > The following test is probably better: > bpf prog: > get pidns_info and put it into the map. > > user space app: > 1. get device, nsid, pid from getpid(), "stat -L /proc/<pid>/ns/pid". > I think you could use "stat" syscall to do this as well. > 2. attach bpf prog to some kernel operation, for example, > an tracepoint or kernel function. > 3. add some operation to trigger the tracepoint or kernel funciton > (there are some examples in kernel:samples/bpf you can look.) > 4. get the pidns_info from the bpf map and compare > the result is the same. > > > + return 0; > > +} > > diff --git a/samples/bpf/trace_ns_info_user_kern.c > > b/samples/bpf/trace_ns_info_user_kern.c > > new file mode 100644 > > index 0000000..f8ed052 > > --- /dev/null > > +++ b/samples/bpf/trace_ns_info_user_kern.c > > @@ -0,0 +1,40 @@ > > +/* Copyright (c) 2017 cneirabus...@gmail.com > > + * > > + * This program is free software; you can redistribute it and/or > > + * modify it under the terms of version 2 of the GNU General Public > > + * License as published by the Free Software Foundation. > > + */ > > +#include <linux/skbuff.h> > > +#include <linux/netdevice.h> > > +#include <linux/version.h> > > +#include <uapi/linux/bpf.h> > > +#include "bpf_helpers.h" > > + > > +struct pidns_info { > > + u64 ns_id; > > + u32 ns_tgid; > > + u32 ns_pid; > > +}; > > + > > +/* kprobe is NOT a stable ABI > > + * kernel functions can be removed, renamed or completely change > semantics. > > + * Number of arguments and their positions can change, etc. > > + * In such case this bpf+kprobe example will no longer be meaningful > > + */ > > +SEC("kprobe/__netif_receive_skb_core") > > +int bpf_prog1(struct pt_regs *ctx) > > +{ > > + struct pidns_info nsinfo; > > + int ok=0; > > + char fmt[] = "ns_id %lld tgid %d pid %d\n"; > > + ok = bpf_get_current_pidns_info(&nsinfo,sizeof (nsinfo)); > > + > > + if(!ok) > > + bpf_trace_printk(fmt, sizeof(fmt), nsinfo.ns_id, > > + nsinfo.ns_tgid, nsinfo.ns_pid); > > + > > + return 0; > > +} > > + > > +char _license[] SEC("license") = "GPL"; > > +u32 _version SEC("version") = LINUX_VERSION_CODE; > > diff --git a/tools/testing/selftests/bpf/bpf_helpers.h > > b/tools/testing/selftests/bpf/bpf_helpers.h > > index d50ac34..202102d 100644 > > --- a/tools/testing/selftests/bpf/bpf_helpers.h > > +++ b/tools/testing/selftests/bpf/bpf_helpers.h > > @@ -64,6 +64,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int > > optname, void *optval, > > int optlen) = > > (void *) BPF_FUNC_setsockopt; > > > > +static int (*bpf_get_current_pidns_info)(void *buf, int buf_size) = > > + (void *) BPF_FUNC_get_current_pidns_info; > > /* llvm builtin functions that eBPF C program may use to > > * emit BPF_LD_ABS and BPF_LD_IND instructions > > */ >
_______________________________________________ iovisor-dev mailing list iovisor-dev@lists.iovisor.org https://lists.iovisor.org/mailman/listinfo/iovisor-dev