Hi,

I'll add the device major/minor numbers into the pidns_info structure and
work on those 4 points.
Thanks for your guidance on this.

Bests

On Sat, Sep 30, 2017 at 9:55 PM, Y Song <ys114...@gmail.com> wrote:

> On Thu, Sep 28, 2017 at 2:02 PM, carlos antonio neira bustos
> <cneirabus...@gmail.com> wrote:
> > Hi All,
> >
> > I'm still working this issue https://github.com/iovisor/bcc/issues/1329.
> > I have added tests under samples/bpf, here is test calling this new
> helper.
> >
> > Inside the container
> >
> > ping-10619 [000] d.s1  5319.547909: 0x00000001: ns_id 4026532197 tgid
> 10619
> > pid 10619
> >
> > Outside the container
> >
> > ping-12174 [000] d.s1  5480.582818: 0x00000001: ns_id 4026531836 tgid
> 12174
> > pid 12174
> >
> >
> > Let me know if something needs to be changed.
> >
> > Thanks again for your help and comments.
> >
> > Here is the patch
> >
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index b69e7a5..34b608e 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -227,6 +227,12 @@ struct bpf_event_entry {
> >   struct rcu_head rcu;
> >  };
> >
> > +struct bpf_current_pidns_info {
> > +        u64 ns_id;
> > +        u32 tgid;
> > +        u32 pid;
> > +};
>
> During linux plumbers conference, I talked to
> Eric Biederman (kernel namespace maintainer).
> The below is an example output of "stat -L /proc/self/ns/pid":
>
> -bash-4.3$ stat -L /proc/self/ns/pid
>   File: '/proc/self/ns/pid'
>   Size: 0             Blocks: 0          IO Block: 4096   regular empty
> file
> Device: 3h/3d    Inode: 4026531836  Links: 1
> Access: (0444/-r--r--r--)  Uid: (    0/    root)   Gid: (    0/    root)
> Context: system_u:object_r:nsfs_t:s0
> Access: 2017-09-28 21:21:41.496571299 -0700
> Modify: 2017-09-28 21:21:41.496571299 -0700
> Change: 2017-09-28 21:21:41.496571299 -0700
>  Birth: -
> -bash-4.3$
>
> You will notice that there is a "Device" field with major and minor
> number. Currently, all namespace files will have the same device.
> However, in the future, it is possible (maybe under really rare
> cases) that different pid_ns files may belong to different device.
>
> So he suggests that we should put device major/minor numbers
> in the pidns_info structure as well. Could you help do that as well?
>
> > +
> >  u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
> >  u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> >
> > @@ -375,6 +381,8 @@ extern const struct bpf_func_proto
> > bpf_skb_vlan_push_proto;
> >  extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
> >  extern const struct bpf_func_proto bpf_get_stackid_proto;
> >
> > +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
> > +
> >  /* Shared helpers among cBPF and eBPF. */
> >  void bpf_user_rnd_init_once(void);
> >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index e99e3e6..c1b94fa 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -539,6 +539,15 @@ union bpf_attr {
> >   *     @mode: operation mode (enum bpf_adj_room_mode)
> >   *     @flags: reserved for future use
> >   *     Return: 0 on success or negative error code
> > + *
> > + * int bpf_get_current_pidns_info(void *buf, int size_of_buf)
> > + *     stores the following  namespace data into
> > + *     bpf_current_pins_info struct:
> > + *     namespace id
> > + *     tgid inside namespace
> > + *     pid  inside namespace
> > + *     Return: 0 on success or negative error
> > + *
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)  \
> >   FN(unspec),   \
> > @@ -591,7 +600,9 @@ union bpf_attr {
> >   FN(get_socket_uid),  \
> >   FN(set_hash),   \
> >   FN(setsockopt),   \
> > - FN(skb_adjust_room),
> > + FN(skb_adjust_room),            \
> > + FN(get_current_pidns_info),
> > +
> >
> >  /* integer value in 'imm' field of BPF_CALL instruction selects which
> > helper
> >   * function eBPF program intends to call
> > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> > index ad5f559..c81ffa0 100644
> > --- a/kernel/bpf/core.c
> > +++ b/kernel/bpf/core.c
> > @@ -1379,6 +1379,9 @@ const struct bpf_func_proto
> > bpf_get_current_pid_tgid_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> >  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
> >
> > +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
> > +
> > +
> >  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> >  {
> >   return NULL;
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index 3d24e23..682d623 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/sched.h>
> >  #include <linux/uidgid.h>
> >  #include <linux/filter.h>
> > +#include <linux/pid_namespace.h>
> >
> >  /* If kernel subsystem is allowing eBPF programs to call this function,
> >   * inside its own verifier_ops->get_func_proto() callback it should
> return
> > @@ -179,3 +180,45 @@ const struct bpf_func_proto
> bpf_get_current_comm_proto
> > = {
> >   .arg1_type = ARG_PTR_TO_UNINIT_MEM,
> >   .arg2_type = ARG_CONST_SIZE,
> >  };
> > +
> > +BPF_CALL_2(bpf_get_current_pidns_info, void *, buf, u32, size)
> > +{
> > +
> > +        if (!buf)
> > +                return -EINVAL;
> > +
> > +        struct task_struct *ts = current;
> > +        struct task_struct *ns_task = NULL;
> > +        const struct cred  *cred = NULL;
> > +        pid_t pid = 0;
> > +
> > +        if (unlikely(!ts))
> > +         return -EINVAL;
> > +
> > +        ((struct bpf_current_pidns_info*)buf)->ns_id =
> > +         ts->nsproxy->pid_ns_for_children->ns.inum;
> > +
> > +        pid = task_pid_nr_ns(ts, ts->nsproxy->pid_ns_for_children);
> > +
> > +        if (!pid)
> > +        return -EINVAL;
> > +
> > +        ns_task = find_task_by_pid_ns(pid,
> > ts->nsproxy->pid_ns_for_children);
> >
>
> I did not check the code. But pid name space could be deeply nested.
> Have you tested the case for more than two level nesting?
>
> > +        if (unlikely(!ns_task))
> > +                return -EINVAL;
> > +
> > +        ((struct bpf_current_pidns_info*)buf)->tgid = ns_task->tgid;
> > +        ((struct bpf_current_pidns_info*)buf)->pid = ns_task->pid;
> > +
> > +        return 0;
> > +}
> > +
> > +
> > +const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
> > +        .func           = bpf_get_current_pidns_info,
> > +        .gpl_only       = false,
> > +        .ret_type       = RET_INTEGER,
> > +        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
> > +        .arg2_type      = ARG_CONST_SIZE,
> > +};
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index dc498b6..06297e7 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -487,6 +487,8 @@ static const struct bpf_func_proto
> > *tracing_func_proto(enum bpf_func_id func_id)
> >    return &bpf_get_prandom_u32_proto;
> >   case BPF_FUNC_probe_read_str:
> >    return &bpf_probe_read_str_proto;
> > + case BPF_FUNC_get_current_pidns_info:
> > +  return &bpf_get_current_pidns_info_proto;
> >   default:
> >    return NULL;
> >   }
> > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> > index 87246be..0af924f 100644
> > --- a/samples/bpf/Makefile
> > +++ b/samples/bpf/Makefile
> > @@ -37,6 +37,7 @@ hostprogs-y += xdp_tx_iptunnel
> >  hostprogs-y += test_map_in_map
> >  hostprogs-y += per_socket_stats_example
> >  hostprogs-y += load_sock_ops
> > +hostprogs-y += trace_ns_info
> >
> >  # Libbpf dependencies
> >  LIBBPF := ../../tools/lib/bpf/bpf.o
> > @@ -78,7 +79,7 @@ lwt_len_hist-objs := bpf_load.o $(LIBBPF)
> > lwt_len_hist_user.o
> >  xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o
> >  test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
> >  per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
> > -
> > +trace_ns_info-objs := bpf_load.o $(LIBBPF) trace_ns_info_user.o
> >  # Tell kbuild to always build the programs
> >  always := $(hostprogs-y)
> >  always += sockex1_kern.o
> > @@ -119,6 +120,7 @@ always += tcp_bufs_kern.o
> >  always += tcp_cong_kern.o
> >  always += tcp_iw_kern.o
> >  always += tcp_clamp_kern.o
> > +always += trace_ns_info_user_kern.o
> >
> >  HOSTCFLAGS += -I$(objtree)/usr/include
> >  HOSTCFLAGS += -I$(srctree)/tools/lib/
> > @@ -155,6 +157,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf
> >  HOSTLOADLIBES_lwt_len_hist += -l elf
> >  HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
> >  HOSTLOADLIBES_test_map_in_map += -lelf
> > +HOSTLOADLIBES_trace_ns_info += -lelf
> >
> >  # Allows pointing LLC/CLANG to a LLVM backend with bpf support,
> redefine on
> > cmdline:
> >  #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc
> > CLANG=~/git/llvm/build/bin/clang
> > diff --git a/samples/bpf/trace_ns_info_user.c
> > b/samples/bpf/trace_ns_info_user.c
> > new file mode 100644
> > index 0000000..0dce083
> > --- /dev/null
> > +++ b/samples/bpf/trace_ns_info_user.c
> > @@ -0,0 +1,32 @@
> > +/* Copyright (c) 2017 cneirabus...@gmail.com
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of version 2 of the GNU General Public
> > + * License as published by the Free Software Foundation.
> > + */
> > +
> > +#include <stdio.h>
> > +#include <linux/bpf.h>
> > +#include <unistd.h>
> > +#include "libbpf.h"
> > +#include "bpf_load.h"
> > +
> > +int main(int ac, char **argv)
> > +{
> > + FILE *f;
> > + char filename[256];
> > +
> > + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
> > +        printf("loading %s\n",filename);
> > +
> > +
> > + if (load_bpf_file(filename)) {
> > +  printf("%s", bpf_log_buf);
> > +  return 1;
> > + }
> > +
> > + f = popen("taskset 1 ping  localhost", "r");
> > + (void) f;
> > + read_trace_pipe();
>
> The test does not really test return values.
> The following test is probably better:
> bpf prog:
>     get pidns_info and put it into the map.
>
> user space app:
>    1. get device, nsid, pid from getpid(), "stat -L /proc/<pid>/ns/pid".
>        I think you could use "stat" syscall to do this as well.
>    2. attach bpf prog to some kernel operation, for example,
>        an tracepoint or kernel function.
>    3. add some operation to trigger the tracepoint or kernel funciton
>       (there are some examples in kernel:samples/bpf you can look.)
>    4. get the pidns_info from the bpf map and compare
>        the result is the same.
>
> > + return 0;
> > +}
> > diff --git a/samples/bpf/trace_ns_info_user_kern.c
> > b/samples/bpf/trace_ns_info_user_kern.c
> > new file mode 100644
> > index 0000000..f8ed052
> > --- /dev/null
> > +++ b/samples/bpf/trace_ns_info_user_kern.c
> > @@ -0,0 +1,40 @@
> > +/* Copyright (c) 2017 cneirabus...@gmail.com
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of version 2 of the GNU General Public
> > + * License as published by the Free Software Foundation.
> > + */
> > +#include <linux/skbuff.h>
> > +#include <linux/netdevice.h>
> > +#include <linux/version.h>
> > +#include <uapi/linux/bpf.h>
> > +#include "bpf_helpers.h"
> > +
> > +struct pidns_info {
> > +        u64 ns_id;
> > +        u32 ns_tgid;
> > +        u32 ns_pid;
> > +};
> > +
> > +/* kprobe is NOT a stable ABI
> > + * kernel functions can be removed, renamed or completely change
> semantics.
> > + * Number of arguments and their positions can change, etc.
> > + * In such case this bpf+kprobe example will no longer be meaningful
> > + */
> > +SEC("kprobe/__netif_receive_skb_core")
> > +int bpf_prog1(struct pt_regs *ctx)
> > +{
> > +        struct pidns_info nsinfo;
> > +        int ok=0;
> > +        char fmt[] = "ns_id %lld tgid %d pid %d\n";
> > + ok = bpf_get_current_pidns_info(&nsinfo,sizeof (nsinfo));
> > +
> > +       if(!ok)
> > +              bpf_trace_printk(fmt, sizeof(fmt), nsinfo.ns_id,
> > +                                 nsinfo.ns_tgid, nsinfo.ns_pid);
> > +
> > + return 0;
> > +}
> > +
> > +char _license[] SEC("license") = "GPL";
> > +u32 _version SEC("version") = LINUX_VERSION_CODE;
> > diff --git a/tools/testing/selftests/bpf/bpf_helpers.h
> > b/tools/testing/selftests/bpf/bpf_helpers.h
> > index d50ac34..202102d 100644
> > --- a/tools/testing/selftests/bpf/bpf_helpers.h
> > +++ b/tools/testing/selftests/bpf/bpf_helpers.h
> > @@ -64,6 +64,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int
> > optname, void *optval,
> >          int optlen) =
> >   (void *) BPF_FUNC_setsockopt;
> >
> > +static int (*bpf_get_current_pidns_info)(void *buf, int buf_size) =
> > + (void *) BPF_FUNC_get_current_pidns_info;
> >  /* llvm builtin functions that eBPF C program may use to
> >   * emit BPF_LD_ABS and BPF_LD_IND instructions
> >   */
>
_______________________________________________
iovisor-dev mailing list
iovisor-dev@lists.iovisor.org
https://lists.iovisor.org/mailman/listinfo/iovisor-dev

Reply via email to