Re: [PATCH bpf-next v3 2/7] bpf: introduce bpf subcommand BPF_TASK_FD_QUERY

2018-05-23 Thread Yonghong Song



On 5/23/18 2:04 PM, Alexei Starovoitov wrote:

On Wed, May 23, 2018 at 10:13:22AM -0700, Martin KaFai Lau wrote:

+   __u32   prog_id;/* output: prod_id */
+   __u32   attach_info;/* output: BPF_ATTACH_* */
+   __u64   probe_offset;   /* output: probe_offset */
+   __u64   probe_addr; /* output: probe_addr */
+   } task_fd_query;
  } __attribute__((aligned(8)));
  
  /* The description below is an attempt at providing documentation to eBPF

@@ -2458,4 +2475,14 @@ struct bpf_fib_lookup {
__u8dmac[6]; /* ETH_ALEN */
  };
  
+/* used by  based query */

+enum {

Nit. Instead of a comment, is it better to give this
enum a descriptive name?


+   BPF_ATTACH_RAW_TRACEPOINT,  /* tp name */
+   BPF_ATTACH_TRACEPOINT,  /* tp name */
+   BPF_ATTACH_KPROBE,  /* (symbol + offset) or addr */
+   BPF_ATTACH_KRETPROBE,   /* (symbol + offset) or addr */
+   BPF_ATTACH_UPROBE,  /* filename + offset */
+   BPF_ATTACH_URETPROBE,   /* filename + offset */
+};


One more nit here.
Can we come up with better names for the above?
'attach' is a verb. I cannot help but read above as it's an action
for the kernel to attach to something and not the type of event
where the program was attached to.
Since we pass task+fd into that BPF_TASK_FD_QUERY command how
about returning BPF_FD_TYPE_KPROBE, BPF_FD_TYPE_TRACEPOINT, ... ?


Okay will use BPF_FD_TYPE_*... which is indeed better than
BPF_ATTACH_*.


Re: [PATCH bpf-next v3 2/7] bpf: introduce bpf subcommand BPF_TASK_FD_QUERY

2018-05-23 Thread Yonghong Song



On 5/23/18 10:13 AM, Martin KaFai Lau wrote:

On Tue, May 22, 2018 at 09:30:46AM -0700, Yonghong Song wrote:

Currently, suppose a userspace application has loaded a bpf program
and attached it to a tracepoint/kprobe/uprobe, and a bpf
introspection tool, e.g., bpftool, wants to show which bpf program
is attached to which tracepoint/kprobe/uprobe. Such attachment
information will be really useful to understand the overall bpf
deployment in the system.

There is a name field (16 bytes) for each program, which could
be used to encode the attachment point. There are some drawbacks
for this approaches. First, bpftool user (e.g., an admin) may not
really understand the association between the name and the
attachment point. Second, if one program is attached to multiple
places, encoding a proper name which can imply all these
attachments becomes difficult.

This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY.
Given a pid and fd, if the  is associated with a
tracepoint/kprobe/uprobe perf event, BPF_TASK_FD_QUERY will return
. prog_id
. tracepoint name, or
. k[ret]probe funcname + offset or kernel addr, or
. u[ret]probe filename + offset
to the userspace.
The user can use "bpftool prog" to find more information about
bpf program itself with prog_id.

LGTM, some comments inline.



Signed-off-by: Yonghong Song 
---
  include/linux/trace_events.h |  16 ++
  include/uapi/linux/bpf.h |  27 ++
  kernel/bpf/syscall.c | 124 +++
  kernel/trace/bpf_trace.c |  48 +
  kernel/trace/trace_kprobe.c  |  29 ++
  kernel/trace/trace_uprobe.c  |  22 
  6 files changed, 266 insertions(+)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bde3ef..eab806d 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, 
void __user *info);
  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog 
*prog);
  struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
+   u32 *attach_info, const char **buf,
+   u64 *probe_offset, u64 *probe_addr);

The first arg is 'const struct perf_event *event' while...


  #else
  static inline unsigned int trace_call_bpf(struct trace_event_call *call, void 
*ctx)
  {
@@ -504,6 +507,12 @@ static inline struct bpf_raw_event_map 
*bpf_find_raw_tracepoint(const char *name
  {
return NULL;
  }
+static inline int bpf_get_perf_event_info(const struct file *file, u32 
*prog_id,

this one has 'const struct file *file'?


Thanks for catching this. Will correct this in the next revision.




+ u32 *attach_info, const char **buf,
+ u64 *probe_offset, u64 *probe_addr)
+{
+   return -EOPNOTSUPP;
+}
  #endif
  
  enum {

@@ -560,10 +569,17 @@ extern void perf_trace_del(struct perf_event *event, int 
flags);
  #ifdef CONFIG_KPROBE_EVENTS
  extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe);
  extern void perf_kprobe_destroy(struct perf_event *event);
+extern int bpf_get_kprobe_info(const struct perf_event *event,
+  u32 *attach_info, const char **symbol,
+  u64 *probe_offset, u64 *probe_addr,
+  bool perf_type_tracepoint);
  #endif
  #ifdef CONFIG_UPROBE_EVENTS
  extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
  extern void perf_uprobe_destroy(struct perf_event *event);
+extern int bpf_get_uprobe_info(const struct perf_event *event,
+  u32 *attach_info, const char **filename,
+  u64 *probe_offset, bool perf_type_tracepoint);
  #endif
  extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
 char *filter_str);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97446bb..a602150 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
+   BPF_TASK_FD_QUERY,
  };
  
  enum bpf_map_type {

@@ -379,6 +380,22 @@ union bpf_attr {
__u32   btf_log_size;
__u32   btf_log_level;
};
+
+   struct {
+   int pid;/* input: pid */
+   int fd; /* input: fd */

Should fd and pid be always positive?
The current fd (like map_fd) in bpf_attr is using __u32.


Will change both pid and fd to __u32. In kernel fd is unsigned, but
pid_t is 

Re: [PATCH bpf-next v3 2/7] bpf: introduce bpf subcommand BPF_TASK_FD_QUERY

2018-05-23 Thread Alexei Starovoitov
On Wed, May 23, 2018 at 10:13:22AM -0700, Martin KaFai Lau wrote:
> > +   __u32   prog_id;/* output: prod_id */
> > +   __u32   attach_info;/* output: BPF_ATTACH_* */
> > +   __u64   probe_offset;   /* output: probe_offset */
> > +   __u64   probe_addr; /* output: probe_addr */
> > +   } task_fd_query;
> >  } __attribute__((aligned(8)));
> >  
> >  /* The description below is an attempt at providing documentation to eBPF
> > @@ -2458,4 +2475,14 @@ struct bpf_fib_lookup {
> > __u8dmac[6]; /* ETH_ALEN */
> >  };
> >  
> > +/* used by  based query */
> > +enum {
> Nit. Instead of a comment, is it better to give this
> enum a descriptive name?
> 
> > +   BPF_ATTACH_RAW_TRACEPOINT,  /* tp name */
> > +   BPF_ATTACH_TRACEPOINT,  /* tp name */
> > +   BPF_ATTACH_KPROBE,  /* (symbol + offset) or addr */
> > +   BPF_ATTACH_KRETPROBE,   /* (symbol + offset) or addr */
> > +   BPF_ATTACH_UPROBE,  /* filename + offset */
> > +   BPF_ATTACH_URETPROBE,   /* filename + offset */
> > +};

One more nit here.
Can we come up with better names for the above?
'attach' is a verb. I cannot help but read above as it's an action
for the kernel to attach to something and not the type of event
where the program was attached to.
Since we pass task+fd into that BPF_TASK_FD_QUERY command how
about returning BPF_FD_TYPE_KPROBE, BPF_FD_TYPE_TRACEPOINT, ... ?



Re: [PATCH bpf-next v3 2/7] bpf: introduce bpf subcommand BPF_TASK_FD_QUERY

2018-05-23 Thread Martin KaFai Lau
On Tue, May 22, 2018 at 09:30:46AM -0700, Yonghong Song wrote:
> Currently, suppose a userspace application has loaded a bpf program
> and attached it to a tracepoint/kprobe/uprobe, and a bpf
> introspection tool, e.g., bpftool, wants to show which bpf program
> is attached to which tracepoint/kprobe/uprobe. Such attachment
> information will be really useful to understand the overall bpf
> deployment in the system.
> 
> There is a name field (16 bytes) for each program, which could
> be used to encode the attachment point. There are some drawbacks
> for this approaches. First, bpftool user (e.g., an admin) may not
> really understand the association between the name and the
> attachment point. Second, if one program is attached to multiple
> places, encoding a proper name which can imply all these
> attachments becomes difficult.
> 
> This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY.
> Given a pid and fd, if the  is associated with a
> tracepoint/kprobe/uprobe perf event, BPF_TASK_FD_QUERY will return
>. prog_id
>. tracepoint name, or
>. k[ret]probe funcname + offset or kernel addr, or
>. u[ret]probe filename + offset
> to the userspace.
> The user can use "bpftool prog" to find more information about
> bpf program itself with prog_id.
LGTM, some comments inline.

> 
> Signed-off-by: Yonghong Song 
> ---
>  include/linux/trace_events.h |  16 ++
>  include/uapi/linux/bpf.h |  27 ++
>  kernel/bpf/syscall.c | 124 
> +++
>  kernel/trace/bpf_trace.c |  48 +
>  kernel/trace/trace_kprobe.c  |  29 ++
>  kernel/trace/trace_uprobe.c  |  22 
>  6 files changed, 266 insertions(+)
> 
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 2bde3ef..eab806d 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, 
> void __user *info);
>  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
>  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog 
> *prog);
>  struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
> +int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> + u32 *attach_info, const char **buf,
> + u64 *probe_offset, u64 *probe_addr);
The first arg is 'const struct perf_event *event' while...

>  #else
>  static inline unsigned int trace_call_bpf(struct trace_event_call *call, 
> void *ctx)
>  {
> @@ -504,6 +507,12 @@ static inline struct bpf_raw_event_map 
> *bpf_find_raw_tracepoint(const char *name
>  {
>   return NULL;
>  }
> +static inline int bpf_get_perf_event_info(const struct file *file, u32 
> *prog_id,
this one has 'const struct file *file'?

> +   u32 *attach_info, const char **buf,
> +   u64 *probe_offset, u64 *probe_addr)
> +{
> + return -EOPNOTSUPP;
> +}
>  #endif
>  
>  enum {
> @@ -560,10 +569,17 @@ extern void perf_trace_del(struct perf_event *event, 
> int flags);
>  #ifdef CONFIG_KPROBE_EVENTS
>  extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe);
>  extern void perf_kprobe_destroy(struct perf_event *event);
> +extern int bpf_get_kprobe_info(const struct perf_event *event,
> +u32 *attach_info, const char **symbol,
> +u64 *probe_offset, u64 *probe_addr,
> +bool perf_type_tracepoint);
>  #endif
>  #ifdef CONFIG_UPROBE_EVENTS
>  extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
>  extern void perf_uprobe_destroy(struct perf_event *event);
> +extern int bpf_get_uprobe_info(const struct perf_event *event,
> +u32 *attach_info, const char **filename,
> +u64 *probe_offset, bool perf_type_tracepoint);
>  #endif
>  extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id,
>char *filter_str);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 97446bb..a602150 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -97,6 +97,7 @@ enum bpf_cmd {
>   BPF_RAW_TRACEPOINT_OPEN,
>   BPF_BTF_LOAD,
>   BPF_BTF_GET_FD_BY_ID,
> + BPF_TASK_FD_QUERY,
>  };
>  
>  enum bpf_map_type {
> @@ -379,6 +380,22 @@ union bpf_attr {
>   __u32   btf_log_size;
>   __u32   btf_log_level;
>   };
> +
> + struct {
> + int pid;/* input: pid */
> + int fd; /* input: fd */
Should fd and pid be always positive?
The current fd (like map_fd) in bpf_attr is using __u32.

> + __u32   flags;  /* input: flags */
> +