[PATCH 4/6] seccomp: add a way to access filters via bpf fds

2015-09-04 Thread Tycho Andersen
This patch adds a way for a process that is "real root" to access the
seccomp filters of another process. The process first does a
PTRACE_SECCOMP_GET_FILTER_FD to get an fd with that process' seccomp filter
attached, and then iterates on this with PTRACE_SECCOMP_NEXT_FILTER using
bpf(BPF_PROG_DUMP) to dump the actual program at each step.

Signed-off-by: Tycho Andersen 
CC: Kees Cook 
CC: Will Drewry 
CC: Oleg Nesterov 
CC: Andy Lutomirski 
CC: Pavel Emelyanov 
CC: Serge E. Hallyn 
CC: Alexei Starovoitov 
CC: Daniel Borkmann 
---
 include/linux/bpf.h | 12 ++
 include/linux/seccomp.h | 14 +++
 include/uapi/linux/ptrace.h |  3 +++
 kernel/bpf/syscall.c| 26 -
 kernel/ptrace.c |  7 ++
 kernel/seccomp.c| 57 +
 6 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4383476..30682dc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -157,6 +157,8 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+int bpf_prog_set(u32 ufd, struct bpf_prog *new);
+int bpf_new_fd(struct bpf_prog *prog, int flags);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
@@ -175,6 +177,16 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline int bpf_prog_set(u32 ufd, struct bpf_prog *new)
+{
+   return -EINVAL;
+}
+
+static inline int bpf_new_fd(struct bpf_prog *prog, int flags)
+{
+   return -EINVAL;
+}
+
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index f426503..d1a86ed 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -95,4 +95,18 @@ static inline void get_seccomp_filter(struct task_struct 
*tsk)
return;
 }
 #endif /* CONFIG_SECCOMP_FILTER */
+
+#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
+extern long seccomp_get_filter_fd(struct task_struct *child);
+extern long seccomp_next_filter(struct task_struct *child, u32 fd);
+#else
+static inline long seccomp_get_filter_fd(struct task_struct *child)
+{
+   return -EINVAL;
+}
+static inline long seccomp_next_filter(struct task_struct *child, u32 fd)
+{
+   return -EINVAL;
+}
+#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
 #endif /* _LINUX_SECCOMP_H */
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index a7a6979..dfd7d2e 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -23,6 +23,9 @@
 
 #define PTRACE_SYSCALL   24
 
+#define PTRACE_SECCOMP_GET_FILTER_FD   40
+#define PTRACE_SECCOMP_NEXT_FILTER 41
+
 /* 0x4200-0x4300 are reserved for architecture-independent additions.  */
 #define PTRACE_SETOPTIONS  0x4200
 #define PTRACE_GETEVENTMSG 0x4201
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ee580d0..58e7421 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -506,6 +506,30 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_get);
 
+int bpf_prog_set(u32 ufd, struct bpf_prog *new)
+{
+   struct fd f;
+   struct bpf_prog *prog;
+
+   f = fdget(ufd);
+
+   prog = get_prog(f);
+   if (!IS_ERR(prog) && prog)
+   bpf_prog_put(prog);
+
+   atomic_inc(>aux->refcnt);
+   f.file->private_data = new;
+   fdput(f);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_set);
+
+int bpf_new_fd(struct bpf_prog *prog, int flags)
+{
+   return anon_inode_getfd("bpf-prog", _prog_fops, prog, flags);
+}
+EXPORT_SYMBOL_GPL(bpf_new_fd);
+
 /* last field in 'union bpf_attr' used by this command */
 #defineBPF_PROG_LOAD_LAST_FIELD kern_version
 
@@ -572,7 +596,7 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_used_maps;
 
-   err = anon_inode_getfd("bpf-prog", _prog_fops, prog, O_RDWR | 
O_CLOEXEC);
+   err = bpf_new_fd(prog, O_RDWR | O_CLOEXEC);
if (err < 0)
/* failed to allocate fd */
goto free_used_maps;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 787320d..4e4b534 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -1016,6 +1016,13 @@ int ptrace_request(struct task_struct *child, long 
request,
break;
}
 #endif
+
+   case PTRACE_SECCOMP_GET_FILTER_FD:
+   return seccomp_get_filter_fd(child);
+
+   case PTRACE_SECCOMP_NEXT_FILTER:
+   return seccomp_next_filter(child, data);
+
default:
break;
}

Re: [PATCH 4/6] seccomp: add a way to access filters via bpf fds

2015-09-04 Thread Kees Cook
On Fri, Sep 4, 2015 at 9:04 AM, Tycho Andersen
 wrote:
> This patch adds a way for a process that is "real root" to access the
> seccomp filters of another process. The process first does a
> PTRACE_SECCOMP_GET_FILTER_FD to get an fd with that process' seccomp filter
> attached, and then iterates on this with PTRACE_SECCOMP_NEXT_FILTER using
> bpf(BPF_PROG_DUMP) to dump the actual program at each step.

Why is this a new ptrace interface instead of a new seccomp interface?
I would expect this to only be valid for "current", otherwise we could
run into races as the ptracee adds filters. i.e. it is not safe to
examine seccomp filters from tasks other than current.

-Kees

>
> Signed-off-by: Tycho Andersen 
> CC: Kees Cook 
> CC: Will Drewry 
> CC: Oleg Nesterov 
> CC: Andy Lutomirski 
> CC: Pavel Emelyanov 
> CC: Serge E. Hallyn 
> CC: Alexei Starovoitov 
> CC: Daniel Borkmann 
> ---
>  include/linux/bpf.h | 12 ++
>  include/linux/seccomp.h | 14 +++
>  include/uapi/linux/ptrace.h |  3 +++
>  kernel/bpf/syscall.c| 26 -
>  kernel/ptrace.c |  7 ++
>  kernel/seccomp.c| 57 
> +
>  6 files changed, 118 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4383476..30682dc 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -157,6 +157,8 @@ void bpf_register_prog_type(struct bpf_prog_type_list 
> *tl);
>  void bpf_register_map_type(struct bpf_map_type_list *tl);
>
>  struct bpf_prog *bpf_prog_get(u32 ufd);
> +int bpf_prog_set(u32 ufd, struct bpf_prog *new);
> +int bpf_new_fd(struct bpf_prog *prog, int flags);
>  void bpf_prog_put(struct bpf_prog *prog);
>  void bpf_prog_put_rcu(struct bpf_prog *prog);
>
> @@ -175,6 +177,16 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
> return ERR_PTR(-EOPNOTSUPP);
>  }
>
> +static inline int bpf_prog_set(u32 ufd, struct bpf_prog *new)
> +{
> +   return -EINVAL;
> +}
> +
> +static inline int bpf_new_fd(struct bpf_prog *prog, int flags)
> +{
> +   return -EINVAL;
> +}
> +
>  static inline void bpf_prog_put(struct bpf_prog *prog)
>  {
>  }
> diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
> index f426503..d1a86ed 100644
> --- a/include/linux/seccomp.h
> +++ b/include/linux/seccomp.h
> @@ -95,4 +95,18 @@ static inline void get_seccomp_filter(struct task_struct 
> *tsk)
> return;
>  }
>  #endif /* CONFIG_SECCOMP_FILTER */
> +
> +#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
> +extern long seccomp_get_filter_fd(struct task_struct *child);
> +extern long seccomp_next_filter(struct task_struct *child, u32 fd);
> +#else
> +static inline long seccomp_get_filter_fd(struct task_struct *child)
> +{
> +   return -EINVAL;
> +}
> +static inline long seccomp_next_filter(struct task_struct *child, u32 fd)
> +{
> +   return -EINVAL;
> +}
> +#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
>  #endif /* _LINUX_SECCOMP_H */
> diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
> index a7a6979..dfd7d2e 100644
> --- a/include/uapi/linux/ptrace.h
> +++ b/include/uapi/linux/ptrace.h
> @@ -23,6 +23,9 @@
>
>  #define PTRACE_SYSCALL   24
>
> +#define PTRACE_SECCOMP_GET_FILTER_FD   40
> +#define PTRACE_SECCOMP_NEXT_FILTER 41
> +
>  /* 0x4200-0x4300 are reserved for architecture-independent additions.  */
>  #define PTRACE_SETOPTIONS  0x4200
>  #define PTRACE_GETEVENTMSG 0x4201
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index ee580d0..58e7421 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -506,6 +506,30 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
>  }
>  EXPORT_SYMBOL_GPL(bpf_prog_get);
>
> +int bpf_prog_set(u32 ufd, struct bpf_prog *new)
> +{
> +   struct fd f;
> +   struct bpf_prog *prog;
> +
> +   f = fdget(ufd);
> +
> +   prog = get_prog(f);
> +   if (!IS_ERR(prog) && prog)
> +   bpf_prog_put(prog);
> +
> +   atomic_inc(>aux->refcnt);
> +   f.file->private_data = new;
> +   fdput(f);
> +   return 0;
> +}
> +EXPORT_SYMBOL_GPL(bpf_prog_set);
> +
> +int bpf_new_fd(struct bpf_prog *prog, int flags)
> +{
> +   return anon_inode_getfd("bpf-prog", _prog_fops, prog, flags);
> +}
> +EXPORT_SYMBOL_GPL(bpf_new_fd);
> +
>  /* last field in 'union bpf_attr' used by this command */
>  #defineBPF_PROG_LOAD_LAST_FIELD kern_version
>
> @@ -572,7 +596,7 @@ static int bpf_prog_load(union bpf_attr *attr)
> if (err < 0)
> goto free_used_maps;
>
> -   err = anon_inode_getfd("bpf-prog", _prog_fops, prog, O_RDWR | 
> O_CLOEXEC);
> +   err = bpf_new_fd(prog, 

Re: [PATCH 4/6] seccomp: add a way to access filters via bpf fds

2015-09-04 Thread Tycho Andersen
On Fri, Sep 04, 2015 at 01:29:49PM -0700, Alexei Starovoitov wrote:
> On Fri, Sep 04, 2015 at 01:26:42PM -0700, Kees Cook wrote:
> > On Fri, Sep 4, 2015 at 9:04 AM, Tycho Andersen
> >  wrote:
> > > This patch adds a way for a process that is "real root" to access the
> > > seccomp filters of another process. The process first does a
> > > PTRACE_SECCOMP_GET_FILTER_FD to get an fd with that process' seccomp 
> > > filter
> > > attached, and then iterates on this with PTRACE_SECCOMP_NEXT_FILTER using
> > > bpf(BPF_PROG_DUMP) to dump the actual program at each step.
> > 
> > Why is this a new ptrace interface instead of a new seccomp interface?
> > I would expect this to only be valid for "current", otherwise we could
> > run into races as the ptracee adds filters.

The task has to be in the stopped state in order for the filters to be
inspected, so I think this isn't a problem.

> > i.e. it is not safe to
> > examine seccomp filters from tasks other than current.
> 
> same question.
> I thought we discussed to add a command to seccomp() syscall for that?

We did initially, although at the end Andy posted about not allowing
tasks to see some filters that had been installed:

On Mon, 10 Aug 2015 14:36:09 -0700 Andy Lutomirski
 wrote:
> On Mon, Aug 10, 2015 at 2:31 PM, Tycho Andersen
>  wrote:
> > On Mon, Aug 10, 2015 at 02:18:29PM -0700, Kees Cook wrote:
> >> On Sun, Aug 9, 2015 at 7:20 PM, Andy Lutomirski
> >>  wrote:
> >> > On Fri, Aug 7, 2015 at 10:59 AM, Tycho Andersen
> >> >  wrote:
> >> >> On Fri, Aug 07, 2015 at 10:36:02AM -0700, Andy Lutomirski wrote:
> >> >>> On Fri, Aug 7, 2015 at 8:45 AM, Tycho Andersen
> >> >>>  wrote:
> >> >>> >
> >> >>> > fd = seccomp(SECCOMP_GET_FILTER_FD);
> >> >>>
> >> >>>
> >> >
> >> > Let me propose something crazy.  First, some scenarios:
> >> >
> >> > Scenario A: A program runs in a sandbox.  It tries to interrogate
> >> > its
> >> > seccomp filter.  I think it would be fine, and maybe even
> >> > beneficial,
> >> > if this didn't work (or pretended there was no filter).
> >> >
> >> > Scenario B: A shell runs in a sandbox.  The user fires up some
> >> > program
> >> > in that shell and then, *still in that shell*, checkpoints it
> >> > with
> >> > CRIU.  I think CRIU shouldn't see the seccomp filter.
> >> >
> >> > Scenario C: A program runs in a sandbox.  CRIU, *in a different
> >> > sandbox*, tries to checkpoint that program.  IMO this is doomed
> >> > to
> >> > eventual failure no matter what the kernel does: restoring the
> >> > program
> >> > isn't going to work as expected.
> >> >
> >> > Scenario D: A shell runs in a sandbox.  Someone fires up a
> >> > seccomp-using program (with in inner filter) under that shell and
> >> > then
> >> > separately checkpoints it from inside the shell.  I think CRIU
> >> > should
> >> > see the inner filter but not the outer filter.
> >> >
> >> > So here's my crazy proposal: If process A tries to read process
> >> > B's
> >> > seccomp state, the kernel could check whether B's seccomp state
> >> > is a
> >> > descendent of A's.  If so, then the attempt to read B's seccomp
> >> > state
> >> > should see only the part that stricly descends from A's state.
> >> > (If
> >> > B's and A's states are the same under a referential equality
> >> > test,
> >> > then this means A should see no seccomp state at all).  If, on
> >> > the
> >> > other hand, B's state is not a descendent of A's state, then the
> >> > read
> >> > call should fail.
> >> >
> >> > Thoughts?
> >> >
> >> > At the very least, this means that no one ever needs to worry
> >> > about
> >> > preventing seccomp state reads in their seccomp filter program,
> >> > since
> >> > the filter is invisible from inside the sandbox using that
> >> > filter.
> >>
> >> I don't oppose this idea, but I think our first pass at CRIU can
> >> just
> >> be to work from the init namespace. I don't mind rejecting
> >> filter-reading requests when CAP_SYS_ADMIN is missing or something
> >> like that.
> >
> > The problem is that we can't do a CAP_SYS_ADMIN check, at least with
> > the proposed interface, since the seccomp() syscall will only tell
> > the
> > process about it's own filters. I was going to just have the
> > PT_SUSPEND_SECCOMP flag we use also affect whether seccomp() would
> > allow a task to inspect its own filters.
> >
> > If we do that, the result is that without PT_SUSPEND_SECCOMP, a task
> > always gets EACCES (or an empty list, whichever is better), and with
> > PT_SUSPEND_SECCOMP it always gets the real filters.
> 
> Hmm.  IMO this interface has the downside that we can't really
> implement my crazy proposal on top of it.  Why do we need the task to
> be able to read its own filters at all?  Wouldn't it be easier if the
> ptracer could read the tracee's filters directly?  (Even if it
> required that the tracee be stopped?)

Re: [PATCH 4/6] seccomp: add a way to access filters via bpf fds

2015-09-04 Thread Alexei Starovoitov
On Fri, Sep 04, 2015 at 01:26:42PM -0700, Kees Cook wrote:
> On Fri, Sep 4, 2015 at 9:04 AM, Tycho Andersen
>  wrote:
> > This patch adds a way for a process that is "real root" to access the
> > seccomp filters of another process. The process first does a
> > PTRACE_SECCOMP_GET_FILTER_FD to get an fd with that process' seccomp filter
> > attached, and then iterates on this with PTRACE_SECCOMP_NEXT_FILTER using
> > bpf(BPF_PROG_DUMP) to dump the actual program at each step.
> 
> Why is this a new ptrace interface instead of a new seccomp interface?
> I would expect this to only be valid for "current", otherwise we could
> run into races as the ptracee adds filters. i.e. it is not safe to
> examine seccomp filters from tasks other than current.

same question.
I thought we discussed to add a command to seccomp() syscall for that?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html