Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
Hi Kees, On Wed, Jun 11, 2014 at 5:25 AM, Kees Cook wrote: > This adds the new "seccomp" syscall with both an "operation" and "flags" > parameter for future expansion. The third argument is a pointer value, > used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must > be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). I assume there'll be another iteration of these patches. With that next iteration, could you write a man page (or at least free text structured like a man page) that comprehensively documents the user-space API? Thanks, Michael > Signed-off-by: Kees Cook > Cc: linux-...@vger.kernel.org > --- > arch/x86/syscalls/syscall_32.tbl |1 + > arch/x86/syscalls/syscall_64.tbl |1 + > include/linux/syscalls.h |2 ++ > include/uapi/asm-generic/unistd.h |4 ++- > include/uapi/linux/seccomp.h |4 +++ > kernel/seccomp.c | 63 > - > kernel/sys_ni.c |3 ++ > 7 files changed, 69 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/syscalls/syscall_32.tbl > b/arch/x86/syscalls/syscall_32.tbl > index d6b867921612..7527eac24122 100644 > --- a/arch/x86/syscalls/syscall_32.tbl > +++ b/arch/x86/syscalls/syscall_32.tbl > @@ -360,3 +360,4 @@ > 351i386sched_setattr sys_sched_setattr > 352i386sched_getattr sys_sched_getattr > 353i386renameat2 sys_renameat2 > +354i386seccomp sys_seccomp > diff --git a/arch/x86/syscalls/syscall_64.tbl > b/arch/x86/syscalls/syscall_64.tbl > index ec255a1646d2..16272a6c12b7 100644 > --- a/arch/x86/syscalls/syscall_64.tbl > +++ b/arch/x86/syscalls/syscall_64.tbl > @@ -323,6 +323,7 @@ > 314common sched_setattr sys_sched_setattr > 315common sched_getattr sys_sched_getattr > 316common renameat2 sys_renameat2 > +317common seccomp sys_seccomp > > # > # x32-specific system call numbers start at 512 to avoid cache impact > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index b0881a0ed322..1713977ee26f 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, > asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, > unsigned long idx1, unsigned long idx2); > asmlinkage long sys_finit_module(int fd, const char __user *uargs, int > flags); > +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, > + const char __user *uargs); > #endif > diff --git a/include/uapi/asm-generic/unistd.h > b/include/uapi/asm-generic/unistd.h > index 333640608087..65acbf0e2867 100644 > --- a/include/uapi/asm-generic/unistd.h > +++ b/include/uapi/asm-generic/unistd.h > @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) > __SYSCALL(__NR_sched_getattr, sys_sched_getattr) > #define __NR_renameat2 276 > __SYSCALL(__NR_renameat2, sys_renameat2) > +#define __NR_seccomp 277 > +__SYSCALL(__NR_seccomp, sys_seccomp) > > #undef __NR_syscalls > -#define __NR_syscalls 277 > +#define __NR_syscalls 278 > > /* > * All syscalls below here should go away really, > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h > index ac2dc9f72973..b258878ba754 100644 > --- a/include/uapi/linux/seccomp.h > +++ b/include/uapi/linux/seccomp.h > @@ -10,6 +10,10 @@ > #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */ > #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */ > > +/* Valid operations for seccomp syscall. */ > +#define SECCOMP_SET_MODE_STRICT0 > +#define SECCOMP_SET_MODE_FILTER1 > + > /* > * All BPF programs must return a 32-bit value. > * The bottom 16-bits are for optional return data. > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > index 39d32c2904fc..c0cafa9e84af 100644 > --- a/kernel/seccomp.c > +++ b/kernel/seccomp.c > @@ -19,6 +19,7 @@ > #include > #include > #include > +#include > > /* #define SECCOMP_DEBUG 1 */ > > @@ -301,8 +302,8 @@ free_prog: > * > * Returns filter on success and ERR_PTR otherwise. > */ > -static > -struct seccomp_filter *seccomp_prepare_user_filter(char __user *user_filter) > +static struct seccomp_filter * > +seccomp_prepare_user_filter(const char __user *user_filter) > { > struct sock_fprog fprog; > struct seccomp_filter *filter = ERR_PTR(-EFAULT); > @@ -325,19 +326,25 @@ out: > > /** > * seccomp_attach_filter: validate and attach filter > + * @flags: flags to change filter behavior > * @filter: seccomp filter to add to the current process > * > * Caller must be holding current->sighand->siglock lock. > * > * Returns 0 on success, -ve on error. > */ > -static long seccomp_attach_filter(struct seccomp_filter *filter) > +static long seccomp_attach_filter(unsigned int flags, > +
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
Hi Kees, On Wed, Jun 11, 2014 at 5:25 AM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). I assume there'll be another iteration of these patches. With that next iteration, could you write a man page (or at least free text structured like a man page) that comprehensively documents the user-space API? Thanks, Michael Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..65acbf0e2867 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT0 +#define SECCOMP_SET_MODE_FILTER1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 39d32c2904fc..c0cafa9e84af 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -19,6 +19,7 @@ #include linux/sched.h #include linux/seccomp.h #include linux/slab.h +#include linux/syscalls.h /* #define SECCOMP_DEBUG 1 */ @@ -301,8 +302,8 @@ free_prog: * * Returns filter on success and ERR_PTR otherwise. */ -static -struct seccomp_filter *seccomp_prepare_user_filter(char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; struct seccomp_filter *filter = ERR_PTR(-EFAULT); @@ -325,19 +326,25 @@ out: /** * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * * Caller must be holding current-sighand-siglock lock. * * Returns 0 on success, -ve on error. */ -static long seccomp_attach_filter(struct seccomp_filter *filter) +static long seccomp_attach_filter(unsigned int flags, + struct
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Fri, Jun 13, 2014 at 2:42 PM, Andy Lutomirski wrote: > On Fri, Jun 13, 2014 at 2:37 PM, Alexei Starovoitov wrote: >> On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski wrote: >>> On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov >>> wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: > This adds the new "seccomp" syscall with both an "operation" and "flags" > parameter for future expansion. The third argument is a pointer value, > used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must > be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). > > Signed-off-by: Kees Cook > Cc: linux-...@vger.kernel.org > --- > arch/x86/syscalls/syscall_32.tbl |1 + > arch/x86/syscalls/syscall_64.tbl |1 + > include/linux/syscalls.h |2 ++ > include/uapi/asm-generic/unistd.h |4 ++- > include/uapi/linux/seccomp.h |4 +++ > kernel/seccomp.c | 63 > - > kernel/sys_ni.c |3 ++ > 7 files changed, 69 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/syscalls/syscall_32.tbl > b/arch/x86/syscalls/syscall_32.tbl > index d6b867921612..7527eac24122 100644 > --- a/arch/x86/syscalls/syscall_32.tbl > +++ b/arch/x86/syscalls/syscall_32.tbl > @@ -360,3 +360,4 @@ > 351i386sched_setattr sys_sched_setattr > 352i386sched_getattr sys_sched_getattr > 353i386renameat2 sys_renameat2 > +354i386seccomp sys_seccomp > diff --git a/arch/x86/syscalls/syscall_64.tbl > b/arch/x86/syscalls/syscall_64.tbl > index ec255a1646d2..16272a6c12b7 100644 > --- a/arch/x86/syscalls/syscall_64.tbl > +++ b/arch/x86/syscalls/syscall_64.tbl > @@ -323,6 +323,7 @@ > 314common sched_setattr sys_sched_setattr > 315common sched_getattr sys_sched_getattr > 316common renameat2 sys_renameat2 > +317common seccomp sys_seccomp > > # > # x32-specific system call numbers start at 512 to avoid cache impact > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index b0881a0ed322..1713977ee26f 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, > asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, > unsigned long idx1, unsigned long idx2); > asmlinkage long sys_finit_module(int fd, const char __user *uargs, int > flags); > +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, > + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs->nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' >>> >>> Eww. If the operation doesn't imply the type, then I think we've >>> totally screwed up. >>> If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. >>> >>> I see no reason to bring nl_attr into this. >>> >>> Admittedly, I've never dealt with nl_attr, but everything >>> netlink-related I've even been involved in has involved some sort of >>> API atrocity. >> >> netlink has a lot of legacy and there is genetlink which is not pretty >> either because of extra socket creation, binding, dealing with packet >> loss issues, but the key concept of variable length encoding is sound. >> Right now seccomp has two commands and they already don't fit >> into single syscall neatly. Are you saying there should be two syscalls >> here? What about another seccomp related command? Another syscall? >> imo all seccomp related commands needs to be mux/demux-ed under >> one
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Fri, Jun 13, 2014 at 2:42 PM, Andy Lutomirski wrote: > On Fri, Jun 13, 2014 at 2:37 PM, Alexei Starovoitov wrote: >> On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski wrote: >>> On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov >>> wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: > This adds the new "seccomp" syscall with both an "operation" and "flags" > parameter for future expansion. The third argument is a pointer value, > used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must > be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). > > Signed-off-by: Kees Cook > Cc: linux-...@vger.kernel.org > --- > arch/x86/syscalls/syscall_32.tbl |1 + > arch/x86/syscalls/syscall_64.tbl |1 + > include/linux/syscalls.h |2 ++ > include/uapi/asm-generic/unistd.h |4 ++- > include/uapi/linux/seccomp.h |4 +++ > kernel/seccomp.c | 63 > - > kernel/sys_ni.c |3 ++ > 7 files changed, 69 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/syscalls/syscall_32.tbl > b/arch/x86/syscalls/syscall_32.tbl > index d6b867921612..7527eac24122 100644 > --- a/arch/x86/syscalls/syscall_32.tbl > +++ b/arch/x86/syscalls/syscall_32.tbl > @@ -360,3 +360,4 @@ > 351i386sched_setattr sys_sched_setattr > 352i386sched_getattr sys_sched_getattr > 353i386renameat2 sys_renameat2 > +354i386seccomp sys_seccomp > diff --git a/arch/x86/syscalls/syscall_64.tbl > b/arch/x86/syscalls/syscall_64.tbl > index ec255a1646d2..16272a6c12b7 100644 > --- a/arch/x86/syscalls/syscall_64.tbl > +++ b/arch/x86/syscalls/syscall_64.tbl > @@ -323,6 +323,7 @@ > 314common sched_setattr sys_sched_setattr > 315common sched_getattr sys_sched_getattr > 316common renameat2 sys_renameat2 > +317common seccomp sys_seccomp > > # > # x32-specific system call numbers start at 512 to avoid cache impact > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index b0881a0ed322..1713977ee26f 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, > asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, > unsigned long idx1, unsigned long idx2); > asmlinkage long sys_finit_module(int fd, const char __user *uargs, int > flags); > +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, > + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs->nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' >>> >>> Eww. If the operation doesn't imply the type, then I think we've >>> totally screwed up. >>> If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. >>> >>> I see no reason to bring nl_attr into this. >>> >>> Admittedly, I've never dealt with nl_attr, but everything >>> netlink-related I've even been involved in has involved some sort of >>> API atrocity. >> >> netlink has a lot of legacy and there is genetlink which is not pretty >> either because of extra socket creation, binding, dealing with packet >> loss issues, but the key concept of variable length encoding is sound. >> Right now seccomp has two commands and they already don't fit >> into single syscall neatly. Are you saying there should be two syscalls >> here? What about another seccomp related command? Another syscall? >> imo all seccomp related commands needs to be mux/demux-ed under >> one
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov wrote: > On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: >> This adds the new "seccomp" syscall with both an "operation" and "flags" >> parameter for future expansion. The third argument is a pointer value, >> used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must >> be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). >> >> Signed-off-by: Kees Cook >> Cc: linux-...@vger.kernel.org >> --- >> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h >> index b0881a0ed322..1713977ee26f 100644 >> --- a/include/linux/syscalls.h >> +++ b/include/linux/syscalls.h >> @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, >> asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, >> unsigned long idx1, unsigned long idx2); >> asmlinkage long sys_finit_module(int fd, const char __user *uargs, int >> flags); >> +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, >> + const char __user *uargs); > > It looks odd to add 'flags' argument to syscall that is not even used. FWIW, "flags" is given use in the next patch to support the tsync option. -Kees -- Kees Cook Chrome OS Security -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Fri, Jun 13, 2014 at 2:37 PM, Alexei Starovoitov wrote: > On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski wrote: >> On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov >> wrote: >>> On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); >>> >>> It looks odd to add 'flags' argument to syscall that is not even used. >>> It don't think it will be extensible this way. >>> 'uargs' is used only in 2nd command as well and it's not 'char __user *' >>> but rather 'struct sock_fprog __user *' >>> I think it makes more sense to define only first argument as 'int op' and >>> the >>> rest as variable length array. >>> Something like: >>> long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); >>> then different commands can interpret 'attrs' differently. >>> if op == mode_strict, then attrs == NULL, len == 0 >>> if op == mode_filter, then attrs->nla_type == seccomp_bpf_filter >>> and nla_data(attrs) is 'struct sock_fprog' >> >> Eww. If the operation doesn't imply the type, then I think we've >> totally screwed up. >> >>> If we decide to add new types of filters or new commands, the syscall >>> prototype >>> won't need to change. New commands can be added preserving backward >>> compatibility. >>> The basic TLV concept has been around forever in netlink world. imo makes >>> sense to use it with new syscalls. Passing 'struct xxx' into syscalls >>> is the thing >>> of the past. TLV style is more extensible. Fields of structures can become >>> optional in the future, new fields added, etc. >>> 'struct nlattr' brings the same benefits to kernel api as protobuf did >>> to user land. >> >> I see no reason to bring nl_attr into this. >> >> Admittedly, I've never dealt with nl_attr, but everything >> netlink-related I've even been involved in has involved some sort of >> API atrocity. > > netlink has a lot of legacy and there is genetlink which is not pretty > either because of extra socket creation, binding, dealing with packet > loss issues, but the key concept of variable length encoding is sound. > Right now seccomp has two commands and they already don't fit > into single syscall neatly. Are you saying there should be two syscalls > here? What about another seccomp related command? Another syscall? > imo all seccomp related commands needs to be mux/demux-ed under > one syscall. What is the way to mux/demux potentially very different > commands under one syscall? I cannot think of anything better than > TLV style. 'struct
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski wrote: > On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov wrote: >> On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: >>> This adds the new "seccomp" syscall with both an "operation" and "flags" >>> parameter for future expansion. The third argument is a pointer value, >>> used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must >>> be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). >>> >>> Signed-off-by: Kees Cook >>> Cc: linux-...@vger.kernel.org >>> --- >>> arch/x86/syscalls/syscall_32.tbl |1 + >>> arch/x86/syscalls/syscall_64.tbl |1 + >>> include/linux/syscalls.h |2 ++ >>> include/uapi/asm-generic/unistd.h |4 ++- >>> include/uapi/linux/seccomp.h |4 +++ >>> kernel/seccomp.c | 63 >>> - >>> kernel/sys_ni.c |3 ++ >>> 7 files changed, 69 insertions(+), 9 deletions(-) >>> >>> diff --git a/arch/x86/syscalls/syscall_32.tbl >>> b/arch/x86/syscalls/syscall_32.tbl >>> index d6b867921612..7527eac24122 100644 >>> --- a/arch/x86/syscalls/syscall_32.tbl >>> +++ b/arch/x86/syscalls/syscall_32.tbl >>> @@ -360,3 +360,4 @@ >>> 351i386sched_setattr sys_sched_setattr >>> 352i386sched_getattr sys_sched_getattr >>> 353i386renameat2 sys_renameat2 >>> +354i386seccomp sys_seccomp >>> diff --git a/arch/x86/syscalls/syscall_64.tbl >>> b/arch/x86/syscalls/syscall_64.tbl >>> index ec255a1646d2..16272a6c12b7 100644 >>> --- a/arch/x86/syscalls/syscall_64.tbl >>> +++ b/arch/x86/syscalls/syscall_64.tbl >>> @@ -323,6 +323,7 @@ >>> 314common sched_setattr sys_sched_setattr >>> 315common sched_getattr sys_sched_getattr >>> 316common renameat2 sys_renameat2 >>> +317common seccomp sys_seccomp >>> >>> # >>> # x32-specific system call numbers start at 512 to avoid cache impact >>> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h >>> index b0881a0ed322..1713977ee26f 100644 >>> --- a/include/linux/syscalls.h >>> +++ b/include/linux/syscalls.h >>> @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, >>> asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, >>> unsigned long idx1, unsigned long idx2); >>> asmlinkage long sys_finit_module(int fd, const char __user *uargs, int >>> flags); >>> +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, >>> + const char __user *uargs); >> >> It looks odd to add 'flags' argument to syscall that is not even used. >> It don't think it will be extensible this way. >> 'uargs' is used only in 2nd command as well and it's not 'char __user *' >> but rather 'struct sock_fprog __user *' >> I think it makes more sense to define only first argument as 'int op' and the >> rest as variable length array. >> Something like: >> long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); >> then different commands can interpret 'attrs' differently. >> if op == mode_strict, then attrs == NULL, len == 0 >> if op == mode_filter, then attrs->nla_type == seccomp_bpf_filter >> and nla_data(attrs) is 'struct sock_fprog' > > Eww. If the operation doesn't imply the type, then I think we've > totally screwed up. > >> If we decide to add new types of filters or new commands, the syscall >> prototype >> won't need to change. New commands can be added preserving backward >> compatibility. >> The basic TLV concept has been around forever in netlink world. imo makes >> sense to use it with new syscalls. Passing 'struct xxx' into syscalls >> is the thing >> of the past. TLV style is more extensible. Fields of structures can become >> optional in the future, new fields added, etc. >> 'struct nlattr' brings the same benefits to kernel api as protobuf did >> to user land. > > I see no reason to bring nl_attr into this. > > Admittedly, I've never dealt with nl_attr, but everything > netlink-related I've even been involved in has involved some sort of > API atrocity. netlink has a lot of legacy and there is genetlink which is not pretty either because of extra socket creation, binding, dealing with packet loss issues, but the key concept of variable length encoding is sound. Right now seccomp has two commands and they already don't fit into single syscall neatly. Are you saying there should be two syscalls here? What about another seccomp related command? Another syscall? imo all seccomp related commands needs to be mux/demux-ed under one syscall. What is the way to mux/demux potentially very different commands under one syscall? I cannot think of anything better than TLV style. 'struct nlattr' is what we have today and I think it works fine. I'm not suggesting to bring the whole netlink into the picture, but rather TLV style of encoding different arguments for
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov wrote: > On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: >> This adds the new "seccomp" syscall with both an "operation" and "flags" >> parameter for future expansion. The third argument is a pointer value, >> used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must >> be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). >> >> Signed-off-by: Kees Cook >> Cc: linux-...@vger.kernel.org >> --- >> arch/x86/syscalls/syscall_32.tbl |1 + >> arch/x86/syscalls/syscall_64.tbl |1 + >> include/linux/syscalls.h |2 ++ >> include/uapi/asm-generic/unistd.h |4 ++- >> include/uapi/linux/seccomp.h |4 +++ >> kernel/seccomp.c | 63 >> - >> kernel/sys_ni.c |3 ++ >> 7 files changed, 69 insertions(+), 9 deletions(-) >> >> diff --git a/arch/x86/syscalls/syscall_32.tbl >> b/arch/x86/syscalls/syscall_32.tbl >> index d6b867921612..7527eac24122 100644 >> --- a/arch/x86/syscalls/syscall_32.tbl >> +++ b/arch/x86/syscalls/syscall_32.tbl >> @@ -360,3 +360,4 @@ >> 351i386sched_setattr sys_sched_setattr >> 352i386sched_getattr sys_sched_getattr >> 353i386renameat2 sys_renameat2 >> +354i386seccomp sys_seccomp >> diff --git a/arch/x86/syscalls/syscall_64.tbl >> b/arch/x86/syscalls/syscall_64.tbl >> index ec255a1646d2..16272a6c12b7 100644 >> --- a/arch/x86/syscalls/syscall_64.tbl >> +++ b/arch/x86/syscalls/syscall_64.tbl >> @@ -323,6 +323,7 @@ >> 314common sched_setattr sys_sched_setattr >> 315common sched_getattr sys_sched_getattr >> 316common renameat2 sys_renameat2 >> +317common seccomp sys_seccomp >> >> # >> # x32-specific system call numbers start at 512 to avoid cache impact >> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h >> index b0881a0ed322..1713977ee26f 100644 >> --- a/include/linux/syscalls.h >> +++ b/include/linux/syscalls.h >> @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, >> asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, >> unsigned long idx1, unsigned long idx2); >> asmlinkage long sys_finit_module(int fd, const char __user *uargs, int >> flags); >> +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, >> + const char __user *uargs); > > It looks odd to add 'flags' argument to syscall that is not even used. > It don't think it will be extensible this way. > 'uargs' is used only in 2nd command as well and it's not 'char __user *' > but rather 'struct sock_fprog __user *' > I think it makes more sense to define only first argument as 'int op' and the > rest as variable length array. > Something like: > long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); > then different commands can interpret 'attrs' differently. > if op == mode_strict, then attrs == NULL, len == 0 > if op == mode_filter, then attrs->nla_type == seccomp_bpf_filter > and nla_data(attrs) is 'struct sock_fprog' Eww. If the operation doesn't imply the type, then I think we've totally screwed up. > If we decide to add new types of filters or new commands, the syscall > prototype > won't need to change. New commands can be added preserving backward > compatibility. > The basic TLV concept has been around forever in netlink world. imo makes > sense to use it with new syscalls. Passing 'struct xxx' into syscalls > is the thing > of the past. TLV style is more extensible. Fields of structures can become > optional in the future, new fields added, etc. > 'struct nlattr' brings the same benefits to kernel api as protobuf did > to user land. I see no reason to bring nl_attr into this. Admittedly, I've never dealt with nl_attr, but everything netlink-related I've even been involved in has involved some sort of API atrocity. --Andy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: > This adds the new "seccomp" syscall with both an "operation" and "flags" > parameter for future expansion. The third argument is a pointer value, > used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must > be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). > > Signed-off-by: Kees Cook > Cc: linux-...@vger.kernel.org > --- > arch/x86/syscalls/syscall_32.tbl |1 + > arch/x86/syscalls/syscall_64.tbl |1 + > include/linux/syscalls.h |2 ++ > include/uapi/asm-generic/unistd.h |4 ++- > include/uapi/linux/seccomp.h |4 +++ > kernel/seccomp.c | 63 > - > kernel/sys_ni.c |3 ++ > 7 files changed, 69 insertions(+), 9 deletions(-) > > diff --git a/arch/x86/syscalls/syscall_32.tbl > b/arch/x86/syscalls/syscall_32.tbl > index d6b867921612..7527eac24122 100644 > --- a/arch/x86/syscalls/syscall_32.tbl > +++ b/arch/x86/syscalls/syscall_32.tbl > @@ -360,3 +360,4 @@ > 351i386sched_setattr sys_sched_setattr > 352i386sched_getattr sys_sched_getattr > 353i386renameat2 sys_renameat2 > +354i386seccomp sys_seccomp > diff --git a/arch/x86/syscalls/syscall_64.tbl > b/arch/x86/syscalls/syscall_64.tbl > index ec255a1646d2..16272a6c12b7 100644 > --- a/arch/x86/syscalls/syscall_64.tbl > +++ b/arch/x86/syscalls/syscall_64.tbl > @@ -323,6 +323,7 @@ > 314common sched_setattr sys_sched_setattr > 315common sched_getattr sys_sched_getattr > 316common renameat2 sys_renameat2 > +317common seccomp sys_seccomp > > # > # x32-specific system call numbers start at 512 to avoid cache impact > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index b0881a0ed322..1713977ee26f 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, > asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, > unsigned long idx1, unsigned long idx2); > asmlinkage long sys_finit_module(int fd, const char __user *uargs, int > flags); > +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, > + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs->nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. > #endif > diff --git a/include/uapi/asm-generic/unistd.h > b/include/uapi/asm-generic/unistd.h > index 333640608087..65acbf0e2867 100644 > --- a/include/uapi/asm-generic/unistd.h > +++ b/include/uapi/asm-generic/unistd.h > @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) > __SYSCALL(__NR_sched_getattr, sys_sched_getattr) > #define __NR_renameat2 276 > __SYSCALL(__NR_renameat2, sys_renameat2) > +#define __NR_seccomp 277 > +__SYSCALL(__NR_seccomp, sys_seccomp) > > #undef __NR_syscalls > -#define __NR_syscalls 277 > +#define __NR_syscalls 278 > > /* > * All syscalls below here should go away really, > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h > index ac2dc9f72973..b258878ba754 100644 > --- a/include/uapi/linux/seccomp.h > +++ b/include/uapi/linux/seccomp.h > @@ -10,6 +10,10 @@ > #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */ > #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */ > > +/* Valid operations for seccomp syscall. */ > +#define SECCOMP_SET_MODE_STRICT0 > +#define SECCOMP_SET_MODE_FILTER1 > + > /* > * All BPF programs must return a 32-bit value. > * The bottom 16-bits are for optional return data. > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > index 39d32c2904fc..c0cafa9e84af 100644 > --- a/kernel/seccomp.c > +++ b/kernel/seccomp.c > @@ -19,6 +19,7 @@ >
Re: [PATCH v6 6/9] seccomp: add "seccomp" syscall
On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook wrote: > This adds the new "seccomp" syscall with both an "operation" and "flags" > parameter for future expansion. The third argument is a pointer value, > used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must > be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Question for the linux-abi people: What's the preferred way to do this these days? This syscall is a general purpose "adjust the seccomp state" thing. The alternative would be a specific new syscall to add a filter with a flags argument. --Andy -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Question for the linux-abi people: What's the preferred way to do this these days? This syscall is a general purpose adjust the seccomp state thing. The alternative would be a specific new syscall to add a filter with a flags argument. --Andy -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs-nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..65acbf0e2867 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT0 +#define SECCOMP_SET_MODE_FILTER1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 39d32c2904fc..c0cafa9e84af 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -19,6 +19,7 @@ #include linux/sched.h #include linux/seccomp.h #include
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs-nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' Eww. If the operation doesn't imply the type, then I think we've totally screwed up. If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. I see no reason to bring nl_attr into this. Admittedly, I've never dealt with nl_attr, but everything netlink-related I've even been involved in has involved some sort of API atrocity. --Andy -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski l...@amacapital.net wrote: On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs-nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' Eww. If the operation doesn't imply the type, then I think we've totally screwed up. If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. I see no reason to bring nl_attr into this. Admittedly, I've never dealt with nl_attr, but everything netlink-related I've even been involved in has involved some sort of API atrocity. netlink has a lot of legacy and there is genetlink which is not pretty either because of extra socket creation, binding, dealing with packet loss issues, but the key concept of variable length encoding is sound. Right now seccomp has two commands and they already don't fit into single syscall neatly. Are you saying there should be two syscalls here? What about another seccomp related command? Another syscall? imo all seccomp related commands needs to be mux/demux-ed under one syscall. What is the way to mux/demux potentially very different commands under one syscall? I cannot think of anything better than TLV style. 'struct nlattr' is what we have today and I think it works fine. I'm not suggesting to bring the whole netlink into the picture, but rather TLV style of encoding different arguments for different commands. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Fri, Jun 13, 2014 at 2:37 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski l...@amacapital.net wrote: On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs-nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' Eww. If the operation doesn't imply the type, then I think we've totally screwed up. If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. I see no reason to bring nl_attr into this. Admittedly, I've never dealt with nl_attr, but everything netlink-related I've even been involved in has involved some sort of API atrocity. netlink has a lot of legacy and there is genetlink which is not pretty either because of extra socket creation, binding, dealing with packet loss issues, but the key concept of variable length encoding is sound. Right now seccomp has two commands and they already don't fit into single syscall neatly. Are you saying there should be two syscalls here? What about another seccomp related command? Another syscall? imo all seccomp related commands needs to be mux/demux-ed under one syscall. What is the way to mux/demux potentially very different commands under one syscall? I cannot think of anything better than TLV style. 'struct nlattr' is what we have today and I think it works fine. I'm not suggesting to bring the whole netlink into the picture, but rather TLV style of encoding different arguments for different commands. I'm unconvinced. These are simple
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. FWIW, flags is given use in the next patch to support the tsync option. -Kees -- Kees Cook Chrome OS Security -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Fri, Jun 13, 2014 at 2:42 PM, Andy Lutomirski l...@amacapital.net wrote: On Fri, Jun 13, 2014 at 2:37 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski l...@amacapital.net wrote: On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs-nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' Eww. If the operation doesn't imply the type, then I think we've totally screwed up. If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. I see no reason to bring nl_attr into this. Admittedly, I've never dealt with nl_attr, but everything netlink-related I've even been involved in has involved some sort of API atrocity. netlink has a lot of legacy and there is genetlink which is not pretty either because of extra socket creation, binding, dealing with packet loss issues, but the key concept of variable length encoding is sound. Right now seccomp has two commands and they already don't fit into single syscall neatly. Are you saying there should be two syscalls here? What about another seccomp related command? Another syscall? imo all seccomp related commands needs to be mux/demux-ed under one syscall. What is the way to mux/demux potentially very different commands under one syscall? I cannot think of anything better than TLV style. 'struct nlattr' is what we have today and I think it works fine. I'm not suggesting to bring the whole netlink into the picture, but rather TLV style of encoding
Re: [PATCH v6 6/9] seccomp: add seccomp syscall
On Fri, Jun 13, 2014 at 2:42 PM, Andy Lutomirski l...@amacapital.net wrote: On Fri, Jun 13, 2014 at 2:37 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Fri, Jun 13, 2014 at 2:25 PM, Andy Lutomirski l...@amacapital.net wrote: On Fri, Jun 13, 2014 at 2:22 PM, Alexei Starovoitov a...@plumgrid.com wrote: On Tue, Jun 10, 2014 at 8:25 PM, Kees Cook keesc...@chromium.org wrote: This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); It looks odd to add 'flags' argument to syscall that is not even used. It don't think it will be extensible this way. 'uargs' is used only in 2nd command as well and it's not 'char __user *' but rather 'struct sock_fprog __user *' I think it makes more sense to define only first argument as 'int op' and the rest as variable length array. Something like: long sys_seccomp(unsigned int op, struct nlattr *attrs, int len); then different commands can interpret 'attrs' differently. if op == mode_strict, then attrs == NULL, len == 0 if op == mode_filter, then attrs-nla_type == seccomp_bpf_filter and nla_data(attrs) is 'struct sock_fprog' Eww. If the operation doesn't imply the type, then I think we've totally screwed up. If we decide to add new types of filters or new commands, the syscall prototype won't need to change. New commands can be added preserving backward compatibility. The basic TLV concept has been around forever in netlink world. imo makes sense to use it with new syscalls. Passing 'struct xxx' into syscalls is the thing of the past. TLV style is more extensible. Fields of structures can become optional in the future, new fields added, etc. 'struct nlattr' brings the same benefits to kernel api as protobuf did to user land. I see no reason to bring nl_attr into this. Admittedly, I've never dealt with nl_attr, but everything netlink-related I've even been involved in has involved some sort of API atrocity. netlink has a lot of legacy and there is genetlink which is not pretty either because of extra socket creation, binding, dealing with packet loss issues, but the key concept of variable length encoding is sound. Right now seccomp has two commands and they already don't fit into single syscall neatly. Are you saying there should be two syscalls here? What about another seccomp related command? Another syscall? imo all seccomp related commands needs to be mux/demux-ed under one syscall. What is the way to mux/demux potentially very different commands under one syscall? I cannot think of anything better than TLV style. 'struct nlattr' is what we have today and I think it works fine. I'm not suggesting to bring the whole netlink into the picture, but rather TLV style of encoding
[PATCH v6 6/9] seccomp: add "seccomp" syscall
This adds the new "seccomp" syscall with both an "operation" and "flags" parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..65acbf0e2867 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT0 +#define SECCOMP_SET_MODE_FILTER1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 39d32c2904fc..c0cafa9e84af 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -19,6 +19,7 @@ #include #include #include +#include /* #define SECCOMP_DEBUG 1 */ @@ -301,8 +302,8 @@ free_prog: * * Returns filter on success and ERR_PTR otherwise. */ -static -struct seccomp_filter *seccomp_prepare_user_filter(char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; struct seccomp_filter *filter = ERR_PTR(-EFAULT); @@ -325,19 +326,25 @@ out: /** * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * * Caller must be holding current->sighand->siglock lock. * * Returns 0 on success, -ve on error. */ -static long seccomp_attach_filter(struct seccomp_filter *filter) +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) { unsigned long total_insns; struct seccomp_filter *walker; BUG_ON(!spin_is_locked(>sighand->siglock)); + /* Validate flags. */ + if (flags != 0) + return -EINVAL; + /* Validate resulting filter length. */ total_insns = filter->len; for (walker = current->seccomp.filter; walker; walker = filter->prev) @@ -541,6 +548,7 @@ out: #ifdef CONFIG_SECCOMP_FILTER /** * seccomp_set_mode_filter:
[PATCH v6 6/9] seccomp: add seccomp syscall
This adds the new seccomp syscall with both an operation and flags parameter for future expansion. The third argument is a pointer value, used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...). Signed-off-by: Kees Cook keesc...@chromium.org Cc: linux-...@vger.kernel.org --- arch/x86/syscalls/syscall_32.tbl |1 + arch/x86/syscalls/syscall_64.tbl |1 + include/linux/syscalls.h |2 ++ include/uapi/asm-generic/unistd.h |4 ++- include/uapi/linux/seccomp.h |4 +++ kernel/seccomp.c | 63 - kernel/sys_ni.c |3 ++ 7 files changed, 69 insertions(+), 9 deletions(-) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index d6b867921612..7527eac24122 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -360,3 +360,4 @@ 351i386sched_setattr sys_sched_setattr 352i386sched_getattr sys_sched_getattr 353i386renameat2 sys_renameat2 +354i386seccomp sys_seccomp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index ec255a1646d2..16272a6c12b7 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -323,6 +323,7 @@ 314common sched_setattr sys_sched_setattr 315common sched_getattr sys_sched_getattr 316common renameat2 sys_renameat2 +317common seccomp sys_seccomp # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b0881a0ed322..1713977ee26f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); +asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, + const char __user *uargs); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 333640608087..65acbf0e2867 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 276 __SYSCALL(__NR_renameat2, sys_renameat2) +#define __NR_seccomp 277 +__SYSCALL(__NR_seccomp, sys_seccomp) #undef __NR_syscalls -#define __NR_syscalls 277 +#define __NR_syscalls 278 /* * All syscalls below here should go away really, diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index ac2dc9f72973..b258878ba754 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -10,6 +10,10 @@ #define SECCOMP_MODE_STRICT1 /* uses hard-coded filter. */ #define SECCOMP_MODE_FILTER2 /* uses user-supplied filter. */ +/* Valid operations for seccomp syscall. */ +#define SECCOMP_SET_MODE_STRICT0 +#define SECCOMP_SET_MODE_FILTER1 + /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 39d32c2904fc..c0cafa9e84af 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -19,6 +19,7 @@ #include linux/sched.h #include linux/seccomp.h #include linux/slab.h +#include linux/syscalls.h /* #define SECCOMP_DEBUG 1 */ @@ -301,8 +302,8 @@ free_prog: * * Returns filter on success and ERR_PTR otherwise. */ -static -struct seccomp_filter *seccomp_prepare_user_filter(char __user *user_filter) +static struct seccomp_filter * +seccomp_prepare_user_filter(const char __user *user_filter) { struct sock_fprog fprog; struct seccomp_filter *filter = ERR_PTR(-EFAULT); @@ -325,19 +326,25 @@ out: /** * seccomp_attach_filter: validate and attach filter + * @flags: flags to change filter behavior * @filter: seccomp filter to add to the current process * * Caller must be holding current-sighand-siglock lock. * * Returns 0 on success, -ve on error. */ -static long seccomp_attach_filter(struct seccomp_filter *filter) +static long seccomp_attach_filter(unsigned int flags, + struct seccomp_filter *filter) { unsigned long total_insns; struct seccomp_filter *walker; BUG_ON(!spin_is_locked(current-sighand-siglock)); + /* Validate flags. */ + if (flags != 0) + return -EINVAL; + /* Validate resulting filter length. */ total_insns = filter-len; for (walker = current-seccomp.filter; walker; walker = filter-prev) @@ -541,6 +548,7