On Thu, Dec 6, 2018 at 5:47 PM Kees Cook <keesc...@chromium.org> wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > <david.abdurachma...@gmail.com> wrote: > > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Signed-off-by: David Abdurachmanov <david.abdurachma...@gmail.com> > > --- > > arch/riscv/Kconfig | 14 ++++++++++++++ > > arch/riscv/include/asm/thread_info.h | 5 ++++- > > arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++-- > > arch/riscv/kernel/ptrace.c | 8 ++++++++ > > 4 files changed, 51 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index a4f48f757204..49cd8e251547 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -29,6 +29,7 @@ config RISCV > > select GENERIC_SMP_IDLE_THREAD > > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > > select HAVE_ARCH_AUDITSYSCALL > > + select HAVE_ARCH_SECCOMP_FILTER > > select HAVE_MEMBLOCK_NODE_MAP > > select HAVE_DMA_CONTIGUOUS > > select HAVE_FUTEX_CMPXCHG if FUTEX > > @@ -228,6 +229,19 @@ menu "Kernel features" > > > > source "kernel/Kconfig.hz" > > > > +config SECCOMP > > + bool "Enable seccomp to safely compute untrusted bytecode" > > + help > > + This kernel feature is useful for number crunching applications > > + that may need to compute untrusted bytecode during their > > + execution. By using pipes or other transports made available to > > + the process as file descriptors supporting the read/write > > + syscalls, it's possible to isolate those applications in > > + their own address space using seccomp. Once seccomp is > > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > > + and the task is only allowed to execute a few safe syscalls > > + defined by each seccomp mode. > > + > > endmenu > > > > menu "Boot options" > > diff --git a/arch/riscv/include/asm/thread_info.h > > b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer > > */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint > > instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP 8 /* syscall secure computing > > */ > > > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > > @@ -88,11 +89,13 @@ struct thread_info { > > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) > > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > > > #define _TIF_WORK_MASK \ > > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > > > #define _TIF_SYSCALL_WORK \ > > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > > + _TIF_SECCOMP ) > > > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > index 355166f57205..e88ccbfa61ee 100644 > > --- a/arch/riscv/kernel/entry.S > > +++ b/arch/riscv/kernel/entry.S > > @@ -207,8 +207,25 @@ check_syscall_nr: > > /* Check to make sure we don't jump to a bogus syscall number. */ > > li t0, __NR_syscalls > > la s0, sys_ni_syscall > > - /* Syscall number held in a7 */ > > - bgeu a7, t0, 1f > > + /* > > + * The tracer can change syscall number to valid/invalid value. > > + * We use syscall_set_nr helper in syscall_trace_enter thus we > > + * cannot trust the current value in a7 and have to reload from > > + * the current task pt_regs. > > + */ > > + REG_L a7, PT_A7(sp) > > + /* > > + * Syscall number held in a7. > > + * If syscall number is above allowed value, redirect to ni_syscall. > > + */ > > + bge a7, t0, 1f > > + /* > > + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == > > -1. > > + * If yes, we pretend it was executed. > > + */ > > + li t1, -1 > > + beq a7, t1, ret_from_syscall_rejected > > + /* Call syscall */ > > la s0, sys_call_table > > slli t0, a7, RISCV_LGPTR > > add s0, s0, t0 > > @@ -219,6 +236,12 @@ check_syscall_nr: > > ret_from_syscall: > > /* Set user a0 to kernel a0 */ > > REG_S a0, PT_A0(sp) > > + /* > > + * We didn't execute the actual syscall. > > + * Seccomp already set return value for the current task pt_regs. > > + * (If it was configured with SECCOMP_RET_ERRNO/TRACE) > > + */ > > +ret_from_syscall_rejected: > > /* Trace syscalls, but only if requested by the user. */ > > REG_L t0, TASK_TI_FLAGS(tp) > > andi t0, t0, _TIF_SYSCALL_WORK > > diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c > > index c1b51539c3e2..598e48b8ca2b 100644 > > --- a/arch/riscv/kernel/ptrace.c > > +++ b/arch/riscv/kernel/ptrace.c > > @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs) > > if (tracehook_report_syscall_entry(regs)) > > syscall_set_nr(current, regs, -1); > > > > + /* > > + * Do the secure computing after ptrace; failures should be fast. > > + * If this fails we might have return value in a0 from seccomp > > + * (via SECCOMP_RET_ERRNO/TRACE). > > + */ > > + if (secure_computing(NULL) == -1) > > + syscall_set_nr(current, regs, -1); > > On a -1 return, this should return immediately -- it should not > continue to process trace_sys_enter(), etc.
Ops! No idea how I missed that. Will fix it. > -Kees > > > + > > #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS > > if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) > > trace_sys_enter(regs, syscall_get_nr(current, regs)); > > -- > > 2.19.2 > > > > > -- > Kees Cook