Here is a trivial module to implement the seccomp guts via utrace. I haven't tested it at all. (AFAIK it was only ever used by cpushare, and that project might be defunct now.)
I'm not sure what Ingo had in mind for integrating this. If it's just to reimplement the existing prctl interface, then this is about all you need--just s/_xxx// and fiddle the config et al to build this and not the old stuff. If the approach would be incremental, to leave the old stuff in place, then it might make more sense just to do a fresh new thing not providing that prctl interface at all. A new thing could be a module, and define some /sys files or whatnot for its "constrain me now" hook. I think a sensible thing would not require asm/seccomp.h at all, and instead just let the userland setup feed in a set of syscall numbers. It could be that flexible while still being quite simple so that one could audit that setup code and be confident it has no holes. Then future versions of cpushare (or whatever) would not need any special kernel support for new arch's nor to change the syscall set it wants to allow. Thanks, Roland ===== #include <linux/sched.h> #include <linux/utrace.h> #include <linux/signal.h> #include <linux/err.h> #include <linux/module.h> #include <linux/compat.h> #include <linux/prctl.h> #include <asm/seccomp.h> #include <asm/syscall.h> MODULE_DESCRIPTION("secure computing"); MODULE_LICENSE("GPL"); static int insecure_signal; module_param_named(signal, insecure_signal, int, SIGKILL); /* * If it's an accepted syscall, run it normally. * If not, send ourselves a SIGKILL and abort the syscall. */ static u32 secure_syscall_entry(u32 action, struct utrace_engine *engine, struct task_struct *task, struct pt_regs *regs) { int callno = syscall_get_nr(task, regs); #ifdef CONFIG_COMPAT if (is_compat_task()) switch (callno) { case __NR_seccomp_read_32: case __NR_seccomp_write_32: case __NR_seccomp_exit_32: case __NR_seccomp_sigreturn_32: return UTRACE_RESUME | UTRACE_SYSCALL_RUN; } else #endif switch (callno) { case __NR_seccomp_read: case __NR_seccomp_write: case __NR_seccomp_exit: case __NR_seccomp_sigreturn: return UTRACE_RESUME | UTRACE_SYSCALL_RUN; } force_sig(insecure_signal, task); return UTRACE_RESUME | UTRACE_SYSCALL_ABORT; } static const struct utrace_engine_ops secure_syscall_ops = { .report_syscall_entry = secure_syscall_entry }; /* * Set up a utrace engine to call secure_syscall_entry() for each system call. * Also act like prctl(PR_SET_TSC, PR_TSC_SIGSEGV). */ static int enable_secure_syscall(void) { struct utrace_engine *engine; int ret; engine = utrace_attach_task(current, UTRACE_ATTACH_CREATE | UTRACE_ATTACH_EXCLUSIVE | UTRACE_ATTACH_MATCH_OPS, &secure_syscall_ops, NULL); if (IS_ERR(engine)) { ret = PTR_ERR(engine); return ret == -EEXIST ? -EPERM : ret; } ret = utrace_set_events(current, engine, UTRACE_EVENT(SYSCALL_ENTRY)); WARN_ON(ret); /* Should never happen on current. */ /* * This is the only outside ref on the engine. * The engine dies automatically when this task gets reaped. */ utrace_engine_put(engine); #ifdef SET_TSC_CTL if (!ret) SET_TSC_CTL(PR_TSC_SIGSEGV); #endif return ret; } long prctl_get_seccomp_xxx(void) { struct utrace_engine *engine = utrace_attach_task( current, UTRACE_ATTACH_MATCH_OPS, &secure_syscall_ops, NULL); if (engine == ERR_PTR(-ENOENT)) return 0; if (!IS_ERR(engine)) /* * I wonder how he managed to call prctl() with it enabled. * That should be impossible. */ return 1; return PTR_ERR(engine); } long prctl_set_seccomp_xxx(unsigned long seccomp_mode) { if (seccomp_mode != 1) return -EINVAL; return enable_secure_syscall(); }