I added a syscall struct using priority levels as described in the libseccomp man page. The priority numbers are based to the frequency they appear in a sample strace from a regular qemu guest run under libvirt.
Libseccomp generates linear BPF code to filter system calls, those rules are read one after another. The priority system places the most common rules first in order to reduce the overhead when processing them. Also, since this is just a first RFC, the whitelist is a little raw. We might need your help to improve, test and fine tune the set of system calls. v2: Fixed some style issues Removed code from vl.c and created qemu-seccomp.[ch] Now using ARRAY_SIZE macro Added more syscalls without priority/frequency set yet Signed-off-by: Eduardo Otubo <ot...@linux.vnet.ibm.com> --- qemu-seccomp.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ qemu-seccomp.h | 9 +++++++ vl.c | 7 ++++++ 3 files changed, 89 insertions(+) create mode 100644 qemu-seccomp.c create mode 100644 qemu-seccomp.h diff --git a/qemu-seccomp.c b/qemu-seccomp.c new file mode 100644 index 0000000..048b7ba --- /dev/null +++ b/qemu-seccomp.c @@ -0,0 +1,73 @@ +#include <stdio.h> +#include <seccomp.h> +#include "qemu-seccomp.h" + +static struct QemuSeccompSyscall seccomp_whitelist[] = { + { SCMP_SYS(timer_settime), 255 }, + { SCMP_SYS(timer_gettime), 254 }, + { SCMP_SYS(futex), 253 }, + { SCMP_SYS(select), 252 }, + { SCMP_SYS(recvfrom), 251 }, + { SCMP_SYS(sendto), 250 }, + { SCMP_SYS(read), 249 }, + { SCMP_SYS(brk), 248 }, + { SCMP_SYS(clone), 247 }, + { SCMP_SYS(mmap), 247 }, + { SCMP_SYS(mprotect), 246 }, + { SCMP_SYS(ioctl), 245 }, + { SCMP_SYS(recvmsg), 245 }, + { SCMP_SYS(sendmsg), 245 }, + { SCMP_SYS(accept), 245 }, + { SCMP_SYS(connect), 245 }, + { SCMP_SYS(bind), 245 }, + { SCMP_SYS(listen), 245 }, + { SCMP_SYS(ioctl), 245 }, + { SCMP_SYS(eventfd), 245 }, + { SCMP_SYS(rt_sigprocmask), 245 }, + { SCMP_SYS(write), 244 }, + { SCMP_SYS(fcntl), 243 }, + { SCMP_SYS(tgkill), 242 }, + { SCMP_SYS(rt_sigaction), 242 }, + { SCMP_SYS(pipe2), 242 }, + { SCMP_SYS(munmap), 242 }, + { SCMP_SYS(mremap), 242 }, + { SCMP_SYS(getsockname), 242 }, + { SCMP_SYS(getpeername), 242 }, + { SCMP_SYS(fdatasync), 242 }, + { SCMP_SYS(close), 242 } +}; + +#define seccomp_whitelist_count ARRAY_SIZE(seccomp_whitelist) + +int seccomp_start(void) +{ + int rc = 0; + unsigned int i = 0; + + rc = seccomp_init(SCMP_ACT_KILL); + if (rc < 0) { + goto seccomp_return; + } + + for (i = 0; i < seccomp_whitelist_count; i++) { + rc = seccomp_rule_add(SCMP_ACT_ALLOW, seccomp_whitelist[i].num, 0); + if (rc < 0) { + goto seccomp_return; + } + rc = seccomp_syscall_priority(seccomp_whitelist[i].num, + seccomp_whitelist[i].priority); + if (rc < 0) { + goto seccomp_return; + } + } + + rc = seccomp_load(); + + seccomp_return: + seccomp_release(); + if (rc < 0) { + fprintf(stderr, + "ERROR: failed to configure the seccomp syscall filter in the kernel\n"); + } + return rc; +} diff --git a/qemu-seccomp.h b/qemu-seccomp.h new file mode 100644 index 0000000..3bbdd87 --- /dev/null +++ b/qemu-seccomp.h @@ -0,0 +1,9 @@ +#include <seccomp.h> +#include "osdep.h" + +struct QemuSeccompSyscall { + int32_t num; + uint8_t priority; +}; + +int seccomp_start(void); diff --git a/vl.c b/vl.c index 204d85b..315afaf 100644 --- a/vl.c +++ b/vl.c @@ -61,6 +61,9 @@ #include <linux/ppdev.h> #include <linux/parport.h> +#ifdef CONFIG_LIBSECCOMP +#include "qemu-seccomp.h" +#endif #endif #ifdef __sun__ #include <sys/stat.h> @@ -2296,6 +2299,10 @@ int main(int argc, char **argv, char **envp) const char *trace_events = NULL; const char *trace_file = NULL; +#ifdef CONFIG_LIBSECCOMP + seccomp_start(); +#endif + atexit(qemu_run_exit_notifiers); error_set_progname(argv[0]); -- 1.7.9.5