=== added file 'init/seccomp_filter.c'
--- init/seccomp_filter.c	1970-01-01 00:00:00 +0000
+++ init/seccomp_filter.c	2013-02-19 23:45:47 +0000
@@ -0,0 +1,312 @@
+/* seccomp addon for upstart
+ *
+ * Author: David Gaarenstroom <david.gaarenstroom@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+/* C Library includes */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Linux specific includes */
+#include <linux/audit.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/unistd.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+
+#include <nih/logging.h>
+
+/* Project includes */
+#include "seccomp_filter.h"
+#include "errno-from-name.h"
+#include "syscall-from-name.h"
+
+/* For now, only arm, i386 and x86_64 are being supported by seccomp */
+#if defined(__arm__)
+#define ARCH_NR		AUDIT_ARCH_ARM
+#elif defined(__i386__)
+#define ARCH_NR		AUDIT_ARCH_I386
+#elif defined(__x86_64__)
+#define ARCH_NR		AUDIT_ARCH_X86_64
+#else
+#warning "Platform does not support seccomp filter yet"
+#define ARCH_NR		0
+#endif
+
+#define syscall_nr	(offsetof(struct seccomp_data, nr))
+#define arch_nr		(offsetof(struct seccomp_data, arch))
+
+#define LOAD_TO_K(a)	(struct sock_filter)BPF_STMT(BPF_LD+BPF_W+BPF_ABS, a)
+#define JMP_EQ_K(v,t,f)	(struct sock_filter)BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, v, t, f)
+#define RET_K(a)	(struct sock_filter)BPF_STMT(BPF_RET+BPF_K, a)
+#define RET_ALLOW	RET_K(SECCOMP_RET_ALLOW)
+#define RET_KILL	RET_K(SECCOMP_RET_KILL)
+
+/**
+ * ACT_TYPE:
+ *
+ * struct for action-text to seccomp action type conversion.
+ */
+typedef struct {
+	int len;
+	const char *text;
+	__u32 action;
+} ACT_TYPE;
+
+/**
+ * actions:
+ *
+ * Lookup table for action-text to seccomp action type.
+ */
+static const ACT_TYPE actions[] = {
+	/* Keep this sorted in {len, text} order */
+	{4, "kill", SECCOMP_RET_KILL},
+	{4, "trap", SECCOMP_RET_TRAP},
+	{5, "allow", SECCOMP_RET_ALLOW},
+	{5, "errno", SECCOMP_RET_ERRNO},
+	{5, "trace", SECCOMP_RET_TRACE}
+};
+
+#define ACT_SIZE	(sizeof(ACT_TYPE))
+#define ACT_COUNT	(sizeof(actions)/sizeof(actions[0]))
+
+/**
+ * ACT_CMP:
+ * @l: left ACT_TYPE struct for comparison
+ * @r: right ACT_TYPE struct for comparison
+ *
+ * Compare two ACT_TYPE structs based on their "text" struct-item.
+ * (for bsearch-ing action type)
+ */
+static int ACT_CMP(const void *l, const void *r)
+{
+	ACT_TYPE *r1 = (ACT_TYPE *) l;
+	ACT_TYPE *r2 = (ACT_TYPE *) r;
+
+	if (r1->len == r2->len)
+		return memcmp(r1->text, r2->text, r1->len);
+	else
+		return r1->len - r2->len;
+}
+
+/**
+ * install_seccomp_filter:
+ * @rcount: rule count
+ * @rules: series of @seperators terminated rule strings
+ * @invert: invert rule-handling (disable specified rules, enable unlisted)
+ * @nnp: set PR_SET_NO_NEW_PRIVS
+ * @separators: rule separator characters
+ * 
+ * Install a custom seccomp filter, based on the set of rules given.
+ *
+ * If @invert is 0, any syscall explicitly listed without a policy is allowed
+ * and syscalls not listed at all are denied. Otherwise, listed syscalls are
+ * denied by default and syscalls not listed are allowed.
+ *
+ * Returns: 0 if installing the seccomp filter succeeded or 1 if it failed at any point.
+ */
+int install_seccomp_filter(int rcount, const char *rules, int invert, int nnp,
+			   const char *separators)
+{
+	size_t j = 0;
+	const char *rule;
+	struct sock_fprog prog;
+
+	/* Estimated filter size:
+	 * - Header: 4 statements
+	 * - Custom: 2 statements per rule
+	 * - Footer: 16 or 17 statements */
+#ifdef __NR_sigreturn
+	struct sock_filter f[4 + 2 * rcount + 17];
+#else
+	struct sock_filter f[4 + 2 * rcount + 16];
+#endif
+
+	/* Validate Architecture */
+	f[j++] = LOAD_TO_K(arch_nr);
+	f[j++] = JMP_EQ_K(ARCH_NR, 1, 0);
+	f[j++] = RET_KILL;
+
+	/* Examine syscall */
+	f[j++] = LOAD_TO_K(syscall_nr);
+
+	/* Point to the first user-supplied rule */
+	rule = rules;
+
+	/* Iterate over all user-supplied rules */
+	for (int i = 0; rule && i < rcount; i++) {
+		const struct syscall_name *sc;
+		__u32 action;
+		int len = strlen(rule);
+		const char *policy;
+		const char *nextrule;
+
+		/* Find end of syscall rule */
+		nextrule = strpbrk(rule, separators);
+		if (nextrule != NULL) {
+			/* Update length */
+			len = nextrule - rule;
+
+			/* Point to the next rule */
+			nextrule += strspn(nextrule, separators);
+		}
+
+		/* Set default actiontype */
+		action = invert ? SECCOMP_RET_KILL : SECCOMP_RET_ALLOW;
+
+		/* Check whether an action type was supplied */
+		policy = memchr(rule, ':', len);
+		if (policy != NULL) {
+			int policylen;
+			int retdata = EACCES;
+			const char *arg;
+			ACT_TYPE key, *res;
+
+			/* Set policy pointer */
+			policy++;
+
+			/* Set policy length */
+			policylen = (rule + len) - policy;
+
+			/* update syscall string length */
+			len = policy - rule - 1;
+
+			/* Parse optional return data (for trap/errno) */
+			arg = memchr(policy, '(', policylen);
+			if (arg != NULL) {
+				int arglen;
+				const char *endp;
+				const struct errno_name *errno_entry;
+
+				/*Point to policy arg (skip lparen) */
+				arg++;
+
+				/* Set policy arg length */
+				arglen = (policy + policylen) - arg;
+
+				/* Update policy length */
+				policylen = arg - policy - 1;
+
+				/* Find close paren for keylookup */
+				endp = memchr(arg, ')', arglen);
+				if (endp != NULL)
+					arglen = endp - arg;
+
+				/* Lookup errno name, if possible */
+				errno_entry = lookup_errno(arg, arglen);
+				if (errno_entry) {
+					retdata = errno_entry->id;
+				} else {
+					char *ch;
+
+					/* No errno name, assume int value */
+					retdata = strtol(arg, &ch, 0);
+					if (*ch != ')') {
+						nih_warn
+						    ("Unknown policy argument: \"%.*s\"\n",
+						     arglen, arg);
+						retdata = EACCES;
+					}
+				}
+			}
+
+			/* Set search key for lookup */
+			key.text = policy;
+			key.len = policylen;
+
+			/* Lookup supplied policy */
+			res = bsearch(&key, actions, ACT_COUNT, ACT_SIZE,
+				      ACT_CMP);
+			if (res != NULL) {
+				action = res->action;
+				if (res->action == SECCOMP_RET_ERRNO
+				    || res->action == SECCOMP_RET_TRAP)
+					action |= retdata & SECCOMP_RET_DATA;
+			} else {
+				nih_warn("Unknown rule policy: \"%.*s\"\n",
+					 policylen, policy);
+			}
+		}
+
+		/* Lookup the syscall */
+		sc = lookup_syscall(rule, len);
+		if (sc != NULL) {
+			f[j++] = JMP_EQ_K(sc->id, 0, 1);
+			f[j++] = RET_K(action);
+		} else
+			nih_warn("Unknown syscall: \"%.*s\"\n", len, rule);
+
+		/* Point to the next rule */
+		rule = nextrule;
+	}
+
+	/* Predefined ruleset: allow exit, exit_group, sigreturn, rt_sigreturn */
+#ifdef __NR_sigreturn
+	f[j++] = JMP_EQ_K(__NR_sigreturn, 14, 0);
+#endif
+	f[j++] = JMP_EQ_K(__NR_rt_sigreturn, 13, 0);
+	f[j++] = JMP_EQ_K(__NR_exit, 12, 0);
+	f[j++] = JMP_EQ_K(__NR_exit_group, 11, 0);
+
+	/* Allow execve and related calls: execve, read, write, open, close,
+	 * fstat, mmap, mprotect, munmap, brk, access, arch_prctl */
+	f[j++] = JMP_EQ_K(__NR_read, 10, 0);
+	f[j++] = JMP_EQ_K(__NR_open, 9, 0);
+	f[j++] = JMP_EQ_K(__NR_close, 8, 0);
+	f[j++] = JMP_EQ_K(__NR_fstat, 7, 0);
+	f[j++] = JMP_EQ_K(__NR_mmap, 6, 0);
+	f[j++] = JMP_EQ_K(__NR_mprotect, 5, 0);
+	f[j++] = JMP_EQ_K(__NR_munmap, 4, 0);
+	f[j++] = JMP_EQ_K(__NR_brk, 3, 0);
+	f[j++] = JMP_EQ_K(__NR_access, 2, 0);
+	f[j++] = JMP_EQ_K(__NR_execve, 1, 0);
+	f[j++] = JMP_EQ_K(__NR_arch_prctl, 0, 1);
+	f[j++] = RET_ALLOW;
+
+	/* Default policy */
+	f[j++] = invert ? RET_ALLOW : RET_KILL;
+
+	if (nnp && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		nih_warn("prctl(NO_NEW_PRIVS): %s", strerror(errno));
+
+		/* Although it is vital that setting NO_NEW_PRIVS does not
+		 * fail (when not run as root), continue to try to install
+		 * the seccomp filter anyway.
+		 */
+	}
+
+	prog.len = j;
+	prog.filter = f;
+
+	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
+		nih_warn("prctl(PR_SET_SECCOMP): %s", strerror(errno));
+		goto failed;
+	}
+	return 0;
+
+failed:
+	if (errno == EINVAL)
+		nih_warn("SECCOMP_FILTER is not available. :(\n");
+	return 1;
+}

