introduce new setsockopt() command: int fd; setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER_EBPF, &fd, sizeof(fd))
fd is associated with eBPF program priorly loaded via: fd = syscall(__NR_bpf, BPF_PROG_LOAD, BPF_PROG_TYPE_SOCKET_FILTER, &prog, sizeof(prog)); setsockopt() calls bpf_prog_get() which increment refcnt of the program, so it doesn't get unloaded while socket is using the program. The same eBPF program can be attached to different sockets. Program exit automatically closes socket which calls sk_filter_uncharge() which decrements refcnt of eBPF program Signed-off-by: Alexei Starovoitov <a...@plumgrid.com> --- arch/alpha/include/uapi/asm/socket.h | 2 + arch/avr32/include/uapi/asm/socket.h | 2 + arch/cris/include/uapi/asm/socket.h | 2 + arch/frv/include/uapi/asm/socket.h | 2 + arch/ia64/include/uapi/asm/socket.h | 2 + arch/m32r/include/uapi/asm/socket.h | 2 + arch/mips/include/uapi/asm/socket.h | 2 + arch/mn10300/include/uapi/asm/socket.h | 2 + arch/parisc/include/uapi/asm/socket.h | 2 + arch/powerpc/include/uapi/asm/socket.h | 2 + arch/s390/include/uapi/asm/socket.h | 2 + arch/sparc/include/uapi/asm/socket.h | 2 + arch/xtensa/include/uapi/asm/socket.h | 2 + include/linux/filter.h | 1 + include/uapi/asm-generic/socket.h | 2 + net/core/filter.c | 112 ++++++++++++++++++++++++++++++++ net/core/sock.c | 13 ++++ 17 files changed, 154 insertions(+) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 3de1394bcab8..8c83c376b5ba 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -87,4 +87,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index 6e6cd159924b..498ef7220466 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _UAPI__ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h index ed94e5ed0a23..0d5120724780 100644 --- a/arch/cris/include/uapi/asm/socket.h +++ b/arch/cris/include/uapi/asm/socket.h @@ -82,6 +82,8 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index ca2c6e6f31c6..81fba267c285 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -80,5 +80,7 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index a1b49bac7951..9cbb2e82fa7c 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -89,4 +89,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h index 6c9a24b3aefa..587ac2fb4106 100644 --- a/arch/m32r/include/uapi/asm/socket.h +++ b/arch/m32r/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index a14baa218c76..ab1aed2306db 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -98,4 +98,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 6aa3ce1854aa..1c4f916d0ef1 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -80,4 +80,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index fe35ceacf0e7..d189bb79ca07 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -79,4 +79,6 @@ #define SO_BPF_EXTENSIONS 0x4029 +#define SO_ATTACH_FILTER_EBPF 0x402a + #endif /* _UAPI_ASM_SOCKET_H */ diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index a9c3e2e18c05..88488f24ae7f 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -87,4 +87,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index e031332096d7..c5f26af90366 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -86,4 +86,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 54d9608681b6..667ed3fa63f2 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -76,6 +76,8 @@ #define SO_BPF_EXTENSIONS 0x0032 +#define SO_ATTACH_FILTER_EBPF 0x0033 + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index 39acec0cf0b1..24f3e4434979 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -91,4 +91,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 822b310e75e1..5a310ed28fbb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -73,6 +73,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp, void sk_unattached_filter_destroy(struct sk_filter *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int sk_attach_filter_ebpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); int sk_chk_filter(const struct sock_filter *filter, unsigned int flen); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index ea0796bdcf88..f41844e9ac07 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -82,4 +82,6 @@ #define SO_BPF_EXTENSIONS 48 +#define SO_ATTACH_FILTER_EBPF 49 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/net/core/filter.c b/net/core/filter.c index 255dba1bb678..ea929fed67b4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -44,6 +44,7 @@ #include <linux/ratelimit.h> #include <linux/seccomp.h> #include <linux/if_vlan.h> +#include <linux/bpf.h> /** * sk_filter - run a packet through a socket filter @@ -1117,6 +1118,117 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } EXPORT_SYMBOL_GPL(sk_attach_filter); +int sk_attach_filter_ebpf(u32 ufd, struct sock *sk) +{ + struct sk_filter *fp, *old_fp; + + if (sock_flag(sk, SOCK_FILTER_LOCKED)) + return -EPERM; + + fp = bpf_prog_get(ufd); + if (!fp) + return -EINVAL; + + if (fp->info->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { + /* valid fd, but invalid program type */ + sk_filter_release(fp); + return -EINVAL; + } + + old_fp = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + rcu_assign_pointer(sk->sk_filter, fp); + + if (old_fp) + sk_filter_uncharge(sk, old_fp); + + return 0; +} + +static struct bpf_func_proto sock_filter_funcs[] = { + [BPF_FUNC_map_lookup_elem] = { + .func = bpf_map_lookup_elem, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_OR_NULL, + .arg1_type = ARG_CONST_MAP_ID, + .arg2_type = ARG_PTR_TO_MAP_KEY, + }, + [BPF_FUNC_map_update_elem] = { + .func = bpf_map_update_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_ID, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_PTR_TO_MAP_VALUE, + }, + [BPF_FUNC_map_delete_elem] = { + .func = bpf_map_delete_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_ID, + .arg2_type = ARG_PTR_TO_MAP_KEY, + }, +}; + +/* allow socket filters to call + * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem() + */ +static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id) +{ + if (func_id < 0 || func_id >= ARRAY_SIZE(sock_filter_funcs)) + return NULL; + return &sock_filter_funcs[func_id]; +} + +static const struct bpf_context_access { + int size; + enum bpf_access_type type; +} sock_filter_ctx_access[] = { + [offsetof(struct sk_buff, mark)] = { + FIELD_SIZEOF(struct sk_buff, mark), BPF_READ + }, + [offsetof(struct sk_buff, protocol)] = { + FIELD_SIZEOF(struct sk_buff, protocol), BPF_READ + }, + [offsetof(struct sk_buff, queue_mapping)] = { + FIELD_SIZEOF(struct sk_buff, queue_mapping), BPF_READ + }, +}; + +/* allow socket filters to access to 'mark', 'protocol' and 'queue_mapping' + * fields of 'struct sk_buff' + */ +static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type) +{ + const struct bpf_context_access *access; + + if (off < 0 || off >= ARRAY_SIZE(sock_filter_ctx_access)) + return false; + + access = &sock_filter_ctx_access[off]; + if (access->size == size && (access->type & type)) + return true; + + return false; +} + +static struct bpf_verifier_ops sock_filter_ops = { + .get_func_proto = sock_filter_func_proto, + .is_valid_access = sock_filter_is_valid_access, +}; + +static struct bpf_prog_type_list tl = { + .ops = &sock_filter_ops, + .type = BPF_PROG_TYPE_SOCKET_FILTER, +}; + +static int __init register_sock_filter_ops(void) +{ + bpf_register_prog_type(&tl); + return 0; +} +late_initcall(register_sock_filter_ops); + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/sock.c b/net/core/sock.c index 026e01f70274..005d5683ef5c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -895,6 +895,19 @@ set_rcvbuf: } break; + case SO_ATTACH_FILTER_EBPF: + ret = -EINVAL; + if (optlen == sizeof(u32)) { + u32 ufd; + + ret = -EFAULT; + if (copy_from_user(&ufd, optval, sizeof(ufd))) + break; + + ret = sk_attach_filter_ebpf(ufd, sk); + } + break; + case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/