This adds a nftables frontend for the IMR->BPF translator. This doesn't work via UMH yet.
AFAIU it should be possible to get transparent ebpf translation for nftables, similar to the bpfilter/iptables UMH. However, at this time I think its better to get IMR "right". nftjit.ko currently needs libnftnl/libmnl but thats convenince on my end and not a "must have". Signed-off-by: Florian Westphal <f...@strlen.de> --- net/bpfilter/Makefile | 7 +- net/bpfilter/nftables.c | 679 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 685 insertions(+), 1 deletion(-) create mode 100644 net/bpfilter/nftables.c diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index 5a85ef7d7a4d..a4064986dc2f 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -3,7 +3,12 @@ # Makefile for the Linux BPFILTER layer. # -hostprogs-y := bpfilter.ko +hostprogs-y := nftjit.ko bpfilter.ko always := $(hostprogs-y) bpfilter.ko-objs := bpfilter.o tgts.o targets.o tables.o init.o ctor.o sockopt.o gen.o + +NFT_LIBS = -lnftnl +nftjit.ko-objs := tgts.o targets.o tables.o init.o ctor.o gen.o nftables.o imr.o +HOSTLOADLIBES_nftjit.ko = `pkg-config --libs libnftnl libmnl` + HOSTCFLAGS += -I. -Itools/include/ diff --git a/net/bpfilter/nftables.c b/net/bpfilter/nftables.c new file mode 100644 index 000000000000..5a756ccd03a1 --- /dev/null +++ b/net/bpfilter/nftables.c @@ -0,0 +1,679 @@ +/* + * based on previous code from: + * + * Copyright (c) 2013 Arturo Borrero Gonzalez <art...@netfilter.org> + * Copyright (c) 2013 Pablo Neira Ayuso <pa...@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <stdlib.h> +#include <stdint.h> +#include <time.h> +#include <string.h> +#include <netinet/in.h> +#include <errno.h> +#include <utils.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> + +#include <libmnl/libmnl.h> +#include <libnftnl/common.h> +#include <libnftnl/ruleset.h> +#include <libnftnl/table.h> +#include <libnftnl/chain.h> +#include <libnftnl/set.h> +#include <libnftnl/expr.h> +#include <libnftnl/rule.h> + +#include <linux/if_ether.h> + +#include "bpfilter_mod.h" +#include "imr.h" + +/* Hack, we don't link bpfilter.o */ +extern long int syscall (long int __sysno, ...); + +int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ + return syscall(321, cmd, attr, size); +} + +static int seq; + +static void memory_allocation_error(void) { perror("allocation failed"); exit(1); } + +static int nft_reg_to_imr_reg(int nfreg) +{ + switch (nfreg) { + case NFT_REG_VERDICT: + return IMR_REG_0; + /* old register numbers, 4 128 bit registers. */ + case NFT_REG_1: + return IMR_REG_4; + case NFT_REG_2: + return IMR_REG_6; + case NFT_REG_3: + return IMR_REG_8; + case NFT_REG_4: + break; + /* new register numbers, 16 32 bit registers, map to old ones */ + case NFT_REG32_00: + return IMR_REG_4; + case NFT_REG32_01: + return IMR_REG_5; + case NFT_REG32_02: + return IMR_REG_6; + default: + return -1; + } + return -1; +} + +static int netlink_parse_immediate(const struct nftnl_expr *nle, void *out) +{ + struct imr_state *state = out; + struct imr_object *o = NULL; + + if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_DATA)) { + uint32_t len; + int reg; + + nftnl_expr_get(nle, NFTNL_EXPR_IMM_DATA, &len); + + switch (len) { + case sizeof(uint32_t): + o = imr_object_alloc_imm32(nftnl_expr_get_u32(nle, NFTNL_EXPR_IMM_DATA)); + break; + case sizeof(uint64_t): + o = imr_object_alloc_imm64(nftnl_expr_get_u64(nle, NFTNL_EXPR_IMM_DATA)); + break; + default: + return -ENOTSUPP; + } + reg = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, + NFTNL_EXPR_IMM_DREG)); + if (reg < 0) { + imr_object_free(o); + return reg; + } + + imr_register_store(state, reg, o); + return 0; + } else if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_VERDICT)) { + uint32_t verdict; + int ret; + + if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_CHAIN)) + return -ENOTSUPP; + + verdict = nftnl_expr_get_u32(nle, NFTNL_EXPR_IMM_VERDICT); + + switch (verdict) { + case NF_ACCEPT: + o = imr_object_alloc_verdict(IMR_VERDICT_PASS); + break; + case NF_DROP: + o = imr_object_alloc_verdict(IMR_VERDICT_DROP); + break; + default: + fprintf(stderr, "Unhandled verdict %d\n", verdict); + o = imr_object_alloc_verdict(IMR_VERDICT_DROP); + break; + } + + ret = imr_state_add_obj(state, o); + if (ret < 0) + imr_object_free(o); + + return ret; + } + + return -ENOTSUPP; +} + +static int netlink_parse_cmp(const struct nftnl_expr *nle, void *out) +{ + struct imr_object *o, *imm, *left; + struct imr_state *state = out; + enum imr_relop op; + uint32_t tmp, len; + int ret; + op = nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_OP); + + switch (op) { + case NFT_CMP_EQ: + op = IMR_RELOP_EQ; + break; + case NFT_CMP_NEQ: + op = IMR_RELOP_NE; + break; + default: + return -ENOTSUPP; + } + + nftnl_expr_get(nle, NFTNL_EXPR_CMP_DATA, &len); + switch (len) { + case sizeof(uint64_t): + imm = imr_object_alloc_imm64(nftnl_expr_get_u64(nle, NFTNL_EXPR_CMP_DATA)); + break; + case sizeof(uint32_t): + imm = imr_object_alloc_imm32(nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_DATA)); + break; + case sizeof(uint16_t): + tmp = nftnl_expr_get_u16(nle, NFTNL_EXPR_CMP_DATA); + + imm = imr_object_alloc_imm32(tmp); + break; + case sizeof(uint8_t): + tmp = nftnl_expr_get_u8(nle, NFTNL_EXPR_CMP_DATA); + + imm = imr_object_alloc_imm32(tmp); + break; + default: + return -ENOTSUPP; + } + + if (!imm) + return -ENOMEM; + + ret = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_SREG)); + if (ret < 0) { + imr_object_free(imm); + return ret; + } + + left = imr_register_load(state, ret); + if (!left) + return -EINVAL; + + o = imr_object_alloc_relational(op, left, imm); + + return imr_state_add_obj(state, o); +} + +static int netlink_parse_payload(const struct nftnl_expr *nle, void *out) +{ + struct imr_state *state = out; + enum imr_payload_base imr_base; + uint32_t base, offset, len; + struct imr_object *payload; + int ret; + + if (nftnl_expr_is_set(nle, NFTNL_EXPR_PAYLOAD_SREG) || + nftnl_expr_is_set(nle, NFTNL_EXPR_PAYLOAD_FLAGS)) + return -EOPNOTSUPP; + + base = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_BASE); + offset = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_OFFSET); + len = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_LEN); + + printf("payload: base %d off %d len %d\n", base, offset, len); + + ret = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_DREG)); + if (ret < 0) + return ret; + + switch (base) { + case NFT_PAYLOAD_LL_HEADER: + imr_base = IMR_PAYLOAD_BASE_LL; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + imr_base = IMR_PAYLOAD_BASE_NH; + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + imr_base = IMR_PAYLOAD_BASE_TH; + break; + default: + return -EINVAL; + } + + payload = imr_object_alloc_payload(imr_base, offset, len); + if (!payload) + return -ENOMEM; + + imr_register_store(state, ret, payload); + return 0; +} + +static const struct { + const char *name; + int (*parse)(const struct nftnl_expr *nle, + void *); +} netlink_parsers[] = { + { .name = "immediate", .parse = netlink_parse_immediate }, + { .name = "cmp", .parse = netlink_parse_cmp }, + { .name = "payload", .parse = netlink_parse_payload }, +}; + +static int expr_parse_cb(struct nftnl_expr *expr, void *data) +{ + const char *name = nftnl_expr_get_str(expr, NFTNL_EXPR_NAME); + struct imr_state *state = data; + unsigned int i; + + if (!name) + return -1; + + for (i = 0; i < ARRAY_SIZE(netlink_parsers); i++) { + if (strcmp(netlink_parsers[i].name, name)) + continue; + + printf("parse: %s\n", nftnl_expr_get_str(expr, NFTNL_EXPR_NAME)); + netlink_parsers[i].parse(expr, state); + } + + return 0; +} + +static int rule_parse_cb(struct nftnl_rule *rule, void *data) +{ + struct imr_state *state = data; + int ret; + + ret = imr_state_rule_begin(state); + if (ret < 0) + return ret; + nftnl_expr_foreach(rule, expr_parse_cb, data); + + return imr_state_rule_end(state); +} + +static int +mnl_talk(struct mnl_socket *nf_sock, const void *data, unsigned int len, + int (*cb)(const struct nlmsghdr *nlh, void *data), void *cb_data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + uint32_t portid = mnl_socket_get_portid(nf_sock); + int ret; + + if (mnl_socket_sendto(nf_sock, data, len) < 0) + return -1; + + ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf)); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, seq, portid, cb, cb_data); + if (ret <= 0) + goto out; + + ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf)); + } +out: + if (ret < 0 && errno == EAGAIN) + return 0; + + return ret; +} + +/* + * Rule + */ +static int rule_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nftnl_rule_list *nlr_list = data; + struct nftnl_rule *r; + + r = nftnl_rule_alloc(); + if (r == NULL) + memory_allocation_error(); + + if (nftnl_rule_nlmsg_parse(nlh, r) < 0) + goto err_free; + + nftnl_rule_list_add_tail(r, nlr_list); + return MNL_CB_OK; + +err_free: + nftnl_rule_free(r); + return MNL_CB_OK; +} + +static struct nftnl_rule_list *mnl_rule_dump(struct mnl_socket *nf_sock, + int family) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nftnl_rule_list *nlr_list; + int ret; + + nlr_list = nftnl_rule_list_alloc(); + if (nlr_list == NULL) + memory_allocation_error(); + + nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, family, + NLM_F_DUMP, seq); + + ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, rule_cb, nlr_list); + if (ret < 0) + goto err; + + return nlr_list; +err: + nftnl_rule_list_free(nlr_list); + return NULL; +} + +/* + * Chain + */ +static int chain_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nftnl_chain_list *nlc_list = data; + struct nftnl_chain *c; + + c = nftnl_chain_alloc(); + if (c == NULL) + memory_allocation_error(); + + if (nftnl_chain_nlmsg_parse(nlh, c) < 0) + goto err_free; + + nftnl_chain_list_add_tail(c, nlc_list); + return MNL_CB_OK; + +err_free: + nftnl_chain_free(c); + return MNL_CB_OK; +} + +static struct nftnl_chain_list *mnl_chain_dump(struct mnl_socket *nf_sock, + int family) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nftnl_chain_list *nlc_list; + int ret; + + nlc_list = nftnl_chain_list_alloc(); + if (nlc_list == NULL) + memory_allocation_error(); + + nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, family, + NLM_F_DUMP, seq); + + ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, chain_cb, nlc_list); + if (ret < 0) + goto err; + + return nlc_list; +err: + nftnl_chain_list_free(nlc_list); + return NULL; +} + +/* + * Table + */ +static int table_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nftnl_ruleset *rs = data; + struct nftnl_table *t; + + t = nftnl_table_alloc(); + if (t == NULL) + memory_allocation_error(); + + if (nftnl_table_nlmsg_parse(nlh, t) < 0) + goto err_free; + + nftnl_ruleset_set(rs, NFTNL_RULESET_TABLELIST, t); + + return MNL_CB_OK; + +err_free: + nftnl_table_free(t); + return MNL_CB_ERROR; +} + +/* + * Set elements + */ +static int set_elem_cb(const struct nlmsghdr *nlh, void *data) +{ + nftnl_set_elems_nlmsg_parse(nlh, data); + return MNL_CB_OK; +} + +static int mnl_setelem_get(struct mnl_socket *nf_sock, struct nftnl_set *nls) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + uint32_t family = nftnl_set_get_u32(nls, NFTNL_SET_FAMILY); + + nlh = nftnl_set_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, family, + NLM_F_DUMP|NLM_F_ACK, seq); + nftnl_set_nlmsg_build_payload(nlh, nls); + + return mnl_talk(nf_sock, nlh, nlh->nlmsg_len, set_elem_cb, nls); +} + +/* + * Set + */ +static int set_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nftnl_set_list *nls_list = data; + struct nftnl_set *s; + + s = nftnl_set_alloc(); + if (s == NULL) + memory_allocation_error(); + + if (nftnl_set_nlmsg_parse(nlh, s) < 0) + goto err_free; + + nftnl_set_list_add_tail(s, nls_list); + return MNL_CB_OK; + +err_free: + nftnl_set_free(s); + return MNL_CB_OK; +} + +static struct nftnl_set_list * +mnl_set_dump(struct mnl_socket *nf_sock, int family) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nftnl_set *s; + struct nftnl_set_list *nls_list; + struct nftnl_set *si; + struct nftnl_set_list_iter *i; + int ret; + + s = nftnl_set_alloc(); + if (s == NULL) + memory_allocation_error(); + + nlh = nftnl_set_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family, + NLM_F_DUMP|NLM_F_ACK, seq); + nftnl_set_nlmsg_build_payload(nlh, s); + nftnl_set_free(s); + + nls_list = nftnl_set_list_alloc(); + if (nls_list == NULL) + memory_allocation_error(); + + ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, set_cb, nls_list); + if (ret < 0) + goto err; + + i = nftnl_set_list_iter_create(nls_list); + if (i == NULL) + memory_allocation_error(); + + si = nftnl_set_list_iter_next(i); + while (si != NULL) { + if (mnl_setelem_get(nf_sock, si) != 0) { + perror("E: Unable to get set elements"); + nftnl_set_list_iter_destroy(i); + goto err; + } + si = nftnl_set_list_iter_next(i); + } + + nftnl_set_list_iter_destroy(i); + + return nls_list; +err: + nftnl_set_list_free(nls_list); + return NULL; +} + +static struct nftnl_ruleset *mnl_table_ruleset(struct mnl_socket *nf_sock, + int family, + const char *table) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nftnl_ruleset *rs; + struct nftnl_table *t; + struct nlmsghdr *nlh; + int ret; + + nlh = nftnl_table_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, family, + NLM_F_ACK, seq); + t = nftnl_table_alloc(); + if (t == NULL) + memory_allocation_error(); + + nftnl_table_set(t, NFTNL_TABLE_NAME, table); + nftnl_table_nlmsg_build_payload(nlh, t); + nftnl_table_free(t); + + rs = nftnl_ruleset_alloc(); + if (rs == NULL) + memory_allocation_error(); + ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, table_cb, rs); + if (ret < 0) + goto err; + + return rs; +err: + nftnl_ruleset_free(rs); + return NULL; +} + +static struct nftnl_ruleset *mnl_ruleset_dump(struct mnl_socket *nf_sock, int family) +{ + struct nftnl_ruleset *rs; + struct nftnl_chain_list *c; + struct nftnl_set_list *s; + struct nftnl_rule_list *r; + uint32_t type = NFTNL_OUTPUT_DEFAULT; + + rs = mnl_table_ruleset(nf_sock, family, "filter"); + if (!rs) + return NULL; + + c = mnl_chain_dump(nf_sock, family); + if (c != NULL) + nftnl_ruleset_set(rs, NFTNL_RULESET_CHAINLIST, c); + + s = mnl_set_dump(nf_sock, family); + if (s != NULL) + nftnl_ruleset_set(rs, NFTNL_RULESET_SETLIST, s); + + r = mnl_rule_dump(nf_sock, family); + if (r != NULL) + nftnl_ruleset_set(rs, NFTNL_RULESET_RULELIST, r); + + nftnl_ruleset_fprintf(stdout, rs, type, 0); + return rs; +} + +/* ether type ne 0x800 accept */ +static int nft_ipv4_only(struct imr_state *state) +{ + struct imr_object *eth_p_ip, *lltype, *relop; + int ret; + + imr_state_rule_begin(state); + lltype = imr_object_alloc_payload(IMR_PAYLOAD_BASE_LL, + offsetof(struct ethhdr, h_proto), + sizeof(uint16_t)); + if (!lltype) + return -ENOMEM; + + eth_p_ip = imr_object_alloc_imm32(htons(ETH_P_IP)); + if (!eth_p_ip) { + imr_object_free(lltype); + return -ENOMEM; + } + + relop = imr_object_alloc_relational(IMR_RELOP_NE, lltype, eth_p_ip); + if (!relop) { + imr_object_free(eth_p_ip); + imr_object_free(lltype); + return -ENOMEM; + } + + ret = imr_state_add_obj(state, relop); + if (ret == 0) { + ret = imr_state_add_obj(state, imr_object_alloc_verdict(IMR_VERDICT_PASS)); + if (ret == 0) + return imr_state_rule_end(state); + } + + return ret; +} + +static int nft2imr(const struct nftnl_ruleset *rs) +{ + struct nftnl_rule_list *l = nftnl_ruleset_get(rs, NFTNL_RULESET_RULELIST); + struct imr_state *state; + int ret; + + state = imr_state_alloc(); + if (!state) + return -ENOMEM; + + ret = nft_ipv4_only(state); + + ret = nftnl_rule_list_foreach(l, rule_parse_cb, state); + if (ret < 0) { + imr_state_free(state); + return ret; + } + + imr_state_print(stdout, state); + imr_do_bpf(state); + imr_state_free(state); + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct mnl_socket *nl; + struct nftnl_ruleset *rs; + + if (argc > 2) { + fprintf(stderr, "%s {json}\n", + argv[0]); + exit(EXIT_FAILURE); + } + + nl = mnl_socket_open(NETLINK_NETFILTER); + if (nl == NULL) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + seq = time(NULL); + + rs = mnl_ruleset_dump(nl, NFPROTO_IPV4); + if (rs == NULL) { + perror("ruleset_dump"); + exit(EXIT_FAILURE); + } + + return nft2imr(rs); +} -- 2.16.1