From: Paul Blakey <pa...@mellanox.com> Add tc flower interface that will be used to offload flows via tc flower classifier. Depending on the flag used (skip_sw/hw) flower will pass those to HW or handle them itself.
Co-authored-by: Shahar Klein <shah...@mellanox.com> Signed-off-by: Shahar Klein <shah...@mellanox.com> Signed-off-by: Paul Blakey <pa...@mellanox.com> Reviewed-by: Roi Dayan <r...@mellanox.com> --- lib/automake.mk | 2 + lib/tc.c | 984 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/tc.h | 108 +++++++ 3 files changed, 1094 insertions(+) create mode 100644 lib/tc.c create mode 100644 lib/tc.h diff --git a/lib/automake.mk b/lib/automake.mk index abc9d0d..a6141ea 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -351,6 +351,8 @@ if LINUX lib_libopenvswitch_la_SOURCES += \ lib/dpif-netlink.c \ lib/dpif-netlink.h \ + lib/tc.h \ + lib/tc.c \ lib/if-notifier.c \ lib/if-notifier.h \ lib/netdev-linux.c \ diff --git a/lib/tc.c b/lib/tc.c new file mode 100644 index 0000000..acda7fe --- /dev/null +++ b/lib/tc.c @@ -0,0 +1,984 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#include <errno.h> +#include <linux/rtnetlink.h> +#include <net/if.h> +#include <linux/tc_act/tc_gact.h> +#include <linux/tc_act/tc_mirred.h> +#include <linux/tc_act/tc_vlan.h> +#include <linux/tc_act/tc_tunnel_key.h> +#include <linux/gen_stats.h> +#include "timeval.h" +#include "netlink-socket.h" +#include "netlink.h" +#include "rtnetlink.h" +#include "openvswitch/vlog.h" +#include "openvswitch/ofpbuf.h" +#include "tc.h" +#include "util.h" +#include "byte-order.h" + +VLOG_DEFINE_THIS_MODULE(tc); + +static struct vlog_rate_limit parse_err = VLOG_RATE_LIMIT_INIT(5, 5); + +/* Returns tc handle 'major':'minor'. */ +static unsigned int +tc_make_handle(unsigned int major, unsigned int minor) +{ + return TC_H_MAKE(major << 16, minor); +} + +static struct tcmsg * +tc_make_req(int ifindex, int type, unsigned int flags, struct ofpbuf *request) +{ + struct tcmsg *tcmsg; + struct nlmsghdr *nlmsghdr; + + ofpbuf_init(request, 512); + + nl_msg_reserve(request, NLMSG_HDRLEN + sizeof *tcmsg); + nlmsghdr = nl_msg_put_uninit(request, NLMSG_HDRLEN); + nlmsghdr->nlmsg_len = 0; + nlmsghdr->nlmsg_type = type; + nlmsghdr->nlmsg_flags = NLM_F_REQUEST | flags; + nlmsghdr->nlmsg_seq = 0; + nlmsghdr->nlmsg_pid = 0; + + tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg); + tcmsg->tcm_family = AF_UNSPEC; + tcmsg->tcm_ifindex = ifindex; + + return tcmsg; +} + +static int +tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) +{ + int error = nl_transact(NETLINK_ROUTE, request, replyp); + + ofpbuf_uninit(request); + return error; +} + +static const struct nl_policy tca_policy[] = { + [TCA_KIND] = { .type = NL_A_STRING, .optional = false, }, + [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, }, + [TCA_STATS] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tc_stats), .optional = true, }, + [TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, }, +}; + +static const struct nl_policy tca_flower_policy[] = { + [TCA_FLOWER_CLASSID] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_INDEV] = { .type = NL_A_STRING, .max_len = IFNAMSIZ, + .optional = true, }, + [TCA_FLOWER_KEY_ETH_SRC] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, .optional = true, }, + [TCA_FLOWER_KEY_ETH_DST] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, .optional = true, }, + [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, + .optional = true, }, + [TCA_FLOWER_KEY_ETH_DST_MASK] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, + .optional = true, }, + [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, }, + [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, }, + [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, }, + [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, }, + [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_IPV4_DST] = {.type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_IPV6_SRC] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct in6_addr), + .optional = true, }, + [TCA_FLOWER_KEY_IPV6_DST] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct in6_addr), + .optional = true, }, + [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct in6_addr), + .optional = true, }, + [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct in6_addr), + .optional = true, }, + [TCA_FLOWER_KEY_TCP_SRC] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_TCP_DST] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_UDP_SRC] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_UDP_DST] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_VLAN_ID] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NL_A_U8, .optional = true, }, + [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, }, + [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_BE16, + .optional = true, }, +}; + +static void +nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower) +{ + const struct eth_addr *eth; + + if (attrs[TCA_FLOWER_KEY_ETH_SRC_MASK]) { + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC], ETH_ALEN); + memcpy(&flower->key.src_mac, eth, sizeof flower->key.src_mac); + + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_SRC_MASK], ETH_ALEN); + memcpy(&flower->mask.src_mac, eth, sizeof flower->mask.src_mac); + } + if (attrs[TCA_FLOWER_KEY_ETH_DST_MASK]) { + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST], ETH_ALEN); + memcpy(&flower->key.dst_mac, eth, sizeof flower->key.dst_mac); + + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ETH_DST_MASK], ETH_ALEN); + memcpy(&flower->mask.dst_mac, eth, sizeof flower->mask.dst_mac); + } +} + +static void +nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) +{ + if (flower->key.eth_type != htons(ETH_P_8021Q)) { + return; + } + + flower->key.encap_eth_type = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]); + + if (attrs[TCA_FLOWER_KEY_VLAN_ID]) { + flower->key.vlan_id = + nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]); + } + if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) { + flower->key.vlan_prio = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]); + } +} + +static void +nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower) +{ + if (attrs[TCA_FLOWER_KEY_ENC_KEY_ID]) { + ovs_be32 id = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_KEY_ID]); + + flower->tunnel.id = be32_to_be64(id); + } + if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]) { + flower->tunnel.ipv4_src = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]); + } + if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]) { + flower->tunnel.ipv4_dst = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]); + } + if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) { + flower->tunnel.tp_dst = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]); + } +} + +static void +nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) { + uint8_t ip_proto = 0; + const size_t ipv6_size = sizeof flower->mask.ipv6.ipv6_src; + struct tc_flower_key *key = &flower->key; + struct tc_flower_key *mask = &flower->mask; + + if (attrs[TCA_FLOWER_KEY_IP_PROTO]) { + ip_proto = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_IP_PROTO]); + key->ip_proto = ip_proto; + mask->ip_proto = UINT8_MAX; + } + + if (attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]) { + key->ipv4.ipv4_src = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC]); + mask->ipv4.ipv4_src = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_SRC_MASK]); + } + if (attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]) { + key->ipv4.ipv4_dst = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST]); + mask->ipv4.ipv4_dst = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_IPV4_DST_MASK]); + } + if (attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK]) { + struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_SRC]; + struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_SRC_MASK]; + const void *data = nl_attr_get_unspec(attr, ipv6_size); + const void *mask_data = nl_attr_get_unspec(attr_mask, ipv6_size); + + memcpy(&key->ipv6.ipv6_src, data, ipv6_size); + memcpy(&mask->ipv6.ipv6_src, mask_data, ipv6_size); + } + if (attrs[TCA_FLOWER_KEY_IPV6_DST_MASK]) { + struct nlattr *attr = attrs[TCA_FLOWER_KEY_IPV6_DST]; + struct nlattr *attr_mask = attrs[TCA_FLOWER_KEY_IPV6_DST_MASK]; + const void *data = nl_attr_get_unspec(attr, ipv6_size); + const void *mask_data = nl_attr_get_unspec(attr_mask, ipv6_size); + + memcpy(&key->ipv6.ipv6_dst, data, ipv6_size); + memcpy(&mask->ipv6.ipv6_dst, mask_data, ipv6_size); + } + + if (ip_proto == IPPROTO_TCP) { + if (attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]) { + key->src_port = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC]); + mask->src_port = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_SRC_MASK]); + } + if (attrs[TCA_FLOWER_KEY_TCP_DST_MASK]) { + key->dst_port = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST]); + mask->dst_port = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_TCP_DST_MASK]); + } + } else if (ip_proto == IPPROTO_UDP) { + if (attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]) { + key->src_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC]); + mask->src_port = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_SRC_MASK]); + } + if (attrs[TCA_FLOWER_KEY_UDP_DST_MASK]) { + key->dst_port = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST]); + mask->dst_port = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_UDP_DST_MASK]); + } + } +} + +static const struct nl_policy tunnel_key_policy[] = { + [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tc_tunnel_key), + .optional = false, }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NL_A_BE32, .optional = true, }, + [TCA_TUNNEL_KEY_ENC_DST_PORT] = { .type = NL_A_BE16, .optional = true, }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NL_A_BE32, .optional = true, }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NL_A_BE32, .optional = true, }, +}; + +static int +nl_parse_act_tunnel_key(struct nlattr *options, struct tc_flower *flower) +{ + struct nlattr *tun_attrs[ARRAY_SIZE(tunnel_key_policy)]; + const struct nlattr *tun_parms; + const struct tc_tunnel_key *tun; + + if (!nl_parse_nested(options, tunnel_key_policy, tun_attrs, + ARRAY_SIZE(tunnel_key_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse tunnel_key action options"); + return EPROTO; + } + + tun_parms = tun_attrs[TCA_TUNNEL_KEY_PARMS]; + tun = nl_attr_get_unspec(tun_parms, sizeof *tun); + if (tun->t_action == TCA_TUNNEL_KEY_ACT_SET) { + struct nlattr *id = tun_attrs[TCA_TUNNEL_KEY_ENC_KEY_ID]; + struct nlattr *dst_port = tun_attrs[TCA_TUNNEL_KEY_ENC_DST_PORT]; + struct nlattr *ipv4_src = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_SRC]; + struct nlattr *ipv4_dst = tun_attrs[TCA_TUNNEL_KEY_ENC_IPV4_DST]; + + flower->set.set = true; + flower->set.ipv4_src = ipv4_src ? nl_attr_get_be32(ipv4_src) : 0; + flower->set.ipv4_dst = ipv4_dst ? nl_attr_get_be32(ipv4_dst) : 0; + flower->set.id = id ? be32_to_be64(nl_attr_get_be32(id)) : 0; + flower->set.tp_dst = dst_port ? nl_attr_get_be16(dst_port) : 0; + } else if (tun->t_action == TCA_TUNNEL_KEY_ACT_RELEASE) { + flower->tunnel.tunnel = true; + } else { + VLOG_ERR_RL(&parse_err, "unknown tunnel actions: %d, %d", + tun->action, tun->t_action); + return EINVAL; + } + return 0; +} + +static const struct nl_policy gact_policy[] = { + [TCA_GACT_PARMS] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tc_gact), + .optional = false, }, + [TCA_GACT_TM] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tcf_t), + .optional = false, }, +}; + +static void +nl_parse_tcf(const struct tcf_t *tm, struct tc_flower *flower) +{ + unsigned long long int lastuse = tm->lastuse * 10; + unsigned long long int now = time_msec(); + + flower->lastused = now - lastuse; +} + +static int +nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower) +{ + struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)]; + const struct tc_gact *p; + struct nlattr *gact_parms; + const struct tcf_t *tm; + + if (!nl_parse_nested(options, gact_policy, gact_attrs, + ARRAY_SIZE(gact_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse gact action options"); + return EPROTO; + } + + gact_parms = gact_attrs[TCA_GACT_PARMS]; + p = nl_attr_get_unspec(gact_parms, sizeof *p); + + if (p->action == TC_ACT_SHOT) { + } else { + VLOG_ERR_RL(&parse_err, "unknown gact action: %d", p->action); + return EINVAL; + } + + tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm); + nl_parse_tcf(tm, flower); + + return 0; +} + +static const struct nl_policy mirred_policy[] = { + [TCA_MIRRED_PARMS] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tc_mirred), + .optional = false, }, + [TCA_MIRRED_TM] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tcf_t), + .optional = false, }, +}; + +static int +nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower) +{ + + struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)]; + const struct tc_mirred *m; + const struct nlattr *mirred_parms; + const struct tcf_t *tm; + struct nlattr *mirred_tm; + + if (!nl_parse_nested(options, mirred_policy, mirred_attrs, + ARRAY_SIZE(mirred_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse mirred action options"); + return EPROTO; + } + + mirred_parms = mirred_attrs[TCA_MIRRED_PARMS]; + m = nl_attr_get_unspec(mirred_parms, sizeof *m); + + if (m->action != TC_ACT_STOLEN || m->eaction != TCA_EGRESS_REDIR) { + VLOG_ERR_RL(&parse_err, "unknown mirred action: %d, %d, %d", + m->action, m->eaction, m->ifindex); + return EINVAL; + } + + flower->ifindex_out = m->ifindex; + + mirred_tm = mirred_attrs[TCA_MIRRED_TM]; + tm = nl_attr_get_unspec(mirred_tm, sizeof *tm); + nl_parse_tcf(tm, flower); + + return 0; +} + +static const struct nl_policy vlan_policy[] = { + [TCA_VLAN_PARMS] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tc_vlan), + .optional = false, }, + [TCA_VLAN_PUSH_VLAN_ID] = { .type = NL_A_U16, .optional = true, }, + [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NL_A_U16, .optional = true, }, + [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NL_A_U8, .optional = true, }, +}; + +static int +nl_parse_act_vlan(struct nlattr *options, struct tc_flower *flower) +{ + struct nlattr *vlan_attrs[ARRAY_SIZE(vlan_policy)]; + const struct tc_vlan *v; + const struct nlattr *vlan_parms; + + if (!nl_parse_nested(options, vlan_policy, vlan_attrs, + ARRAY_SIZE(vlan_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse vlan action options"); + return EPROTO; + } + + vlan_parms = vlan_attrs[TCA_VLAN_PARMS]; + v = nl_attr_get_unspec(vlan_parms, sizeof *v); + if (v->v_action == TCA_VLAN_ACT_PUSH) { + struct nlattr *vlan_id = vlan_attrs[TCA_VLAN_PUSH_VLAN_ID]; + struct nlattr *vlan_prio = vlan_attrs[TCA_VLAN_PUSH_VLAN_PRIORITY]; + + flower->vlan_push_id = nl_attr_get_u16(vlan_id); + flower->vlan_push_prio = nl_attr_get_u8(vlan_prio); + } else if (v->v_action == TCA_VLAN_ACT_POP) { + flower->vlan_pop = 1; + } else { + VLOG_ERR_RL(&parse_err, "unknown vlan action: %d, %d", + v->action, v->v_action); + return EINVAL; + } + return 0; +} + +static const struct nl_policy act_policy[] = { + [TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, }, + [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = false, }, + [TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, }, +}; + +static const struct nl_policy stats_policy[] = { + [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct gnet_stats_basic), + .optional = false, }, +}; + +static int +nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) +{ + struct nlattr *act_options; + struct nlattr *act_stats; + const struct nlattr *stats_basic; + const char *act_kind; + struct nlattr *action_attrs[ARRAY_SIZE(act_policy)]; + struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)]; + struct ovs_flow_stats *stats = &flower->stats; + const struct gnet_stats_basic *bs; + + if (!nl_parse_nested(action, act_policy, action_attrs, + ARRAY_SIZE(act_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse single action options"); + return EPROTO; + } + + act_kind = nl_attr_get_string(action_attrs[TCA_ACT_KIND]); + act_options = action_attrs[TCA_ACT_OPTIONS]; + + if (!strcmp(act_kind, "gact")) { + nl_parse_act_drop(act_options, flower); + } else if (!strcmp(act_kind, "mirred")) { + nl_parse_act_mirred(act_options, flower); + } else if (!strcmp(act_kind, "vlan")) { + nl_parse_act_vlan(act_options, flower); + } else if (!strcmp(act_kind, "tunnel_key")) { + nl_parse_act_tunnel_key(act_options, flower); + } else { + VLOG_ERR_RL(&parse_err, "unknown tc action kind: %s", act_kind); + return EINVAL; + } + + act_stats = action_attrs[TCA_ACT_STATS]; + + if (!nl_parse_nested(act_stats, stats_policy, stats_attrs, + ARRAY_SIZE(stats_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse action stats policy"); + return EPROTO; + } + + stats_basic = stats_attrs[TCA_STATS_BASIC]; + bs = nl_attr_get_unspec(stats_basic, sizeof *bs); + + stats->n_packets.lo = bs->packets; + stats->n_packets.hi = 0; + stats->n_bytes.hi = bs->bytes >> 32; + stats->n_bytes.lo = bs->bytes & 0x00000000FFFFFFFF; + + return 0; +} + +#define TCA_ACT_MIN_PRIO 1 + +static int +nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower) +{ + const struct nlattr *actions = attrs[TCA_FLOWER_ACT]; + static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO + 1] = {}; + struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)]; + const int max_size = ARRAY_SIZE(actions_orders_policy); + + for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) { + actions_orders_policy[i].type = NL_A_NESTED; + actions_orders_policy[i].optional = true; + } + + if (!nl_parse_nested(actions, actions_orders_policy, actions_orders, + ARRAY_SIZE(actions_orders_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse flower order of actions"); + return EPROTO; + } + + for (int i = TCA_ACT_MIN_PRIO; i < max_size; i++) { + if (actions_orders[i]) { + int err = nl_parse_single_action(actions_orders[i], flower); + + if (err) { + return err; + } + } + } + + return 0; +} + +static int +nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower) +{ + struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)]; + + if (!nl_parse_nested(nl_options, tca_flower_policy, + attrs, ARRAY_SIZE(tca_flower_policy))) { + VLOG_ERR_RL(&parse_err, "failed to parse flower classifier options"); + return EPROTO; + } + + nl_parse_flower_eth(attrs, flower); + nl_parse_flower_vlan(attrs, flower); + nl_parse_flower_ip(attrs, flower); + nl_parse_flower_tunnel(attrs, flower); + return nl_parse_flower_actions(attrs, flower); +} + +int +parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tc_flower *flower) +{ + struct tcmsg *tc; + struct nlattr *ta[ARRAY_SIZE(tca_policy)]; + const char *kind; + + memset(flower, 0, sizeof *flower); + if (NLMSG_HDRLEN + (sizeof *tc) > reply->size) { + return EPROTO; + } + + tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); + flower->handle = tc->tcm_handle; + flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info); + flower->mask.eth_type = OVS_BE16_MAX; + flower->prio = tc_get_major(tc->tcm_info); + + if (!flower->handle) { + return EAGAIN; + } + + if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc, + tca_policy, ta, ARRAY_SIZE(ta))) { + VLOG_ERR_RL(&parse_err, "failed to parse tca policy"); + return EPROTO; + } + + kind = nl_attr_get_string(ta[TCA_KIND]); + if (strcmp(kind, "flower")) { + VLOG_ERR_RL(&parse_err, "failed to parse filter: not flower"); + return EPROTO; + } + + return nl_parse_flower_options(ta[TCA_OPTIONS], flower); +} + +int +tc_dump_flower_start(int ifindex, struct nl_dump *dump) +{ + struct ofpbuf request; + struct tcmsg *tcmsg; + + tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_DUMP, &request); + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); + tcmsg->tcm_info = TC_H_UNSPEC; + tcmsg->tcm_handle = 0; + + nl_dump_start(dump, NETLINK_ROUTE, &request); + ofpbuf_uninit(&request); + + return 0; +} + +int +tc_flush(int ifindex) +{ + struct ofpbuf request; + struct tcmsg *tcmsg; + + tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ACK, &request); + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); + tcmsg->tcm_info = TC_H_UNSPEC; + + return tc_transact(&request, NULL); +} + +int +tc_del_filter(int ifindex, int prio, int handle) +{ + struct ofpbuf request; + struct tcmsg *tcmsg; + struct ofpbuf *reply; + + tcmsg = tc_make_request(ifindex, RTM_DELTFILTER, NLM_F_ECHO, &request); + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); + tcmsg->tcm_info = tc_make_handle(prio, 0); + tcmsg->tcm_handle = handle; + + return tc_transact(&request, &reply); +} + +int +tc_get_flower(int ifindex, int prio, int handle, struct tc_flower *flower) +{ + struct ofpbuf request; + int error = 0; + struct tcmsg *tcmsg; + struct ofpbuf *reply; + + tcmsg = tc_make_request(ifindex, RTM_GETTFILTER, NLM_F_ECHO, &request); + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); + tcmsg->tcm_info = tc_make_handle(prio, 0); + tcmsg->tcm_handle = handle; + + error = tc_transact(&request, &reply); + if (error) { + return error; + } + + parse_netlink_to_tc_flower(reply, flower); + return error; +} + +static void +nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "vlan"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_vlan parm = { .action = TC_ACT_PIPE, + .v_action = TCA_VLAN_ACT_PUSH }; + + nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm); + nl_msg_put_u16(request, TCA_VLAN_PUSH_VLAN_ID, vid); + nl_msg_put_u8(request, TCA_VLAN_PUSH_VLAN_PRIORITY, prio); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_act_pop_vlan(struct ofpbuf *request) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "vlan"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_vlan parm = { .action = TC_ACT_PIPE, + .v_action = TCA_VLAN_ACT_POP }; + + nl_msg_put_unspec(request, TCA_VLAN_PARMS, &parm, sizeof parm); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_act_tunnel_key_release(struct ofpbuf *request) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_tunnel_key tun = { .action = TC_ACT_PIPE, + .t_action = TCA_TUNNEL_KEY_ACT_RELEASE }; + + nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, ovs_be64 id, + ovs_be32 ipv4_src, ovs_be32 ipv4_dst, + ovs_be16 tp_dst) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "tunnel_key"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_tunnel_key tun = { .action = TC_ACT_PIPE, + .t_action = TCA_TUNNEL_KEY_ACT_SET }; + + nl_msg_put_unspec(request, TCA_TUNNEL_KEY_PARMS, &tun, sizeof tun); + + ovs_be32 id32 = be64_to_be32(id); + nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_KEY_ID, id32); + nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src); + nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst); + nl_msg_put_be16(request, TCA_TUNNEL_KEY_ENC_DST_PORT, tp_dst); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_act_drop(struct ofpbuf *request) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "gact"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_gact p = { .action = TC_ACT_SHOT }; + + nl_msg_put_unspec(request, TCA_GACT_PARMS, &p, sizeof p); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_act_redirect(struct ofpbuf *request, int ifindex) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "mirred"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_mirred m = { .action = TC_ACT_STOLEN, + .eaction = TCA_EGRESS_REDIR, + .ifindex = ifindex }; + + nl_msg_put_unspec(request, TCA_MIRRED_PARMS, &m, sizeof m); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) +{ + size_t offset; + size_t act_offset; + + offset = nl_msg_start_nested(request, TCA_FLOWER_ACT); + { + uint16_t act_index = 1; + + if (flower->set.set) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_tunnel_key_set(request, flower->set.id, + flower->set.ipv4_src, + flower->set.ipv4_dst, + flower->set.tp_dst); + nl_msg_end_nested(request, act_offset); + } + if (flower->tunnel.tunnel) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_tunnel_key_release(request); + nl_msg_end_nested(request, act_offset); + } + if (flower->vlan_pop) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_pop_vlan(request); + nl_msg_end_nested(request, act_offset); + } + if (flower->vlan_push_id) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_push_vlan(request, + flower->vlan_push_id, + flower->vlan_push_prio); + nl_msg_end_nested(request, act_offset); + } + if (flower->ifindex_out) { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_redirect(request, flower->ifindex_out); + nl_msg_end_nested(request, act_offset); + } else { + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_drop(request); + nl_msg_end_nested(request, act_offset); + } + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_masked_value(struct ofpbuf *request, uint16_t type, + uint16_t mask_type, const void *data, + const void *mask_data, size_t len) +{ + if (mask_type != TCA_FLOWER_UNSPEC) { + if (is_all_zeros(mask_data, len)) { + return; + } + nl_msg_put_unspec(request, mask_type, mask_data, len); + } + nl_msg_put_unspec(request, type, data, len); +} + +static void +nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) +{ + ovs_be32 ipv4_src = flower->tunnel.ipv4_src; + ovs_be32 ipv4_dst = flower->tunnel.ipv4_dst; + ovs_be16 tp_dst = flower->tunnel.tp_dst; + ovs_be32 id = be64_to_be32(flower->tunnel.id); + + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id); + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src); + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst); + nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst); +} + +static void +nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) +{ + uint16_t host_eth_type = ntohs(flower->key.eth_type); + + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_ETH_DST, + TCA_FLOWER_KEY_ETH_DST_MASK, + &flower->key.dst_mac, + &flower->mask.dst_mac, ETH_ALEN); + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_ETH_SRC, + TCA_FLOWER_KEY_ETH_SRC_MASK, + &flower->key.src_mac, + &flower->mask.src_mac, ETH_ALEN); + + if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) { + if (flower->mask.ip_proto && flower->key.ip_proto) { + nl_msg_put_u8(request, TCA_FLOWER_KEY_IP_PROTO, + flower->key.ip_proto); + } + if (flower->key.ip_proto == IPPROTO_UDP) { + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_UDP_SRC, + TCA_FLOWER_KEY_UDP_SRC_MASK, + &flower->key.src_port, + &flower->mask.src_port, 2); + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_UDP_DST, + TCA_FLOWER_KEY_UDP_DST_MASK, + &flower->key.dst_port, + &flower->mask.dst_port, 2); + } else if (flower->key.ip_proto == IPPROTO_TCP) { + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_TCP_SRC, + TCA_FLOWER_KEY_TCP_SRC_MASK, + &flower->key.src_port, + &flower->mask.src_port, 2); + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_TCP_DST, + TCA_FLOWER_KEY_TCP_DST_MASK, + &flower->key.dst_port, + &flower->mask.dst_port, 2); + } + } + if (host_eth_type == ETH_P_IP) { + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_IPV4_SRC, + TCA_FLOWER_KEY_IPV4_SRC_MASK, + &flower->key.ipv4.ipv4_src, + &flower->mask.ipv4.ipv4_src, + sizeof flower->key.ipv4.ipv4_src); + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_IPV4_DST, + TCA_FLOWER_KEY_IPV4_DST_MASK, + &flower->key.ipv4.ipv4_dst, + &flower->mask.ipv4.ipv4_dst, + sizeof flower->key.ipv4.ipv4_dst); + } else if (host_eth_type == ETH_P_IPV6) { + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_IPV6_SRC, + TCA_FLOWER_KEY_IPV6_SRC_MASK, + &flower->key.ipv6.ipv6_src, + &flower->mask.ipv6.ipv6_src, + sizeof flower->key.ipv6.ipv6_src); + nl_msg_put_masked_value(request, + TCA_FLOWER_KEY_IPV6_DST, + TCA_FLOWER_KEY_IPV6_DST_MASK, + &flower->key.ipv6.ipv6_dst, + &flower->mask.ipv6.ipv6_dst, + sizeof flower->key.ipv6.ipv6_dst); + } + + nl_msg_put_be16(request, TCA_FLOWER_KEY_ETH_TYPE, flower->key.eth_type); + + if (host_eth_type == ETH_P_8021Q) { + if (flower->key.vlan_id || flower->key.vlan_prio) { + nl_msg_put_u16(request, TCA_FLOWER_KEY_VLAN_ID, + flower->key.vlan_id); + nl_msg_put_u8(request, TCA_FLOWER_KEY_VLAN_PRIO, + flower->key.vlan_prio); + } + if (flower->key.encap_eth_type) { + nl_msg_put_be16(request, TCA_FLOWER_KEY_VLAN_ETH_TYPE, + flower->key.encap_eth_type); + } + } + + nl_msg_put_u32(request, TCA_FLOWER_FLAGS, 0); + + if (flower->tunnel.tunnel) { + nl_msg_put_flower_tunnel(request, flower); + } + + nl_msg_put_flower_acts(request, flower); +} + +int +tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle, + struct tc_flower *flower) +{ + struct ofpbuf request; + struct tcmsg *tcmsg; + struct ofpbuf *reply; + int error = 0; + size_t basic_offset; + uint16_t eth_type = (OVS_FORCE uint16_t) flower->key.eth_type; + + tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER, + NLM_F_CREATE | NLM_F_ECHO, &request); + tcmsg->tcm_parent = tc_make_handle(0xffff, 0); + tcmsg->tcm_info = tc_make_handle(prio, eth_type); + tcmsg->tcm_handle = handle; + + nl_msg_put_string(&request, TCA_KIND, "flower"); + basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS); + { + nl_msg_put_flower_options(&request, flower); + } + nl_msg_end_nested(&request, basic_offset); + + error = tc_transact(&request, &reply); + if (!error) { + struct tcmsg *tc = + ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); + + flower->prio = tc_get_major(tc->tcm_info); + flower->handle = tc->tcm_handle; + } + + return error; +} diff --git a/lib/tc.h b/lib/tc.h new file mode 100644 index 0000000..d377101 --- /dev/null +++ b/lib/tc.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2016 Mellanox Technologies, Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TC_H +#define TC_H 1 + +#include "odp-netlink.h" +#include "netlink-socket.h" + + +struct tc_flower_key { + ovs_be16 eth_type; + uint8_t ip_proto; + + struct eth_addr dst_mac; + struct eth_addr src_mac; + + ovs_be16 src_port; + ovs_be16 dst_port; + + uint16_t vlan_id; + uint8_t vlan_prio; + + ovs_be16 encap_eth_type; + uint8_t encap_ip_proto; + union { + struct { + ovs_be32 ipv4_src; + ovs_be32 ipv4_dst; + } encap_ipv4; + struct { + ovs_be32 ipv6_src[4]; + ovs_be32 ipv6_dst[4]; + } encap_ipv6; + }; + + union { + struct { + ovs_be32 ipv4_src; + ovs_be32 ipv4_dst; + } ipv4; + struct { + ovs_be32 ipv6_src[4]; + ovs_be32 ipv6_dst[4]; + } ipv6; + }; +}; + +struct tc_flower { + uint32_t handle; + uint32_t prio; + + struct tc_flower_key key; + struct tc_flower_key mask; + + uint8_t vlan_pop; + uint16_t vlan_push_id; + uint8_t vlan_push_prio; + + int ifindex; + int ifindex_out; + + struct ovs_flow_stats stats; + uint64_t lastused; + + struct { + bool set; + ovs_be32 ipv4_src; + ovs_be32 ipv4_dst; + ovs_be64 id; + ovs_be16 tp_src; + ovs_be16 tp_dst; + } set; + + struct { + bool tunnel; + ovs_be32 ipv4_src; + ovs_be32 ipv4_dst; + ovs_be64 id; + ovs_be16 tp_src; + ovs_be16 tp_dst; + } tunnel; +}; + +int tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle, + struct tc_flower *flower); +int tc_del_filter(int ifindex, int prio, int handle); +int tc_get_flower(int ifindex, int prio, int handle, + struct tc_flower *flower); +int tc_flush(int ifindex); +int tc_dump_flower_start(int ifindex, struct nl_dump *dump); +int parse_netlink_to_tc_flower(struct ofpbuf *reply, + struct tc_flower *flower); + +#endif /* tc.h */ -- 2.7.4 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev