sent v6 ________________________________ From: Ilya Maximets <i.maxim...@ovn.org> Sent: Wednesday, December 18, 2019 11:38 PM To: Paul Blakey <pa...@mellanox.com>; Roi Dayan <r...@mellanox.com>; Simon Horman <simon.hor...@netronome.com>; Oz Shlomo <o...@mellanox.com>; Marcelo Ricardo Leitner <mleit...@redhat.com>; Justin Pettit <jpet...@nicira.com>; Ilya Maximets <i.maxim...@ovn.org>; Ben Pfaff <b...@ovn.org>; d...@openvswitch.org <d...@openvswitch.org> Subject: Re: [ovs-dev][PATCH v5 07/10] netdev-offload-tc: Add recirculation support via tc chains
On 16.12.2019 16:53, Paul Blakey wrote: > Each recirculation id will create a tc chain, and we translate > the recirculation action to a tc goto chain action. > > We check for kernel support for this by probing OvS Datapath for the > tc recirc id sharing feature. If supported, we can offload rules > that match on recirc_id, and recirculation action safely. > > Signed-off-by: Paul Blakey <pa...@mellanox.com> > Reviewed-by: Roi Dayan <r...@mellanox.com> > > --- > > Changelog: > V4->V5: > Always enable recirc_id sharing to avoid bugs with opening an > existing datapath, and enabling offload later. > V3->V4: > Always try to enable recirc_id sharing (on dpif_open) if hardware offload > is enabled. > V2->V3: > Merged part of probe for recirc_id support in here to help future git > bisect. > Added tunnel released check to avoid bug with mirroring > Removed cascading condition in netdev_tc_flow_put() check of recirc_id > support > > V1->V2: > moved make_tc_id_chain helper to tc.h as static inline > updated is_tc_id_eq with chain compare instead of find_ufid > --- > lib/dpif-netlink.c | 14 ++++++++++++++ > lib/netdev-offload-tc.c | 35 +++++++++++++++++++++++++---------- > lib/tc.c | 49 > +++++++++++++++++++++++++++++++++++++++++++------ > lib/tc.h | 18 +++++++++++++++++- > 4 files changed, 99 insertions(+), 17 deletions(-) > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index ef06dd4..5d785e4 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -110,6 +110,8 @@ static int dpif_netlink_dp_transact(const struct > dpif_netlink_dp *request, > static int dpif_netlink_dp_get(const struct dpif *, > struct dpif_netlink_dp *reply, > struct ofpbuf **bufp); > +static int > +dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features); > > struct dpif_netlink_flow { > /* Generic Netlink header. */ > @@ -363,7 +365,9 @@ dpif_netlink_open(const struct dpif_class *class > OVS_UNUSED, const char *name, > } > > error = open_dpif(&dp, dpifp); > + dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING); > ofpbuf_delete(buf); > + > return error; > } > > @@ -1638,6 +1642,7 @@ dpif_netlink_netdev_match_to_dpif_flow(struct match > *match, > .mask = &match->wc.masks, > .support = { > .max_vlan_headers = 2, > + .recirc = true, > }, > }; > size_t offset; > @@ -2037,6 +2042,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct > dpif_flow_put *put) > struct offload_info info; > ovs_be16 dst_port = 0; > uint8_t csum_on = false; > + bool recirc_act; > int err; > > if (put->flags & DPIF_FP_PROBE) { > @@ -2076,9 +2082,17 @@ parse_flow_put(struct dpif_netlink *dpif, struct > dpif_flow_put *put) > csum_on = tnl_cfg->csum; > } > netdev_close(outdev); > + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_RECIRC) { > + recirc_act = true; > } > } > > + if ((recirc_act || match.flow.recirc_id) > + && !(dpif->user_features & OVS_DP_F_TC_RECIRC_SHARING)) { > + err = EOPNOTSUPP; You need to check this in netdev-offload-tc, because it could be used not only from the system datapath. Maybe pass it with something like this: diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index 97a500647..bdca6cf3f 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -65,7 +65,8 @@ struct offload_info { const struct dpif_class *dpif_class; ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */ uint8_t tunnel_csum_on; /* Tunnel header with checksum */ - + bool recirc_id_shared_with_tc; /* Indicates whever tc chains will be in + * sync with datapath recirc ids. */ /* * The flow mark id assigened to the flow. If any pkts hit the flow, * it will be in the pkt meta data. --- (better name/comment are welcome) > + goto out; > + } > + > info.dpif_class = dpif_class; > info.tp_dst_port = dst_port; > info.tunnel_csum_on = csum_on; > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c > index 15b39e6..e3b0415 100644 > --- a/lib/netdev-offload-tc.c > +++ b/lib/netdev-offload-tc.c > @@ -38,6 +38,7 @@ > #include "tc.h" > #include "unaligned.h" > #include "util.h" > +#include "dpif-provider.h" > > VLOG_DEFINE_THIS_MODULE(netdev_offload_tc); > > @@ -206,9 +207,12 @@ static void > add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid, > struct tcf_id *id) > { > - size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0); > - size_t tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex); > struct ufid_tc_data *new_data = xzalloc(sizeof *new_data); > + size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0); > + size_t tc_hash; > + > + tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex); > + tc_hash = hash_int(id->chain, tc_hash); > > new_data->ufid = *ufid; > new_data->id = *id; > @@ -252,8 +256,11 @@ get_ufid_tc_mapping(const ovs_u128 *ufid, struct tcf_id > *id) > static bool > find_ufid(struct netdev *netdev, struct tcf_id *id, ovs_u128 *ufid) > { > - size_t tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex); > struct ufid_tc_data *data; > + size_t tc_hash; > + > + tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex); > + tc_hash = hash_int(id->chain, tc_hash); > > ovs_mutex_lock(&ufid_lock); > HMAP_FOR_EACH_WITH_HASH (data, tc_to_ufid_node, tc_hash, &tc_to_ufid) { > @@ -739,6 +746,10 @@ parse_tc_flower_to_match(struct tc_flower *flower, > nl_msg_put_u32(buf, OVS_ACTION_ATTR_OUTPUT, > odp_to_u32(outport)); > } > break; > + case TC_ACT_GOTO: { > + nl_msg_put_u32(buf, OVS_ACTION_ATTR_RECIRC, action->chain); > + } > + break; > } > } > } > @@ -799,6 +810,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump, > > match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX); > match->flow.in_port.odp_port = dump->port; > + match_set_recirc_id(match, id.chain); > > return true; > } > @@ -983,12 +995,6 @@ test_key_and_mask(struct match *match) > return EOPNOTSUPP; > } > > - if (mask->recirc_id && key->recirc_id) { > - VLOG_DBG_RL(&rl, "offloading attribute recirc_id isn't supported"); > - return EOPNOTSUPP; > - } > - mask->recirc_id = 0; > - > if (mask->dp_hash) { > VLOG_DBG_RL(&rl, "offloading attribute dp_hash isn't supported"); > return EOPNOTSUPP; > @@ -1156,6 +1162,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match > *match, > uint32_t block_id = 0; > struct nlattr *nla; > struct tcf_id id; > + uint32_t chain; > size_t left; > int prio = 0; > int ifindex; > @@ -1170,6 +1177,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match > *match, > > memset(&flower, 0, sizeof flower); > > + chain = key->recirc_id; > + mask->recirc_id = 0; > + > if (flow_tnl_dst_is_set(&key->tunnel)) { > VLOG_DBG_RL(&rl, > "tunnel: id %#" PRIx64 " src " IP_FMT > @@ -1420,6 +1430,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match > *match, > if (err) { > return err; > } > + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_RECIRC) { > + action->type = TC_ACT_GOTO; > + action->chain = nl_attr_get_u32(nla); > + flower.action_count++; > } else { > VLOG_DBG_RL(&rl, "unsupported put action type: %d", > nl_attr_type(nla)); > @@ -1443,7 +1457,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match > *match, > flower.act_cookie.len = sizeof *ufid; > > block_id = get_block_id_from_netdev(netdev); > - id = tc_make_tcf_id(ifindex, block_id, prio, hook); > + id = tc_make_tcf_id_chain(ifindex, block_id, chain, prio, hook); > err = tc_replace_flower(&id, &flower); > if (!err) { > if (stats) { > @@ -1491,6 +1505,7 @@ netdev_tc_flow_get(struct netdev *netdev, > > match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX); > match->flow.in_port.odp_port = in_port; > + match_set_recirc_id(match, id.chain); > > return 0; > } > diff --git a/lib/tc.c b/lib/tc.c > index 7a4acce..dafa90e 100644 > --- a/lib/tc.c > +++ b/lib/tc.c > @@ -51,6 +51,7 @@ > #endif > > #if TCA_MAX < 14 > +#define TCA_CHAIN 11 > #define TCA_INGRESS_BLOCK 13 > #endif > > @@ -207,6 +208,10 @@ static void request_from_tcf_id(struct tcf_id *id, > uint16_t eth_type, > TC_EGRESS_PARENT : ingress_parent; > tcmsg->tcm_info = tc_make_handle(id->prio, eth_type); > tcmsg->tcm_handle = id->handle; > + > + if (id->chain) { > + nl_msg_put_u32(request, TCA_CHAIN, id->chain); > + } > } > > int > @@ -286,6 +291,7 @@ tc_add_del_qdisc(int ifindex, bool add, uint32_t block_id, > static const struct nl_policy tca_policy[] = { > [TCA_KIND] = { .type = NL_A_STRING, .optional = false, }, > [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false, }, > + [TCA_CHAIN] = { .type = NL_A_U32, .optional = true, }, > [TCA_STATS] = { .type = NL_A_UNSPEC, > .min_len = sizeof(struct tc_stats), .optional = true, }, > [TCA_STATS2] = { .type = NL_A_NESTED, .optional = true, }, > @@ -1135,12 +1141,13 @@ nl_parse_tcf(const struct tcf_t *tm, struct tc_flower > *flower) > } > > static int > -nl_parse_act_drop(struct nlattr *options, struct tc_flower *flower) > +nl_parse_act_gact(struct nlattr *options, struct tc_flower *flower) > { > struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)]; > const struct tc_gact *p; > struct nlattr *gact_parms; > const struct tcf_t *tm; > + struct tc_action *action; > > if (!nl_parse_nested(options, gact_policy, gact_attrs, > ARRAY_SIZE(gact_policy))) { > @@ -1151,7 +1158,11 @@ nl_parse_act_drop(struct nlattr *options, struct > tc_flower *flower) > gact_parms = gact_attrs[TCA_GACT_PARMS]; > p = nl_attr_get_unspec(gact_parms, sizeof *p); > > - if (p->action != TC_ACT_SHOT) { > + if (TC_ACT_EXT_CMP(p->action, TC_ACT_GOTO_CHAIN)) { > + action = &flower->actions[flower->action_count++]; > + action->chain = p->action & TC_ACT_EXT_VAL_MASK; > + action->type = TC_ACT_GOTO; > + } else if (p->action != TC_ACT_SHOT) { > VLOG_ERR_RL(&error_rl, "unknown gact action: %d", p->action); > return EINVAL; > } > @@ -1429,7 +1440,7 @@ nl_parse_single_action(struct nlattr *action, struct > tc_flower *flower) > act_cookie = action_attrs[TCA_ACT_COOKIE]; > > if (!strcmp(act_kind, "gact")) { > - err = nl_parse_act_drop(act_options, flower); > + err = nl_parse_act_gact(act_options, flower); > } else if (!strcmp(act_kind, "mirred")) { > err = nl_parse_act_mirred(act_options, flower); > } else if (!strcmp(act_kind, "vlan")) { > @@ -1580,6 +1591,10 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, > struct tcf_id *id, > return EPROTO; > } > > + if (ta[TCA_CHAIN]) { > + id->chain = nl_attr_get_u32(ta[TCA_CHAIN]); > + } > + > kind = nl_attr_get_string(ta[TCA_KIND]); > if (strcmp(kind, "flower")) { > VLOG_DBG_ONCE("Unsupported filter: %s", kind); > @@ -1876,7 +1891,7 @@ nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, > bool id_present, > } > > static void > -nl_msg_put_act_drop(struct ofpbuf *request) > +nl_msg_put_act_gact(struct ofpbuf *request, uint32_t chain) > { > size_t offset; > > @@ -1885,6 +1900,10 @@ nl_msg_put_act_drop(struct ofpbuf *request) > { > struct tc_gact p = { .action = TC_ACT_SHOT }; > > + if (chain) { > + p.action = TC_ACT_GOTO_CHAIN | chain; > + } > + > nl_msg_put_unspec(request, TCA_GACT_PARMS, &p, sizeof p); > } > nl_msg_end_nested(request, offset); > @@ -2230,12 +2249,30 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct > tc_flower *flower) > nl_msg_end_nested(request, act_offset); > } > break; > + case TC_ACT_GOTO: { > + if (released) { > + /* We don't support tunnel release + output + goto > + * for now, as next chain by default will try and match > + * the tunnel metadata that was released/unset. > + * > + * This will happen with tunnel + mirror ports. > + */ > + return -EOPNOTSUPP; > + } > + > + act_offset = nl_msg_start_nested(request, act_index++); > + nl_msg_put_act_gact(request, action->chain); > + nl_msg_put_act_cookie(request, &flower->act_cookie); > + nl_msg_end_nested(request, act_offset); > + } > + break; > } > } > } > - if (!ifindex) { > + > + if (!flower->action_count) { > act_offset = nl_msg_start_nested(request, act_index++); > - nl_msg_put_act_drop(request); > + nl_msg_put_act_gact(request, 0); > nl_msg_put_act_cookie(request, &flower->act_cookie); > nl_msg_put_act_flags(request); > nl_msg_end_nested(request, act_offset); > diff --git a/lib/tc.h b/lib/tc.h > index da9a766..9154fd8 100644 > --- a/lib/tc.h > +++ b/lib/tc.h > @@ -156,10 +156,13 @@ enum tc_action_type { > TC_ACT_MPLS_POP, > TC_ACT_MPLS_PUSH, > TC_ACT_MPLS_SET, > + TC_ACT_GOTO, > }; > > struct tc_action { > union { > + int chain; > + > struct { > int ifindex_out; > bool ingress; > @@ -214,6 +217,7 @@ struct tcf_id { > enum tc_qdisc_hook hook; > uint32_t block_id; > int ifindex; > + uint32_t chain; > uint16_t prio; > uint32_t handle; > }; > @@ -233,6 +237,17 @@ tc_make_tcf_id(int ifindex, uint32_t block_id, uint16_t > prio, > return id; > } > > +static inline struct tcf_id > +tc_make_tcf_id_chain(int ifindex, uint32_t block_id, uint32_t chain, > + uint16_t prio, enum tc_qdisc_hook hook) > +{ > + struct tcf_id id = tc_make_tcf_id(ifindex, block_id, prio, hook); > + > + id.chain = chain; > + > + return id; > +} > + > static inline bool > is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2) > { > @@ -241,7 +256,8 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2) > && id1->handle == id2->handle > && id1->hook == id2->hook > && id1->block_id == id2->block_id > - && id1->ifindex == id2->ifindex; > + && id1->ifindex == id2->ifindex > + && id1->chain == id2->chain; > } > > struct tc_flower { > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev