Sugesh, So the issue is that instead of recirculating on the datapath, we can compute the “recirculation” actions on the tunnel push translation time, as these actions only depend on the tunnel attributes? This explains why the recirculation in this case is unnecessary, and mentioning this in the commit message would make the motivation/correctness easier to understand.
It seems to me that OVS_ACTION_ATTR_START_COMBINE/OVS_ACTION_ATTR_END_COMBINE semantics is to clone the packet and execute the actions in between. A cleaner way of doing this would be with (e.g.) “OVS_ACTION_ATTR_CLONE” that would be a nested attribute, the length of which would span to the end of the nested attributes (i.e., the actions produced by the tunnel output). This would be likely also faster to execute as the end does not need to be searched. Semantically this would be the same as the sample action with probability of 1, but simpler. Obviously the removal of the final cloning could be avoided like now. Finally, translating the tunnel output looks a lot like translating patch port output. I wonder if it would be possible to share some of the code via some new helpers rather than duplicating it? Jarno > On Dec 30, 2016, at 7:36 AM, Sugesh Chandran <sugesh.chand...@intel.com> > wrote: > > Currently the tunnel packets are recirculated in OVS after the tunnel > encapsulation. This patch combines the post tunnel actions with the tunnel > push to avoid unnecessary recirculation. > Two new datapath actions OVS_ATTR_COMBINE_START and OVS_ATTR_COMBINE_END are > added to implement this feature. These dummy new actions used as a delimiters > for combined action subset. > In our testing this patch improves the VxLAN encapsulation performance upto > 30%. > > Signed-off-by: Sugesh Chandran <sugesh.chand...@intel.com> > Signed-off-by: Zoltán Balogh <zoltan.bal...@ericsson.com> > Co-authored-by: Zoltán Balogh <zoltan.bal...@ericsson.com> > --- > datapath/linux/compat/include/linux/openvswitch.h | 2 + > lib/dpif-netdev.c | 22 ++---- > lib/dpif.c | 2 + > lib/odp-execute.c | 59 ++++++++++++++- > lib/odp-util.c | 68 ++++++++++++++++- > lib/odp-util.h | 6 ++ > ofproto/ofproto-dpif-sflow.c | 2 + > ofproto/ofproto-dpif-xlate.c | 89 +++++++++++++++++++++++ > 8 files changed, 228 insertions(+), 22 deletions(-) > > diff --git a/datapath/linux/compat/include/linux/openvswitch.h > b/datapath/linux/compat/include/linux/openvswitch.h > index 12260d8..4e9c7f7 100644 > --- a/datapath/linux/compat/include/linux/openvswitch.h > +++ b/datapath/linux/compat/include/linux/openvswitch.h > @@ -818,6 +818,8 @@ enum ovs_action_attr { > #ifndef __KERNEL__ > OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/ > OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */ > + OVS_ACTION_ATTR_START_COMBINE, > + OVS_ACTION_ATTR_END_COMBINE, > #endif > __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted > * from userspace. */ > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 0b73056..2e59bd1 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -4430,24 +4430,8 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > > case OVS_ACTION_ATTR_TUNNEL_PUSH: > if (*depth < MAX_RECIRC_DEPTH) { > - struct dp_packet_batch tnl_pkt; > - struct dp_packet_batch *orig_packets_ = packets_; > - int err; > - > - if (!may_steal) { > - dp_packet_batch_clone(&tnl_pkt, packets_); > - packets_ = &tnl_pkt; > - dp_packet_batch_reset_cutlen(orig_packets_); > - } > - > dp_packet_batch_apply_cutlen(packets_); > - > - err = push_tnl_action(pmd, a, packets_); > - if (!err) { > - (*depth)++; > - dp_netdev_recirculate(pmd, packets_); > - (*depth)--; > - } > + push_tnl_action(pmd, a, packets_); > return; > } > break; > @@ -4597,6 +4581,10 @@ dp_execute_cb(void *aux_, struct dp_packet_batch > *packets_, > break; > } > > + case OVS_ACTION_ATTR_START_COMBINE: > + break; > + case OVS_ACTION_ATTR_END_COMBINE: > + break; > case OVS_ACTION_ATTR_PUSH_VLAN: > case OVS_ACTION_ATTR_POP_VLAN: > case OVS_ACTION_ATTR_PUSH_MPLS: > diff --git a/lib/dpif.c b/lib/dpif.c > index 53958c5..6477f33 100644 > --- a/lib/dpif.c > +++ b/lib/dpif.c > @@ -1187,6 +1187,8 @@ dpif_execute_helper_cb(void *aux_, struct > dp_packet_batch *packets_, > case OVS_ACTION_ATTR_SAMPLE: > case OVS_ACTION_ATTR_TRUNC: > case OVS_ACTION_ATTR_UNSPEC: > + case OVS_ACTION_ATTR_START_COMBINE: > + case OVS_ACTION_ATTR_END_COMBINE: > case __OVS_ACTION_ATTR_MAX: > OVS_NOT_REACHED(); > } > diff --git a/lib/odp-execute.c b/lib/odp-execute.c > index 4fcff16..c475f98 100644 > --- a/lib/odp-execute.c > +++ b/lib/odp-execute.c > @@ -532,6 +532,8 @@ requires_datapath_assistance(const struct nlattr *a) > case OVS_ACTION_ATTR_USERSPACE: > case OVS_ACTION_ATTR_RECIRC: > case OVS_ACTION_ATTR_CT: > + case OVS_ACTION_ATTR_START_COMBINE: > + case OVS_ACTION_ATTR_END_COMBINE: > return true; > > case OVS_ACTION_ATTR_SET: > @@ -553,6 +555,29 @@ requires_datapath_assistance(const struct nlattr *a) > return false; > } > > +static inline size_t > +find_combine_action_end(const struct nlattr **actions_, size_t *actions_len, > + const struct nlattr **comb_start, > + bool *comb_last_action) > +{ > + const struct nlattr *a; > + const struct nlattr *actions = *actions_; > + unsigned int left; > + bool last_action; > + size_t comb_size = nl_attr_get_u32(actions); /* Size of combined actions > */ > + *actions_len -= NLA_ALIGN(actions->nla_len); > + actions = nl_attr_next(actions); > + > + *comb_start = actions; > + a = (void *) ((uint8_t *) actions + NLA_ALIGN(comb_size)); > + left = (*actions_len) - comb_size; > + last_action = (left <= NLA_ALIGN(a->nla_len)); > + *actions_ = a; > + *comb_last_action = last_action ? true : false; > + *actions_len = left; > + return comb_size; > +} > + > void > odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, > const struct nlattr *actions, size_t actions_len, > @@ -561,7 +586,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch > *batch, bool steal, > struct dp_packet **packets = batch->packets; > int cnt = batch->count; > const struct nlattr *a; > - unsigned int left; > + size_t left; > int i; > > NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { > @@ -573,7 +598,35 @@ odp_execute_actions(void *dp, struct dp_packet_batch > *batch, bool steal, > /* Allow 'dp_execute_action' to steal the packet data if we do > * not need it any more. */ > bool may_steal = steal && last_action; > - > + if ((enum ovs_action_attr) type == > + OVS_ACTION_ATTR_START_COMBINE) { > + size_t comb_action_len; > + const struct nlattr *comb_actions = NULL; > + bool comb_last_action = true; > + comb_action_len = find_combine_action_end(&a, &left, > + &comb_actions, > + > &comb_last_action); > + if (OVS_UNLIKELY(!comb_action_len)) { > + /* No combine present, execute current action */ > + goto execute; > + } > + if (OVS_LIKELY(comb_last_action)) { > + odp_execute_actions(dp, batch, 1, comb_actions, > + comb_action_len, dp_execute_action); > + return; > + } > + else { > + /* Packets to be duplicated for the sub action */ > + struct dp_packet_batch comb_pkt_batch; > + dp_packet_batch_clone(&comb_pkt_batch, batch); > + dp_packet_batch_reset_cutlen(batch); > + odp_execute_actions(dp, &comb_pkt_batch, steal, > + comb_actions, comb_action_len, > + dp_execute_action); > + } > + continue; > + } > +execute: > dp_execute_action(dp, batch, a, may_steal); > > if (last_action) { > @@ -683,6 +736,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch > *batch, bool steal, > case OVS_ACTION_ATTR_RECIRC: > case OVS_ACTION_ATTR_CT: > case OVS_ACTION_ATTR_UNSPEC: > + case OVS_ACTION_ATTR_START_COMBINE: > + case OVS_ACTION_ATTR_END_COMBINE: > case __OVS_ACTION_ATTR_MAX: > OVS_NOT_REACHED(); > } > diff --git a/lib/odp-util.c b/lib/odp-util.c > index 427dd65..9cb8629 100644 > --- a/lib/odp-util.c > +++ b/lib/odp-util.c > @@ -121,6 +121,8 @@ odp_action_len(uint16_t type) > case OVS_ACTION_ATTR_SET_MASKED: return ATTR_LEN_VARIABLE; > case OVS_ACTION_ATTR_SAMPLE: return ATTR_LEN_VARIABLE; > case OVS_ACTION_ATTR_CT: return ATTR_LEN_VARIABLE; > + case OVS_ACTION_ATTR_START_COMBINE: return sizeof(uint32_t); > + case OVS_ACTION_ATTR_END_COMBINE: return 0; > > case OVS_ACTION_ATTR_UNSPEC: > case __OVS_ACTION_ATTR_MAX: > @@ -509,7 +511,7 @@ format_odp_tnl_push_header(struct ds *ds, struct > ovs_action_push_tnl *data) > gnh->oam ? "oam," : "", > gnh->critical ? "crit," : "", > ntohl(get_16aligned_be32(&gnh->vni)) >> 8); > - > + > if (gnh->opt_len) { > ds_put_cstr(ds, ",options("); > format_geneve_opts(gnh->options, NULL, gnh->opt_len * 4, > @@ -865,6 +867,17 @@ format_odp_action(struct ds *ds, const struct nlattr *a) > case OVS_ACTION_ATTR_CT: > format_odp_conntrack_action(ds, a); > break; > + case OVS_ACTION_ATTR_START_COMBINE: { > + uint32_t comb_len; > + comb_len = nl_attr_get_u32(a); > + if (comb_len) { > + ds_put_cstr(ds,"{"); > + } > + break; > + } > + case OVS_ACTION_ATTR_END_COMBINE: > + ds_put_cstr(ds,"}"); > + break; > case OVS_ACTION_ATTR_UNSPEC: > case __OVS_ACTION_ATTR_MAX: > default: > @@ -873,6 +886,34 @@ format_odp_action(struct ds *ds, const struct nlattr *a) > } > } > > +static bool > +is_action_combined(const struct nlattr *a, size_t len) > +{ > + bool last_action = false; > + last_action = (len <= NLA_ALIGN(a->nla_len)); > + enum ovs_action_attr type = nl_attr_type(a); > + if (type == OVS_ACTION_ATTR_START_COMBINE) { > + uint32_t comb_len; > + comb_len = nl_attr_get_u32(a); > + if (comb_len) { > + return true; > + } > + } > + else if (type == OVS_ACTION_ATTR_END_COMBINE) { > + return true; > + } > + if (!last_action) { > + type = nl_attr_type(nl_attr_next(a)); > + if (type == OVS_ACTION_ATTR_END_COMBINE) { > + return true; > + } > + } > + else { > + return true; > + } > + return false; > +} > + > void > format_odp_actions(struct ds *ds, const struct nlattr *actions, > size_t actions_len) > @@ -882,10 +923,10 @@ format_odp_actions(struct ds *ds, const struct nlattr > *actions, > unsigned int left; > > NL_ATTR_FOR_EACH (a, left, actions, actions_len) { > - if (a != actions) { > + format_odp_action(ds, a); > + if (!is_action_combined(a, left)) { > ds_put_char(ds, ','); > } > - format_odp_action(ds, a); > } > if (left) { > int i; > @@ -5338,6 +5379,27 @@ odp_put_tunnel_action(const struct flow_tnl *tunnel, > } > > void > +odp_put_combine_start_action(struct ofpbuf *odp_actions, uint32_t action_len) > +{ > + nl_msg_put_u32(odp_actions, OVS_ACTION_ATTR_START_COMBINE, action_len); > +} > + > +void > +odp_update_combine_start_action(struct nlattr *start_combine, > + uint32_t combine_size) > +{ > + uint32_t *combine_len; > + combine_len = (uint32_t *)(start_combine + 1); > + *combine_len = combine_size; > +} > + > +void > +odp_put_combine_end_action(struct ofpbuf *odp_actions) > +{ > + nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_END_COMBINE); > +} > + > +void > odp_put_tnl_push_action(struct ofpbuf *odp_actions, > struct ovs_action_push_tnl *data) > { > diff --git a/lib/odp-util.h b/lib/odp-util.h > index 42011bc..b77faee 100644 > --- a/lib/odp-util.h > +++ b/lib/odp-util.h > @@ -308,4 +308,10 @@ void odp_put_tunnel_action(const struct flow_tnl *tunnel, > > void odp_put_tnl_push_action(struct ofpbuf *odp_actions, > struct ovs_action_push_tnl *data); > +void odp_put_combine_end_action(struct ofpbuf *odp_actions); > +void odp_put_combine_start_action(struct ofpbuf *odp_actions, > + uint32_t action_len); > +void > +odp_update_combine_start_action(struct nlattr *start_combine, > + uint32_t combine_size); > #endif /* odp-util.h */ > diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c > index 37992b4..893c1a5 100644 > --- a/ofproto/ofproto-dpif-sflow.c > +++ b/ofproto/ofproto-dpif-sflow.c > @@ -1163,6 +1163,8 @@ dpif_sflow_read_actions(const struct flow *flow, > } > case OVS_ACTION_ATTR_SAMPLE: > case OVS_ACTION_ATTR_UNSPEC: > + case OVS_ACTION_ATTR_START_COMBINE: > + case OVS_ACTION_ATTR_END_COMBINE: > case __OVS_ACTION_ATTR_MAX: > default: > break; > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c > index 2977be5..f26896f 100644 > --- a/ofproto/ofproto-dpif-xlate.c > +++ b/ofproto/ofproto-dpif-xlate.c > @@ -410,6 +410,9 @@ const char *xlate_strerror(enum xlate_error error) > static void xlate_action_set(struct xlate_ctx *ctx); > static void xlate_commit_actions(struct xlate_ctx *ctx); > > +static int > +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev); > + > static void > ctx_trigger_freeze(struct xlate_ctx *ctx) > { > @@ -2865,7 +2868,22 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct > xport *xport, > } > tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port); > tnl_push_data.out_port = odp_to_u32(out_dev->odp_port); > + > + struct nlattr *start_combine; > + uint32_t action_size = 0; > + uint32_t push_action_size = 0; > + start_combine = ofpbuf_tail(ctx->odp_actions); > + odp_put_combine_start_action(ctx->odp_actions, 0); > + action_size = ctx->odp_actions->size; > odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data); > + push_action_size = ctx->odp_actions->size; > + combine_tunnel_actions(ctx, out_dev); > + if (ctx->odp_actions->size > push_action_size) { > + /* Update the combine action only when combined */ > + odp_update_combine_start_action(start_combine, > + (ctx->odp_actions->size - action_size)); > + odp_put_combine_end_action(ctx->odp_actions); > + } > return 0; > } > > @@ -2903,6 +2921,77 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, > const struct flow *flow, co > xport_in->xbundle->protected && xport_out->xbundle->protected); > } > > +static int > +combine_tunnel_actions(struct xlate_ctx *ctx, struct xport *out_dev) > +{ > + struct flow *flow = &ctx->xin->flow; > + struct flow old_flow = ctx->xin->flow; > + bool old_conntrack = ctx->conntracked; > + bool old_was_mpls = ctx->was_mpls; > + ovs_version_t old_version = ctx->xin->tables_version; > + struct ofpbuf old_stack = ctx->stack; > + union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; > + struct ofpbuf old_action_set = ctx->action_set; > + uint64_t actset_stub[1024 / 8]; > + const struct xbridge *xbridge_old = ctx->xbridge; > + > + ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack); > + ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub); > + > + struct xbridge *xbridge = NULL; > + > + xbridge = out_dev->xbridge; > + > + ctx->xbridge = xbridge; /* peer bridge replaced*/ > + flow->in_port.ofp_port = out_dev->ofp_port; /* peer port replaced */ > + flow->metadata = htonll(0); > + memset(&flow->tunnel, 0, sizeof flow->tunnel); > + memset(flow->regs, 0, sizeof flow->regs); > + flow->actset_output = OFPP_UNSET; > + ctx->conntracked = false; > + clear_conntrack(flow); > + > + /* The bridge is now known so obtain its table version. */ > + ctx->xin->tables_version > + = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto); > + > + xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true); > + if (!ctx->freezing) { > + xlate_action_set(ctx); > + } > + if (ctx->freezing) { > + finish_freezing(ctx); > + } > + > + ctx->xin->flow = old_flow; > + ctx->xbridge = xbridge_old; > + ofpbuf_uninit(&ctx->action_set); > + ctx->action_set = old_action_set; > + ofpbuf_uninit(&ctx->stack); > + ctx->stack = old_stack; > + > + /* Restore calling bridge's lookup version. */ > + ctx->xin->tables_version = old_version; > + > + /* The peer bridge popping MPLS should have no effect on the original > + * bridge. */ > + ctx->was_mpls = old_was_mpls; > + > + /* The peer bridge's conntrack execution should have no effect on the > + * original bridge. */ > + ctx->conntracked = old_conntrack; > + > + /* The fact that the peer bridge exits (for any reason) does not mean > + * that the original bridge should exit. Specifically, if the peer > + * bridge freezes translation, the original bridge must continue > + * processing with the original, not the frozen packet! */ > + ctx->exit = false; > + > + /* Peer bridge errors do not propagate back. */ > + ctx->error = XLATE_OK; > + > + return 0; > +} > > static void > compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, > -- > 2.7.4 > > _______________________________________________ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev