Introduce the capability to split multicast group openflow actions created in consider_mc_group routine in multiple buffers if the single buffer size is over netlink buffer size limits.
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2232152 Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> --- controller/physical.c | 272 +++++++++++++++++++++++++++++----------- controller/pinctrl.c | 66 ++++++++++ include/ovn/actions.h | 3 + tests/ovn-controller.at | 45 +++++++ 4 files changed, 313 insertions(+), 73 deletions(-) diff --git a/controller/physical.c b/controller/physical.c index 75257bc85..663fd1c8d 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -1969,6 +1969,22 @@ local_output_pb(int64_t tunnel_key, struct ofpbuf *ofpacts) put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts); } +static void +mc_split_buf_controller_action(struct ofpbuf *ofpacts, uint32_t index, + uint8_t table_id, uint32_t port_key) +{ + size_t oc_offset = encode_start_controller_op( + ACTION_OPCODE_MG_SPLIT_BUF, false, NX_CTLR_NO_METER, ofpacts); + ovs_be32 val = htonl(index); + ofpbuf_put(ofpacts, &val, sizeof val); + val = htonl(port_key); + ofpbuf_put(ofpacts, &val, sizeof val); + ofpbuf_put(ofpacts, &table_id, sizeof table_id); + encode_finish_controller_op(oc_offset, ofpacts); +} + +#define MC_OFPACTS_MAX_MSG_SIZE 1024 +#define MC_BUF_START_ID 0x9000 static void consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name, enum mf_field_id mff_ovn_geneve, @@ -1990,9 +2006,6 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name, struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis); struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis); - struct match match; - match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key); - /* Go through all of the ports in the multicast group: * * - For remote ports, add the chassis to 'remote_chassis' or @@ -2013,10 +2026,18 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name, * set the output port to be the router patch port for which * the redirect port was added. */ - struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp; + struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp, reset_ofpacts; ofpbuf_init(&ofpacts, 0); ofpbuf_init(&remote_ofpacts, 0); ofpbuf_init(&remote_ofpacts_ramp, 0); + ofpbuf_init(&reset_ofpacts, 0); + + bool local_ports = false, remote_ports = false, remote_ramp_ports = false; + + put_load(0, MFF_REG6, 0, 32, &reset_ofpacts); + + /* local port loop. */ + uint32_t local_flow_index = MC_BUF_START_ID; for (size_t i = 0; i < mc->n_ports; i++) { struct sbrec_port_binding *port = mc->ports[i]; @@ -2040,19 +2061,15 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name, if (ldp->is_transit_switch) { local_output_pb(port->tunnel_key, &ofpacts); } else { - local_output_pb(port->tunnel_key, &remote_ofpacts); - local_output_pb(port->tunnel_key, &remote_ofpacts_ramp); + remote_ramp_ports = true; + remote_ports = true; } } if (!strcmp(port->type, "remote")) { if (port->chassis) { - put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, - &remote_ofpacts); - tunnel_to_chassis(mff_ovn_geneve, port->chassis->name, - chassis_tunnels, mc->datapath, - port->tunnel_key, &remote_ofpacts); + remote_ports = true; } } else if (!strcmp(port->type, "localport")) { - local_output_pb(port->tunnel_key, &remote_ofpacts); + remote_ports = true; } else if ((port->chassis == chassis || is_additional_chassis(port, chassis)) && (local_binding_get_primary_pb(local_bindings, lport_name) @@ -2095,87 +2112,196 @@ consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name, } } } - } - /* Table 40, priority 100. - * ======================= - * - * Handle output to the local logical ports in the multicast group, if - * any. */ - bool local_ports = ofpacts.size > 0; - if (local_ports) { - /* Following delivery to local logical ports, restore the multicast - * group as the logical output port. */ - put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts); + local_ports |= !!ofpacts.size; + if (!local_ports) { + continue; + } - ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, - mc->header_.uuid.parts[0], - &match, &ofpacts, &mc->header_.uuid); + /* do not overcome max netlink message size used by ovs-vswitchd to + * send netlink configuration to the kernel. */ + if (ofpacts.size > MC_OFPACTS_MAX_MSG_SIZE || i == mc->n_ports - 1) { + struct match match; + + match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key); + + bool is_first = local_flow_index == MC_BUF_START_ID; + if (!is_first) { + match_set_reg(&match, MFF_REG6 - MFF_REG0, local_flow_index); + } + + if (i == mc->n_ports - 1) { + /* Following delivery to local logical ports, restore the + * multicast group as the logical output port. */ + put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts); + } else { + mc_split_buf_controller_action(&ofpacts, ++local_flow_index, + OFTABLE_LOCAL_OUTPUT, + mc->tunnel_key); + } + + /* reset MFF_REG6. */ + ofpbuf_insert(&ofpacts, 0, reset_ofpacts.data, reset_ofpacts.size); + + /* Table 40, priority 100. + * ========================== + * + * Handle output to the local logical ports in the multicast group, + * if any. */ + ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, + is_first ? 100 : 110, + mc->header_.uuid.parts[0], &match, &ofpacts, + &mc->header_.uuid); + ofpbuf_clear(&ofpacts); + } } - /* Table 39, priority 100. - * ======================= - * - * Handle output to the remote chassis in the multicast group, if - * any. */ - if (!sset_is_empty(&remote_chassis) || - !sset_is_empty(&vtep_chassis) || remote_ofpacts.size > 0) { - if (remote_ofpacts.size > 0) { - /* Following delivery to logical patch ports, restore the - * multicast group as the logical output port. */ - put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, - &remote_ofpacts); - - if (get_vtep_port(local_datapaths, mc->datapath->tunnel_key)) { - struct match match_ramp; - match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0, - MLF_RCV_FROM_RAMP); + /* remote port loop. */ + struct ofpbuf remote_ofpacts_last; + ofpbuf_init(&remote_ofpacts_last, 0); + if (remote_ports) { + put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &remote_ofpacts_last); + } + fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels, + mc->datapath, mc->tunnel_key, false, + &remote_ofpacts_last); + fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels, + mc->datapath, mc->tunnel_key, true, + &remote_ofpacts_last); + remote_ports |= !!remote_ofpacts_last.size; + if (remote_ports && local_ports) { + put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_last); + } - put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, - &remote_ofpacts_ramp); + bool has_vetp = get_vtep_port(local_datapaths, mc->datapath->tunnel_key); + uint32_t reverse_remap_flow_index = MC_BUF_START_ID; + uint32_t reverse_flow_index = MC_BUF_START_ID; - /* MCAST traffic which was originally received from RAMP_SWITCH - * is not allowed to be re-sent to remote_chassis. - * Process "patch" port binding for routing in - * OFTABLE_REMOTE_OUTPUT and resubmit packets to - * OFTABLE_LOCAL_OUTPUT for local delivery. */ + for (size_t i = 0; i < mc->n_ports; i++) { + struct sbrec_port_binding *port = mc->ports[i]; - match_outport_dp_and_port_keys(&match_ramp, dp_key, - mc->tunnel_key); + if (port->datapath != mc->datapath) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); + VLOG_WARN_RL(&rl, UUID_FMT": multicast group contains ports " + "in wrong datapath", + UUID_ARGS(&mc->header_.uuid)); + continue; + } + + if (!strcmp(port->type, "patch")) { + if (!ldp->is_transit_switch) { + local_output_pb(port->tunnel_key, &remote_ofpacts); + local_output_pb(port->tunnel_key, &remote_ofpacts_ramp); + } + } if (!strcmp(port->type, "remote")) { + if (port->chassis) { + put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32, + &remote_ofpacts); + tunnel_to_chassis(mff_ovn_geneve, port->chassis->name, + chassis_tunnels, mc->datapath, + port->tunnel_key, &remote_ofpacts); + } + } else if (!strcmp(port->type, "localport")) { + local_output_pb(port->tunnel_key, &remote_ofpacts); + } - /* Match packets coming from RAMP_SWITCH and allowed for - * loopback processing (including routing). */ - match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0, - MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK, - MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK); + /* do not overcome max netlink message size used by ovs-vswitchd to + * send netlink configuration to the kernel. */ + if (remote_ofpacts.size > MC_OFPACTS_MAX_MSG_SIZE || + i == mc->n_ports - 1) { + struct match match; + match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key); + if (has_vetp) { + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0, + MLF_RCV_FROM_RAMP); + } - put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp); + bool is_first = reverse_flow_index == MC_BUF_START_ID; + if (!is_first) { + match_set_reg(&match, MFF_REG6 - MFF_REG0, reverse_flow_index); + } + + if (i == mc->n_ports - 1) { + ofpbuf_put(&remote_ofpacts, remote_ofpacts_last.data, + remote_ofpacts_last.size); + } else { + mc_split_buf_controller_action(&remote_ofpacts, + ++reverse_flow_index, + OFTABLE_REMOTE_OUTPUT, + mc->tunnel_key); + } - ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 120, - mc->header_.uuid.parts[0], &match_ramp, - &remote_ofpacts_ramp, &mc->header_.uuid); + /* reset MFF_REG6. */ + ofpbuf_insert(&remote_ofpacts, 0, reset_ofpacts.data, + reset_ofpacts.size); + if (remote_ports) { + /* Table 39, priority 100. + * ======================= + * + * Handle output to the remote chassis in the multicast group, + * if any. */ + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, + is_first ? 100 : 110, + mc->header_.uuid.parts[0], + &match, &remote_ofpacts, &mc->header_.uuid); } + ofpbuf_clear(&remote_ofpacts); } - fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels, - mc->datapath, mc->tunnel_key, false, - &remote_ofpacts); - fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels, - mc->datapath, mc->tunnel_key, true, - &remote_ofpacts); + if (!remote_ports || !remote_ramp_ports || !has_vetp) { + continue; + } - if (remote_ofpacts.size) { - if (local_ports) { - put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts); + if (remote_ofpacts_ramp.size > MC_OFPACTS_MAX_MSG_SIZE || + i == mc->n_ports - 1) { + /* MCAST traffic which was originally received from RAMP_SWITCH + * is not allowed to be re-sent to remote_chassis. + * Process "patch" port binding for routing in + * OFTABLE_REMOTE_OUTPUT and resubmit packets to + * OFTABLE_LOCAL_OUTPUT for local delivery. */ + struct match match_ramp; + match_outport_dp_and_port_keys(&match_ramp, dp_key, + mc->tunnel_key); + + /* Match packets coming from RAMP_SWITCH and allowed for + * loopback processing (including routing). */ + match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0, + MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK, + MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK); + + bool is_first = reverse_remap_flow_index == MC_BUF_START_ID; + if (!is_first) { + match_set_reg(&match_ramp, MFF_REG6 - MFF_REG0, + reverse_remap_flow_index); } - ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100, - mc->header_.uuid.parts[0], - &match, &remote_ofpacts, &mc->header_.uuid); + + if (i == mc->n_ports - 1) { + put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, + &remote_ofpacts_ramp); + put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp); + } else { + mc_split_buf_controller_action(&remote_ofpacts_ramp, + ++reverse_remap_flow_index, + OFTABLE_REMOTE_OUTPUT, + mc->tunnel_key); + } + + /* reset MFF_REG6. */ + ofpbuf_insert(&remote_ofpacts_ramp, 0, reset_ofpacts.data, + reset_ofpacts.size); + ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, + is_first ? 120 : 130, + mc->header_.uuid.parts[0], &match_ramp, + &remote_ofpacts_ramp, &mc->header_.uuid); + ofpbuf_clear(&remote_ofpacts_ramp); } } + ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&remote_ofpacts); + ofpbuf_uninit(&remote_ofpacts_last); ofpbuf_uninit(&remote_ofpacts_ramp); + ofpbuf_uninit(&reset_ofpacts); sset_destroy(&remote_chassis); sset_destroy(&vtep_chassis); } diff --git a/controller/pinctrl.c b/controller/pinctrl.c index ff5a3444c..996f78d7b 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -211,6 +211,10 @@ static void send_mac_binding_buffered_pkts(struct rconn *swconn) static void pinctrl_rarp_activation_strategy_handler(const struct match *md); +static void pinctrl_mg_split_buff_handler( + struct rconn *swconn, struct dp_packet *pkt, + const struct match *md, struct ofpbuf *userdata); + static void init_activated_ports(void); static void destroy_activated_ports(void); static void wait_activated_ports(void); @@ -3283,6 +3287,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) ovs_mutex_unlock(&pinctrl_mutex); break; + case ACTION_OPCODE_MG_SPLIT_BUF: + pinctrl_mg_split_buff_handler(swconn, &packet, &pin.flow_metadata, + &userdata); + break; + default: VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32, ntohl(ah->opcode)); @@ -8148,6 +8157,63 @@ pinctrl_rarp_activation_strategy_handler(const struct match *md) notify_pinctrl_main(); } +static void +pinctrl_mg_split_buff_handler(struct rconn *swconn, struct dp_packet *pkt, + const struct match *md, struct ofpbuf *userdata) +{ + ovs_be32 *index = ofpbuf_try_pull(userdata, sizeof *index); + if (!index) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "%s: missing index field", __func__); + return; + } + + ovs_be32 *mg = ofpbuf_try_pull(userdata, sizeof *mg); + if (!mg) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "%s: missing multicast group field", __func__); + return; + } + + uint8_t *table_id = ofpbuf_try_pull(userdata, sizeof *table_id); + if (!table_id) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "%s: missing table_id field", __func__); + return; + } + + struct ofpbuf ofpacts; + ofpbuf_init(&ofpacts, 4096); + reload_metadata(&ofpacts, md); + + /* reload pkt_mark field */ + const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK); + union mf_value pkt_mark_value; + mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value); + ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, NULL); + + put_load(ntohl(*index), MFF_REG6, 0, 32, &ofpacts); + put_load(ntohl(*mg), MFF_LOG_OUTPORT, 0, 32, &ofpacts); + + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_CONTROLLER; + resubmit->table_id = *table_id; + + struct ofputil_packet_out po = { + .packet = dp_packet_data(pkt), + .packet_len = dp_packet_size(pkt), + .buffer_id = UINT32_MAX, + .ofpacts = ofpacts.data, + .ofpacts_len = ofpacts.size, + }; + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + queue_msg(swconn, ofputil_encode_packet_out(&po, proto)); + + ofpbuf_uninit(&ofpacts); +} + static struct hmap put_fdbs; /* MAC learning (fdb) related functions. Runs within the main diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 04bb6ffd0..49cfe0624 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -747,6 +747,9 @@ enum action_opcode { /* activation_strategy_rarp() */ ACTION_OPCODE_ACTIVATION_STRATEGY_RARP, + + /* multicast group split buffer action. */ + ACTION_OPCODE_MG_SPLIT_BUF, }; /* Header. */ diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index 4212d601a..7d1645e53 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -2651,3 +2651,48 @@ OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=0 | grep -c in_por OVN_CLEANUP([hv1]) AT_CLEANUP ]) + +OVN_FOR_EACH_NORTHD([ +AT_SETUP([ovn-controller - multicast group buffer split]) +AT_KEYWORDS([ovn]) +ovn_start + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 + +for i in $(seq 1 20); do + ovs-vsctl -- add-port br-int hv1-vif$i -- \ + set interface hv1-vif$i external-ids:iface-id=sw0-p$i \ + options:tx_pcap=hv1/vif${i}-tx.pcap \ + options:rxq_pcap=hv1/vif${i}-rx.pcap \ + ofport-request=$i +done + + +check ovn-nbctl ls-add sw0 +for i in $(seq 1 20); do + check ovn-nbctl lsp-add sw0 sw0-p$i + check ovn-nbctl lsp-set-addresses sw0-p$i "unknown" +done + +wait_for_ports_up +ovn-nbctl --wait=hv sync + +OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=40 | grep -c "controller(userdata=00.00.00.1b.00.00.00.00.00.00.90.01"], [0],[dnl +3 +]) + +for i in $(seq 1 10); do + check ovn-nbctl lsp-del sw0-p$i +done +ovn-nbctl --wait=hv sync + +AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=40 | grep -q controller], [1]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP +]) -- 2.41.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev