The split buf was causing issues with overloaded ovn-controller. That would happen when there was a lof of multicast traffic, due to the split nature the traffic could hit ovn-controller pinctrl thread multiple times. The original commit aimed to workaround limitation within Linux kernel, which would reject too long actions. The limitation should be fixed [0] so there isn't a need for the workaround anymore.
This partially reverts [1] and [2] both of them used the split buf action. Note that this should solve the ovn-controller overload issues, however if the multicast group is big enough there might be still an issue with resubmit limit. [0] https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=a1e64addf3ff [1] 325c7b203d8b ("controller: split mg action in table 39 and 40 to fit kernel netlink buffer size") [2] 7c3f7f415f1d ("northd, controller: Flood ARP and NA packet on transit router.") Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2025-February/053455.html Reported-at: https://issues.redhat.com/browse/FDP-1636 Signed-off-by: Ales Musil <[email protected]> --- TODO.rst | 1 + controller/lflow.h | 3 - controller/physical.c | 131 +++++++++------------------------------- controller/pinctrl.c | 7 ++- include/ovn/actions.h | 2 +- lib/actions.c | 1 - tests/ovn-controller.at | 55 ----------------- tests/ovn.at | 61 +------------------ 8 files changed, 36 insertions(+), 225 deletions(-) diff --git a/TODO.rst b/TODO.rst index 741ac6027..abcf6c526 100644 --- a/TODO.rst +++ b/TODO.rst @@ -200,6 +200,7 @@ when the feature/action will move from ``Deprecated`` to ``Removed``. * 26.03 Deprecated * ``ct_lb`` action, should be removed in 26.09. + * ``SPLIT_BUF_ACTION`` action, should be removed in 28.09. * 26.03 Removed diff --git a/controller/lflow.h b/controller/lflow.h index a97abd824..d82acc0d8 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -105,9 +105,6 @@ struct uuid; #define OFTABLE_GET_REMOTE_FDB 87 #define OFTABLE_LEARN_REMOTE_FDB 88 -/* Common defines shared between some controller components. */ -#define CHASSIS_FLOOD_INDEX_START 0x8000 - struct lflow_ctx_in { struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath; diff --git a/controller/physical.c b/controller/physical.c index d161dc437..c055e664b 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -225,18 +225,6 @@ match_set_chassis_flood_outport(struct match *match, } } -static void -match_set_chassis_flood_remote(struct match *match, uint32_t index) -{ - match_init_catchall(match); - match_set_reg(match, MFF_REG6 - MFF_REG0, index); - /* Match if the packet wasn't already received from tunnel. - * This prevents from looping it back to the tunnel again. */ - match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, 0, - MLF_RX_FROM_TUNNEL); -} - - static void put_stack(enum mf_field_id field, struct ofpact_stack *stack) { @@ -245,27 +233,6 @@ put_stack(enum mf_field_id field, struct ofpact_stack *stack) stack->subfield.n_bits = stack->subfield.field->n_bits; } -/* Split the ofpacts buffer to prevent overflow of the - * MAX_ACTIONS_BUFSIZE netlink buffer size supported by the kernel. - * In order to avoid all the action buffers to be squashed together by - * ovs, add a controller action for each configured openflow. - */ -static void -put_split_buf_function(uint32_t index, uint32_t outport, uint8_t stage, - struct ofpbuf *ofpacts) -{ - ovs_be32 values[2] = { - htonl(index), - htonl(outport) - }; - size_t oc_offset = - encode_start_controller_op(ACTION_OPCODE_SPLIT_BUF_ACTION, false, - NX_CTLR_NO_METER, ofpacts); - ofpbuf_put(ofpacts, values, sizeof values); - ofpbuf_put(ofpacts, &stage, sizeof stage); - encode_finish_controller_op(oc_offset, ofpacts); -} - static const struct sbrec_port_binding * get_localnet_port(const struct hmap *local_datapaths, int64_t tunnel_key) { @@ -2600,11 +2567,7 @@ local_set_ct_zone_and_output_pb(int tunnel_key, int64_t zone_id, put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts); } -#define MC_OFPACTS_MAX_MSG_SIZE 8192 -#define MC_BUF_START_ID 0x9000 - -struct mc_buf_split_ctx { - uint8_t index; +struct mc_flow_ctx { uint8_t stage; uint16_t prio; uint32_t flags; @@ -2612,18 +2575,17 @@ struct mc_buf_split_ctx { struct ofpbuf ofpacts; }; -enum mc_buf_split_type { - MC_BUF_SPLIT_LOCAL, - MC_BUF_SPLIT_REMOTE, - MC_BUF_SPLIT_REMOTE_RAMP, - MC_BUF_SPLIT_MAX, +enum mc_flows_type { + MC_FLOWS_LOCAL, + MC_FLOWS_REMOTE, + MC_FLOWS_REMOTE_RAMP, + MC_FLOWS_MAX, }; static void mc_ofctrl_add_flow(const struct sbrec_multicast_group *mc, - struct mc_buf_split_ctx *ctx, bool split, + struct mc_flow_ctx *ctx, struct ovn_desired_flow_table *flow_table) - { struct match match = MATCH_CATCHALL_INITIALIZER; @@ -2631,24 +2593,10 @@ mc_ofctrl_add_flow(const struct sbrec_multicast_group *mc, mc->tunnel_key); match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, ctx->flags, ctx->flags_mask); - - uint16_t prio = ctx->prio; - if (ctx->index) { - match_set_reg(&match, MFF_REG6 - MFF_REG0, - MC_BUF_START_ID + ctx->index); - prio += 10; - } - - if (split) { - put_split_buf_function(MC_BUF_START_ID + ctx->index + 1, - mc->tunnel_key, ctx->stage, &ctx->ofpacts); - } - - ofctrl_add_flow(flow_table, ctx->stage, prio, mc->header_.uuid.parts[0], - &match, &ctx->ofpacts, &mc->header_.uuid); + ofctrl_add_flow(flow_table, ctx->stage, ctx->prio, + mc->header_.uuid.parts[0], &match, &ctx->ofpacts, + &mc->header_.uuid); ofpbuf_clear(&ctx->ofpacts); - /* reset MFF_REG6. */ - put_load(0, MFF_REG6, 0, 32, &ctx->ofpacts); } static void @@ -2687,7 +2635,7 @@ consider_mc_group(const struct physical_ctx *ctx, */ bool has_vtep = has_vtep_port(ctx->local_datapaths, mc->datapath->tunnel_key); - struct mc_buf_split_ctx mc_split[MC_BUF_SPLIT_MAX] = { + struct mc_flow_ctx mc_flows[MC_FLOWS_MAX] = { { .stage = OFTABLE_LOCAL_OUTPUT, .prio = 100, @@ -2704,15 +2652,14 @@ consider_mc_group(const struct physical_ctx *ctx, .flags_mask = MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK, }, }; - for (size_t i = 0; i < MC_BUF_SPLIT_MAX; i++) { - struct mc_buf_split_ctx *split_ctx = &mc_split[i]; - ofpbuf_init(&split_ctx->ofpacts, 0); - put_load(0, MFF_REG6, 0, 32, &split_ctx->ofpacts); + for (size_t i = 0; i < MC_FLOWS_MAX; i++) { + struct mc_flow_ctx *flow_ctx = &mc_flows[i]; + ofpbuf_init(&flow_ctx->ofpacts, 0); } - struct mc_buf_split_ctx *local_ctx = &mc_split[MC_BUF_SPLIT_LOCAL]; - struct mc_buf_split_ctx *remote_ctx = &mc_split[MC_BUF_SPLIT_REMOTE]; - struct mc_buf_split_ctx *ramp_ctx = &mc_split[MC_BUF_SPLIT_REMOTE_RAMP]; + struct mc_flow_ctx *local_ctx = &mc_flows[MC_FLOWS_LOCAL]; + struct mc_flow_ctx *remote_ctx = &mc_flows[MC_FLOWS_REMOTE]; + struct mc_flow_ctx *ramp_ctx = &mc_flows[MC_FLOWS_REMOTE_RAMP]; for (size_t i = 0; i < mc->n_ports; i++) { struct sbrec_port_binding *port = mc->ports[i]; @@ -2796,17 +2743,6 @@ consider_mc_group(const struct physical_ctx *ctx, } } } - - for (size_t n = 0; n < MC_BUF_SPLIT_MAX; n++) { - struct mc_buf_split_ctx *split_ctx = &mc_split[n]; - if (!has_vtep && n == MC_BUF_SPLIT_REMOTE_RAMP) { - continue; - } - if (split_ctx->ofpacts.size >= MC_OFPACTS_MAX_MSG_SIZE) { - mc_ofctrl_add_flow(mc, split_ctx, true, flow_table); - split_ctx->index++; - } - } } bool local_lports = local_ctx->ofpacts.size > 0; @@ -2815,7 +2751,7 @@ consider_mc_group(const struct physical_ctx *ctx, if (local_lports) { put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &local_ctx->ofpacts); - mc_ofctrl_add_flow(mc, local_ctx, false, flow_table); + mc_ofctrl_add_flow(mc, local_ctx, flow_table); } if (remote_ports) { @@ -2831,31 +2767,28 @@ consider_mc_group(const struct physical_ctx *ctx, remote_ports = remote_ctx->ofpacts.size > 0; if (remote_ports) { put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &remote_ctx->ofpacts); - mc_ofctrl_add_flow(mc, remote_ctx, false, flow_table); + mc_ofctrl_add_flow(mc, remote_ctx, flow_table); } if (ramp_ports && has_vtep) { put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ramp_ctx->ofpacts); put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &ramp_ctx->ofpacts); - mc_ofctrl_add_flow(mc, ramp_ctx, false, flow_table); + mc_ofctrl_add_flow(mc, ramp_ctx, flow_table); } - for (size_t i = 0; i < MC_BUF_SPLIT_MAX; i++) { - ofpbuf_uninit(&mc_split[i].ofpacts); + for (size_t i = 0; i < MC_FLOWS_MAX; i++) { + ofpbuf_uninit(&mc_flows[i].ofpacts); } sset_destroy(&remote_chassis); sset_destroy(&vtep_chassis); } -#define CHASSIS_FLOOD_MAX_MSG_SIZE MC_OFPACTS_MAX_MSG_SIZE - static void physical_eval_remote_chassis_flows(const struct physical_ctx *ctx, struct ofpbuf *egress_ofpacts, struct ovn_desired_flow_table *flow_table) { struct match match = MATCH_CATCHALL_INITIALIZER; - uint32_t index = CHASSIS_FLOOD_INDEX_START; struct chassis_tunnel *prev = NULL; uint8_t actions_stub[256]; @@ -2892,18 +2825,6 @@ physical_eval_remote_chassis_flows(const struct physical_ctx *ctx, ofpact_put_OUTPUT(egress_ofpacts)->port = tun->ofport; prev = tun; - if (egress_ofpacts->size > CHASSIS_FLOOD_MAX_MSG_SIZE) { - match_set_chassis_flood_remote(&match, index++); - put_split_buf_function(index, 0, OFTABLE_FLOOD_REMOTE_CHASSIS, - egress_ofpacts); - - ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0, - &match, egress_ofpacts, hc_uuid); - - ofpbuf_clear(egress_ofpacts); - prev = NULL; - } - ofpbuf_clear(&ingress_ofpacts); put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, @@ -2944,12 +2865,14 @@ physical_eval_remote_chassis_flows(const struct physical_ctx *ctx, } if (egress_ofpacts->size > 0) { - match_set_chassis_flood_remote(&match, index++); - + match_init_catchall(&match); + /* Match if the packet wasn't already received from tunnel. + * This prevents from looping it back to the tunnel again. */ + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0, + MLF_RX_FROM_TUNNEL); ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0, &match, egress_ofpacts, hc_uuid); } - ofpbuf_uninit(&ingress_ofpacts); } diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 8c4561374..2370fa5fe 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -3781,10 +3781,15 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg) ovs_mutex_unlock(&pinctrl_mutex); break; - case ACTION_OPCODE_SPLIT_BUF_ACTION: + case ACTION_OPCODE_SPLIT_BUF_ACTION: { + char *opc_str = ovnact_op_to_string(ntohl(ah->opcode)); + VLOG_WARN_RL(&rl, "pinctrl received deprecated packet-in | opcode=%s", + opc_str); + free(opc_str); pinctrl_split_buf_action_handler(swconn, &packet, &pin.flow_metadata, &userdata); break; + } /* Deprecated actions. */ case ACTION_OPCODE_PUT_ICMP4_FRAG_MTU: diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 78c2e8ebe..d3f0bfd04 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -804,7 +804,7 @@ OVNACTS /* activation_strategy() */ \ ACTION_OPCODE(ACTIVATION_STRATEGY) \ \ - /* split buffer action. */ \ + /* DEPRECATED: split buffer action. */ \ ACTION_OPCODE(SPLIT_BUF_ACTION) \ \ /* "dhcp_relay_req_chk(relay_ip, server_ip)". diff --git a/lib/actions.c b/lib/actions.c index f0758752f..53f4d20a9 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -5637,7 +5637,6 @@ encode_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, const struct ovnact_encode_params *ep, struct ofpbuf *ofpacts) { - put_load(CHASSIS_FLOOD_INDEX_START, MFF_REG6, 0, 32, ofpacts); emit_resubmit(ofpacts, ep->flood_remote_table); } diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index 6a7ffa5fe..c734e0413 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -2847,57 +2847,6 @@ OVN_CLEANUP([hv1]) AT_CLEANUP ]) -OVN_FOR_EACH_NORTHD([ -AT_SETUP([ovn-controller - multicast group buffer split]) -AT_KEYWORDS([ovn]) -ovn_start - -net_add n1 - -sim_add hv1 -as hv1 -ovs-vsctl add-br br-phys -ovn_attach n1 br-phys 192.168.0.1 - -for i in $(seq 1 320); do - OVS_VSCTL(add-port br-int hv1-vif$i -- \ - set interface hv1-vif$i external-ids:iface-id=sw0-p$i \ - options:tx_pcap=hv1/vif${i}-tx.pcap \ - options:rxq_pcap=hv1/vif${i}-rx.pcap \ - ofport-request=$i) -done -RUN_OVS_VSCTL - -check ovn-nbctl ls-add sw0 -for i in $(seq 1 320); do - OVN_NBCTL(lsp-add sw0 sw0-p$i) - OVN_NBCTL(lsp-set-addresses sw0-p$i "unknown") -done -RUN_OVN_NBCTL - -wait_for_ports_up -check ovn-nbctl --wait=hv sync - -OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT | grep reg15=0x8000,metadata=0x1 | grep -c "controller(userdata=00.00.00.1b"], [0],[dnl -3 -]) - -for i in $(seq 1 280); do - OVN_NBCTL(lsp-del sw0-p$i) -done -RUN_OVN_NBCTL -check ovn-nbctl --wait=hv sync - -AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT | grep -q controller], [1]) - -# Since we add many ports in one nbctl command, we might get some warnings on slow systems. -OVN_CLEANUP([hv1 -/Unreasonably long/d -/faults:/d -/context switches/d]) -AT_CLEANUP -]) - AT_SETUP([ovn-controller - ssl/tls ciphers using command line options]) AT_KEYWORDS([ovn]) AT_SKIP_IF([test "$HAVE_OPENSSL" = no]) @@ -3706,10 +3655,6 @@ AT_CHECK([grep -c 'move:NXM_NX_REG14\[[0..11\]]->NXM_NX_TUN_ID\[[12..23\]],move: 1 ]) -AT_CHECK([grep -c "reg6=0x8000" flood_flows], [0], [dnl -1 -]) - AT_CHECK([grep -c "reg10=0/0x10000" flood_flows], [0], [dnl 1 ]) diff --git a/tests/ovn.at b/tests/ovn.at index 0a4c7c6db..060305662 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2319,7 +2319,7 @@ ct_proto(); Syntax error at `ct_proto' expecting action. flood_remote; - encodes as set_field:0x8000->reg6,resubmit(,OFTABLE_FLOOD_REMOTE_CHASSIS) + encodes as resubmit(,OFTABLE_FLOOD_REMOTE_CHASSIS) flood_remote(); Syntax error at `(' expecting `;'. @@ -39578,65 +39578,6 @@ OVN_CLEANUP([hv1]) AT_CLEANUP ]) -OVN_FOR_EACH_NORTHD([ -AT_SETUP([multicast group buffer split]) -AT_KEYWORDS([ovn-mc-split]) -AT_SKIP_IF([test $HAVE_SCAPY = no]) -ovn_start - -net_add n -sim_add hv1 - -as hv1 -check ovs-vsctl add-br br-phys -ovn_attach n br-phys 192.168.0.1 -check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys - -for i in $(seq 1 320); do - OVS_VSCTL( add-port br-int hv1-vif$i -- \ - set interface hv1-vif$i external-ids:iface-id=sw0-port$i \ - options:tx_pcap=hv1/vif$i-tx.pcap \ - options:rxq_pcap=hv1/vif$i-rx.pcap) -done -RUN_OVS_VSCTL - -check ovn-nbctl ls-add sw0 - -check ovn-nbctl lsp-add sw0 sw0-port1 -check ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.2" -for i in $(seq 2 320); do - OVN_NBCTL(lsp-add sw0 sw0-port$i -- lsp-set-addresses sw0-port$i "unknown") -done -RUN_OVN_NBCTL - -check ovn-nbctl --wait=hv sync -wait_for_ports_up -OVN_POPULATE_ARP - -AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT | grep -c controller], [0],[dnl -9 -]) - -arp_req=$(fmt_pkt "Ether(dst='ff:ff:ff:ff:ff:ff', src='50:54:00:00:00:01')/ARP(pdst='10.0.0.254', psrc='10.0.0.1')") -echo $arp_req > expected -as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $arp_req - -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif2-tx.pcap | wc -l) -ge 1]) -OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [expected]) - -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif100-tx.pcap | wc -l) -ge 1]) -OVN_CHECK_PACKETS([hv1/vif100-tx.pcap], [expected]) - -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif150-tx.pcap | wc -l) -ge 1]) -OVN_CHECK_PACKETS([hv1/vif150-tx.pcap], [expected]) - -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" hv1/vif300-tx.pcap | wc -l) -ge 1]) -OVN_CHECK_PACKETS([hv1/vif150-tx.pcap], [expected]) - -OVN_CLEANUP([hv1]) -AT_CLEANUP -]) - OVN_FOR_EACH_NORTHD([ AT_SETUP([virtual port claim race condition]) AT_KEYWORDS([virtual ports]) -- 2.51.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
