On 10/7/25 8:37 AM, Ales Musil wrote:
> The split buf was causing issues with overloaded ovn-controller. That
> would happen when there was a lof of multicast traffic, due to the
> split nature the traffic could hit ovn-controller pinctrl thread
> multiple times. The original commit aimed to workaround limitation
> within Linux kernel, which would reject too long actions. The limitation
> should be fixed [0] so there isn't a need for the workaround anymore.
>
> This partially reverts [1] and [2] both of them used the split buf action.
>
> Note that this should solve the ovn-controller overload issues, however
> if the multicast group is big enough there might be still an issue
> with resubmit limit.
>
> [0]
> https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git/commit/?id=a1e64addf3ff
> [1] 325c7b203d8b ("controller: split mg action in table 39 and 40 to fit
> kernel netlink buffer size")
> [2] 7c3f7f415f1d ("northd, controller: Flood ARP and NA packet on transit
> router.")
> Reported-at:
> https://mail.openvswitch.org/pipermail/ovs-discuss/2025-February/053455.html
> Reported-at: https://issues.redhat.com/browse/FDP-1636
> Signed-off-by: Ales Musil <[email protected]>
> ---
Hi Ales,
Thanks for the follow up!
I only have one question below. Once that is cleared up I'm OK with
acking the patch.
> TODO.rst | 1 +
> controller/lflow.h | 3 -
> controller/physical.c | 131 +++++++++-------------------------------
> controller/pinctrl.c | 7 ++-
> include/ovn/actions.h | 2 +-
> lib/actions.c | 1 -
> tests/ovn-controller.at | 55 -----------------
> tests/ovn.at | 61 +------------------
> 8 files changed, 36 insertions(+), 225 deletions(-)
>
> diff --git a/TODO.rst b/TODO.rst
> index 741ac6027..abcf6c526 100644
> --- a/TODO.rst
> +++ b/TODO.rst
> @@ -200,6 +200,7 @@ when the feature/action will move from ``Deprecated`` to
> ``Removed``.
> * 26.03 Deprecated
>
> * ``ct_lb`` action, should be removed in 26.09.
> + * ``SPLIT_BUF_ACTION`` action, should be removed in 28.09.
>
> * 26.03 Removed
>
> diff --git a/controller/lflow.h b/controller/lflow.h
> index a97abd824..d82acc0d8 100644
> --- a/controller/lflow.h
> +++ b/controller/lflow.h
> @@ -105,9 +105,6 @@ struct uuid;
> #define OFTABLE_GET_REMOTE_FDB 87
> #define OFTABLE_LEARN_REMOTE_FDB 88
>
> -/* Common defines shared between some controller components. */
> -#define CHASSIS_FLOOD_INDEX_START 0x8000
> -
>
> struct lflow_ctx_in {
> struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath;
> diff --git a/controller/physical.c b/controller/physical.c
> index d161dc437..c055e664b 100644
> --- a/controller/physical.c
> +++ b/controller/physical.c
> @@ -225,18 +225,6 @@ match_set_chassis_flood_outport(struct match *match,
> }
> }
>
> -static void
> -match_set_chassis_flood_remote(struct match *match, uint32_t index)
> -{
> - match_init_catchall(match);
> - match_set_reg(match, MFF_REG6 - MFF_REG0, index);
> - /* Match if the packet wasn't already received from tunnel.
> - * This prevents from looping it back to the tunnel again. */
> - match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, 0,
> - MLF_RX_FROM_TUNNEL);
> -}
> -
> -
> static void
> put_stack(enum mf_field_id field, struct ofpact_stack *stack)
> {
> @@ -245,27 +233,6 @@ put_stack(enum mf_field_id field, struct ofpact_stack
> *stack)
> stack->subfield.n_bits = stack->subfield.field->n_bits;
> }
>
> -/* Split the ofpacts buffer to prevent overflow of the
> - * MAX_ACTIONS_BUFSIZE netlink buffer size supported by the kernel.
> - * In order to avoid all the action buffers to be squashed together by
> - * ovs, add a controller action for each configured openflow.
> - */
> -static void
> -put_split_buf_function(uint32_t index, uint32_t outport, uint8_t stage,
> - struct ofpbuf *ofpacts)
> -{
> - ovs_be32 values[2] = {
> - htonl(index),
> - htonl(outport)
> - };
> - size_t oc_offset =
> - encode_start_controller_op(ACTION_OPCODE_SPLIT_BUF_ACTION, false,
> - NX_CTLR_NO_METER, ofpacts);
> - ofpbuf_put(ofpacts, values, sizeof values);
> - ofpbuf_put(ofpacts, &stage, sizeof stage);
> - encode_finish_controller_op(oc_offset, ofpacts);
> -}
> -
> static const struct sbrec_port_binding *
> get_localnet_port(const struct hmap *local_datapaths, int64_t tunnel_key)
> {
> @@ -2600,11 +2567,7 @@ local_set_ct_zone_and_output_pb(int tunnel_key,
> int64_t zone_id,
> put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts);
> }
>
> -#define MC_OFPACTS_MAX_MSG_SIZE 8192
> -#define MC_BUF_START_ID 0x9000
> -
> -struct mc_buf_split_ctx {
> - uint8_t index;
> +struct mc_flow_ctx {
> uint8_t stage;
> uint16_t prio;
> uint32_t flags;
> @@ -2612,18 +2575,17 @@ struct mc_buf_split_ctx {
> struct ofpbuf ofpacts;
> };
>
> -enum mc_buf_split_type {
> - MC_BUF_SPLIT_LOCAL,
> - MC_BUF_SPLIT_REMOTE,
> - MC_BUF_SPLIT_REMOTE_RAMP,
> - MC_BUF_SPLIT_MAX,
> +enum mc_flows_type {
> + MC_FLOWS_LOCAL,
> + MC_FLOWS_REMOTE,
> + MC_FLOWS_REMOTE_RAMP,
> + MC_FLOWS_MAX,
> };
>
> static void
> mc_ofctrl_add_flow(const struct sbrec_multicast_group *mc,
> - struct mc_buf_split_ctx *ctx, bool split,
> + struct mc_flow_ctx *ctx,
> struct ovn_desired_flow_table *flow_table)
> -
> {
> struct match match = MATCH_CATCHALL_INITIALIZER;
>
> @@ -2631,24 +2593,10 @@ mc_ofctrl_add_flow(const struct sbrec_multicast_group
> *mc,
> mc->tunnel_key);
> match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
> ctx->flags, ctx->flags_mask);
> -
> - uint16_t prio = ctx->prio;
> - if (ctx->index) {
> - match_set_reg(&match, MFF_REG6 - MFF_REG0,
> - MC_BUF_START_ID + ctx->index);
> - prio += 10;
> - }
> -
> - if (split) {
> - put_split_buf_function(MC_BUF_START_ID + ctx->index + 1,
> - mc->tunnel_key, ctx->stage, &ctx->ofpacts);
> - }
> -
> - ofctrl_add_flow(flow_table, ctx->stage, prio, mc->header_.uuid.parts[0],
> - &match, &ctx->ofpacts, &mc->header_.uuid);
> + ofctrl_add_flow(flow_table, ctx->stage, ctx->prio,
> + mc->header_.uuid.parts[0], &match, &ctx->ofpacts,
> + &mc->header_.uuid);
> ofpbuf_clear(&ctx->ofpacts);
> - /* reset MFF_REG6. */
> - put_load(0, MFF_REG6, 0, 32, &ctx->ofpacts);
> }
>
> static void
> @@ -2687,7 +2635,7 @@ consider_mc_group(const struct physical_ctx *ctx,
> */
> bool has_vtep = has_vtep_port(ctx->local_datapaths,
> mc->datapath->tunnel_key);
> - struct mc_buf_split_ctx mc_split[MC_BUF_SPLIT_MAX] = {
> + struct mc_flow_ctx mc_flows[MC_FLOWS_MAX] = {
> {
> .stage = OFTABLE_LOCAL_OUTPUT,
> .prio = 100,
> @@ -2704,15 +2652,14 @@ consider_mc_group(const struct physical_ctx *ctx,
> .flags_mask = MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
> },
> };
> - for (size_t i = 0; i < MC_BUF_SPLIT_MAX; i++) {
> - struct mc_buf_split_ctx *split_ctx = &mc_split[i];
> - ofpbuf_init(&split_ctx->ofpacts, 0);
> - put_load(0, MFF_REG6, 0, 32, &split_ctx->ofpacts);
> + for (size_t i = 0; i < MC_FLOWS_MAX; i++) {
> + struct mc_flow_ctx *flow_ctx = &mc_flows[i];
> + ofpbuf_init(&flow_ctx->ofpacts, 0);
> }
>
> - struct mc_buf_split_ctx *local_ctx = &mc_split[MC_BUF_SPLIT_LOCAL];
> - struct mc_buf_split_ctx *remote_ctx = &mc_split[MC_BUF_SPLIT_REMOTE];
> - struct mc_buf_split_ctx *ramp_ctx = &mc_split[MC_BUF_SPLIT_REMOTE_RAMP];
> + struct mc_flow_ctx *local_ctx = &mc_flows[MC_FLOWS_LOCAL];
> + struct mc_flow_ctx *remote_ctx = &mc_flows[MC_FLOWS_REMOTE];
> + struct mc_flow_ctx *ramp_ctx = &mc_flows[MC_FLOWS_REMOTE_RAMP];
>
> for (size_t i = 0; i < mc->n_ports; i++) {
> struct sbrec_port_binding *port = mc->ports[i];
> @@ -2796,17 +2743,6 @@ consider_mc_group(const struct physical_ctx *ctx,
> }
> }
> }
> -
> - for (size_t n = 0; n < MC_BUF_SPLIT_MAX; n++) {
> - struct mc_buf_split_ctx *split_ctx = &mc_split[n];
> - if (!has_vtep && n == MC_BUF_SPLIT_REMOTE_RAMP) {
> - continue;
> - }
> - if (split_ctx->ofpacts.size >= MC_OFPACTS_MAX_MSG_SIZE) {
> - mc_ofctrl_add_flow(mc, split_ctx, true, flow_table);
> - split_ctx->index++;
> - }
> - }
> }
>
> bool local_lports = local_ctx->ofpacts.size > 0;
> @@ -2815,7 +2751,7 @@ consider_mc_group(const struct physical_ctx *ctx,
>
> if (local_lports) {
> put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
> &local_ctx->ofpacts);
> - mc_ofctrl_add_flow(mc, local_ctx, false, flow_table);
> + mc_ofctrl_add_flow(mc, local_ctx, flow_table);
> }
>
> if (remote_ports) {
> @@ -2831,31 +2767,28 @@ consider_mc_group(const struct physical_ctx *ctx,
> remote_ports = remote_ctx->ofpacts.size > 0;
> if (remote_ports) {
> put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &remote_ctx->ofpacts);
> - mc_ofctrl_add_flow(mc, remote_ctx, false, flow_table);
> + mc_ofctrl_add_flow(mc, remote_ctx, flow_table);
> }
>
> if (ramp_ports && has_vtep) {
> put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ramp_ctx->ofpacts);
> put_resubmit(OFTABLE_REMOTE_VTEP_OUTPUT, &ramp_ctx->ofpacts);
> - mc_ofctrl_add_flow(mc, ramp_ctx, false, flow_table);
> + mc_ofctrl_add_flow(mc, ramp_ctx, flow_table);
> }
>
> - for (size_t i = 0; i < MC_BUF_SPLIT_MAX; i++) {
> - ofpbuf_uninit(&mc_split[i].ofpacts);
> + for (size_t i = 0; i < MC_FLOWS_MAX; i++) {
> + ofpbuf_uninit(&mc_flows[i].ofpacts);
> }
> sset_destroy(&remote_chassis);
> sset_destroy(&vtep_chassis);
> }
>
> -#define CHASSIS_FLOOD_MAX_MSG_SIZE MC_OFPACTS_MAX_MSG_SIZE
> -
> static void
> physical_eval_remote_chassis_flows(const struct physical_ctx *ctx,
> struct ofpbuf *egress_ofpacts,
> struct ovn_desired_flow_table *flow_table)
> {
> struct match match = MATCH_CATCHALL_INITIALIZER;
> - uint32_t index = CHASSIS_FLOOD_INDEX_START;
> struct chassis_tunnel *prev = NULL;
>
> uint8_t actions_stub[256];
> @@ -2892,18 +2825,6 @@ physical_eval_remote_chassis_flows(const struct
> physical_ctx *ctx,
> ofpact_put_OUTPUT(egress_ofpacts)->port = tun->ofport;
> prev = tun;
>
> - if (egress_ofpacts->size > CHASSIS_FLOOD_MAX_MSG_SIZE) {
> - match_set_chassis_flood_remote(&match, index++);
> - put_split_buf_function(index, 0, OFTABLE_FLOOD_REMOTE_CHASSIS,
> - egress_ofpacts);
> -
> - ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0,
> - &match, egress_ofpacts, hc_uuid);
> -
> - ofpbuf_clear(egress_ofpacts);
> - prev = NULL;
> - }
> -
>
> ofpbuf_clear(&ingress_ofpacts);
> put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1,
> @@ -2944,12 +2865,14 @@ physical_eval_remote_chassis_flows(const struct
> physical_ctx *ctx,
> }
>
> if (egress_ofpacts->size > 0) {
> - match_set_chassis_flood_remote(&match, index++);
> -
> + match_init_catchall(&match);
> + /* Match if the packet wasn't already received from tunnel.
> + * This prevents from looping it back to the tunnel again. */
> + match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
> + MLF_RX_FROM_TUNNEL);
> ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0,
> &match, egress_ofpacts, hc_uuid);
> }
> -
Nit: I think I'd keep this empty line.
> ofpbuf_uninit(&ingress_ofpacts);
> }
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 8c4561374..2370fa5fe 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -3781,10 +3781,15 @@ process_packet_in(struct rconn *swconn, const struct
> ofp_header *msg)
> ovs_mutex_unlock(&pinctrl_mutex);
> break;
>
> - case ACTION_OPCODE_SPLIT_BUF_ACTION:
We should probably move the /* Deprecated actions. */ comment here.
> + case ACTION_OPCODE_SPLIT_BUF_ACTION: {
> + char *opc_str = ovnact_op_to_string(ntohl(ah->opcode));
> + VLOG_WARN_RL(&rl, "pinctrl received deprecated packet-in |
> opcode=%s",
> + opc_str);
> + free(opc_str);
> pinctrl_split_buf_action_handler(swconn, &packet, &pin.flow_metadata,
> &userdata);
I'm wondering whether we should just remove the handler completely.
The handler will reinject the packet with "index" loaded in REG6. But
we are removing all the openflow rules that match on REG6.
While typing this though I was thinking that maybe the case you're
trying to cover is when ovn-controller hasn't updated the openflow rules
yet (e.g., waiting for the first full SB update to be received) but
pinctrl is already running (ofctrl_put() returns early). Was that the
case you were handling here?
> break;
> + }
>
> /* Deprecated actions. */
> case ACTION_OPCODE_PUT_ICMP4_FRAG_MTU:
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index 78c2e8ebe..d3f0bfd04 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -804,7 +804,7 @@ OVNACTS
> /* activation_strategy() */
> \
> ACTION_OPCODE(ACTIVATION_STRATEGY)
> \
>
> \
> - /* split buffer action. */
> \
> + /* DEPRECATED: split buffer action. */
> \
> ACTION_OPCODE(SPLIT_BUF_ACTION)
> \
>
> \
> /* "dhcp_relay_req_chk(relay_ip, server_ip)".
> diff --git a/lib/actions.c b/lib/actions.c
> index f0758752f..53f4d20a9 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -5637,7 +5637,6 @@ encode_FLOOD_REMOTE(const struct ovnact_null *null
> OVS_UNUSED,
> const struct ovnact_encode_params *ep,
> struct ofpbuf *ofpacts)
> {
> - put_load(CHASSIS_FLOOD_INDEX_START, MFF_REG6, 0, 32, ofpacts);
> emit_resubmit(ofpacts, ep->flood_remote_table);
> }
>
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 6a7ffa5fe..c734e0413 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -2847,57 +2847,6 @@ OVN_CLEANUP([hv1])
> AT_CLEANUP
> ])
>
> -OVN_FOR_EACH_NORTHD([
> -AT_SETUP([ovn-controller - multicast group buffer split])
> -AT_KEYWORDS([ovn])
> -ovn_start
> -
> -net_add n1
> -
> -sim_add hv1
> -as hv1
> -ovs-vsctl add-br br-phys
> -ovn_attach n1 br-phys 192.168.0.1
> -
> -for i in $(seq 1 320); do
> - OVS_VSCTL(add-port br-int hv1-vif$i -- \
> - set interface hv1-vif$i external-ids:iface-id=sw0-p$i \
> - options:tx_pcap=hv1/vif${i}-tx.pcap \
> - options:rxq_pcap=hv1/vif${i}-rx.pcap \
> - ofport-request=$i)
> -done
> -RUN_OVS_VSCTL
> -
> -check ovn-nbctl ls-add sw0
> -for i in $(seq 1 320); do
> - OVN_NBCTL(lsp-add sw0 sw0-p$i)
> - OVN_NBCTL(lsp-set-addresses sw0-p$i "unknown")
> -done
> -RUN_OVN_NBCTL
> -
> -wait_for_ports_up
> -check ovn-nbctl --wait=hv sync
> -
> -OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int
> table=OFTABLE_LOCAL_OUTPUT | grep reg15=0x8000,metadata=0x1 | grep -c
> "controller(userdata=00.00.00.1b"], [0],[dnl
> -3
> -])
> -
> -for i in $(seq 1 280); do
> - OVN_NBCTL(lsp-del sw0-p$i)
> -done
> -RUN_OVN_NBCTL
> -check ovn-nbctl --wait=hv sync
> -
> -AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT |
> grep -q controller], [1])
> -
> -# Since we add many ports in one nbctl command, we might get some warnings
> on slow systems.
> -OVN_CLEANUP([hv1
> -/Unreasonably long/d
> -/faults:/d
> -/context switches/d])
> -AT_CLEANUP
> -])
> -
> AT_SETUP([ovn-controller - ssl/tls ciphers using command line options])
> AT_KEYWORDS([ovn])
> AT_SKIP_IF([test "$HAVE_OPENSSL" = no])
> @@ -3706,10 +3655,6 @@ AT_CHECK([grep -c
> 'move:NXM_NX_REG14\[[0..11\]]->NXM_NX_TUN_ID\[[12..23\]],move:
> 1
> ])
>
> -AT_CHECK([grep -c "reg6=0x8000" flood_flows], [0], [dnl
> -1
> -])
> -
> AT_CHECK([grep -c "reg10=0/0x10000" flood_flows], [0], [dnl
> 1
> ])
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 0a4c7c6db..060305662 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -2319,7 +2319,7 @@ ct_proto();
> Syntax error at `ct_proto' expecting action.
>
> flood_remote;
> - encodes as set_field:0x8000->reg6,resubmit(,OFTABLE_FLOOD_REMOTE_CHASSIS)
> + encodes as resubmit(,OFTABLE_FLOOD_REMOTE_CHASSIS)
>
> flood_remote();
> Syntax error at `(' expecting `;'.
> @@ -39578,65 +39578,6 @@ OVN_CLEANUP([hv1])
> AT_CLEANUP
> ])
>
> -OVN_FOR_EACH_NORTHD([
> -AT_SETUP([multicast group buffer split])
> -AT_KEYWORDS([ovn-mc-split])
> -AT_SKIP_IF([test $HAVE_SCAPY = no])
> -ovn_start
> -
> -net_add n
> -sim_add hv1
> -
> -as hv1
> -check ovs-vsctl add-br br-phys
> -ovn_attach n br-phys 192.168.0.1
> -check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> -
> -for i in $(seq 1 320); do
> - OVS_VSCTL( add-port br-int hv1-vif$i -- \
> - set interface hv1-vif$i external-ids:iface-id=sw0-port$i \
> - options:tx_pcap=hv1/vif$i-tx.pcap \
> - options:rxq_pcap=hv1/vif$i-rx.pcap)
> -done
> -RUN_OVS_VSCTL
> -
> -check ovn-nbctl ls-add sw0
> -
> -check ovn-nbctl lsp-add sw0 sw0-port1
> -check ovn-nbctl lsp-set-addresses sw0-port1 "50:54:00:00:00:01 10.0.0.2"
> -for i in $(seq 2 320); do
> - OVN_NBCTL(lsp-add sw0 sw0-port$i -- lsp-set-addresses sw0-port$i
> "unknown")
> -done
> -RUN_OVN_NBCTL
> -
> -check ovn-nbctl --wait=hv sync
> -wait_for_ports_up
> -OVN_POPULATE_ARP
> -
> -AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_LOCAL_OUTPUT |
> grep -c controller], [0],[dnl
> -9
> -])
> -
> -arp_req=$(fmt_pkt "Ether(dst='ff:ff:ff:ff:ff:ff',
> src='50:54:00:00:00:01')/ARP(pdst='10.0.0.254', psrc='10.0.0.1')")
> -echo $arp_req > expected
> -as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $arp_req
> -
> -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in"
> hv1/vif2-tx.pcap | wc -l) -ge 1])
> -OVN_CHECK_PACKETS([hv1/vif2-tx.pcap], [expected])
> -
> -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in"
> hv1/vif100-tx.pcap | wc -l) -ge 1])
> -OVN_CHECK_PACKETS([hv1/vif100-tx.pcap], [expected])
> -
> -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in"
> hv1/vif150-tx.pcap | wc -l) -ge 1])
> -OVN_CHECK_PACKETS([hv1/vif150-tx.pcap], [expected])
> -
> -OVS_WAIT_UNTIL([test $($PYTHON "$ovs_srcdir/utilities/ovs-pcap.in"
> hv1/vif300-tx.pcap | wc -l) -ge 1])
> -OVN_CHECK_PACKETS([hv1/vif150-tx.pcap], [expected])
> -
> -OVN_CLEANUP([hv1])
> -AT_CLEANUP
> -])
> -
> OVN_FOR_EACH_NORTHD([
> AT_SETUP([virtual port claim race condition])
> AT_KEYWORDS([virtual ports])
Regards,
Dumitru
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev