Introduce the capability to split multicast group openflow actions
created in consider_mc_group routine in multiple buffers if the
single buffer size is over netlink buffer size limits.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2232152
Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com>
---
 controller/physical.c   | 272 +++++++++++++++++++++++++++++-----------
 controller/pinctrl.c    |  66 ++++++++++
 include/ovn/actions.h   |   3 +
 tests/ovn-controller.at |  45 +++++++
 4 files changed, 313 insertions(+), 73 deletions(-)

diff --git a/controller/physical.c b/controller/physical.c
index 75257bc85..663fd1c8d 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1969,6 +1969,22 @@ local_output_pb(int64_t tunnel_key, struct ofpbuf 
*ofpacts)
     put_resubmit(OFTABLE_CHECK_LOOPBACK, ofpacts);
 }
 
+static void
+mc_split_buf_controller_action(struct ofpbuf *ofpacts, uint32_t index,
+                               uint8_t table_id, uint32_t port_key)
+{
+    size_t oc_offset = encode_start_controller_op(
+            ACTION_OPCODE_MG_SPLIT_BUF, false, NX_CTLR_NO_METER, ofpacts);
+    ovs_be32 val = htonl(index);
+    ofpbuf_put(ofpacts, &val, sizeof val);
+    val = htonl(port_key);
+    ofpbuf_put(ofpacts, &val, sizeof val);
+    ofpbuf_put(ofpacts, &table_id, sizeof table_id);
+    encode_finish_controller_op(oc_offset, ofpacts);
+}
+
+#define MC_OFPACTS_MAX_MSG_SIZE     1024
+#define MC_BUF_START_ID             0x9000
 static void
 consider_mc_group(struct ovsdb_idl_index *sbrec_port_binding_by_name,
                   enum mf_field_id mff_ovn_geneve,
@@ -1990,9 +2006,6 @@ consider_mc_group(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
     struct sset remote_chassis = SSET_INITIALIZER(&remote_chassis);
     struct sset vtep_chassis = SSET_INITIALIZER(&vtep_chassis);
 
-    struct match match;
-    match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
-
     /* Go through all of the ports in the multicast group:
      *
      *    - For remote ports, add the chassis to 'remote_chassis' or
@@ -2013,10 +2026,18 @@ consider_mc_group(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
      *      set the output port to be the router patch port for which
      *      the redirect port was added.
      */
-    struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp;
+    struct ofpbuf ofpacts, remote_ofpacts, remote_ofpacts_ramp, reset_ofpacts;
     ofpbuf_init(&ofpacts, 0);
     ofpbuf_init(&remote_ofpacts, 0);
     ofpbuf_init(&remote_ofpacts_ramp, 0);
+    ofpbuf_init(&reset_ofpacts, 0);
+
+    bool local_ports = false, remote_ports = false, remote_ramp_ports = false;
+
+    put_load(0, MFF_REG6, 0, 32, &reset_ofpacts);
+
+    /* local port loop. */
+    uint32_t local_flow_index = MC_BUF_START_ID;
     for (size_t i = 0; i < mc->n_ports; i++) {
         struct sbrec_port_binding *port = mc->ports[i];
 
@@ -2040,19 +2061,15 @@ consider_mc_group(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
             if (ldp->is_transit_switch) {
                 local_output_pb(port->tunnel_key, &ofpacts);
             } else {
-                local_output_pb(port->tunnel_key, &remote_ofpacts);
-                local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
+                remote_ramp_ports = true;
+                remote_ports = true;
             }
         } if (!strcmp(port->type, "remote")) {
             if (port->chassis) {
-                put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                         &remote_ofpacts);
-                tunnel_to_chassis(mff_ovn_geneve, port->chassis->name,
-                                  chassis_tunnels, mc->datapath,
-                                  port->tunnel_key, &remote_ofpacts);
+                remote_ports = true;
             }
         } else if (!strcmp(port->type, "localport")) {
-            local_output_pb(port->tunnel_key, &remote_ofpacts);
+            remote_ports = true;
         } else if ((port->chassis == chassis
                     || is_additional_chassis(port, chassis))
                    && (local_binding_get_primary_pb(local_bindings, lport_name)
@@ -2095,87 +2112,196 @@ consider_mc_group(struct ovsdb_idl_index 
*sbrec_port_binding_by_name,
                 }
             }
         }
-    }
 
-    /* Table 40, priority 100.
-     * =======================
-     *
-     * Handle output to the local logical ports in the multicast group, if
-     * any. */
-    bool local_ports = ofpacts.size > 0;
-    if (local_ports) {
-        /* Following delivery to local logical ports, restore the multicast
-         * group as the logical output port. */
-        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+        local_ports |= !!ofpacts.size;
+        if (!local_ports) {
+            continue;
+        }
 
-        ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100,
-                        mc->header_.uuid.parts[0],
-                        &match, &ofpacts, &mc->header_.uuid);
+        /* do not overcome max netlink message size used by ovs-vswitchd to
+         * send netlink configuration to the kernel. */
+        if (ofpacts.size > MC_OFPACTS_MAX_MSG_SIZE || i == mc->n_ports - 1) {
+            struct match match;
+
+            match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
+
+            bool is_first = local_flow_index == MC_BUF_START_ID;
+            if (!is_first) {
+                match_set_reg(&match, MFF_REG6 - MFF_REG0, local_flow_index);
+            }
+
+            if (i == mc->n_ports - 1) {
+                /* Following delivery to local logical ports, restore the
+                 * multicast group as the logical output port. */
+                put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+            } else {
+                mc_split_buf_controller_action(&ofpacts, ++local_flow_index,
+                                               OFTABLE_LOCAL_OUTPUT,
+                                               mc->tunnel_key);
+            }
+
+            /* reset MFF_REG6. */
+            ofpbuf_insert(&ofpacts, 0, reset_ofpacts.data, reset_ofpacts.size);
+
+            /* Table 40, priority 100.
+             * ==========================
+             *
+             * Handle output to the local logical ports in the multicast group,
+             * if any. */
+            ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT,
+                            is_first ? 100 : 110,
+                            mc->header_.uuid.parts[0], &match, &ofpacts,
+                            &mc->header_.uuid);
+            ofpbuf_clear(&ofpacts);
+        }
     }
 
-    /* Table 39, priority 100.
-     * =======================
-     *
-     * Handle output to the remote chassis in the multicast group, if
-     * any. */
-    if (!sset_is_empty(&remote_chassis) ||
-            !sset_is_empty(&vtep_chassis) || remote_ofpacts.size > 0) {
-        if (remote_ofpacts.size > 0) {
-            /* Following delivery to logical patch ports, restore the
-             * multicast group as the logical output port. */
-            put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                     &remote_ofpacts);
-
-            if (get_vtep_port(local_datapaths, mc->datapath->tunnel_key)) {
-                struct match match_ramp;
-                match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
-                                     MLF_RCV_FROM_RAMP);
+    /* remote port loop. */
+    struct ofpbuf remote_ofpacts_last;
+    ofpbuf_init(&remote_ofpacts_last, 0);
+    if (remote_ports) {
+        put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32, &remote_ofpacts_last);
+    }
+    fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
+                      mc->datapath, mc->tunnel_key, false,
+                      &remote_ofpacts_last);
+    fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels,
+                      mc->datapath, mc->tunnel_key, true,
+                      &remote_ofpacts_last);
+    remote_ports |= !!remote_ofpacts_last.size;
+    if (remote_ports && local_ports) {
+        put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_last);
+    }
 
-                put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
-                         &remote_ofpacts_ramp);
+    bool has_vetp = get_vtep_port(local_datapaths, mc->datapath->tunnel_key);
+    uint32_t reverse_remap_flow_index = MC_BUF_START_ID;
+    uint32_t reverse_flow_index = MC_BUF_START_ID;
 
-                /* MCAST traffic which was originally received from RAMP_SWITCH
-                 * is not allowed to be re-sent to remote_chassis.
-                 * Process "patch" port binding for routing in
-                 * OFTABLE_REMOTE_OUTPUT and resubmit packets to
-                 * OFTABLE_LOCAL_OUTPUT for local delivery. */
+    for (size_t i = 0; i < mc->n_ports; i++) {
+        struct sbrec_port_binding *port = mc->ports[i];
 
-                match_outport_dp_and_port_keys(&match_ramp, dp_key,
-                                               mc->tunnel_key);
+        if (port->datapath != mc->datapath) {
+            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+            VLOG_WARN_RL(&rl, UUID_FMT": multicast group contains ports "
+                         "in wrong datapath",
+                         UUID_ARGS(&mc->header_.uuid));
+            continue;
+        }
+
+        if (!strcmp(port->type, "patch")) {
+            if (!ldp->is_transit_switch) {
+                local_output_pb(port->tunnel_key, &remote_ofpacts);
+                local_output_pb(port->tunnel_key, &remote_ofpacts_ramp);
+            }
+        } if (!strcmp(port->type, "remote")) {
+            if (port->chassis) {
+                put_load(port->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
+                         &remote_ofpacts);
+                tunnel_to_chassis(mff_ovn_geneve, port->chassis->name,
+                                  chassis_tunnels, mc->datapath,
+                                  port->tunnel_key, &remote_ofpacts);
+            }
+        } else if (!strcmp(port->type, "localport")) {
+            local_output_pb(port->tunnel_key, &remote_ofpacts);
+        }
 
-                /* Match packets coming from RAMP_SWITCH and allowed for
-                * loopback processing (including routing). */
-                match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
-                                     MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
-                                     MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
+        /* do not overcome max netlink message size used by ovs-vswitchd to
+         * send netlink configuration to the kernel. */
+        if (remote_ofpacts.size > MC_OFPACTS_MAX_MSG_SIZE ||
+            i == mc->n_ports - 1) {
+            struct match match;
+            match_outport_dp_and_port_keys(&match, dp_key, mc->tunnel_key);
+            if (has_vetp) {
+                match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0, 0,
+                                     MLF_RCV_FROM_RAMP);
+            }
 
-                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp);
+            bool is_first = reverse_flow_index == MC_BUF_START_ID;
+            if (!is_first) {
+                match_set_reg(&match, MFF_REG6 - MFF_REG0, reverse_flow_index);
+            }
+
+            if (i == mc->n_ports - 1) {
+                ofpbuf_put(&remote_ofpacts, remote_ofpacts_last.data,
+                           remote_ofpacts_last.size);
+            } else {
+                mc_split_buf_controller_action(&remote_ofpacts,
+                                               ++reverse_flow_index,
+                                               OFTABLE_REMOTE_OUTPUT,
+                                               mc->tunnel_key);
+            }
 
-                ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 120,
-                                mc->header_.uuid.parts[0], &match_ramp,
-                                &remote_ofpacts_ramp, &mc->header_.uuid);
+            /* reset MFF_REG6. */
+            ofpbuf_insert(&remote_ofpacts, 0, reset_ofpacts.data,
+                          reset_ofpacts.size);
+            if (remote_ports) {
+                /* Table 39, priority 100.
+                 * =======================
+                 *
+                 * Handle output to the remote chassis in the multicast group,
+                 * if any. */
+                ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT,
+                                is_first ? 100 : 110,
+                                mc->header_.uuid.parts[0],
+                                &match, &remote_ofpacts, &mc->header_.uuid);
             }
+            ofpbuf_clear(&remote_ofpacts);
         }
 
-        fanout_to_chassis(mff_ovn_geneve, &remote_chassis, chassis_tunnels,
-                          mc->datapath, mc->tunnel_key, false,
-                          &remote_ofpacts);
-        fanout_to_chassis(mff_ovn_geneve, &vtep_chassis, chassis_tunnels,
-                          mc->datapath, mc->tunnel_key, true,
-                          &remote_ofpacts);
+        if (!remote_ports || !remote_ramp_ports || !has_vetp) {
+            continue;
+        }
 
-        if (remote_ofpacts.size) {
-            if (local_ports) {
-                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts);
+        if (remote_ofpacts_ramp.size > MC_OFPACTS_MAX_MSG_SIZE ||
+            i == mc->n_ports - 1) {
+            /* MCAST traffic which was originally received from RAMP_SWITCH
+             * is not allowed to be re-sent to remote_chassis.
+             * Process "patch" port binding for routing in
+             * OFTABLE_REMOTE_OUTPUT and resubmit packets to
+             * OFTABLE_LOCAL_OUTPUT for local delivery. */
+            struct match match_ramp;
+            match_outport_dp_and_port_keys(&match_ramp, dp_key,
+                                           mc->tunnel_key);
+
+            /* Match packets coming from RAMP_SWITCH and allowed for
+            * loopback processing (including routing). */
+            match_set_reg_masked(&match_ramp, MFF_LOG_FLAGS - MFF_REG0,
+                                 MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK,
+                                 MLF_RCV_FROM_RAMP | MLF_ALLOW_LOOPBACK);
+
+            bool is_first = reverse_remap_flow_index == MC_BUF_START_ID;
+            if (!is_first) {
+                match_set_reg(&match_ramp, MFF_REG6 - MFF_REG0,
+                              reverse_remap_flow_index);
             }
-            ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT, 100,
-                            mc->header_.uuid.parts[0],
-                            &match, &remote_ofpacts, &mc->header_.uuid);
+
+            if (i == mc->n_ports - 1) {
+                put_load(mc->tunnel_key, MFF_LOG_OUTPORT, 0, 32,
+                         &remote_ofpacts_ramp);
+                put_resubmit(OFTABLE_LOCAL_OUTPUT, &remote_ofpacts_ramp);
+            } else {
+                mc_split_buf_controller_action(&remote_ofpacts_ramp,
+                                               ++reverse_remap_flow_index,
+                                               OFTABLE_REMOTE_OUTPUT,
+                                               mc->tunnel_key);
+            }
+
+            /* reset MFF_REG6. */
+            ofpbuf_insert(&remote_ofpacts_ramp, 0, reset_ofpacts.data,
+                          reset_ofpacts.size);
+            ofctrl_add_flow(flow_table, OFTABLE_REMOTE_OUTPUT,
+                            is_first ? 120 : 130,
+                            mc->header_.uuid.parts[0], &match_ramp,
+                            &remote_ofpacts_ramp, &mc->header_.uuid);
+            ofpbuf_clear(&remote_ofpacts_ramp);
         }
     }
+
     ofpbuf_uninit(&ofpacts);
     ofpbuf_uninit(&remote_ofpacts);
+    ofpbuf_uninit(&remote_ofpacts_last);
     ofpbuf_uninit(&remote_ofpacts_ramp);
+    ofpbuf_uninit(&reset_ofpacts);
     sset_destroy(&remote_chassis);
     sset_destroy(&vtep_chassis);
 }
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index ff5a3444c..996f78d7b 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -211,6 +211,10 @@ static void send_mac_binding_buffered_pkts(struct rconn 
*swconn)
 
 static void pinctrl_rarp_activation_strategy_handler(const struct match *md);
 
+static void pinctrl_mg_split_buff_handler(
+        struct rconn *swconn, struct dp_packet *pkt,
+        const struct match *md, struct ofpbuf *userdata);
+
 static void init_activated_ports(void);
 static void destroy_activated_ports(void);
 static void wait_activated_ports(void);
@@ -3283,6 +3287,11 @@ process_packet_in(struct rconn *swconn, const struct 
ofp_header *msg)
         ovs_mutex_unlock(&pinctrl_mutex);
         break;
 
+    case ACTION_OPCODE_MG_SPLIT_BUF:
+        pinctrl_mg_split_buff_handler(swconn, &packet, &pin.flow_metadata,
+                                      &userdata);
+        break;
+
     default:
         VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
                      ntohl(ah->opcode));
@@ -8148,6 +8157,63 @@ pinctrl_rarp_activation_strategy_handler(const struct 
match *md)
     notify_pinctrl_main();
 }
 
+static void
+pinctrl_mg_split_buff_handler(struct rconn *swconn, struct dp_packet *pkt,
+                              const struct match *md, struct ofpbuf *userdata)
+{
+    ovs_be32 *index = ofpbuf_try_pull(userdata, sizeof *index);
+    if (!index) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        VLOG_WARN_RL(&rl, "%s: missing index field", __func__);
+        return;
+    }
+
+    ovs_be32 *mg = ofpbuf_try_pull(userdata, sizeof *mg);
+    if (!mg) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        VLOG_WARN_RL(&rl, "%s: missing multicast group field", __func__);
+        return;
+    }
+
+    uint8_t *table_id = ofpbuf_try_pull(userdata, sizeof *table_id);
+    if (!table_id) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        VLOG_WARN_RL(&rl, "%s: missing table_id field", __func__);
+        return;
+    }
+
+    struct ofpbuf ofpacts;
+    ofpbuf_init(&ofpacts, 4096);
+    reload_metadata(&ofpacts, md);
+
+    /* reload pkt_mark field */
+    const struct mf_field *pkt_mark_field = mf_from_id(MFF_PKT_MARK);
+    union mf_value pkt_mark_value;
+    mf_get_value(pkt_mark_field, &md->flow, &pkt_mark_value);
+    ofpact_put_set_field(&ofpacts, pkt_mark_field, &pkt_mark_value, NULL);
+
+    put_load(ntohl(*index), MFF_REG6, 0, 32, &ofpacts);
+    put_load(ntohl(*mg), MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = *table_id;
+
+    struct ofputil_packet_out po = {
+        .packet = dp_packet_data(pkt),
+        .packet_len = dp_packet_size(pkt),
+        .buffer_id = UINT32_MAX,
+        .ofpacts = ofpacts.data,
+        .ofpacts_len = ofpacts.size,
+    };
+    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+    enum ofp_version version = rconn_get_version(swconn);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+
+    ofpbuf_uninit(&ofpacts);
+}
+
 static struct hmap put_fdbs;
 
 /* MAC learning (fdb) related functions.  Runs within the main
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 04bb6ffd0..49cfe0624 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -747,6 +747,9 @@ enum action_opcode {
 
     /* activation_strategy_rarp() */
     ACTION_OPCODE_ACTIVATION_STRATEGY_RARP,
+
+    /* multicast group split buffer action. */
+    ACTION_OPCODE_MG_SPLIT_BUF,
 };
 
 /* Header. */
diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
index 4212d601a..7d1645e53 100644
--- a/tests/ovn-controller.at
+++ b/tests/ovn-controller.at
@@ -2651,3 +2651,48 @@ OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int 
table=0 | grep -c in_por
 OVN_CLEANUP([hv1])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([ovn-controller - multicast group buffer split])
+AT_KEYWORDS([ovn])
+ovn_start
+
+net_add n1
+
+sim_add hv1
+as hv1
+ovs-vsctl add-br br-phys
+ovn_attach n1 br-phys 192.168.0.1
+
+for i in $(seq 1 20); do
+    ovs-vsctl -- add-port br-int hv1-vif$i -- \
+        set interface hv1-vif$i external-ids:iface-id=sw0-p$i \
+        options:tx_pcap=hv1/vif${i}-tx.pcap \
+        options:rxq_pcap=hv1/vif${i}-rx.pcap \
+        ofport-request=$i
+done
+
+
+check ovn-nbctl ls-add sw0
+for i in $(seq 1 20); do
+    check ovn-nbctl lsp-add sw0 sw0-p$i
+    check ovn-nbctl lsp-set-addresses sw0-p$i "unknown"
+done
+
+wait_for_ports_up
+ovn-nbctl --wait=hv sync
+
+OVS_WAIT_FOR_OUTPUT([as hv1 ovs-ofctl dump-flows br-int table=40 | grep -c 
"controller(userdata=00.00.00.1b.00.00.00.00.00.00.90.01"], [0],[dnl
+3
+])
+
+for i in $(seq 1 10); do
+    check ovn-nbctl lsp-del sw0-p$i
+done
+ovn-nbctl --wait=hv sync
+
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=40 | grep -q controller], 
[1])
+
+OVN_CLEANUP([hv1])
+AT_CLEANUP
+])
-- 
2.41.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to