RFC 4541 "Considerations for Internet Group Management Protocol (IGMP)
and Multicast Listener Discovery (MLD) Snooping Switches" [0] states:

For IGMP packets:
    2.1.1.  IGMP Forwarding Rules
       1) A snooping switch should forward IGMP Membership Reports only to
          those ports where multicast routers are attached.

          Alternatively stated: a snooping switch should not forward IGMP
          Membership Reports to ports on which only hosts are attached.  An
          administrative control may be provided to override this
          restriction, allowing the report messages to be flooded to other
          ports.

          This is the main IGMP snooping functionality for the control path.

          Sending membership reports to other hosts can result, for IGMPv1
          and IGMPv2, in unintentionally preventing a host from joining a
          specific multicast group.

          When an IGMPv1 or IGMPv2 host receives a membership report for a
          group address that it intends to join, the host will suppress its
          own membership report for the same group.  This join or message
          suppression is a requirement for IGMPv1 and IGMPv2 hosts.
          However, if a switch does not receive a membership report from the
          host it will not forward multicast data to it.

In OVN this translates to forwarding reports only on:
a. ports where mrouters have been learned (IGMP queries were received on
   those ports)
b. ports connecting a multicast enabled logical switch to a multicast
   relay enabled logical router (OVN mrouter)
c. ports configured with mcast_flood_reports=true

    2.1.2.  Data Forwarding Rules

       1) Packets with a destination IP address outside 224.0.0.X which are
          not IGMP should be forwarded according to group-based port
          membership tables and must also be forwarded on router ports.

In OVN this translates to forwarding traffic for a group G to:
a. all ports where G was learned
b. all ports where an (external or OVN) mrouter was learned.
c. all ports configured with mcast_flood=true

IGMP queries are already snooped by ovn-controller.  Just propagate the
information about where mrouters are to the Southbound DB through means
of a custom IGMP_Group (name="mrouters").

Snooped queries are then re-injected in the OVS pipeline with outport
set to MC_FLOOD_L2 (only flood them in the L2 domain).

Snooped reports are re-injected in the OVS pipeline with outport set to
MC_MROUTER_FLOOD (flood them to snooped mrouters and OVN mrouters).

The same behavior applies to IPv6 too (MLD).

[0] https://datatracker.ietf.org/doc/html/rfc4541

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1933990
Reported-at: https://github.com/ovn-org/ovn/issues/126
Signed-off-by: Dumitru Ceara <dce...@redhat.com>
---
 controller/ip-mcast.c   |  129 ++++++++++++++++++++++++++++++++-----
 controller/ip-mcast.h   |   14 ++++
 controller/pinctrl.c    |  129 +++++++++++++++++++++++++++++++++----
 lib/ip-mcast-index.h    |    5 +
 lib/mcast-group-index.h |    4 -
 northd/northd.c         |   54 +++++++++++----
 northd/ovn-northd.8.xml |    6 --
 tests/ovn-macros.at     |   25 +++++++
 tests/ovn.at            |  164 ++++++++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 472 insertions(+), 58 deletions(-)

diff --git a/controller/ip-mcast.c b/controller/ip-mcast.c
index 9b0b4465a..a870fb29e 100644
--- a/controller/ip-mcast.c
+++ b/controller/ip-mcast.c
@@ -16,6 +16,7 @@
 #include <config.h>
 
 #include "ip-mcast.h"
+#include "ip-mcast-index.h"
 #include "lport.h"
 #include "lib/ovn-sb-idl.h"
 
@@ -27,6 +28,18 @@ struct igmp_group_port {
     const struct sbrec_port_binding *port;
 };
 
+static const struct sbrec_igmp_group *
+igmp_group_lookup_(struct ovsdb_idl_index *igmp_groups,
+                   const char *addr_str,
+                   const struct sbrec_datapath_binding *datapath,
+                   const struct sbrec_chassis *chassis);
+
+static struct sbrec_igmp_group *
+igmp_group_create_(struct ovsdb_idl_txn *idl_txn,
+                   const char *addr_str,
+                   const struct sbrec_datapath_binding *datapath,
+                   const struct sbrec_chassis *chassis);
+
 struct ovsdb_idl_index *
 igmp_group_index_create(struct ovsdb_idl *idl)
 {
@@ -54,17 +67,16 @@ igmp_group_lookup(struct ovsdb_idl_index *igmp_groups,
         return NULL;
     }
 
-    struct sbrec_igmp_group *target =
-        sbrec_igmp_group_index_init_row(igmp_groups);
-
-    sbrec_igmp_group_index_set_address(target, addr_str);
-    sbrec_igmp_group_index_set_datapath(target, datapath);
-    sbrec_igmp_group_index_set_chassis(target, chassis);
+    return igmp_group_lookup_(igmp_groups, addr_str, datapath, chassis);
+}
 
-    const struct sbrec_igmp_group *g =
-        sbrec_igmp_group_index_find(igmp_groups, target);
-    sbrec_igmp_group_index_destroy_row(target);
-    return g;
+const struct sbrec_igmp_group *
+igmp_mrouter_lookup(struct ovsdb_idl_index *igmp_groups,
+                    const struct sbrec_datapath_binding *datapath,
+                    const struct sbrec_chassis *chassis)
+{
+    return igmp_group_lookup_(igmp_groups, OVN_IGMP_GROUP_MROUTERS,
+                              datapath, chassis);
 }
 
 /* Creates and returns a new IGMP group based on an IPv4 (mapped in IPv6) or
@@ -82,13 +94,16 @@ igmp_group_create(struct ovsdb_idl_txn *idl_txn,
         return NULL;
     }
 
-    struct sbrec_igmp_group *g = sbrec_igmp_group_insert(idl_txn);
-
-    sbrec_igmp_group_set_address(g, addr_str);
-    sbrec_igmp_group_set_datapath(g, datapath);
-    sbrec_igmp_group_set_chassis(g, chassis);
+    return igmp_group_create_(idl_txn, addr_str, datapath, chassis);
+}
 
-    return g;
+struct sbrec_igmp_group *
+igmp_mrouter_create(struct ovsdb_idl_txn *idl_txn,
+                    const struct sbrec_datapath_binding *datapath,
+                    const struct sbrec_chassis *chassis)
+{
+    return igmp_group_create_(idl_txn, OVN_IGMP_GROUP_MROUTERS, datapath,
+                              chassis);
 }
 
 void
@@ -140,6 +155,54 @@ igmp_group_update_ports(const struct sbrec_igmp_group *g,
     hmap_destroy(&old_ports);
 }
 
+void
+igmp_mrouter_update_ports(const struct sbrec_igmp_group *g,
+                          struct ovsdb_idl_index *datapaths,
+                          struct ovsdb_idl_index *port_bindings,
+                          const struct mcast_snooping *ms)
+    OVS_REQ_RDLOCK(ms->rwlock)
+{
+    struct igmp_group_port *old_ports_storage =
+        (g->n_ports ? xmalloc(g->n_ports * sizeof *old_ports_storage) : NULL);
+
+    struct hmap old_ports = HMAP_INITIALIZER(&old_ports);
+
+    for (size_t i = 0; i < g->n_ports; i++) {
+        struct igmp_group_port *old_port = &old_ports_storage[i];
+
+        old_port->port = g->ports[i];
+        hmap_insert(&old_ports, &old_port->hmap_node,
+                    old_port->port->tunnel_key);
+    }
+
+    struct mcast_mrouter_bundle *bundle;
+    uint64_t dp_key = g->datapath->tunnel_key;
+
+    LIST_FOR_EACH (bundle, mrouter_node, &ms->mrouter_lru) {
+        uint32_t port_key = (uintptr_t)bundle->port;
+        const struct sbrec_port_binding *sbrec_port =
+            lport_lookup_by_key(datapaths, port_bindings, dp_key, port_key);
+        if (!sbrec_port) {
+            continue;
+        }
+
+        struct hmap_node *node = hmap_first_with_hash(&old_ports, port_key);
+        if (!node) {
+            sbrec_igmp_group_update_ports_addvalue(g, sbrec_port);
+        } else {
+            hmap_remove(&old_ports, node);
+        }
+    }
+
+    struct igmp_group_port *igmp_port;
+    HMAP_FOR_EACH_POP (igmp_port, hmap_node, &old_ports) {
+        sbrec_igmp_group_update_ports_delvalue(g, igmp_port->port);
+    }
+
+    free(old_ports_storage);
+    hmap_destroy(&old_ports);
+}
+
 void
 igmp_group_delete(const struct sbrec_igmp_group *g)
 {
@@ -162,3 +225,37 @@ igmp_group_cleanup(struct ovsdb_idl_txn *ovnsb_idl_txn,
 
     return true;
 }
+
+static const struct sbrec_igmp_group *
+igmp_group_lookup_(struct ovsdb_idl_index *igmp_groups,
+                   const char *addr_str,
+                   const struct sbrec_datapath_binding *datapath,
+                   const struct sbrec_chassis *chassis)
+{
+    struct sbrec_igmp_group *target =
+        sbrec_igmp_group_index_init_row(igmp_groups);
+
+    sbrec_igmp_group_index_set_address(target, addr_str);
+    sbrec_igmp_group_index_set_datapath(target, datapath);
+    sbrec_igmp_group_index_set_chassis(target, chassis);
+
+    const struct sbrec_igmp_group *g =
+        sbrec_igmp_group_index_find(igmp_groups, target);
+    sbrec_igmp_group_index_destroy_row(target);
+    return g;
+}
+
+static struct sbrec_igmp_group *
+igmp_group_create_(struct ovsdb_idl_txn *idl_txn,
+                   const char *addr_str,
+                   const struct sbrec_datapath_binding *datapath,
+                   const struct sbrec_chassis *chassis)
+{
+    struct sbrec_igmp_group *g = sbrec_igmp_group_insert(idl_txn);
+
+    sbrec_igmp_group_set_address(g, addr_str);
+    sbrec_igmp_group_set_datapath(g, datapath);
+    sbrec_igmp_group_set_chassis(g, chassis);
+
+    return g;
+}
diff --git a/controller/ip-mcast.h b/controller/ip-mcast.h
index b3447d4c7..326f39db1 100644
--- a/controller/ip-mcast.h
+++ b/controller/ip-mcast.h
@@ -30,12 +30,20 @@ const struct sbrec_igmp_group *igmp_group_lookup(
     const struct in6_addr *address,
     const struct sbrec_datapath_binding *datapath,
     const struct sbrec_chassis *chassis);
+const struct sbrec_igmp_group *igmp_mrouter_lookup(
+    struct ovsdb_idl_index *igmp_groups,
+    const struct sbrec_datapath_binding *datapath,
+    const struct sbrec_chassis *chassis);
 
 struct sbrec_igmp_group *igmp_group_create(
     struct ovsdb_idl_txn *idl_txn,
     const struct in6_addr *address,
     const struct sbrec_datapath_binding *datapath,
     const struct sbrec_chassis *chassis);
+struct sbrec_igmp_group *igmp_mrouter_create(
+    struct ovsdb_idl_txn *idl_txn,
+    const struct sbrec_datapath_binding *datapath,
+    const struct sbrec_chassis *chassis);
 
 void igmp_group_update_ports(const struct sbrec_igmp_group *g,
                              struct ovsdb_idl_index *datapaths,
@@ -43,6 +51,12 @@ void igmp_group_update_ports(const struct sbrec_igmp_group 
*g,
                              const struct mcast_snooping *ms,
                              const struct mcast_group *mc_group)
     OVS_REQ_RDLOCK(ms->rwlock);
+void
+igmp_mrouter_update_ports(const struct sbrec_igmp_group *g,
+                          struct ovsdb_idl_index *datapaths,
+                          struct ovsdb_idl_index *port_bindings,
+                          const struct mcast_snooping *ms)
+    OVS_REQ_RDLOCK(ms->rwlock);
 
 void igmp_group_delete(const struct sbrec_igmp_group *g);
 
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f954362b7..4e1478102 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -624,6 +624,39 @@ set_actions_and_enqueue_msg(struct rconn *swconn,
     ofpbuf_uninit(&ofpacts);
 }
 
+/* Forwards a packet to 'out_port_key' even if that's on a remote
+ * hypervisor, i.e., the packet is re-injected in table OFTABLE_REMOTE_OUTPUT.
+ */
+static void
+pinctrl_forward_pkt(struct rconn *swconn, int64_t dp_key,
+                    int64_t in_port_key, int64_t out_port_key,
+                    const struct dp_packet *pkt)
+{
+    /* Reinject the packet and flood it to all registered mrouters. */
+    uint64_t ofpacts_stub[4096 / 8];
+    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+    enum ofp_version version = rconn_get_version(swconn);
+    put_load(dp_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts);
+    put_load(in_port_key, MFF_LOG_INPORT, 0, 32, &ofpacts);
+    put_load(out_port_key, MFF_LOG_OUTPORT, 0, 32, &ofpacts);
+
+    struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+    resubmit->in_port = OFPP_CONTROLLER;
+    resubmit->table_id = OFTABLE_REMOTE_OUTPUT;
+
+    struct ofputil_packet_out po = {
+        .packet = dp_packet_data(pkt),
+        .packet_len = dp_packet_size(pkt),
+        .buffer_id = UINT32_MAX,
+        .ofpacts = ofpacts.data,
+        .ofpacts_len = ofpacts.size,
+    };
+    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+    ofpbuf_uninit(&ofpacts);
+}
+
 static struct shash ipv6_prefixd;
 
 enum {
@@ -5139,6 +5172,7 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn,
         }
     }
 
+    const struct sbrec_igmp_group *sbrec_ip_mrouter;
     const struct sbrec_igmp_group *sbrec_igmp;
 
     /* Then flush any IGMP_Group entries that are not needed anymore:
@@ -5174,7 +5208,9 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn,
             continue;
         }
 
-        if (ip_parse(sbrec_igmp->address, &group_v4_addr)) {
+        if (!strcmp(sbrec_igmp->address, OVN_IGMP_GROUP_MROUTERS)) {
+            continue;
+        } else if (ip_parse(sbrec_igmp->address, &group_v4_addr)) {
             group_addr = in6_addr_mapped_ipv4(group_v4_addr);
         } else if (!ipv6_parse(sbrec_igmp->address, &group_addr)) {
             continue;
@@ -5213,6 +5249,8 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn,
         struct mcast_group *mc_group;
 
         ovs_rwlock_rdlock(&ip_ms->ms->rwlock);
+
+        /* Groups. */
         LIST_FOR_EACH (mc_group, group_node, &ip_ms->ms->group_lru) {
             if (ovs_list_is_empty(&mc_group->bundle_lru)) {
                 continue;
@@ -5228,6 +5266,20 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn,
                                     sbrec_port_binding_by_key, ip_ms->ms,
                                     mc_group);
         }
+
+        /* Mrouters. */
+        sbrec_ip_mrouter = igmp_mrouter_lookup(sbrec_igmp_groups,
+                                               local_dp->datapath,
+                                               chassis);
+        if (!sbrec_ip_mrouter) {
+            sbrec_ip_mrouter = igmp_mrouter_create(ovnsb_idl_txn,
+                                                   local_dp->datapath,
+                                                   chassis);
+        }
+        igmp_mrouter_update_ports(sbrec_ip_mrouter,
+                                  sbrec_datapath_binding_by_key,
+                                  sbrec_port_binding_by_key, ip_ms->ms);
+
         ovs_rwlock_unlock(&ip_ms->ms->rwlock);
     }
 
@@ -5236,12 +5288,35 @@ ip_mcast_sync(struct ovsdb_idl_txn *ovnsb_idl_txn,
     }
 }
 
+/* Reinject the packet and flood it to all registered mrouters (also those
+ * who are not local to this chassis). */
+static void
+ip_mcast_forward_report(struct rconn *swconn, struct ip_mcast_snoop *ip_ms,
+                        uint32_t orig_in_port_key,
+                        const struct dp_packet *report)
+{
+    pinctrl_forward_pkt(swconn, ip_ms->dp_key, orig_in_port_key,
+                        OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY, report);
+}
+
+
+static void
+ip_mcast_forward_query(struct rconn *swconn, struct ip_mcast_snoop *ip_ms,
+                       uint32_t orig_in_port_key,
+                       const struct dp_packet *query)
+{
+    pinctrl_forward_pkt(swconn, ip_ms->dp_key, orig_in_port_key,
+                        OVN_MCAST_FLOOD_L2_TUNNEL_KEY, query);
+}
+
 static bool
-pinctrl_ip_mcast_handle_igmp(struct ip_mcast_snoop *ip_ms,
+pinctrl_ip_mcast_handle_igmp(struct rconn *swconn,
+                             struct ip_mcast_snoop *ip_ms,
                              const struct flow *ip_flow,
                              struct dp_packet *pkt_in,
-                             void *port_key_data)
+                             uint32_t in_port_key)
 {
+    void *port_key_data = (void *)(uintptr_t)in_port_key;
     const struct igmp_header *igmp;
     size_t offset;
 
@@ -5255,6 +5330,8 @@ pinctrl_ip_mcast_handle_igmp(struct ip_mcast_snoop *ip_ms,
 
     ovs_be32 ip4 = ip_flow->igmp_group_ip4;
     bool group_change = false;
+    bool report = false;
+    bool query = false;
 
     /* Only default VLAN is supported for now. */
     ovs_rwlock_wrlock(&ip_ms->ms->rwlock);
@@ -5264,36 +5341,48 @@ pinctrl_ip_mcast_handle_igmp(struct ip_mcast_snoop 
*ip_ms,
         group_change =
             mcast_snooping_add_group4(ip_ms->ms, ip4, IP_MCAST_VLAN,
                                       port_key_data);
+        report = true;
         break;
     case IGMP_HOST_LEAVE_MESSAGE:
         group_change =
             mcast_snooping_leave_group4(ip_ms->ms, ip4, IP_MCAST_VLAN,
                                         port_key_data);
+        report = true;
         break;
     case IGMP_HOST_MEMBERSHIP_QUERY:
-        /* Shouldn't be receiving any of these since we are the multicast
-         * router. Store them for now.
-         */
         group_change =
             mcast_snooping_add_mrouter(ip_ms->ms, IP_MCAST_VLAN,
                                        port_key_data);
+        query = true;
         break;
     case IGMPV3_HOST_MEMBERSHIP_REPORT:
         group_change =
             mcast_snooping_add_report(ip_ms->ms, pkt_in, IP_MCAST_VLAN,
                                       port_key_data);
+        report = true;
         break;
     }
     ovs_rwlock_unlock(&ip_ms->ms->rwlock);
+
+    /* Forward reports to all registered mrouters and flood queries to
+     * the whole L2 domain.
+     */
+    if (report) {
+        ip_mcast_forward_report(swconn, ip_ms, in_port_key, pkt_in);
+    } else if (query) {
+        ip_mcast_forward_query(swconn, ip_ms, in_port_key, pkt_in);
+    }
     return group_change;
 }
 
 static bool
-pinctrl_ip_mcast_handle_mld(struct ip_mcast_snoop *ip_ms,
+pinctrl_ip_mcast_handle_mld(struct rconn *swconn,
+                            struct ip_mcast_snoop *ip_ms,
                             const struct flow *ip_flow,
                             struct dp_packet *pkt_in,
-                            void *port_key_data)
+                            uint32_t in_port_key)
 {
+    void *port_key_data = (void *)(uintptr_t)in_port_key;
     const struct mld_header *mld;
     size_t offset;
 
@@ -5310,6 +5399,8 @@ pinctrl_ip_mcast_handle_mld(struct ip_mcast_snoop *ip_ms,
     }
 
     bool group_change = false;
+    bool report = false;
+    bool query = false;
 
     /* Only default VLAN is supported for now. */
     ovs_rwlock_wrlock(&ip_ms->ms->rwlock);
@@ -5323,6 +5414,7 @@ pinctrl_ip_mcast_handle_mld(struct ip_mcast_snoop *ip_ms,
                 mcast_snooping_add_mrouter(ip_ms->ms, IP_MCAST_VLAN,
                                            port_key_data);
         }
+        query = true;
         break;
     case MLD_REPORT:
     case MLD_DONE:
@@ -5330,14 +5422,24 @@ pinctrl_ip_mcast_handle_mld(struct ip_mcast_snoop 
*ip_ms,
         group_change =
             mcast_snooping_add_mld(ip_ms->ms, pkt_in, IP_MCAST_VLAN,
                                    port_key_data);
+        report = true;
         break;
     }
     ovs_rwlock_unlock(&ip_ms->ms->rwlock);
+
+    /* Forward reports to all registered mrouters and flood queries to
+     * the whole L2 domain.
+     */
+    if (report) {
+        ip_mcast_forward_report(swconn, ip_ms, in_port_key, pkt_in);
+    } else if (query) {
+        ip_mcast_forward_query(swconn, ip_ms, in_port_key, pkt_in);
+    }
     return group_change;
 }
 
 static void
-pinctrl_ip_mcast_handle(struct rconn *swconn OVS_UNUSED,
+pinctrl_ip_mcast_handle(struct rconn *swconn,
                         const struct flow *ip_flow,
                         struct dp_packet *pkt_in,
                         const struct match *md,
@@ -5366,18 +5468,17 @@ pinctrl_ip_mcast_handle(struct rconn *swconn OVS_UNUSED,
     }
 
     uint32_t port_key = md->flow.regs[MFF_LOG_INPORT - MFF_REG0];
-    void *port_key_data = (void *)(uintptr_t)port_key;
 
     switch (dl_type) {
     case ETH_TYPE_IP:
-        if (pinctrl_ip_mcast_handle_igmp(ip_ms, ip_flow, pkt_in,
-                                         port_key_data)) {
+        if (pinctrl_ip_mcast_handle_igmp(swconn, ip_ms, ip_flow, pkt_in,
+                                         port_key)) {
             notify_pinctrl_main();
         }
         break;
     case ETH_TYPE_IPV6:
-        if (pinctrl_ip_mcast_handle_mld(ip_ms, ip_flow, pkt_in,
-                                        port_key_data)) {
+        if (pinctrl_ip_mcast_handle_mld(swconn, ip_ms, ip_flow, pkt_in,
+                                        port_key)) {
             notify_pinctrl_main();
         }
         break;
diff --git a/lib/ip-mcast-index.h b/lib/ip-mcast-index.h
index 3ac65798b..87a7bf28d 100644
--- a/lib/ip-mcast-index.h
+++ b/lib/ip-mcast-index.h
@@ -20,6 +20,11 @@ struct ovsdb_idl;
 
 struct sbrec_datapath_binding;
 
+/* Fixed group name to denote an IGMP_Group that actually stores
+ * a list of learned mrouters.
+ */
+#define OVN_IGMP_GROUP_MROUTERS "mrouters"
+
 #define OVN_MCAST_MIN_IDLE_TIMEOUT_S           15
 #define OVN_MCAST_MAX_IDLE_TIMEOUT_S           3600
 #define OVN_MCAST_DEFAULT_IDLE_TIMEOUT_S       300
diff --git a/lib/mcast-group-index.h b/lib/mcast-group-index.h
index e8d82b126..9664a94dd 100644
--- a/lib/mcast-group-index.h
+++ b/lib/mcast-group-index.h
@@ -32,10 +32,6 @@ enum ovn_mcast_tunnel_keys {
     OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY,  /* For L3 multicast traffic that must
                                           * be relayed (multicast routed).
                                           */
-    OVN_MCAST_MROUTER_STATIC_TUNNEL_KEY, /* For multicast reports that need to
-                                          * be forwarded statically towards
-                                          * mrouters.
-                                          */
     OVN_MCAST_STATIC_TUNNEL_KEY,         /* For switches:
                                           * - for L3 multicast traffic that
                                           *   needs to be forwarded
diff --git a/northd/northd.c b/northd/northd.c
index 964af992f..690a86104 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -4506,10 +4506,6 @@ static const struct multicast_group mc_flood =
 static const struct multicast_group mc_mrouter_flood =
     { MC_MROUTER_FLOOD, OVN_MCAST_MROUTER_FLOOD_TUNNEL_KEY };
 
-#define MC_MROUTER_STATIC "_MC_mrouter_static"
-static const struct multicast_group mc_mrouter_static =
-    { MC_MROUTER_STATIC, OVN_MCAST_MROUTER_STATIC_TUNNEL_KEY };
-
 #define MC_STATIC "_MC_static"
 static const struct multicast_group mc_static =
     { MC_STATIC, OVN_MCAST_STATIC_TUNNEL_KEY };
@@ -4803,6 +4799,22 @@ ovn_igmp_group_allocate_id(struct ovn_igmp_group 
*igmp_group)
     return true;
 }
 
+static void
+ovn_igmp_mrouter_aggregate_ports(struct ovn_igmp_group *igmp_group,
+                                 struct hmap *mcast_groups)
+{
+    struct ovn_igmp_group_entry *entry;
+
+    LIST_FOR_EACH_POP (entry, list_node, &igmp_group->entries) {
+        ovn_multicast_add_ports(mcast_groups, igmp_group->datapath,
+                                &mc_mrouter_flood, entry->ports,
+                                entry->n_ports);
+
+        ovn_igmp_group_destroy_entry(entry);
+        free(entry);
+    }
+}
+
 static void
 ovn_igmp_group_aggregate_ports(struct ovn_igmp_group *igmp_group,
                                struct hmap *mcast_groups)
@@ -8287,13 +8299,6 @@ build_lswitch_destination_lookup_bmcast(struct 
ovn_datapath *od,
 
         if (mcast_sw_info->enabled) {
             ds_clear(actions);
-            if (mcast_sw_info->flood_reports) {
-                ds_put_cstr(actions,
-                            "clone { "
-                                "outport = \""MC_MROUTER_STATIC"\"; "
-                                "output; "
-                            "};");
-            }
             ds_put_cstr(actions, "igmp;");
             /* Punt IGMP traffic to controller. */
             ovn_lflow_metered(lflows, od, S_SWITCH_IN_L2_LKUP, 100,
@@ -15091,10 +15096,11 @@ build_mcast_groups(struct lflow_input *input_data,
             }
 
             /* If this port is configured to always flood multicast reports
-             * add it to the MC_MROUTER_STATIC group.
+             * add it to the MC_MROUTER_FLOOD group (all reports must be
+             * flooded to statically configured or learned mrouters).
              */
             if (op->mcast_info.flood_reports) {
-                ovn_multicast_add(mcast_groups, &mc_mrouter_static, op);
+                ovn_multicast_add(mcast_groups, &mc_mrouter_flood, op);
                 op->od->mcast_info.sw.flood_reports = true;
             }
 
@@ -15130,7 +15136,10 @@ build_mcast_groups(struct lflow_input *input_data,
         }
 
         struct in6_addr group_address;
-        if (!ovn_igmp_group_get_address(sb_igmp, &group_address)) {
+        if (!strcmp(sb_igmp->address, OVN_IGMP_GROUP_MROUTERS)) {
+            /* Use all-zeros IP to denote a group corresponding to mrouters. */
+            memset(&group_address, 0, sizeof group_address);
+        } else if (!ovn_igmp_group_get_address(sb_igmp, &group_address)) {
             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
             VLOG_WARN_RL(&rl, "invalid IGMP group address: %s",
                          sb_igmp->address);
@@ -15188,6 +15197,12 @@ build_mcast_groups(struct lflow_input *input_data,
             LIST_FOR_EACH (igmp_group, list_node, &od->mcast_info.groups) {
                 struct in6_addr *address = &igmp_group->address;
 
+                /* Skip mrouter entries. */
+                if (!strcmp(igmp_group->mcgroup.name,
+                            OVN_IGMP_GROUP_MROUTERS)) {
+                    continue;
+                }
+
                 /* For IPv6 only relay routable multicast groups
                  * (RFC 4291 2.7).
                  */
@@ -15215,10 +15230,21 @@ build_mcast_groups(struct lflow_input *input_data,
 
     /* Walk the aggregated IGMP groups and allocate IDs for new entries.
      * Then store the ports in the associated multicast group.
+     * Mrouter entries are also stored as IGMP groups, deal with those
+     * explicitly.
      */
     struct ovn_igmp_group *igmp_group;
     HMAP_FOR_EACH_SAFE (igmp_group, hmap_node, igmp_groups) {
 
+        /* If this is a mrouter entry just aggregate the mrouter ports
+         * into the MC_MROUTER mcast_group and destroy the igmp_group;
+         * no more processing needed. */
+        if (!strcmp(igmp_group->mcgroup.name, OVN_IGMP_GROUP_MROUTERS)) {
+            ovn_igmp_mrouter_aggregate_ports(igmp_group, mcast_groups);
+            ovn_igmp_group_destroy(igmp_groups, igmp_group);
+            continue;
+        }
+
         if (!ovn_igmp_group_allocate_id(igmp_group)) {
             /* If we ran out of keys just destroy the entry. */
             ovn_igmp_group_destroy(igmp_groups, igmp_group);
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index a87df24bd..3d1e7357d 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1662,11 +1662,7 @@ output;
       <li>
         A priority-100 flow that punts all IGMP/MLD packets to
         <code>ovn-controller</code> if multicast snooping is enabled on the
-        logical switch. The flow also forwards the IGMP/MLD packets to the
-        <code>MC_MROUTER_STATIC</code> multicast group, which
-        <code>ovn-northd</code> populates with all the logical ports that
-        have <ref column="options" table="Logical_Switch_Port"/>
-        <code>:mcast_flood_reports='true'</code>.
+        logical switch.
       </li>
 
       <li>
diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
index 77e89f6b4..bc7f53fd9 100644
--- a/tests/ovn-macros.at
+++ b/tests/ovn-macros.at
@@ -624,6 +624,31 @@ store_igmp_v3_query() {
     echo ${packet} >> ${outfile}
 }
 
+# send_igmp_v3_query INPORT HV ETH_SRC IP_SRC IP_CSUM OUTFILE
+#
+# This shell function builds and sends an IGMPv3 general query from
+# ETH_SRC and IP_SRC and stores the hexdump of the packet in OUTFILE.
+#
+send_igmp_v3_query() {
+    local inport=$1 hv=$2 eth_src=$3 ip_src=$4 ip_chksum=$5 outfile=$6
+
+    local eth_dst=01005e000001
+    local ip_dst=$(ip_to_hex 224 0 0 1)
+    local ip_ttl=01
+    local igmp_type=11
+    local max_resp=0a
+    local igmp_chksum=eeeb
+    local addr=00000000
+
+    local eth=${eth_dst}${eth_src}0800
+    local ip=4500002000004000${ip_ttl}02${ip_chksum}${ip_src}${ip_dst}
+    local igmp=${igmp_type}${max_resp}${igmp_chksum}${addr}000a0000
+    local packet=${eth}${ip}${igmp}
+
+    echo ${packet} >> ${outfile}
+    check as $hv ovs-appctl netdev-dummy/receive ${inport} ${packet}
+}
+
 # send_ip_multicast_pkt INPORT HV ETH_SRC ETH_DST IP_SRC IP_DST IP_LEN TTL
 #    IP_CHKSUM IP_PROTO DATA
 #
diff --git a/tests/ovn.at b/tests/ovn.at
index 9512fd033..b552dd246 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -21214,8 +21214,8 @@ send_igmp_v3_report hv1-vif2 hv1 \
     $(ip_to_hex 239 0 1 68) 04 e9b9 \
     expected_reports
 
-# Check that the IGMP Group is learned.
-wait_row_count IGMP_Group 1 address=239.0.1.68
+# Check that the IGMP Group is learned (on sw1-p12 and on sw2-rtr).
+wait_row_count IGMP_Group 2 address=239.0.1.68
 check ovn-nbctl --wait=hv sync
 
 AS_BOX([IGMP traffic test 8])
@@ -21276,7 +21276,7 @@ ch=$(fetch_column Chassis _uuid name=hv3)
 ovn-sbctl create IGMP_Group address=239.0.1.42 datapath=$dp chassis=$ch
 
 ovn-nbctl --wait=hv sync
-wait_row_count IGMP_Group 1 address=239.0.1.68
+wait_row_count IGMP_Group 2 address=239.0.1.68
 wait_row_count IGMP_Group 1 address=239.0.1.42
 
 OVN_CLEANUP([hv1], [hv2], [hv3])
@@ -21971,8 +21971,8 @@ send_mld_v2_report hv1-vif2 hv1 \
     ff0adeadbeef00000000000000000001 04 c0e4 \
     expected_reports
 
-# Check that the IP multicast group is learned.
-wait_row_count IGMP_Group 1 address='"ff0a:dead:beef::1"'
+# Check that the IP multicast group is learned (on sw1-p12 and on sw2-rtr).
+wait_row_count IGMP_Group 2 address='"ff0a:dead:beef::1"'
 check ovn-nbctl --wait=hv sync
 
 # Send traffic from sw1-p21
@@ -22018,6 +22018,160 @@ OVN_CLEANUP([hv1], [hv2])
 AT_CLEANUP
 ])
 
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([IGMP external querier])
+AT_KEYWORDS([IP-multicast])
+
+ovn_start
+
+# Logical network:
+# A logical switch (mcast snoop enabled) connected to a provider (localnet)
+# network.
+# One external multicast querier, connected to the switch via the localnet
+# port.
+
+check ovn-nbctl                                                  \
+    -- ls-add sw1                                                \
+      -- lsp-add sw1 sw1-ln                                      \
+        -- lsp-set-type sw1-ln localnet                          \
+        -- lsp-set-options sw1-ln network_name=phys-1            \
+      -- lsp-add sw1 sw1-p1                                      \
+      -- lsp-add sw1 sw1-p2                                      \
+      -- lsp-add sw1 sw1-p3                                      \
+      -- lsp-add sw1 sw1-p4                                      \
+      -- set logical_switch sw1 other_config:mcast_snoop=true    \
+      -- set logical_switch sw1 other_config:mcast_querier=false
+
+net_add n1
+
+sim_add hv1
+as hv1
+check ovs-vsctl add-br br-phys-1
+ovn_attach n1 br-phys-1 192.168.1.1
+check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys-1:br-phys-1
+
+check ovs-vsctl -- add-port br-int hv1-vif1 -- \
+    set interface hv1-vif1 external-ids:iface-id=sw1-p1 \
+    options:tx_pcap=hv1/vif1-tx.pcap \
+    options:rxq_pcap=hv1/vif1-rx.pcap
+check ovs-vsctl -- add-port br-int hv1-vif2 -- \
+    set interface hv1-vif2 external-ids:iface-id=sw1-p2 \
+    options:tx_pcap=hv1/vif2-tx.pcap \
+    options:rxq_pcap=hv1/vif2-rx.pcap
+
+
+sim_add hv2
+as hv2
+check ovs-vsctl add-br br-phys-1
+ovn_attach n1 br-phys-1 192.168.1.2
+check ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys-1:br-phys-1
+
+check ovs-vsctl -- add-port br-int hv2-vif1 -- \
+    set interface hv2-vif1 external-ids:iface-id=sw1-p3 \
+    options:tx_pcap=hv2/vif1-tx.pcap \
+    options:rxq_pcap=hv2/vif1-rx.pcap
+check ovs-vsctl -- add-port br-int hv2-vif2 -- \
+    set interface hv2-vif2 external-ids:iface-id=sw1-p4 \
+    options:tx_pcap=hv2/vif2-tx.pcap \
+    options:rxq_pcap=hv2/vif2-rx.pcap
+
+wait_for_ports_up
+check ovn-nbctl --wait=hv sync
+
+AT_CAPTURE_FILE([exp])
+AT_CAPTURE_FILE([rcv])
+check_packets() {
+    > exp
+    > rcv
+    if test "$1" = --uniq; then
+        sort="sort -u"; shift
+    else
+        sort=sort
+    fi
+    for tuple in "$@"; do
+        set $tuple; pcap=$1 type=$2
+        echo "--- $pcap" | tee -a exp >> rcv
+        $sort "$type" >> exp
+        $PYTHON "$ovs_srcdir/utilities/ovs-pcap.in" $pcap | $sort >> rcv
+        echo | tee -a exp >> rcv
+    done
+
+    $at_diff exp rcv >/dev/null
+}
+
+OVN_POPULATE_ARP
+
+sw1=$(fetch_column Datapath_Binding _uuid external_ids:name=sw1)
+
+dnl Send IGMP query from localnet1 (send on both hypervisors to simulate
+dnl the multicast query packet in the fabric).
+send_igmp_v3_query br-phys-1_n1 hv1 000000000142 $(ip_to_hex 10 0 0 42) 8fb1
+send_igmp_v3_query br-phys-1_n1 hv2 000000000142 $(ip_to_hex 10 0 0 42) 8fb1 
expected
+
+dnl Wait for the mrouter to be learned by both chassis.
+wait_row_count IGMP_Group 2 address=mrouters datapath=$sw1
+
+dnl The query should have been flooded in the broadcast domain.
+: > expected_empty
+
+OVS_WAIT_UNTIL(
+  [check_packets 'hv1/vif1-tx.pcap expected' \
+                 'hv1/vif2-tx.pcap expected' \
+                 'hv1/br-phys-1_n1-tx.pcap expected_empty' \
+                 'hv2/vif1-tx.pcap expected' \
+                 'hv2/vif2-tx.pcap expected' \
+                 'hv2/br-phys-1_n1-tx.pcap expected_empty'],
+  [$at_diff -F'^---' exp rcv])
+
+as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+as hv1 reset_pcap_file hv1-vif2 hv1/vif2
+as hv1 reset_pcap_file br-phys-1_n1 hv1/br-phys-1_n1
+as hv2 reset_pcap_file hv2-vif1 hv2/vif1
+as hv2 reset_pcap_file hv2-vif2 hv2/vif2
+as hv2 reset_pcap_file br-phys-1_n1 hv2/br-phys-1_n1
+
+: > expected
+
+dnl Send IGMP query from sw1-p4.
+send_igmp_v3_query hv2-vif2 hv2 000000000002 $(ip_to_hex 10 0 0 2) 8fb1 
expected
+
+OVS_WAIT_UNTIL(
+  [check_packets 'hv1/vif1-tx.pcap expected' \
+                 'hv1/vif2-tx.pcap expected' \
+                 'hv1/br-phys-1_n1-tx.pcap expected_empty' \
+                 'hv2/vif1-tx.pcap expected' \
+                 'hv2/vif2-tx.pcap expected_empty' \
+                 'hv2/br-phys-1_n1-tx.pcap expected'],
+  [$at_diff -F'^---' exp rcv])
+
+as hv1 reset_pcap_file hv1-vif1 hv1/vif1
+as hv1 reset_pcap_file hv1-vif2 hv1/vif2
+as hv1 reset_pcap_file br-phys-1_n1 hv1/br-phys-1_n1
+as hv2 reset_pcap_file hv2-vif1 hv2/vif1
+as hv2 reset_pcap_file hv2-vif2 hv2/vif2
+as hv2 reset_pcap_file br-phys-1_n1 hv2/br-phys-1_n1
+
+: > expected
+
+dnl Send a join from sw1-p1: should be forwarded to sw1-ln and sw1-p4.
+send_igmp_v3_report hv1-vif1 hv1 \
+   000000000001 $(ip_to_hex 10 0 0 2) f9f7 \
+   $(ip_to_hex 239 0 1 68) 04 e9b9 \
+   expected
+
+OVS_WAIT_UNTIL(
+  [check_packets 'hv1/vif1-tx.pcap expected_empty' \
+                 'hv1/vif2-tx.pcap expected_empty' \
+                 'hv1/br-phys-1_n1-tx.pcap expected' \
+                 'hv2/vif1-tx.pcap expected_empty' \
+                 'hv2/vif2-tx.pcap expected' \
+                 'hv2/br-phys-1_n1-tx.pcap expected_empty' ],
+  [$at_diff -F'^---' exp rcv])
+
+OVN_CLEANUP([hv1], [hv2])
+AT_CLEANUP
+])
+
 OVN_FOR_EACH_NORTHD([
 AT_SETUP([unixctl socket])
 ovn_start

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to