In some usecases such as VM migration or when VMs reuse IP addresses,
VMs become unreachable externally because
external switches/routers on localnet
have stale port-mac or arp caches. The problem resolves
after some time when the caches ageout which could be
minutes for port-mac bindings or hours for arp caches.

To fix this, send some gratuitous ARPs when a logical
port on a localnet datapath gets added. Such gratuitous arps
help on a best-effort basis to update the mac-port bindings and arp-caches
of external switches and routers on the localnet.

Reported-at: https://bugs.launchpad.net/networking-ovn/+bug/1545897
Reported-by: Kyle Mestery <mest...@mestery.com>
Signed-off-by: Ramu Ramamurthy <ramu.ramamur...@us.ibm.com>
---
 ovn/controller/ovn-controller.c |   2 +-
 ovn/controller/pinctrl.c        | 217 +++++++++++++++++++++++++++++++++++++++-
 ovn/controller/pinctrl.h        |   5 +-
 tests/ovn.at                    |  61 +++++++++++
 4 files changed, 282 insertions(+), 3 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 6027011..3f0aea4 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -335,7 +335,7 @@ main(int argc, char *argv[])
 
             enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int);
 
-            pinctrl_run(&ctx, &lports, br_int);
+            pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths);
 
             struct hmap flow_table = HMAP_INITIALIZER(&flow_table);
             lflow_run(&ctx, &lports, &mcgroups, &local_datapaths,
diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c
index 3fcab99..57be002 100644
--- a/ovn/controller/pinctrl.c
+++ b/ovn/controller/pinctrl.c
@@ -24,6 +24,7 @@
 #include "ofp-actions.h"
 #include "ovn/lib/actions.h"
 #include "ovn/lib/logical-fields.h"
+#include "ovn/lib/ovn-util.h"
 #include "ofp-msgs.h"
 #include "ofp-print.h"
 #include "ofp-util.h"
@@ -54,6 +55,13 @@ static void run_put_arps(struct controller_ctx *,
 static void wait_put_arps(struct controller_ctx *);
 static void flush_put_arps(void);
 
+static void init_send_garps(void);
+static void destroy_send_garps(void);
+static void send_garp_run(const struct lport_index *lports,
+                          const struct ovsrec_bridge *br_int,
+                          const char *chassis_id,
+                          struct hmap *local_datapaths);
+
 COVERAGE_DEFINE(pinctrl_drop_put_arp);
 
 void
@@ -62,6 +70,7 @@ pinctrl_init(void)
     swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
     conn_seq_no = 0;
     init_put_arps();
+    init_send_garps();
 }
 
 static ovs_be32
@@ -266,7 +275,9 @@ pinctrl_recv(const struct ofp_header *oh, enum ofptype type)
 
 void
 pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports,
-            const struct ovsrec_bridge *br_int)
+            const struct ovsrec_bridge *br_int,
+            const char *chassis_id,
+            struct hmap *local_datapaths)
 {
     if (br_int) {
         char *target;
@@ -307,6 +318,9 @@ pinctrl_run(struct controller_ctx *ctx, const struct 
lport_index *lports,
     }
 
     run_put_arps(ctx, lports);
+    if (rconn_is_connected(swconn)) {
+        send_garp_run(lports, br_int, chassis_id, local_datapaths);
+    }
 }
 
 void
@@ -322,6 +336,7 @@ pinctrl_destroy(void)
 {
     rconn_destroy(swconn);
     destroy_put_arps();
+    destroy_send_garps();
 }
 
 /* Implementation of the "put_arp" OVN action.  This action sends a packet to
@@ -484,3 +499,203 @@ flush_put_arps(void)
         free(pa);
     }
 }
+
+/*
+ * Send gratuitous arp for vifs on localnets
+ *
+ * When a new vif on localnet is added, gratuitous arps
+ * are sent announcing the port's mac,ip mapping.
+ * On localnet, such announcements are needed for
+ * switches and routers on the broadcast segment to
+ * update their port-mac and arp tables.
+ */
+struct garp_data {
+    int ofport;         /* ofport used for this garp */
+    struct eth_addr ea; /* ethernet address of port */
+    ovs_be32 ipv4;      /* ipv4 address of port */
+    long long int announce_time; /* next announcement in ms */
+    int backoff; /* backoff for the next announcement */
+};
+
+/* Contains garps to be sent */
+static struct shash send_garp_data;
+
+static void
+init_send_garps(void)
+{
+    shash_init(&send_garp_data);
+}
+
+static void
+destroy_send_garps(void)
+{
+    shash_destroy(&send_garp_data);
+}
+
+/* Add a new vif to announce garps */
+static void
+send_garp_add(const struct sbrec_port_binding *binding_rec,
+              int ofport)
+{
+    int i;
+    for (i = 0; i < binding_rec->n_mac; i++) {
+        struct lport_addresses laddrs;
+        if (extract_lport_addresses(binding_rec->mac[i], &laddrs,
+                                    false)) {
+            /* the mac address must be valid */
+            struct garp_data *garp = xmalloc(sizeof(struct garp_data));
+            garp->ofport = ofport;
+            garp->ea = laddrs.ea;
+            garp->ipv4 = 0;
+            if (laddrs.n_ipv4_addrs) {
+                garp->ipv4 = laddrs.ipv4_addrs[0].addr;
+                free(laddrs.ipv4_addrs);
+            }
+            garp->backoff = 1;
+            garp->announce_time = time_msec()+1000;
+            shash_add(&send_garp_data, binding_rec->logical_port, garp);
+            break;
+        }
+    }
+}
+
+/* Remove a vif from garp announce */
+static void
+send_garp_delete(const char *lp)
+{
+    struct garp_data *garp = shash_find_and_delete(&send_garp_data, lp);
+    free(garp);
+}
+
+static void
+send_garp(struct garp_data *garp, long long int current_time)
+{
+    if (garp->backoff > 32 || current_time < garp->announce_time) {
+        return;
+    }
+    /* Compose a GARP packet. */
+    uint64_t packet_stub[128 / 8];
+    struct dp_packet packet;
+    dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub);
+    if (garp->ipv4) {
+        /* A broadcast arp reply is used here, A gratuitous arp
+         * request is another option
+         */
+        compose_arp(&packet, ARP_OP_REPLY,
+                    garp->ea, eth_addr_zero,
+                    true, garp->ipv4, garp->ipv4);
+    } else {
+        /* there is only a mac on the port, so send rarp */
+        compose_rarp(&packet, garp->ea);
+    }
+    /* Compose actions, the garp packet goes through ingress pipeline
+     * as if it is received on the input port.
+     */
+    uint64_t ofpacts_stub[4096 / 8];
+    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+    enum ofp_version version = rconn_get_version(swconn);
+    ofpact_put_OUTPUT(&ofpacts)->port = OFPP_TABLE;
+
+    struct ofputil_packet_out po = {
+        .packet = dp_packet_data(&packet),
+        .packet_len = dp_packet_size(&packet),
+        .buffer_id = UINT32_MAX,
+        .in_port = u16_to_ofp(garp->ofport),
+        .ofpacts = ofpacts.data,
+        .ofpacts_len = ofpacts.size,
+    };
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(ofputil_encode_packet_out(&po, proto));
+
+    dp_packet_uninit(&packet);
+    ofpbuf_uninit(&ofpacts);
+
+    /* set the next announcement */
+    garp->backoff *= 2;
+    garp->announce_time = current_time + garp->backoff*1000;
+}
+
+/* get ofport map for local vifs on localnet logical switch */
+static void
+get_localnet_vif_to_ofport(const struct ovsrec_bridge *br_int,
+                           const struct lport_index *lports,
+                           const char *this_chassis_id,
+                           struct simap *localnet_vif_to_ofport,
+                           struct hmap *local_datapaths)
+{
+    for (int i = 0; i < br_int->n_ports; i++) {
+        const struct ovsrec_port *port_rec = br_int->ports[i];
+        if (!strcmp(port_rec->name, br_int->name)) {
+            continue;
+        }
+        const char *chassis_id = smap_get(&port_rec->external_ids,
+                                          "ovn-chassis-id");
+        if (chassis_id && !strcmp(chassis_id, this_chassis_id)) {
+            continue;
+        }
+        for (int j = 0; j < port_rec->n_interfaces; j++) {
+            const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
+            if (!iface_rec->n_ofport) {
+                continue;
+            }
+            int64_t ofport = iface_rec->ofport[0];
+            if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
+                continue;
+            }
+            const char *iface_id = smap_get(&iface_rec->external_ids,
+                                            "iface-id");
+            if (!iface_id) {
+                continue;
+            }
+            const struct sbrec_port_binding *pb = lport_lookup_by_name(lports,
+                                                                       
iface_id);
+            if (!pb) {
+                continue;
+            }
+            struct local_datapath *ld =
+                CONTAINER_OF(hmap_first_with_hash(local_datapaths,
+                                                  pb->datapath->tunnel_key),
+                             struct local_datapath, hmap_node);
+            if (ld && ld->localnet_port) {
+                simap_put(localnet_vif_to_ofport, iface_id, ofport);
+            }
+        }
+    }
+}
+
+static void
+send_garp_run(const struct lport_index *lports,
+              const struct ovsrec_bridge *br_int, const char *chassis_id,
+              struct hmap *local_datapaths)
+{
+    struct simap localnet_vif_to_ofport = 
SIMAP_INITIALIZER(&localnet_vif_to_ofport);
+
+    /* get the ofport map for local vifs on localnet */
+    get_localnet_vif_to_ofport(br_int, lports, chassis_id, 
&localnet_vif_to_ofport,
+                               local_datapaths);
+    /* for deleted ports, remove from send_garp_data */
+    struct shash_node *iter, *next;
+    SHASH_FOR_EACH_SAFE(iter, next, &send_garp_data) {
+        if (!simap_contains(&localnet_vif_to_ofport, iter->name)) {
+            send_garp_delete(iter->name);
+        }
+    }
+    /* for new ports, add to send_garp_data */
+    struct simap_node *node, *node_next;
+    SIMAP_FOR_EACH_SAFE(node, node_next, &localnet_vif_to_ofport) {
+        const struct sbrec_port_binding *pb = lport_lookup_by_name(lports,
+                                                                   node->name);
+        if (pb && !shash_find(&send_garp_data, node->name)) {
+            send_garp_add(pb, node->data);
+        }
+    }
+    /* FIXME
+     * handle the case that the address on a port has changed
+     */
+    /* send garps */
+    long long int current_time = time_msec();
+    SHASH_FOR_EACH(iter, &send_garp_data) {
+        send_garp(iter->data, current_time);
+    }
+    simap_destroy(&localnet_vif_to_ofport);
+}
diff --git a/ovn/controller/pinctrl.h b/ovn/controller/pinctrl.h
index 945e76b..52f5df0 100644
--- a/ovn/controller/pinctrl.h
+++ b/ovn/controller/pinctrl.h
@@ -21,13 +21,16 @@
 
 #include "meta-flow.h"
 
+struct hmap;
 struct lport_index;
 struct ovsrec_bridge;
 struct controller_ctx;
 
 void pinctrl_init(void);
 void pinctrl_run(struct controller_ctx *, const struct lport_index *,
-                 const struct ovsrec_bridge *br_int);
+                 const struct ovsrec_bridge *,
+                 const char *chassis_id,
+                 struct hmap *local_datapaths);
 void pinctrl_wait(struct controller_ctx *);
 void pinctrl_destroy(void);
 
diff --git a/tests/ovn.at b/tests/ovn.at
index 22121e1..fcd278c 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -2141,3 +2141,64 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
 OVS_APP_EXIT_AND_WAIT([ovsdb-server])
 
 AT_CLEANUP
+
+AT_SETUP([ovn -- gratuitous arp generate])
+AT_KEYWORDS([ovn])
+
+ovn_start
+ovn-nbctl lswitch-add lsw0
+net_add n1
+sim_add hv
+as hv
+ovs-vsctl \
+    -- add-br br-phys \
+    -- add-br br-eth0
+
+ovn_attach n1 br-phys 192.168.0.1
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . 
external-ids:ovn-bridge-mappings=physnet1:br-eth0])
+AT_CHECK([ovs-vsctl add-port br-eth0 snoopvif -- set Interface snoopvif 
options:tx_pcap=hv/snoopvif-tx.pcap options:rxq_pcap=hv/snoopvif-rx.pcap])
+
+# create a vif
+AT_CHECK([ovn-nbctl lport-add lsw0 localvif1])
+AT_CHECK([ovn-nbctl lport-set-addresses localvif1 "f0:00:00:00:00:01 
192.168.1.2"])
+AT_CHECK([ovn-nbctl lport-set-port-security localvif1 "f0:00:00:00:00:01"])
+
+# create a localnet port
+AT_CHECK([ovn-nbctl lport-add lsw0 ln_port])
+AT_CHECK([ovn-nbctl lport-set-addresses ln_port unknown])
+AT_CHECK([ovn-nbctl lport-set-type ln_port localnet])
+AT_CHECK([ovn-nbctl lport-set-options ln_port network_name=physnet1])
+
+AT_CHECK([ovs-vsctl add-port br-int localvif1 -- set Interface localvif1 
external_ids:iface-id=localvif1])
+
+#wait for packet to be received
+OVS_WAIT_UNTIL([test `wc -c < "hv/snoopvif-tx.pcap"` -ge 50])
+trim_zeros() {
+    sed 's/\(00\)\{1,\}$//'
+}
+$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv/snoopvif-tx.pcap | trim_zeros > 
packets
+expected="fffffffffffff0000000000108060001080006040002f00000000001c0a80102000000000000c0a80102"
+echo $expected > expout
+AT_CHECK([sort packets], [0], [expout])
+cat packets
+
+as hv
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([ovn-northd])
+
+as main
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+AT_CLEANUP
-- 
2.3.9

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to