In some usecases such as VM migration or when VMs reuse IP addresses, VMs become unreachable externally because external switches/routers on localnet have stale port-mac or arp caches. The problem resolves after some time when the caches ageout which could be minutes for port-mac bindings or hours for arp caches.
To fix this, send some gratuitous ARPs when a logical port on a localnet datapath gets added. Such gratuitous arps help on a best-effort basis to update the mac-port bindings and arp-caches of external switches and routers on the localnet. Reported-at: https://bugs.launchpad.net/networking-ovn/+bug/1545897 Reported-by: Kyle Mestery <mest...@mestery.com> Signed-off-by: Ramu Ramamurthy <ramu.ramamur...@us.ibm.com> --- ovn/controller/ovn-controller.c | 2 +- ovn/controller/pinctrl.c | 217 +++++++++++++++++++++++++++++++++++++++- ovn/controller/pinctrl.h | 5 +- tests/ovn.at | 61 +++++++++++ 4 files changed, 282 insertions(+), 3 deletions(-) diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index 6027011..3f0aea4 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -335,7 +335,7 @@ main(int argc, char *argv[]) enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int); - pinctrl_run(&ctx, &lports, br_int); + pinctrl_run(&ctx, &lports, br_int, chassis_id, &local_datapaths); struct hmap flow_table = HMAP_INITIALIZER(&flow_table); lflow_run(&ctx, &lports, &mcgroups, &local_datapaths, diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c index 3fcab99..57be002 100644 --- a/ovn/controller/pinctrl.c +++ b/ovn/controller/pinctrl.c @@ -24,6 +24,7 @@ #include "ofp-actions.h" #include "ovn/lib/actions.h" #include "ovn/lib/logical-fields.h" +#include "ovn/lib/ovn-util.h" #include "ofp-msgs.h" #include "ofp-print.h" #include "ofp-util.h" @@ -54,6 +55,13 @@ static void run_put_arps(struct controller_ctx *, static void wait_put_arps(struct controller_ctx *); static void flush_put_arps(void); +static void init_send_garps(void); +static void destroy_send_garps(void); +static void send_garp_run(const struct lport_index *lports, + const struct ovsrec_bridge *br_int, + const char *chassis_id, + struct hmap *local_datapaths); + COVERAGE_DEFINE(pinctrl_drop_put_arp); void @@ -62,6 +70,7 @@ pinctrl_init(void) swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION); conn_seq_no = 0; init_put_arps(); + init_send_garps(); } static ovs_be32 @@ -266,7 +275,9 @@ pinctrl_recv(const struct ofp_header *oh, enum ofptype type) void pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports, - const struct ovsrec_bridge *br_int) + const struct ovsrec_bridge *br_int, + const char *chassis_id, + struct hmap *local_datapaths) { if (br_int) { char *target; @@ -307,6 +318,9 @@ pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports, } run_put_arps(ctx, lports); + if (rconn_is_connected(swconn)) { + send_garp_run(lports, br_int, chassis_id, local_datapaths); + } } void @@ -322,6 +336,7 @@ pinctrl_destroy(void) { rconn_destroy(swconn); destroy_put_arps(); + destroy_send_garps(); } /* Implementation of the "put_arp" OVN action. This action sends a packet to @@ -484,3 +499,203 @@ flush_put_arps(void) free(pa); } } + +/* + * Send gratuitous arp for vifs on localnets + * + * When a new vif on localnet is added, gratuitous arps + * are sent announcing the port's mac,ip mapping. + * On localnet, such announcements are needed for + * switches and routers on the broadcast segment to + * update their port-mac and arp tables. + */ +struct garp_data { + int ofport; /* ofport used for this garp */ + struct eth_addr ea; /* ethernet address of port */ + ovs_be32 ipv4; /* ipv4 address of port */ + long long int announce_time; /* next announcement in ms */ + int backoff; /* backoff for the next announcement */ +}; + +/* Contains garps to be sent */ +static struct shash send_garp_data; + +static void +init_send_garps(void) +{ + shash_init(&send_garp_data); +} + +static void +destroy_send_garps(void) +{ + shash_destroy(&send_garp_data); +} + +/* Add a new vif to announce garps */ +static void +send_garp_add(const struct sbrec_port_binding *binding_rec, + int ofport) +{ + int i; + for (i = 0; i < binding_rec->n_mac; i++) { + struct lport_addresses laddrs; + if (extract_lport_addresses(binding_rec->mac[i], &laddrs, + false)) { + /* the mac address must be valid */ + struct garp_data *garp = xmalloc(sizeof(struct garp_data)); + garp->ofport = ofport; + garp->ea = laddrs.ea; + garp->ipv4 = 0; + if (laddrs.n_ipv4_addrs) { + garp->ipv4 = laddrs.ipv4_addrs[0].addr; + free(laddrs.ipv4_addrs); + } + garp->backoff = 1; + garp->announce_time = time_msec()+1000; + shash_add(&send_garp_data, binding_rec->logical_port, garp); + break; + } + } +} + +/* Remove a vif from garp announce */ +static void +send_garp_delete(const char *lp) +{ + struct garp_data *garp = shash_find_and_delete(&send_garp_data, lp); + free(garp); +} + +static void +send_garp(struct garp_data *garp, long long int current_time) +{ + if (garp->backoff > 32 || current_time < garp->announce_time) { + return; + } + /* Compose a GARP packet. */ + uint64_t packet_stub[128 / 8]; + struct dp_packet packet; + dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); + if (garp->ipv4) { + /* A broadcast arp reply is used here, A gratuitous arp + * request is another option + */ + compose_arp(&packet, ARP_OP_REPLY, + garp->ea, eth_addr_zero, + true, garp->ipv4, garp->ipv4); + } else { + /* there is only a mac on the port, so send rarp */ + compose_rarp(&packet, garp->ea); + } + /* Compose actions, the garp packet goes through ingress pipeline + * as if it is received on the input port. + */ + uint64_t ofpacts_stub[4096 / 8]; + struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); + enum ofp_version version = rconn_get_version(swconn); + ofpact_put_OUTPUT(&ofpacts)->port = OFPP_TABLE; + + struct ofputil_packet_out po = { + .packet = dp_packet_data(&packet), + .packet_len = dp_packet_size(&packet), + .buffer_id = UINT32_MAX, + .in_port = u16_to_ofp(garp->ofport), + .ofpacts = ofpacts.data, + .ofpacts_len = ofpacts.size, + }; + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + queue_msg(ofputil_encode_packet_out(&po, proto)); + + dp_packet_uninit(&packet); + ofpbuf_uninit(&ofpacts); + + /* set the next announcement */ + garp->backoff *= 2; + garp->announce_time = current_time + garp->backoff*1000; +} + +/* get ofport map for local vifs on localnet logical switch */ +static void +get_localnet_vif_to_ofport(const struct ovsrec_bridge *br_int, + const struct lport_index *lports, + const char *this_chassis_id, + struct simap *localnet_vif_to_ofport, + struct hmap *local_datapaths) +{ + for (int i = 0; i < br_int->n_ports; i++) { + const struct ovsrec_port *port_rec = br_int->ports[i]; + if (!strcmp(port_rec->name, br_int->name)) { + continue; + } + const char *chassis_id = smap_get(&port_rec->external_ids, + "ovn-chassis-id"); + if (chassis_id && !strcmp(chassis_id, this_chassis_id)) { + continue; + } + for (int j = 0; j < port_rec->n_interfaces; j++) { + const struct ovsrec_interface *iface_rec = port_rec->interfaces[j]; + if (!iface_rec->n_ofport) { + continue; + } + int64_t ofport = iface_rec->ofport[0]; + if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) { + continue; + } + const char *iface_id = smap_get(&iface_rec->external_ids, + "iface-id"); + if (!iface_id) { + continue; + } + const struct sbrec_port_binding *pb = lport_lookup_by_name(lports, + iface_id); + if (!pb) { + continue; + } + struct local_datapath *ld = + CONTAINER_OF(hmap_first_with_hash(local_datapaths, + pb->datapath->tunnel_key), + struct local_datapath, hmap_node); + if (ld && ld->localnet_port) { + simap_put(localnet_vif_to_ofport, iface_id, ofport); + } + } + } +} + +static void +send_garp_run(const struct lport_index *lports, + const struct ovsrec_bridge *br_int, const char *chassis_id, + struct hmap *local_datapaths) +{ + struct simap localnet_vif_to_ofport = SIMAP_INITIALIZER(&localnet_vif_to_ofport); + + /* get the ofport map for local vifs on localnet */ + get_localnet_vif_to_ofport(br_int, lports, chassis_id, &localnet_vif_to_ofport, + local_datapaths); + /* for deleted ports, remove from send_garp_data */ + struct shash_node *iter, *next; + SHASH_FOR_EACH_SAFE(iter, next, &send_garp_data) { + if (!simap_contains(&localnet_vif_to_ofport, iter->name)) { + send_garp_delete(iter->name); + } + } + /* for new ports, add to send_garp_data */ + struct simap_node *node, *node_next; + SIMAP_FOR_EACH_SAFE(node, node_next, &localnet_vif_to_ofport) { + const struct sbrec_port_binding *pb = lport_lookup_by_name(lports, + node->name); + if (pb && !shash_find(&send_garp_data, node->name)) { + send_garp_add(pb, node->data); + } + } + /* FIXME + * handle the case that the address on a port has changed + */ + /* send garps */ + long long int current_time = time_msec(); + SHASH_FOR_EACH(iter, &send_garp_data) { + send_garp(iter->data, current_time); + } + simap_destroy(&localnet_vif_to_ofport); +} diff --git a/ovn/controller/pinctrl.h b/ovn/controller/pinctrl.h index 945e76b..52f5df0 100644 --- a/ovn/controller/pinctrl.h +++ b/ovn/controller/pinctrl.h @@ -21,13 +21,16 @@ #include "meta-flow.h" +struct hmap; struct lport_index; struct ovsrec_bridge; struct controller_ctx; void pinctrl_init(void); void pinctrl_run(struct controller_ctx *, const struct lport_index *, - const struct ovsrec_bridge *br_int); + const struct ovsrec_bridge *, + const char *chassis_id, + struct hmap *local_datapaths); void pinctrl_wait(struct controller_ctx *); void pinctrl_destroy(void); diff --git a/tests/ovn.at b/tests/ovn.at index 22121e1..fcd278c 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2141,3 +2141,64 @@ OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) OVS_APP_EXIT_AND_WAIT([ovsdb-server]) AT_CLEANUP + +AT_SETUP([ovn -- gratuitous arp generate]) +AT_KEYWORDS([ovn]) + +ovn_start +ovn-nbctl lswitch-add lsw0 +net_add n1 +sim_add hv +as hv +ovs-vsctl \ + -- add-br br-phys \ + -- add-br br-eth0 + +ovn_attach n1 br-phys 192.168.0.1 + +AT_CHECK([ovs-vsctl set Open_vSwitch . external-ids:ovn-bridge-mappings=physnet1:br-eth0]) +AT_CHECK([ovs-vsctl add-port br-eth0 snoopvif -- set Interface snoopvif options:tx_pcap=hv/snoopvif-tx.pcap options:rxq_pcap=hv/snoopvif-rx.pcap]) + +# create a vif +AT_CHECK([ovn-nbctl lport-add lsw0 localvif1]) +AT_CHECK([ovn-nbctl lport-set-addresses localvif1 "f0:00:00:00:00:01 192.168.1.2"]) +AT_CHECK([ovn-nbctl lport-set-port-security localvif1 "f0:00:00:00:00:01"]) + +# create a localnet port +AT_CHECK([ovn-nbctl lport-add lsw0 ln_port]) +AT_CHECK([ovn-nbctl lport-set-addresses ln_port unknown]) +AT_CHECK([ovn-nbctl lport-set-type ln_port localnet]) +AT_CHECK([ovn-nbctl lport-set-options ln_port network_name=physnet1]) + +AT_CHECK([ovs-vsctl add-port br-int localvif1 -- set Interface localvif1 external_ids:iface-id=localvif1]) + +#wait for packet to be received +OVS_WAIT_UNTIL([test `wc -c < "hv/snoopvif-tx.pcap"` -ge 50]) +trim_zeros() { + sed 's/\(00\)\{1,\}$//' +} +$PYTHON "$top_srcdir/utilities/ovs-pcap.in" hv/snoopvif-tx.pcap | trim_zeros > packets +expected="fffffffffffff0000000000108060001080006040002f00000000001c0a80102000000000000c0a80102" +echo $expected > expout +AT_CHECK([sort packets], [0], [expout]) +cat packets + +as hv +OVS_APP_EXIT_AND_WAIT([ovn-controller]) +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-sb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as ovn-nb +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +as northd +OVS_APP_EXIT_AND_WAIT([ovn-northd]) + +as main +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +AT_CLEANUP -- 2.3.9 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev