-------- HV1 -------------- HV2 -------------- --- vif1 -- ls --- localnet --- ls --- vif2--- ---------------------------------------------- In this configuration, with one logical switch with two vif (vif1 on hv1 and vif2 on hv2), and a localnet port (localnet), traffic from vif1 to vif2 goes through localnet. A vif entry update in fdb was lost if the sb transaction was failing. A transaction could fail in the following scenario.
- When packet from vif1 is received by br-int on hv1, vif1_mac is inserted in FDB with port=vif1 [1]. - When the packet is received on hv2 by localnet, if it is received before the FDB update [1], then vif1_mac is inserted in FDB with port=localnet [2]. This transaction, when sent, will fail [A]. The transaction might be sent later (in some time) as sent by ovn-controller (while packet was forwarded by OVS w/o OVN intervention). - When vif2 replies (e.g. ARP reply, echo reply), vif2_mac is inserted in FDB [3] with port=vif2. If the reply packet is handled in hv2 before FDB update [2] is sent, both updates [2] and [3] are sent at the same time. Hence, if the transaction fails [A], vif2_mac is not inserted in FDB. - Finally, when reply hits hv1, if vif2_mac is not in FDB, an entry for vif2_mac will be inserted in FDB with port=localnet. Hence, at the end of the game, the FDB entry for vif2_mac is localnet, and vif2 stops receiving packets until it sends one (and updates FDB with port=vif2). This patch fixes this by resubmitting FDB updates for VIF entries if the transaction failed. Reported-at: https://issues.redhat.com/browse/FDP-1341 Signed-off-by: Xavier Simonart <xsimo...@redhat.com> --- controller/mac-cache.c | 1 + controller/mac-cache.h | 3 ++ controller/ovn-controller.c | 4 +- controller/pinctrl.c | 44 +++++++++++++++---- controller/pinctrl.h | 1 + tests/ovn.at | 84 ++++++++++++++++++++++++++++++------- 6 files changed, 113 insertions(+), 24 deletions(-) diff --git a/controller/mac-cache.c b/controller/mac-cache.c index 3237677dc..76ed69afc 100644 --- a/controller/mac-cache.c +++ b/controller/mac-cache.c @@ -290,6 +290,7 @@ fdb_add(struct hmap *map, struct fdb_data fdb_data, long long timestamp) fdb->data = fdb_data; fdb->timestamp = timestamp; + fdb->resend_on_commit_failure = false; return fdb; } diff --git a/controller/mac-cache.h b/controller/mac-cache.h index bdaec064b..392dc5bad 100644 --- a/controller/mac-cache.h +++ b/controller/mac-cache.h @@ -88,6 +88,9 @@ struct fdb { /* Reference to the SB FDB record. */ const struct sbrec_fdb *sbrec_fdb; long long timestamp; + /* Set to true when FDB update has been sent, if it must be resent in + * case of transaction failure. */ + bool resend_on_commit_failure; }; struct mac_cache_stats { diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 5303c6551..3ec985f9a 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -6368,6 +6368,7 @@ main(int argc, char *argv[]) /* Main loop. */ bool sb_monitor_all = false; struct tracked_acl_ids *tracked_acl_ids = NULL; + int ovnsb_txn_status = 1; while (!exit_args.exiting) { ovsrcu_quiesce_end(); @@ -6667,6 +6668,7 @@ main(int argc, char *argv[]) time_msec()); pinctrl_update(ovnsb_idl_loop.idl); pinctrl_run(ovnsb_idl_txn, + ovnsb_txn_status, sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, sbrec_port_binding_by_name, @@ -6880,7 +6882,7 @@ main(int argc, char *argv[]) poll_immediate_wake(); } - int ovnsb_txn_status = ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); + ovnsb_txn_status = ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop); if (!ovnsb_txn_status) { VLOG_INFO("OVNSB commit failed, force recompute next time."); engine_set_force_recompute_immediate(); diff --git a/controller/pinctrl.c b/controller/pinctrl.c index 68042a820..9aba33ffa 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -373,9 +373,10 @@ static void run_put_fdb(struct ovsdb_idl_txn *ovnsb_idl_txn, struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac, struct ovsdb_idl_index *sbrec_port_binding_by_key, struct ovsdb_idl_index *sbrec_datapath_binding_by_key, - const struct fdb *fdb) + struct fdb *fdb) OVS_REQUIRES(pinctrl_mutex); static void run_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn, + int ovnsb_txn_status, struct ovsdb_idl_index *sbrec_port_binding_by_key, struct ovsdb_idl_index *sbrec_datapath_binding_by_key, struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac) @@ -4049,6 +4050,7 @@ pinctrl_update(const struct ovsdb_idl *idl) /* Called by ovn-controller. */ void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + int ovnsb_txn_status, struct ovsdb_idl_index *sbrec_datapath_binding_by_key, struct ovsdb_idl_index *sbrec_port_binding_by_key, struct ovsdb_idl_index *sbrec_port_binding_by_name, @@ -4094,7 +4096,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, chassis); bfd_monitor_run(ovnsb_idl_txn, bfd_table, sbrec_port_binding_by_name, chassis); - run_put_fdbs(ovnsb_idl_txn, sbrec_port_binding_by_key, + run_put_fdbs(ovnsb_idl_txn, ovnsb_txn_status, sbrec_port_binding_by_key, sbrec_datapath_binding_by_key, sbrec_fdb_by_dp_key_mac); run_activated_ports(ovnsb_idl_txn, sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, chassis); @@ -8564,8 +8566,9 @@ run_put_fdb(struct ovsdb_idl_txn *ovnsb_idl_txn, struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac, struct ovsdb_idl_index *sbrec_port_binding_by_key, struct ovsdb_idl_index *sbrec_datapath_binding_by_key, - const struct fdb *fdb) + struct fdb *fdb) { + bool new_entry_is_localnet = false; /* Convert ethernet argument to string form for database. */ char mac_string[ETH_ADDR_STRLEN + 1]; snprintf(mac_string, sizeof mac_string, @@ -8576,6 +8579,14 @@ run_put_fdb(struct ovsdb_idl_txn *ovnsb_idl_txn, const struct sbrec_port_binding *new_entry_pb = NULL; const struct sbrec_fdb *sb_fdb = fdb_lookup(sbrec_fdb_by_dp_key_mac, fdb->data.dp_key, mac_string); + + new_entry_pb = lport_lookup_by_key( + sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, + fdb->data.dp_key, fdb->data.port_key); + if (new_entry_pb) { + new_entry_is_localnet = !strcmp(new_entry_pb->type, "localnet"); + } + if (!sb_fdb) { sb_fdb = sbrec_fdb_insert(ovnsb_idl_txn); sbrec_fdb_set_dp_key(sb_fdb, fdb->data.dp_key); @@ -8585,14 +8596,15 @@ run_put_fdb(struct ovsdb_idl_txn *ovnsb_idl_txn, sb_entry_pb = lport_lookup_by_key( sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, sb_fdb->dp_key, sb_fdb->port_key); - new_entry_pb = lport_lookup_by_key( - sbrec_datapath_binding_by_key, sbrec_port_binding_by_key, - fdb->data.dp_key, fdb->data.port_key); } /* Do not have localnet overwrite a previous vif entry */ - if (!sb_entry_pb || !new_entry_pb || strcmp(sb_entry_pb->type, "") || - strcmp(new_entry_pb->type, "localnet")) { + if (new_entry_is_localnet) { + if (!sb_entry_pb || strcmp(sb_entry_pb->type, "")) { + sbrec_fdb_set_port_key(sb_fdb, fdb->data.port_key); + } + } else { sbrec_fdb_set_port_key(sb_fdb, fdb->data.port_key); + fdb->resend_on_commit_failure = true; } /* For backward compatibility check if timestamp column is available @@ -8604,23 +8616,37 @@ run_put_fdb(struct ovsdb_idl_txn *ovnsb_idl_txn, static void run_put_fdbs(struct ovsdb_idl_txn *ovnsb_idl_txn, + int ovnsb_txn_status, struct ovsdb_idl_index *sbrec_port_binding_by_key, struct ovsdb_idl_index *sbrec_datapath_binding_by_key, struct ovsdb_idl_index *sbrec_fdb_by_dp_key_mac) OVS_REQUIRES(pinctrl_mutex) { + /* If commit is in fly, do nothing.*/ if (!ovnsb_idl_txn) { return; } long long now = time_msec(); struct fdb *fdb; + + /* If commit did not fail, remove from resend list */ + if (ovnsb_txn_status) { + HMAP_FOR_EACH_SAFE (fdb, hmap_node, &put_fdbs) { + if (fdb->resend_on_commit_failure) { + fdb_remove(&put_fdbs, fdb); + } + } + } + HMAP_FOR_EACH_SAFE (fdb, hmap_node, &put_fdbs) { if (now >= fdb->timestamp) { run_put_fdb(ovnsb_idl_txn, sbrec_fdb_by_dp_key_mac, sbrec_port_binding_by_key, sbrec_datapath_binding_by_key, fdb); - fdb_remove(&put_fdbs, fdb); + if (!fdb->resend_on_commit_failure) { + fdb_remove(&put_fdbs, fdb); + } } } } diff --git a/controller/pinctrl.h b/controller/pinctrl.h index 80384ac9b..625563d7e 100644 --- a/controller/pinctrl.h +++ b/controller/pinctrl.h @@ -43,6 +43,7 @@ struct sbrec_mac_binding_table; void pinctrl_init(void); void pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, + int ovnsb_txn_status, struct ovsdb_idl_index *sbrec_datapath_binding_by_key, struct ovsdb_idl_index *sbrec_port_binding_by_key, struct ovsdb_idl_index *sbrec_port_binding_by_name, diff --git a/tests/ovn.at b/tests/ovn.at index fadc62a76..e9ffd9fa2 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -39545,7 +39545,7 @@ OVN_FOR_EACH_NORTHD([ AT_SETUP([pod to pod with localnet_learn_fdb]) AT_SKIP_IF([test $HAVE_SCAPY = no]) -# 6 VIFs, 3 per HV: vif11, vif12, vif13 on hv1. +# 8 VIFs, 4 per HV: vif11, vif12, vif13 and vif 14 on hv1. # vif11 will exchange packets with vif21, vif12 w/ vif22 and so on. # ovn_start @@ -39558,7 +39558,7 @@ check ovn-nbctl lsp-add ls0 ln_port -- \ lsp-set-type ln_port localnet -- \ lsp-set-options ln_port network_name=physnet1 -for i in $(seq 1 3); do +for i in $(seq 1 4); do check ovn-nbctl lsp-add ls0 vif1$i -- \ lsp-set-addresses vif1$i unknown check ovn-nbctl lsp-add ls0 vif2$i -- \ @@ -39573,7 +39573,7 @@ for hv in 1 2; do ovs-vsctl add-br br-phys ovn_attach n1 br-phys 192.168.0.${hv} - for i in $(seq 1 3); do + for i in $(seq 1 4); do ovs-vsctl -- add-port br-int vif${hv}${i} -- \ set interface vif${hv}${i} external-ids:iface-id=vif${hv}${i} \ options:tx_pcap=hv${hv}/vif${hv}${i}-tx.pcap \ @@ -39584,7 +39584,7 @@ for hv in 1 2; do set interface ext0 \ options:tx_pcap=hv${hv}/ext0-tx.pcap \ options:rxq_pcap=hv${hv}/ext0-rx.pcap \ - ofport-request=4 + ofport-request=5 ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys done @@ -39596,9 +39596,11 @@ ln_port_key=$(fetch_column port_binding tunnel_key logical_port=ln_port) vif11_key=$(fetch_column port_binding tunnel_key logical_port=vif11) vif12_key=$(fetch_column port_binding tunnel_key logical_port=vif12) vif13_key=$(fetch_column port_binding tunnel_key logical_port=vif13) +vif14_key=$(fetch_column port_binding tunnel_key logical_port=vif14) vif21_key=$(fetch_column port_binding tunnel_key logical_port=vif21) vif22_key=$(fetch_column port_binding tunnel_key logical_port=vif22) vif23_key=$(fetch_column port_binding tunnel_key logical_port=vif23) +vif24_key=$(fetch_column port_binding tunnel_key logical_port=vif24) ensure_controller_run() { # We want to make sure controller could run at least one full loop. @@ -39667,14 +39669,17 @@ for i in 2 3; do done wait_column "$vif21_key" fdb port_key mac='"00:00:00:00:10:21"' +# Expect flows for 10:11 and 10:21 check_flow_count hv1 2 check_flow_count hv2 2 - # We now enable localnet_learn_fdb # We check how it behaves with existing vif entries in fdb check ovn-nbctl --wait=hv set logical_switch_port ln_port options:localnet_learn_fdb=true AS_BOX([$(date +%H:%M:%S.%03N) vif11 <=> vif21 after learn_fdb]) +# Expect flows for 10:11 and 10:21 (x2 as now using learn fdb i.e. also have flows with MLF_LOCALNET_BIT. +check_flow_count hv1 4 +check_flow_count hv2 4 send_packet hv1 11 21 wait_for_packets hv2 vif21 @@ -39696,6 +39701,7 @@ wait_column "$vif21_key" fdb port_key mac='"00:00:00:00:10:21"' AT_CHECK([test 1 = `cat hv1/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) AT_CHECK([test 1 = `cat hv2/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) +# Expect flows for 10:11, and 10:21 check_flow_count hv1 4 check_flow_count hv2 4 @@ -39719,9 +39725,53 @@ AT_CHECK([test 1 = `cat hv2/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) wait_column "$vif11_key" fdb port_key mac='"00:00:00:00:10:11"' wait_column "$vif21_key" fdb port_key mac='"00:00:00:00:10:21"' -# We will now create fdb entries AFTER enabing localnet_learn_fdb +# We will now create fdb entries AFTER enabling localnet_learn_fdb # We make ovn_controller (hv1 or hv2) to sleep to control who writes first to fdb # as otherwise no guarentee. + +send_packet hv1 14 99 +ensure_controller_run hv1 + +for i in 1 2 3; do + wait_for_packets hv1 vif1${i} +done +for i in 1 2 3; do + wait_for_packets hv2 vif2${i} +done + +# In this test we will cause transaction errors in hv2 for a FDB update containing vif24 update +# Make both hv2 ovn-controller and sb to sleep (i.e. be very slow ...) +sleep_sb +sleep_controller hv2 + +AS_BOX([$(date +%H:%M:%S.%03N) Sending ARP 14 -> 24]) +arp_req=$(fmt_pkt "Ether(dst='ff:ff:ff:ff:ff:ff', src='00:00:00:00:10:14')/ARP(op=1, pdst='192.168.10.24', psrc='192.168.10.14')") +as hv1 ovs-appctl netdev-dummy/receive vif14 $arp_req +ensure_controller_run hv1 + +AS_BOX([$(date +%H:%M:%S.%03N) Sending ARP 24 -> 14]) +arp_req=$(fmt_pkt "Ether(dst='00:00:00:00:10:14', src='00:00:00:00:10:24')/ARP(op=2, pdst='192.168.10.14', psrc='192.168.10.24')") +as hv2 ovs-appctl netdev-dummy/receive vif24 $arp_req +sleep 1 + +# hv2 ovn-controller has been sleeping for 1 sec. +# When it makes up, it should receive PUT_FDB for 00:00:10:14 (localnet) and 00:00:10:24 (vif24). +# hv2 will highly probably handle both within same loop when it wakes up. +wake_up_controller hv2 + +# hv2 sent update for 00:00:10:14 (localnet) and 00:00:10:24 (vif24) to sb, but +# they will fail as hv1 as already sent an update for 00:00:10:14 (vif14) to sb ... which is also sleeping +# When sb wakes up, it will see the hv2 update as a transaction error. +ensure_controller_run hv2 +wake_up_sb +echo "ln=$ln_port_key, vif14=$vif14_key, vif22=$vif24_key" +wait_column "$vif14_key" fdb port_key mac='"00:00:00:00:10:14"' +wait_column "$vif24_key" fdb port_key mac='"00:00:00:00:10:24"' + +# Expect flows for 10:11, 10:14, 10:21 and 10:24 +check_flow_count hv1 8 +check_flow_count hv2 8 + AS_BOX([$(date +%H:%M:%S.%03N) vif12 <=> vif22]) # We make sure that the fdb update by the vif is done after the localnet update sleep_controller hv1 @@ -39751,8 +39801,9 @@ wait_column "$ln_port_key" fdb port_key mac='"00:00:00:00:10:22"' wake_up_controller hv2 wait_column "$vif22_key" fdb port_key mac='"00:00:00:00:10:22"' -check_flow_count hv1 8 -check_flow_count hv2 8 +# Expect flows for 10:11, 10:12, 10:14, 10:21, 10:22 and 10:24 +check_flow_count hv1 12 +check_flow_count hv2 12 AS_BOX([$(date +%H:%M:%S.%03N) vif13 <=> vif23 ]) # Now we do the other way around: we make sure that the localnet update is done after the vif update. @@ -39800,9 +39851,11 @@ wait_column "$vif23_key" fdb port_key mac='"00:00:00:00:10:23"' # - vif11 <=> vif21: 1 PACKET_IN # - vif12 <=> vif22: 2 PACKET_IN # - vif13 <=> vif23: 2 PACKET_IN +# - vif14 <=> 99: 1 PACKET_IN +# - vif24 <=> vif14: 1 PACKET_IN # controller + . -AT_CHECK([test 5 = `cat hv1/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) -AT_CHECK([test 5 = `cat hv2/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) +AT_CHECK([test 7 = `cat hv1/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) +AT_CHECK([test 7 = `cat hv2/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) # Send a few more packets send_packet hv1 13 23 @@ -39823,19 +39876,22 @@ for i in 1 2; do done # The last packets should have gone through the fast path -AT_CHECK([test 5 = `cat hv1/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) -AT_CHECK([test 5 = `cat hv2/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) +AT_CHECK([test 7 = `cat hv1/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) +AT_CHECK([test 7 = `cat hv2/ovs-vswitchd.log | grep NXT_PACKET_IN2 | wc -l`]) # Check that there are no bad surprises wait_column "$vif11_key" fdb port_key mac='"00:00:00:00:10:11"' wait_column "$vif12_key" fdb port_key mac='"00:00:00:00:10:12"' wait_column "$vif13_key" fdb port_key mac='"00:00:00:00:10:13"' +wait_column "$vif14_key" fdb port_key mac='"00:00:00:00:10:14"' wait_column "$vif21_key" fdb port_key mac='"00:00:00:00:10:21"' wait_column "$vif22_key" fdb port_key mac='"00:00:00:00:10:22"' wait_column "$vif23_key" fdb port_key mac='"00:00:00:00:10:23"' +wait_column "$vif24_key" fdb port_key mac='"00:00:00:00:10:24"' -check_flow_count hv1 12 -check_flow_count hv2 12 +# Expect flows for 10:11, 10:12, 10:13, 10:14, 10:21, 10:22, 10:23 and 10:24 +check_flow_count hv1 16 +check_flow_count hv2 16 OVN_CLEANUP([hv1]) AT_CLEANUP -- 2.47.1 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev