On Thu, Jul 13, 2017 at 3:13 PM, Miguel Angel Ajo <majop...@redhat.com> wrote:
> When ovn-controller is restarted, ovn-controller removes the old > Chassis entry from the SBDB and a new one is inserted. > > This cleared the Gateway_Chassis chassis column in the SBDB and then > ovn-northd removed the empty-column Gateway_Chassis entry. > Such event made the other (non-restarted and master gateway chassis) > believe that he was a single (non-HA) gateway, turning off BFD and > releasing the port for a tiny time frame causing unnecesary downtime. > > Signed-off-by: Miguel Angel Ajo <majop...@redhat.com> > --- > ovn/northd/ovn-northd.c | 34 +++++++++++--------- > tests/ovn.at | 82 ++++++++++++++++++++++++++++++ > +++++++++++++++++++ > 2 files changed, 102 insertions(+), 14 deletions(-) > > diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c > index 9a1e6c1..62a73f3 100644 > --- a/ovn/northd/ovn-northd.c > +++ b/ovn/northd/ovn-northd.c > @@ -1684,11 +1684,22 @@ gateway_chassis_equal(const struct > nbrec_gateway_chassis *nb_gwc, > const struct sbrec_chassis *nb_gwc_c, > const struct sbrec_gateway_chassis *sb_gwc) > { > - return !strcmp(nb_gwc->name, sb_gwc->name) > - && !strcmp(nb_gwc_c->name, sb_gwc->chassis->name) > - && nb_gwc->priority == sb_gwc->priority > - && smap_equal(&nb_gwc->options, &sb_gwc->options) > - && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids); > + bool equal = !strcmp(nb_gwc->name, sb_gwc->name) > + && nb_gwc->priority == sb_gwc->priority > + && smap_equal(&nb_gwc->options, &sb_gwc->options) > + && smap_equal(&nb_gwc->external_ids, > &sb_gwc->external_ids); > + > Some notes here, instead of continuing with the boolean operations (the if and the return is not necessary) I did it this way so I could add a note with the reasoning. I'm perfectly fine if we want to change it back to a full bolean form. > + if (!equal) { > + return false; > + } > + > + /* If everything else matched and we were unable to find the SBDB > + * Chassis entry at this time, assume a match and return true. > + * This happens when an ovn-controller is restarting and the Chassis > + * entry is gone away momentarily */ > + return !nb_gwc_c > + || (sb_gwc->chassis && !strcmp(nb_gwc_c->name, > + sb_gwc->chassis->name)); > } > > static bool > @@ -1723,11 +1734,10 @@ sbpb_gw_chassis_needs_update( > chassis_lookup_by_name(chassis_index, > lrp->gateway_chassis[n]-> > chassis_name); > > - if (chassis) { > - lrp_gwc_c[lrp_n_gateway_chassis] = chassis; > - lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n]; > - lrp_n_gateway_chassis++; > - } else { > + lrp_gwc_c[lrp_n_gateway_chassis] = chassis; > + lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n]; > + lrp_n_gateway_chassis++; > + if (!chassis) { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); > VLOG_WARN_RL( > &rl, "Chassis name %s referenced in NBDB via > Gateway_Chassis " > @@ -1807,10 +1817,6 @@ copy_gw_chassis_from_nbrp_to_sbpb( > const struct sbrec_chassis *chassis = > chassis_lookup_by_name(chassis_index, lrp_gwc->chassis_name); > > - if (!chassis) { > - continue; > - } > - > gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof > *gw_chassis); > > struct sbrec_gateway_chassis *pb_gwc = > diff --git a/tests/ovn.at b/tests/ovn.at > index 5a0b761..07b822d 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -8080,3 +8080,85 @@ AT_CHECK([grep $garp hv2_br_phys_tx | sort], [0], > []) > OVN_CLEANUP([hv1],[hv2],[hv3]) > > AT_CLEANUP > + > +AT_SETUP([ovn -- ensure one gw controller restart in HA doesn't bounce > the master]) > +AT_SKIP_IF([test $HAVE_PYTHON = no]) > +ovn_start > + > +net_add n1 > + > +# create two gateways with external network connectivity > +for i in 1 2; do > + sim_add gw$i > + as gw$i > + ovs-vsctl add-br br-phys > + ovn_attach n1 br-phys 192.168.0.$i > + ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys > +done > + > +ovn-nbctl ls-add inside > +ovn-nbctl ls-add outside > + > +# create one hypervisors with a vif port the internal network > +sim_add hv1 > +as hv1 > +ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.11 > +ovs-vsctl -- add-port br-int hv1-vif1 -- \ > + set interface hv1-vif1 external-ids:iface-id=inside1 \ > + options:tx_pcap=hv1/vif1-tx.pcap \ > + options:rxq_pcap=hv1/vif1-rx.pcap \ > + ofport-request=1 > + > +ovn-nbctl lsp-add inside inside1 \ > + -- lsp-set-addresses inside1 "f0:00:00:01:22:01 192.168.1.101" > + > + > +ovn_populate_arp > + > +ovn-nbctl create Logical_Router name=R1 > + > +# Connect inside to R1 > +ovn-nbctl lrp-add R1 inside 00:00:01:01:02:03 192.168.1.1/24 > +ovn-nbctl lsp-add inside rp-inside -- set Logical_Switch_Port rp-inside \ > + type=router options:router-port=inside \ > + -- lsp-set-addresses rp-inside router > + > +# Connect outside to R1 as distributed router gateway port on gw1+gw2 > +ovn-nbctl lrp-add R1 outside 00:00:02:01:02:04 192.168.0.101/24 > + > +ovn-nbctl --id=@gc0 create Gateway_Chassis \ > + name=outside_gw1 chassis_name=gw1 priority=20 -- \ > + --id=@gc1 create Gateway_Chassis \ > + name=outside_gw2 chassis_name=gw2 priority=10 -- \ > + set Logical_Router_Port outside 'gateway_chassis=[@gc0,@gc1]' > + > +ovn-nbctl lsp-add outside rp-outside -- set Logical_Switch_Port > rp-outside \ > + type=router options:router-port=outside \ > + -- lsp-set-addresses rp-outside router > + > +# Create localnet port in outside > +ovn-nbctl lsp-add outside ln-outside > +ovn-nbctl lsp-set-addresses ln-outside unknown > +ovn-nbctl lsp-set-type ln-outside localnet > +ovn-nbctl lsp-set-options ln-outside network_name=phys > + > +# Allow some time for ovn-northd and ovn-controller to catch up. > +ovn-nbctl --wait=hv sync > + > +# currently when ovn-controller is restarted, the old entry is deleted > +# and a new one is created, which leaves the Gateway_Chassis with > +# an empty chassis for a while. NOTE: restarting ovn-controller in tests > +# doesn't have the same effect because "name" is conserved, and the > +# Chassis entry is not replaced. > + > +gw2_chassis=$(ovn-sbctl --bare --columns=_uuid find Chassis name=gw2) > +ovn-sbctl destroy Chassis $gw2_chassis > + > +sleep 2 > + > +AT_CHECK([grep "Releasing lport" gw1/ovn-controller.log], [1], []) > + > +OVN_CLEANUP([gw1],[gw2],[hv1]) > + > +AT_CLEANUP > -- > 1.8.3.1 > > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev