On Thu, Jul 13, 2017 at 3:13 PM, Miguel Angel Ajo <majop...@redhat.com>
wrote:

> When ovn-controller is restarted, ovn-controller removes the old
> Chassis entry from the SBDB and a new one is inserted.
>
> This cleared the Gateway_Chassis chassis column in the SBDB and then
> ovn-northd removed the empty-column Gateway_Chassis entry.
> Such event made the other (non-restarted and master gateway chassis)
> believe that he was a single (non-HA) gateway, turning off BFD and
> releasing the port for a tiny time frame causing unnecesary downtime.
>
> Signed-off-by: Miguel Angel Ajo <majop...@redhat.com>
> ---
>  ovn/northd/ovn-northd.c | 34 +++++++++++---------
>  tests/ovn.at            | 82 ++++++++++++++++++++++++++++++
> +++++++++++++++++++
>  2 files changed, 102 insertions(+), 14 deletions(-)
>
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 9a1e6c1..62a73f3 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -1684,11 +1684,22 @@ gateway_chassis_equal(const struct
> nbrec_gateway_chassis *nb_gwc,
>                        const struct sbrec_chassis *nb_gwc_c,
>                        const struct sbrec_gateway_chassis *sb_gwc)
>  {
> -    return !strcmp(nb_gwc->name, sb_gwc->name)
> -           && !strcmp(nb_gwc_c->name, sb_gwc->chassis->name)
> -           && nb_gwc->priority == sb_gwc->priority
> -           && smap_equal(&nb_gwc->options, &sb_gwc->options)
> -           && smap_equal(&nb_gwc->external_ids, &sb_gwc->external_ids);
> +    bool equal = !strcmp(nb_gwc->name, sb_gwc->name)
> +                 && nb_gwc->priority == sb_gwc->priority
> +                 && smap_equal(&nb_gwc->options, &sb_gwc->options)
> +                 && smap_equal(&nb_gwc->external_ids,
> &sb_gwc->external_ids);
> +
>

Some notes here, instead of continuing with the boolean operations (the if
and the return is not necessary) I did it this way so I could add a note
with the reasoning.

I'm perfectly fine if we want to change it back to a full bolean form.


> +    if (!equal) {
> +        return false;
> +    }
> +
> +    /* If everything else matched and we were unable to find the SBDB
> +     * Chassis entry at this time, assume a match and return true.
> +     * This happens when an ovn-controller is restarting and the Chassis
> +     * entry is gone away momentarily */
> +    return !nb_gwc_c
> +           || (sb_gwc->chassis && !strcmp(nb_gwc_c->name,
> +                                          sb_gwc->chassis->name));
>  }
>
>  static bool
> @@ -1723,11 +1734,10 @@ sbpb_gw_chassis_needs_update(
>              chassis_lookup_by_name(chassis_index,
>                                     lrp->gateway_chassis[n]->
> chassis_name);
>
> -        if (chassis) {
> -            lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
> -            lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
> -            lrp_n_gateway_chassis++;
> -        } else {
> +        lrp_gwc_c[lrp_n_gateway_chassis] = chassis;
> +        lrp_gwc[lrp_n_gateway_chassis] = lrp->gateway_chassis[n];
> +        lrp_n_gateway_chassis++;
> +        if (!chassis) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
>              VLOG_WARN_RL(
>                  &rl, "Chassis name %s referenced in NBDB via
> Gateway_Chassis "
> @@ -1807,10 +1817,6 @@ copy_gw_chassis_from_nbrp_to_sbpb(
>          const struct sbrec_chassis *chassis =
>              chassis_lookup_by_name(chassis_index, lrp_gwc->chassis_name);
>
> -        if (!chassis) {
> -            continue;
> -        }
> -
>          gw_chassis = xrealloc(gw_chassis, (n_gwc + 1) * sizeof
> *gw_chassis);
>
>          struct sbrec_gateway_chassis *pb_gwc =
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 5a0b761..07b822d 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -8080,3 +8080,85 @@ AT_CHECK([grep $garp hv2_br_phys_tx | sort], [0],
> [])
>  OVN_CLEANUP([hv1],[hv2],[hv3])
>
>  AT_CLEANUP
> +
> +AT_SETUP([ovn -- ensure one gw controller restart in HA doesn't bounce
> the master])
> +AT_SKIP_IF([test $HAVE_PYTHON = no])
> +ovn_start
> +
> +net_add n1
> +
> +# create two gateways with external network connectivity
> +for i in 1 2; do
> +    sim_add gw$i
> +    as gw$i
> +    ovs-vsctl add-br br-phys
> +    ovn_attach n1 br-phys 192.168.0.$i
> +    ovs-vsctl set open . external-ids:ovn-bridge-mappings=phys:br-phys
> +done
> +
> +ovn-nbctl ls-add inside
> +ovn-nbctl ls-add outside
> +
> +# create one hypervisors with a vif port the internal network
> +sim_add hv1
> +as hv1
> +ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +ovs-vsctl -- add-port br-int hv1-vif1 -- \
> +    set interface hv1-vif1 external-ids:iface-id=inside1 \
> +    options:tx_pcap=hv1/vif1-tx.pcap \
> +    options:rxq_pcap=hv1/vif1-rx.pcap \
> +    ofport-request=1
> +
> +ovn-nbctl lsp-add inside inside1 \
> +        -- lsp-set-addresses inside1 "f0:00:00:01:22:01 192.168.1.101"
> +
> +
> +ovn_populate_arp
> +
> +ovn-nbctl create Logical_Router name=R1
> +
> +# Connect inside to R1
> +ovn-nbctl lrp-add R1 inside 00:00:01:01:02:03 192.168.1.1/24
> +ovn-nbctl lsp-add inside rp-inside -- set Logical_Switch_Port rp-inside \
> +    type=router options:router-port=inside \
> +    -- lsp-set-addresses rp-inside router
> +
> +# Connect outside to R1 as distributed router gateway port on gw1+gw2
> +ovn-nbctl lrp-add R1 outside 00:00:02:01:02:04 192.168.0.101/24
> +
> +ovn-nbctl --id=@gc0 create Gateway_Chassis \
> +                    name=outside_gw1 chassis_name=gw1 priority=20 -- \
> +          --id=@gc1 create Gateway_Chassis \
> +                    name=outside_gw2 chassis_name=gw2 priority=10 -- \
> +          set Logical_Router_Port outside 'gateway_chassis=[@gc0,@gc1]'
> +
> +ovn-nbctl lsp-add outside rp-outside -- set Logical_Switch_Port
> rp-outside \
> +    type=router options:router-port=outside \
> +    -- lsp-set-addresses rp-outside router
> +
> +# Create localnet port in outside
> +ovn-nbctl lsp-add outside ln-outside
> +ovn-nbctl lsp-set-addresses ln-outside unknown
> +ovn-nbctl lsp-set-type ln-outside localnet
> +ovn-nbctl lsp-set-options ln-outside network_name=phys
> +
> +# Allow some time for ovn-northd and ovn-controller to catch up.
> +ovn-nbctl --wait=hv sync
> +
> +# currently when ovn-controller is restarted, the old entry is deleted
> +# and a new one is created, which leaves the Gateway_Chassis with
> +# an empty chassis for a while. NOTE: restarting ovn-controller in tests
> +# doesn't have the same effect because "name" is conserved, and the
> +# Chassis entry is not replaced.
> +
> +gw2_chassis=$(ovn-sbctl --bare --columns=_uuid find Chassis name=gw2)
> +ovn-sbctl destroy Chassis $gw2_chassis
> +
> +sleep 2
> +
> +AT_CHECK([grep "Releasing lport" gw1/ovn-controller.log], [1], [])
> +
> +OVN_CLEANUP([gw1],[gw2],[hv1])
> +
> +AT_CLEANUP
> --
> 1.8.3.1
>
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to