Thanks for the patch. A minor detail might have been overlooked On Fri, Oct 10, 2025 at 11:27 AM Rosemarie O'Riorden via dev < [email protected]> wrote:
> If you set a chassis' encap ip to be the same as that of another > chassis, OVN should detect it, log an error, and delete the chassis > record. > > But this was not the case; OVN would allow the transaction. However, the > commit to the southbound database would fail, because there was already > an encap record with that same ip and type (from the other chassis). > The OVS db on the problematic chassis would then rapidly increase in > size. Essentially, the controller would get stuck in a loop of deleting > and recreating the tunnels. > > This change makes it so when encaps are change, before building encaps, > the controller detects if there is already an existing encap tunnel of > the same ip and type. If so, it logs an error, refuses to build the > encaps, and deletes the associated chassis and chassis private record. > > Reported-at: https://issues.redhat.com/browse/FDP-1481 > Co-authored-by: Nicholas Hubbard <[email protected]> > Signed-off-by: Nicholas Hubbard <[email protected]> > Signed-off-by: Rosemarie O'Riorden <[email protected]> > --- > v3: > - Update misleading comment > - Add deletion of chassis private record > - Move declaration function name to same line as return type > - Delete "Transaction causes multiple rows" error from exceptions of > affected > test > - Update my own error to be more specific in exceptions of affected test > - Update test to not be racy; Check for db row counts instead of saving > file > size after change > - Update commit message to reflect changes > > v2: Fix typo in title. > --- > controller/chassis.c | 99 ++++++++++++++++++++++++++----------- > controller/chassis.h | 3 +- > controller/ovn-controller.c | 5 +- > lib/chassis-index.c | 20 ++++++++ > lib/chassis-index.h | 3 ++ > tests/ovn-controller.at | 36 ++++++++++++-- > 6 files changed, 130 insertions(+), 36 deletions(-) > > diff --git a/controller/chassis.c b/controller/chassis.c > index 07bef1c56..12117259d 100644 > --- a/controller/chassis.c > +++ b/controller/chassis.c > @@ -77,6 +77,12 @@ struct ovs_chassis_cfg { > bool ct_label_flush; > }; > > +enum chassis_update_status { > + CHASSIS_UPDATED, > + CHASSIS_NOT_UPDATED, > + CHASSIS_NEED_DELETE > +}; > + > static void > ovs_chassis_cfg_init(struct ovs_chassis_cfg *cfg) > { > @@ -699,7 +705,8 @@ chassis_build_encaps(struct ovsdb_idl_txn > *ovnsb_idl_txn, > const char *encap_ip_default, > const char *chassis_id, > const char *encap_csum, > - size_t *n_encap) > + size_t *n_encap, > + struct ovsdb_idl_index *sbrec_encaps) > { > size_t tunnel_count = 0; > > @@ -713,6 +720,18 @@ chassis_build_encaps(struct ovsdb_idl_txn > *ovnsb_idl_txn, > > SSET_FOR_EACH (encap_ip, encap_ip_set) { > SSET_FOR_EACH (encap_type, encap_type_set) { > + const struct sbrec_encap *encap_rec = > + encap_lookup_by_ip_and_type(sbrec_encaps, > + encap_ip, encap_type); > + if (encap_rec && strcmp(encap_rec->chassis_name, chassis_id)) > { > + static struct vlog_rate_limit rl = > VLOG_RATE_LIMIT_INIT(5, 1); > + VLOG_WARN_RL(&rl, "'%s' already has encap ip '%s' and " > + "type '%s', cannot duplicate on '%s'", > + encap_rec->chassis_name, encap_rec->ip, > + encap_rec->type, chassis_id); > + continue; > if there are multiple encap_ips specified and one of them is a duplicate then the loop that this patch fixes will still happen. > + } > + > struct sbrec_encap *encap = sbrec_encap_insert(ovnsb_idl_txn); > > sbrec_encap_set_type(encap, encap_type); > @@ -778,7 +797,7 @@ update_supported_sset(struct sset *supported) > > static void > remove_unsupported_options(const struct sbrec_chassis *chassis_rec, > - bool *updated) > + enum chassis_update_status *update_status) > { > struct sset supported_options = SSET_INITIALIZER(&supported_options); > update_supported_sset(&supported_options); > @@ -789,7 +808,7 @@ remove_unsupported_options(const struct sbrec_chassis > *chassis_rec, > VLOG_WARN("Removing unsupported key \"%s\" from chassis > record.", > node->key); > sbrec_chassis_update_other_config_delkey(chassis_rec, > node->key); > - *updated = true; > + *update_status = CHASSIS_UPDATED; > } > } > > @@ -827,25 +846,28 @@ chassis_get_record(struct ovsdb_idl_txn > *ovnsb_idl_txn, > } > > /* Update a Chassis record based on the config in the ovs config. > - * Returns true if 'chassis_rec' was updated, false otherwise. > + * Returns CHASSIS_UPDATED if 'chassis_rec' was updated, > CHASSIS_NEED_DELETE if > + * another chassis exists with an encap with same IP and type as > 'chassis', and > + * returns CHASSIS_NOT_UPDATED otherwise. > */ > -static bool > +static enum chassis_update_status > chassis_update(const struct sbrec_chassis *chassis_rec, > struct ovsdb_idl_txn *ovnsb_idl_txn, > const struct ovs_chassis_cfg *ovs_cfg, > const char *chassis_id, > - const struct sset *transport_zones) > + const struct sset *transport_zones, > + struct ovsdb_idl_index *sbrec_encaps) > { > - bool updated = false; > + enum chassis_update_status update_status = CHASSIS_NOT_UPDATED; > > if (strcmp(chassis_id, chassis_rec->name)) { > sbrec_chassis_set_name(chassis_rec, chassis_id); > - updated = true; > + update_status = CHASSIS_UPDATED; > } > > if (strcmp(ovs_cfg->hostname, chassis_rec->hostname)) { > sbrec_chassis_set_hostname(chassis_rec, ovs_cfg->hostname); > - updated = true; > + update_status = CHASSIS_UPDATED; > } > > if (chassis_other_config_changed(ovs_cfg, chassis_rec)) { > @@ -856,12 +878,12 @@ chassis_update(const struct sbrec_chassis > *chassis_rec, > sbrec_chassis_verify_other_config(chassis_rec); > sbrec_chassis_set_other_config(chassis_rec, &other_config); > smap_destroy(&other_config); > - updated = true; > + update_status = CHASSIS_UPDATED; > } > > update_chassis_transport_zones(transport_zones, chassis_rec); > > - remove_unsupported_options(chassis_rec, &updated); > + remove_unsupported_options(chassis_rec, &update_status); > > /* If any of the encaps should change, update them. */ > bool tunnels_changed = > @@ -871,20 +893,27 @@ chassis_update(const struct sbrec_chassis > *chassis_rec, > ovs_cfg->encap_csum, > chassis_rec); > if (!tunnels_changed) { > - return updated; > + return update_status; > } > > struct sbrec_encap **encaps; > size_t n_encap; > > encaps = > - chassis_build_encaps(ovnsb_idl_txn, &ovs_cfg->encap_type_set, > + chassis_build_encaps(ovnsb_idl_txn, > + &ovs_cfg->encap_type_set, > &ovs_cfg->encap_ip_set, > ovs_cfg->encap_ip_default, chassis_id, > - ovs_cfg->encap_csum, &n_encap); > + ovs_cfg->encap_csum, &n_encap, > + sbrec_encaps); > sbrec_chassis_set_encaps(chassis_rec, encaps, n_encap); > free(encaps); > - return true; > + + if (!n_encap) { + return CHASSIS_NEED_DELETE; > + } > + > + return CHASSIS_UPDATED; > } > > /* If this is a chassis_private config update after we initialized the > record > @@ -935,7 +964,8 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > const char *chassis_id, > const struct ovsrec_bridge *br_int, > const struct sset *transport_zones, > - const struct sbrec_chassis_private **chassis_private) > + const struct sbrec_chassis_private **chassis_private, > + struct ovsdb_idl_index *sbrec_encaps) > { > struct ovs_chassis_cfg ovs_cfg; > > @@ -956,22 +986,31 @@ chassis_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > * modified in the ovs table. > */ > if (chassis_rec && ovnsb_idl_txn) { > - bool updated = chassis_update(chassis_rec, ovnsb_idl_txn, > &ovs_cfg, > - chassis_id, transport_zones); > - > - if (!existed || updated) { > - ovsdb_idl_txn_add_comment(ovnsb_idl_txn, > - "ovn-controller: %s chassis '%s'", > - !existed ? "registering" : > "updating", > - chassis_id); > - } > + enum chassis_update_status update_status = > + chassis_update(chassis_rec, ovnsb_idl_txn, &ovs_cfg, > + chassis_id, > + transport_zones, sbrec_encaps); > + > + *chassis_private = chassis_private_get_record(ovnsb_idl_txn, > + sbrec_chassis_private_by_name, chassis_id); > > - *chassis_private = > - chassis_private_get_record(ovnsb_idl_txn, > - sbrec_chassis_private_by_name, > + if (update_status == CHASSIS_NEED_DELETE) { > + sbrec_chassis_delete(chassis_rec); > + chassis_rec = NULL; > + if (*chassis_private) { > + sbrec_chassis_private_delete(*chassis_private); > + } > + } else { > + if (!existed || update_status == CHASSIS_UPDATED) { > + ovsdb_idl_txn_add_comment(ovnsb_idl_txn, > + "ovn-controller: %s chassis > '%s'", > + !existed ? "registering" > + : "updating", chassis_id); > + } > + if (*chassis_private) { > + chassis_private_update(*chassis_private, chassis_rec, > chassis_id); > - if (*chassis_private) { > - chassis_private_update(*chassis_private, chassis_rec, > chassis_id); > + } > } > } > > diff --git a/controller/chassis.h b/controller/chassis.h > index 03cc2f906..14251bdad 100644 > --- a/controller/chassis.h > +++ b/controller/chassis.h > @@ -44,7 +44,8 @@ const struct sbrec_chassis *chassis_run( > const struct ovsrec_open_vswitch_table *, > const char *chassis_id, const struct ovsrec_bridge *br_int, > const struct sset *transport_zones, > - const struct sbrec_chassis_private **chassis_private); > + const struct sbrec_chassis_private **chassis_private, > + struct ovsdb_idl_index *sbrec_chassis_encaps); > bool chassis_cleanup(struct ovsdb_idl_txn *ovs_idl_txn, > struct ovsdb_idl_txn *ovnsb_idl_txn, > const struct ovsrec_open_vswitch_table *, > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 6fbf3a227..67a2c447c 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -6684,6 +6684,9 @@ main(int argc, char *argv[]) > struct ovsdb_idl_index *sbrec_learned_route_index_by_datapath > = ovsdb_idl_index_create1(ovnsb_idl_loop.idl, > &sbrec_learned_route_col_datapath); > + struct ovsdb_idl_index *sbrec_encaps > + = ovsdb_idl_index_create2(ovnsb_idl_loop.idl, > + &sbrec_encap_col_type, > &sbrec_encap_col_ip); > > ovsdb_idl_track_add_all(ovnsb_idl_loop.idl); > ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, > @@ -7401,7 +7404,7 @@ main(int argc, char *argv[]) > sbrec_chassis_private_by_name, > ovs_table, chassis_id, > br_int, &transport_zones, > - &chassis_private); > + &chassis_private, sbrec_encaps); > } > > /* If any OVS feature support changed, force a full recompute. > diff --git a/lib/chassis-index.c b/lib/chassis-index.c > index 4b38036cb..953cc6ed4 100644 > --- a/lib/chassis-index.c > +++ b/lib/chassis-index.c > @@ -115,3 +115,23 @@ ha_chassis_group_lookup_by_name( > > return retval; > } > + > +/* Finds and returns the encap with the given ip and type, or NULL if no > such > + * encap exists. */ > +const struct sbrec_encap * > +encap_lookup_by_ip_and_type(struct ovsdb_idl_index *sbrec_chassis_encaps, > + const char *ip, const char *type) > +{ > + struct sbrec_encap *target = sbrec_encap_index_init_row( > + sbrec_chassis_encaps); > + > + sbrec_encap_index_set_ip(target, ip); > + sbrec_encap_index_set_type(target, type); > + > + struct sbrec_encap *retval = sbrec_encap_index_find( > + sbrec_chassis_encaps, target); > + > + sbrec_encap_index_destroy_row(target); > + > + return retval; > +} > diff --git a/lib/chassis-index.h b/lib/chassis-index.h > index bc654da13..7161458b2 100644 > --- a/lib/chassis-index.h > +++ b/lib/chassis-index.h > @@ -35,5 +35,8 @@ chassis_private_lookup_by_name( > struct ovsdb_idl_index *ha_chassis_group_index_create(struct ovsdb_idl > *idl); > const struct sbrec_ha_chassis_group *ha_chassis_group_lookup_by_name( > struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name, const char > *name); > +const struct sbrec_encap *encap_lookup_by_ip_and_type( > + struct ovsdb_idl_index *sbrec_chassis_encaps, > + const char *ip, const char *type); > > #endif /* lib/chassis-index.h */ > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at > index 7bc1aca59..6713f1a2b 100644 > --- a/tests/ovn-controller.at > +++ b/tests/ovn-controller.at > @@ -277,7 +277,8 @@ ovs-vsctl -- set Open_vSwitch . > external-ids:hostname="${new_sysid}" \ > -- set Open_vSwitch . > external-ids:ovn-remote="${current_remote}" > > OVS_WAIT_UNTIL([ > - grep -q 'Transaction causes multiple rows in \\"Encap\\" table to > have identical values' hv/ovn-controller.log > + grep -q "'hv' already has encap ip '192.168.0.1' and type 'geneve', " > \ > + "cannot duplicate on 'hv-foo'" hv/ovn-controller.log > ]) > > # Destroy the stale entries manually and ovn-controller should now be able > @@ -305,12 +306,12 @@ check ovn-sbctl destroy chassis_private . -- destroy > chassis . > # - could not create datapath br-int of unknown type foobar > # - unknown datapath type bar > # - could not create datapath br-int of unknown type bar > -# - Transaction causes multiple rows ... > # - failed to create bridge br-int: Address family not supported by > protocol # due to previous errors. > +# - 'hv' already has encap ip '192.168.0.1' and type 'geneve', cannot > duplicate on 'hv-foo' > OVN_CLEANUP_SBOX([hv], ["/foo/d > /bar/d > -/Transaction causes multiple rows/d > -/failed to create bridge/d"]) > +/failed to create bridge/d > +/already has encap ip.*cannot duplicate on/d"]) > > OVN_CLEANUP_VSWITCH([main]) > as ovn-sb > @@ -3821,3 +3822,30 @@ wait_ports 0 > OVN_CLEANUP([hv1 > /Invalid VXLAN port number.*/d]) > AT_CLEANUP > + > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn-controller - two encaps same IP and type]) > +AT_KEYWORDS([ovn]) > +ovn_start > +net_add n1 > + > +sim_add hv1 > +as hv1 > +check ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.11 > + > +sim_add hv2 > +as hv2 > +check ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.12 > + > +# Set hv2's encap IP to be the same as hv1's. > +check ovs-vsctl set Open_vSwitch . > external-ids:ovn-encap-ip="192.168.0.11" > if you change this line to check ovs-vsctl set Open_vSwitch . external-ids:ovn-encap-ip="192.168.0.11,192.168.0.12" the controller does not add the duplicated encap-ip but ovn-controller gets stuck in a loop trying to and failing. > +wait_row_count Encap 0 ip="192.168.0.12" > + > +# Make sure hv2 has been deleted because of the misconfiguration. > +wait_row_count Chassis 0 name="hv2" > +wait_row_count Chassis_Private 0 name="hv2" > + > +AT_CLEANUP > +]) > -- > 2.51.0 > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > Thanks, Jacob Tanenbaum _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
