On Mon, Jan 27, 2025 at 8:23 PM Lucas Vargas Dias via dev < [email protected]> wrote:
> CMS like neutron uses NB Global nb_cfg to liveness check > of chassis. It will increment nb_cfg of Chassis_Private > and with monitor all enabled, all chassis exchange between them > messages of update Chassis_Private. So, CPU load of ovn-controller > will be high in scenario with many chassis. > To fix it, each chassis monitor its Chassis_Private only. > > Signed-off-by: Lucas Vargas Dias <[email protected]> > --- > Hello Lucas, thank you for the patch and sorry for the delays in reviews. Of course the test should be kept too. controller/ovn-controller.c | 24 +++++++++++++++----- > tests/ovn-controller.at | 45 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 64 insertions(+), 5 deletions(-) > > diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c > index 854e54854..a34966802 100644 > --- a/controller/ovn-controller.c > +++ b/controller/ovn-controller.c > @@ -264,7 +264,13 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, > ovsdb_idl_condition_add_clause_true(&ce); > ovsdb_idl_condition_add_clause_true(&ip_mcast); > ovsdb_idl_condition_add_clause_true(&igmp); > - ovsdb_idl_condition_add_clause_true(&chprv); > + if (chassis) { > + /* Monitors Chassis_Private record for current chassis only. > */ > + sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ, > + chassis->name); > + } else { > + ovsdb_idl_condition_add_clause_true(&chprv); > + } > With this the condition becomes the same for both monitor_all and !monitor_all, what do you think about the following instead? @@ -258,6 +258,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, * avoid the unnecessarily extra wake-ups of ovn-controller. */ ovsdb_idl_condition_add_clause_true(&ldpg); + if (chassis) { + /* Always monitor Chassis_Private record for current chassis only. */ + sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ, + chassis->name); + } else { + /* During initialization, we monitor all records in Chassis_Private so + * that we don't try to recreate existing ones. */ + ovsdb_idl_condition_add_clause_true(&chprv); + } + if (monitor_all) { ovsdb_idl_condition_add_clause_true(&pb); ovsdb_idl_condition_add_clause_true(&lf); @@ -268,7 +278,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, ovsdb_idl_condition_add_clause_true(&ce); ovsdb_idl_condition_add_clause_true(&ip_mcast); ovsdb_idl_condition_add_clause_true(&igmp); - ovsdb_idl_condition_add_clause_true(&chprv); ovsdb_idl_condition_add_clause_true(&tv); ovsdb_idl_condition_add_clause_true(&nh); goto out; @@ -306,16 +315,9 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl, sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ, &chassis->header_.uuid); - /* Monitors Chassis_Private record for current chassis only. */ - sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ, - chassis->name); - sbrec_chassis_template_var_add_clause_chassis(&tv, OVSDB_F_EQ, chassis->name); } else { - /* During initialization, we monitor all records in Chassis_Private so - * that we don't try to recreate existing ones. */ - ovsdb_idl_condition_add_clause_true(&chprv); /* Also, to avoid traffic disruption (e.g., conntrack flushing for * zones that are used by OVN but not yet known due to the SB initial * contents not being available), monitor all port bindings > ovsdb_idl_condition_add_clause_true(&tv); > goto out; > } > @@ -603,7 +609,8 @@ static void > update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl, > bool *monitor_all_p, bool *reset_ovnsb_idl_min_index, > struct controller_engine_ctx *ctx, > - unsigned int *sb_cond_seqno) > + unsigned int *sb_cond_seqno, > + struct ovsdb_idl_index *sbrec_chassis_by_name) > { > const struct ovsrec_open_vswitch *cfg = > ovsrec_open_vswitch_first(ovs_idl); > if (!cfg) { > @@ -629,12 +636,18 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct > ovsdb_idl *ovnsb_idl, > get_chassis_external_id_value_bool( > &cfg->external_ids, chassis_id, "ovn-monitor-all", false); > if (monitor_all) { > + const struct sbrec_chassis *chassis = NULL; > + if (chassis_id && sbrec_chassis_by_name) { > + chassis = > + chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id); > + } > + > /* Always call update_sb_monitors when monitor_all is true. > * Otherwise, don't call it here, because there would be > unnecessary > * extra cost. Instead, it is called after the engine execution > only > * when it is necessary. */ > unsigned int next_cond_seqno = > - update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, NULL, true); > + update_sb_monitors(ovnsb_idl, chassis, NULL, NULL, NULL, > true); > if (sb_cond_seqno) { > *sb_cond_seqno = next_cond_seqno; > } > @@ -5504,7 +5517,8 @@ main(int argc, char *argv[]) > > update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, > &sb_monitor_all, > &reset_ovnsb_idl_min_index, > - &ctrl_engine_ctx, &ovnsb_expected_cond_seqno); > + &ctrl_engine_ctx, &ovnsb_expected_cond_seqno, > + sbrec_chassis_by_name); > update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); > > struct ovsdb_idl_txn *ovnsb_idl_txn > @@ -5974,7 +5988,7 @@ loop_done: > bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl); > while (!done) { > update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl, > - NULL, NULL, NULL, NULL); > + NULL, NULL, NULL, NULL, sbrec_chassis_by_name); > nit: No need to pass the sbrec_chassis_by_name here, the controller is exiting anyway. > update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl)); > > struct ovsdb_idl_txn *ovs_idl_txn > diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at > index 988de7bb3..03766a1b8 100644 > --- a/tests/ovn-controller.at > +++ b/tests/ovn-controller.at > @@ -484,6 +484,51 @@ OVN_CLEANUP([hv]) > AT_CLEANUP > ]) > > +# Checks that ovn-controller increments the nb_cfg value in the > Chassis_Private table > +# and each hv doesn't exchange messages of update of Chassis_Private table > +OVN_FOR_EACH_NORTHD([ > +AT_SETUP([ovn-controller - Bump Chassis_Private nb_cfg value with Monitor > All]) > +AT_KEYWORDS([ovn]) > +ovn_start > + > +net_add n1 > +sim_add hv1 > +as hv1 > +check ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.11 > + > +sim_add hv2 > +as hv2 > +check ovs-vsctl add-br br-phys > +ovn_attach n1 br-phys 192.168.0.12 > + > + > +as hv1 ovs-vsctl set open . external_ids:ovn-monitor-all=true > +as hv2 ovs-vsctl set open . external_ids:ovn-monitor-all=true > + > +# Wait until ovn-monitor-all is processed by ovn-controller. > +wait_row_count Chassis 1 name=hv1 other_config:ovn-monitor-all=true > +wait_row_count Chassis 1 name=hv2 other_config:ovn-monitor-all=true > + > +# enable debug for check the received messages > +as hv1 ovn-appctl -t ovn-controller vlog/set dbg > +as hv2 ovn-appctl -t ovn-controller vlog/set dbg > + > +# Bump the NB_Global nb_cfg value > +nb_global_id=$(ovn-nbctl --columns _uuid --bare find nb_global) > +check ovn-nbctl set NB_Global ${nb_global_id} nb_cfg=999 > + > +# ovn-controller should bump the nb_cfg in the chassis_private table > +OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find > chassis_private name=hv1`]) > +OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find > chassis_private name=hv2`]) > + > +AT_CHECK([test 1 = `cat hv1/ovn-controller.log | grep > 'Chassis_Private.*nb_cfg\":999' | wc -l`]) > Please remove the '\' otherwise grep will complain: grep: warning: stray \ before " > +AT_CHECK([test 1 = `cat hv2/ovn-controller.log | grep > 'Chassis_Private.*nb_cfg\":999' | wc -l`]) > Same + > +OVN_CLEANUP([hv1],[hv2]) > +AT_CLEANUP > +]) > + > # check that nb_cfg overflow cases handled properly > AT_SETUP([ovn-controller - overflow the nb_cfg value across the tables]) > AT_KEYWORDS([ovn]) > -- > 2.34.1 > > > -- > > > > > _'Esta mensagem é direcionada apenas para os endereços constantes no > cabeçalho inicial. Se você não está listado nos endereços constantes no > cabeçalho, pedimos-lhe que desconsidere completamente o conteúdo dessa > mensagem e cuja cópia, encaminhamento e/ou execução das ações citadas > estão > imediatamente anuladas e proibidas'._ > > > * **'Apesar do Magazine Luiza tomar > todas as precauções razoáveis para assegurar que nenhum vírus esteja > presente nesse e-mail, a empresa não poderá aceitar a responsabilidade por > quaisquer perdas ou danos causados por esse e-mail ou por seus anexos'.* > > > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > Thanks, Ales _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
