On Mon, Jan 27, 2025 at 8:23 PM Lucas Vargas Dias via dev <
[email protected]> wrote:

> CMS like neutron uses NB Global nb_cfg to liveness check
> of chassis. It will increment nb_cfg of Chassis_Private
> and with monitor all enabled, all chassis exchange between them
> messages of update Chassis_Private. So, CPU load of ovn-controller
> will be high in scenario with many chassis.
> To fix it, each chassis monitor its Chassis_Private only.
>
> Signed-off-by: Lucas Vargas Dias <[email protected]>
> ---
>

Hello Lucas,

thank you for the patch and sorry for the delays in reviews.


Of course the test should be kept too.

 controller/ovn-controller.c | 24 +++++++++++++++-----
>  tests/ovn-controller.at     | 45 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 64 insertions(+), 5 deletions(-)
>
> diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
> index 854e54854..a34966802 100644
> --- a/controller/ovn-controller.c
> +++ b/controller/ovn-controller.c
> @@ -264,7 +264,13 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
>          ovsdb_idl_condition_add_clause_true(&ce);
>          ovsdb_idl_condition_add_clause_true(&ip_mcast);
>          ovsdb_idl_condition_add_clause_true(&igmp);
> -        ovsdb_idl_condition_add_clause_true(&chprv);
> +        if (chassis) {
> +            /* Monitors Chassis_Private record for current chassis only.
> */
> +            sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ,
> +                                                  chassis->name);
> +        } else {
> +            ovsdb_idl_condition_add_clause_true(&chprv);
> +        }
>

With this the condition becomes the same for both monitor_all and
!monitor_all, what do you think about the following instead?

@@ -258,6 +258,16 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
      * avoid the unnecessarily extra wake-ups of ovn-controller. */
     ovsdb_idl_condition_add_clause_true(&ldpg);

+    if (chassis) {
+        /* Always monitor Chassis_Private record for current chassis only.
*/
+        sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ,
+                                              chassis->name);
+    } else {
+        /* During initialization, we monitor all records in
Chassis_Private so
+         * that we don't try to recreate existing ones. */
+        ovsdb_idl_condition_add_clause_true(&chprv);
+    }
+
     if (monitor_all) {
         ovsdb_idl_condition_add_clause_true(&pb);
         ovsdb_idl_condition_add_clause_true(&lf);
@@ -268,7 +278,6 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         ovsdb_idl_condition_add_clause_true(&ce);
         ovsdb_idl_condition_add_clause_true(&ip_mcast);
         ovsdb_idl_condition_add_clause_true(&igmp);
-        ovsdb_idl_condition_add_clause_true(&chprv);
         ovsdb_idl_condition_add_clause_true(&tv);
         ovsdb_idl_condition_add_clause_true(&nh);
         goto out;
@@ -306,16 +315,9 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
         sbrec_igmp_group_add_clause_chassis(&igmp, OVSDB_F_EQ,
                                             &chassis->header_.uuid);

-        /* Monitors Chassis_Private record for current chassis only. */
-        sbrec_chassis_private_add_clause_name(&chprv, OVSDB_F_EQ,
-                                              chassis->name);
-
         sbrec_chassis_template_var_add_clause_chassis(&tv, OVSDB_F_EQ,
                                                       chassis->name);
     } else {
-        /* During initialization, we monitor all records in
Chassis_Private so
-         * that we don't try to recreate existing ones. */
-        ovsdb_idl_condition_add_clause_true(&chprv);
         /* Also, to avoid traffic disruption (e.g., conntrack flushing for
          * zones that are used by OVN but not yet known due to the SB
initial
          * contents not being available), monitor all port bindings


>          ovsdb_idl_condition_add_clause_true(&tv);
>          goto out;
>      }
> @@ -603,7 +609,8 @@ static void
>  update_sb_db(struct ovsdb_idl *ovs_idl, struct ovsdb_idl *ovnsb_idl,
>               bool *monitor_all_p, bool *reset_ovnsb_idl_min_index,
>               struct controller_engine_ctx *ctx,
> -             unsigned int *sb_cond_seqno)
> +             unsigned int *sb_cond_seqno,
> +             struct ovsdb_idl_index *sbrec_chassis_by_name)
>  {
>      const struct ovsrec_open_vswitch *cfg =
> ovsrec_open_vswitch_first(ovs_idl);
>      if (!cfg) {
> @@ -629,12 +636,18 @@ update_sb_db(struct ovsdb_idl *ovs_idl, struct
> ovsdb_idl *ovnsb_idl,
>          get_chassis_external_id_value_bool(
>              &cfg->external_ids, chassis_id, "ovn-monitor-all", false);
>      if (monitor_all) {
> +        const struct sbrec_chassis *chassis = NULL;
> +        if (chassis_id && sbrec_chassis_by_name) {
> +            chassis =
> +                chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
> +        }
> +
>          /* Always call update_sb_monitors when monitor_all is true.
>           * Otherwise, don't call it here, because there would be
> unnecessary
>           * extra cost. Instead, it is called after the engine execution
> only
>           * when it is necessary. */
>          unsigned int next_cond_seqno =
> -            update_sb_monitors(ovnsb_idl, NULL, NULL, NULL, NULL, true);
> +            update_sb_monitors(ovnsb_idl, chassis, NULL, NULL, NULL,
> true);
>          if (sb_cond_seqno) {
>              *sb_cond_seqno = next_cond_seqno;
>          }
> @@ -5504,7 +5517,8 @@ main(int argc, char *argv[])
>
>          update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl,
> &sb_monitor_all,
>                       &reset_ovnsb_idl_min_index,
> -                     &ctrl_engine_ctx, &ovnsb_expected_cond_seqno);
> +                     &ctrl_engine_ctx, &ovnsb_expected_cond_seqno,
> +                     sbrec_chassis_by_name);
>          update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl));
>
>          struct ovsdb_idl_txn *ovnsb_idl_txn
> @@ -5974,7 +5988,7 @@ loop_done:
>          bool done = !ovsdb_idl_has_ever_connected(ovnsb_idl_loop.idl);
>          while (!done) {
>              update_sb_db(ovs_idl_loop.idl, ovnsb_idl_loop.idl,
> -                         NULL, NULL, NULL, NULL);
> +                         NULL, NULL, NULL, NULL, sbrec_chassis_by_name);
>

nit: No need to pass the sbrec_chassis_by_name here, the controller is
exiting anyway.


>              update_ssl_config(ovsrec_ssl_table_get(ovs_idl_loop.idl));
>
>              struct ovsdb_idl_txn *ovs_idl_txn
> diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at
> index 988de7bb3..03766a1b8 100644
> --- a/tests/ovn-controller.at
> +++ b/tests/ovn-controller.at
> @@ -484,6 +484,51 @@ OVN_CLEANUP([hv])
>  AT_CLEANUP
>  ])
>
> +# Checks that ovn-controller increments the nb_cfg value in the
> Chassis_Private table
> +# and each hv doesn't exchange messages of update of Chassis_Private table
> +OVN_FOR_EACH_NORTHD([
> +AT_SETUP([ovn-controller - Bump Chassis_Private nb_cfg value with Monitor
> All])
> +AT_KEYWORDS([ovn])
> +ovn_start
> +
> +net_add n1
> +sim_add hv1
> +as hv1
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.11
> +
> +sim_add hv2
> +as hv2
> +check ovs-vsctl add-br br-phys
> +ovn_attach n1 br-phys 192.168.0.12
> +
> +
> +as hv1 ovs-vsctl set open . external_ids:ovn-monitor-all=true
>
+as hv2 ovs-vsctl set open . external_ids:ovn-monitor-all=true
> +
> +# Wait until ovn-monitor-all is processed by ovn-controller.
> +wait_row_count Chassis 1 name=hv1 other_config:ovn-monitor-all=true
> +wait_row_count Chassis 1 name=hv2 other_config:ovn-monitor-all=true
> +
> +# enable debug for check the received messages
> +as hv1 ovn-appctl -t ovn-controller vlog/set dbg
> +as hv2 ovn-appctl -t ovn-controller vlog/set dbg
> +
> +# Bump the NB_Global nb_cfg value
> +nb_global_id=$(ovn-nbctl --columns _uuid --bare find nb_global)
> +check ovn-nbctl set NB_Global ${nb_global_id} nb_cfg=999
> +
> +# ovn-controller should bump the nb_cfg in the chassis_private table
> +OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find
> chassis_private name=hv1`])
> +OVS_WAIT_UNTIL([test x999 = x`ovn-sbctl --columns nb_cfg --bare find
> chassis_private name=hv2`])
> +
> +AT_CHECK([test 1 = `cat hv1/ovn-controller.log | grep
> 'Chassis_Private.*nb_cfg\":999' | wc -l`])
>

Please remove the '\' otherwise grep will complain: grep: warning: stray \
before "


> +AT_CHECK([test 1 = `cat hv2/ovn-controller.log | grep
> 'Chassis_Private.*nb_cfg\":999' | wc -l`])
>

Same

+
> +OVN_CLEANUP([hv1],[hv2])
> +AT_CLEANUP
> +])
> +
>  # check that nb_cfg overflow cases handled properly
>  AT_SETUP([ovn-controller - overflow the nb_cfg value across the tables])
>  AT_KEYWORDS([ovn])
> --
> 2.34.1
>
>
> --
>
>
>
>
> _'Esta mensagem é direcionada apenas para os endereços constantes no
> cabeçalho inicial. Se você não está listado nos endereços constantes no
> cabeçalho, pedimos-lhe que desconsidere completamente o conteúdo dessa
> mensagem e cuja cópia, encaminhamento e/ou execução das ações citadas
> estão
> imediatamente anuladas e proibidas'._
>
>
> * **'Apesar do Magazine Luiza tomar
> todas as precauções razoáveis para assegurar que nenhum vírus esteja
> presente nesse e-mail, a empresa não poderá aceitar a responsabilidade por
> quaisquer perdas ou danos causados por esse e-mail ou por seus anexos'.*
>
>
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
Thanks,
Ales
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to