Re: [ovs-dev] [PATCH ovn branch-23.03] northd: Fix recompute of referenced chassis in HA chassis groups.

2023-08-29 Thread Mark Michelson
Thank you Ilya, I applied this patch to 23.03 and all branches back to 
22.03.


On 8/25/23 14:03, Ilya Maximets wrote:

Current implementation of 'ref_chassis' recompute is horribly slow.
For each port binding it finds LR group, then it adds a chassis of the
current port binding to each HA chassis group of this LR group.

The number of ports is much greater than number of chassis.  And the
number of distinct LR groups is even smaller than a number of chassis
in a typical setup.  Chances are, the setup only has one LR group.
So, northd today is trying to add the same chassis to the same
HA chassis group over and over again.

In an OpenStack setup with ~20 chassis, 40K ports and 5K routers we
end up with 33+ million calls to add_to_ha_ref_chassis_info() that
also performs a linear search for duplicates leading to about 270
million operations, while we only have 5K Sb HA chassis groups with
15 ref_chassis in each.  I.e. only 5000 * 15 = 75.000 operations out
of 270 million are actually useful.

In practice that leads to 80 second recompute:

   inc_proc_eng|INFO|node: sync_from_sb, recompute (forced) took 80074ms

Fix that by accumulating unique chassis per unique LR groups first
and then updating ref_chassis with these unique values avoiding any
unnecessary calls.  Result is ~1000x performance improvement:

   inc_proc_eng|DBG|node: sync_from_sb, recompute (forced) took 75ms

Considering this as a performance bug, because it makes OVN practically
unusable in certain (default?) OpenStack configurations at scale.

Fixes: b82d351976a1 ("ovn: Add generic HA chassis group")
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052614.html
Reported-by: Roberto Bartzen Acosta 
Acked-by: Mark Michelson 
Signed-off-by: Ilya Maximets 
---

cherry-picked from commit 4023d6a5fa573021a6218ac49796f3f7688227d7

This version is also applicable all the way down to 22.03.

  northd/northd.c | 119 +++-
  1 file changed, 76 insertions(+), 43 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 4d3646d10..7b91a94cb 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -857,6 +857,8 @@ struct lrouter_group {
  int n_router_dps;
  /* Set of ha_chassis_groups which are associated with the router dps. */
  struct sset ha_chassis_groups;
+/* Temporary storage for chassis references while computing HA groups. */
+struct hmapx tmp_ha_chassis;
  };
  
  static struct ovn_datapath *

@@ -7874,6 +7876,7 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list 
*lr_list)
  od->lr_group->router_dps[0] = od;
  od->lr_group->n_router_dps = 1;
  sset_init(>lr_group->ha_chassis_groups);
+hmapx_init(>lr_group->tmp_ha_chassis);
  build_lrouter_groups__(ports, od);
  }
  }
@@ -15783,6 +15786,7 @@ destroy_datapaths_and_ports(struct hmap *datapaths, 
struct hmap *ports,
  
  free(lr_group->router_dps);

  sset_destroy(_group->ha_chassis_groups);
+hmapx_destroy(_group->tmp_ha_chassis);
  free(lr_group);
  }
  }
@@ -16319,38 +16323,27 @@ ovnnb_db_run(struct northd_input *input_data,
  smap_destroy();
  }
  
-/* Stores the list of chassis which references an ha_chassis_group.

+/* Stores the set of chassis which references an ha_chassis_group.
   */
  struct ha_ref_chassis_info {
  const struct sbrec_ha_chassis_group *ha_chassis_group;
-struct sbrec_chassis **ref_chassis;
-size_t n_ref_chassis;
-size_t free_slots;
+struct hmapx ref_chassis;
  };
  
  static void

  add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info,
-   const struct sbrec_chassis *chassis)
+   const struct hmapx *chassis)
  {
-for (size_t j = 0; j < ref_ch_info->n_ref_chassis; j++) {
-if (ref_ch_info->ref_chassis[j] == chassis) {
-   return;
-}
-}
+if (!hmapx_count(_ch_info->ref_chassis)) {
+hmapx_destroy(_ch_info->ref_chassis);
+hmapx_clone(_ch_info->ref_chassis, chassis);
+} else {
+struct hmapx_node *node;
  
-/* Allocate space for 3 chassis at a time. */

-if (!ref_ch_info->free_slots) {
-ref_ch_info->ref_chassis =
-xrealloc(ref_ch_info->ref_chassis,
- sizeof *ref_ch_info->ref_chassis *
- (ref_ch_info->n_ref_chassis + 3));
-ref_ch_info->free_slots = 3;
+HMAPX_FOR_EACH (node, chassis) {
+hmapx_add(_ch_info->ref_chassis, node->data);
+}
  }
-
-ref_ch_info->ref_chassis[ref_ch_info->n_ref_chassis] =
-CONST_CAST(struct sbrec_chassis *, chassis);
-ref_ch_info->n_ref_chassis++;
-ref_ch_info->free_slots--;
  }
  
  struct ha_chassis_group_node {

@@ -16379,9 +16372,20 @@ update_sb_ha_group_ref_chassis(struct northd_input 
*input_data,
  struct shash_node *node;
  

[ovs-dev] [PATCH ovn branch-23.03] northd: Fix recompute of referenced chassis in HA chassis groups.

2023-08-25 Thread Ilya Maximets
Current implementation of 'ref_chassis' recompute is horribly slow.
For each port binding it finds LR group, then it adds a chassis of the
current port binding to each HA chassis group of this LR group.

The number of ports is much greater than number of chassis.  And the
number of distinct LR groups is even smaller than a number of chassis
in a typical setup.  Chances are, the setup only has one LR group.
So, northd today is trying to add the same chassis to the same
HA chassis group over and over again.

In an OpenStack setup with ~20 chassis, 40K ports and 5K routers we
end up with 33+ million calls to add_to_ha_ref_chassis_info() that
also performs a linear search for duplicates leading to about 270
million operations, while we only have 5K Sb HA chassis groups with
15 ref_chassis in each.  I.e. only 5000 * 15 = 75.000 operations out
of 270 million are actually useful.

In practice that leads to 80 second recompute:

  inc_proc_eng|INFO|node: sync_from_sb, recompute (forced) took 80074ms

Fix that by accumulating unique chassis per unique LR groups first
and then updating ref_chassis with these unique values avoiding any
unnecessary calls.  Result is ~1000x performance improvement:

  inc_proc_eng|DBG|node: sync_from_sb, recompute (forced) took 75ms

Considering this as a performance bug, because it makes OVN practically
unusable in certain (default?) OpenStack configurations at scale.

Fixes: b82d351976a1 ("ovn: Add generic HA chassis group")
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-discuss/2023-August/052614.html
Reported-by: Roberto Bartzen Acosta 
Acked-by: Mark Michelson 
Signed-off-by: Ilya Maximets 
---

cherry-picked from commit 4023d6a5fa573021a6218ac49796f3f7688227d7

This version is also applicable all the way down to 22.03.

 northd/northd.c | 119 +++-
 1 file changed, 76 insertions(+), 43 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 4d3646d10..7b91a94cb 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -857,6 +857,8 @@ struct lrouter_group {
 int n_router_dps;
 /* Set of ha_chassis_groups which are associated with the router dps. */
 struct sset ha_chassis_groups;
+/* Temporary storage for chassis references while computing HA groups. */
+struct hmapx tmp_ha_chassis;
 };
 
 static struct ovn_datapath *
@@ -7874,6 +7876,7 @@ build_lrouter_groups(struct hmap *ports, struct ovs_list 
*lr_list)
 od->lr_group->router_dps[0] = od;
 od->lr_group->n_router_dps = 1;
 sset_init(>lr_group->ha_chassis_groups);
+hmapx_init(>lr_group->tmp_ha_chassis);
 build_lrouter_groups__(ports, od);
 }
 }
@@ -15783,6 +15786,7 @@ destroy_datapaths_and_ports(struct hmap *datapaths, 
struct hmap *ports,
 
 free(lr_group->router_dps);
 sset_destroy(_group->ha_chassis_groups);
+hmapx_destroy(_group->tmp_ha_chassis);
 free(lr_group);
 }
 }
@@ -16319,38 +16323,27 @@ ovnnb_db_run(struct northd_input *input_data,
 smap_destroy();
 }
 
-/* Stores the list of chassis which references an ha_chassis_group.
+/* Stores the set of chassis which references an ha_chassis_group.
  */
 struct ha_ref_chassis_info {
 const struct sbrec_ha_chassis_group *ha_chassis_group;
-struct sbrec_chassis **ref_chassis;
-size_t n_ref_chassis;
-size_t free_slots;
+struct hmapx ref_chassis;
 };
 
 static void
 add_to_ha_ref_chassis_info(struct ha_ref_chassis_info *ref_ch_info,
-   const struct sbrec_chassis *chassis)
+   const struct hmapx *chassis)
 {
-for (size_t j = 0; j < ref_ch_info->n_ref_chassis; j++) {
-if (ref_ch_info->ref_chassis[j] == chassis) {
-   return;
-}
-}
+if (!hmapx_count(_ch_info->ref_chassis)) {
+hmapx_destroy(_ch_info->ref_chassis);
+hmapx_clone(_ch_info->ref_chassis, chassis);
+} else {
+struct hmapx_node *node;
 
-/* Allocate space for 3 chassis at a time. */
-if (!ref_ch_info->free_slots) {
-ref_ch_info->ref_chassis =
-xrealloc(ref_ch_info->ref_chassis,
- sizeof *ref_ch_info->ref_chassis *
- (ref_ch_info->n_ref_chassis + 3));
-ref_ch_info->free_slots = 3;
+HMAPX_FOR_EACH (node, chassis) {
+hmapx_add(_ch_info->ref_chassis, node->data);
+}
 }
-
-ref_ch_info->ref_chassis[ref_ch_info->n_ref_chassis] =
-CONST_CAST(struct sbrec_chassis *, chassis);
-ref_ch_info->n_ref_chassis++;
-ref_ch_info->free_slots--;
 }
 
 struct ha_chassis_group_node {
@@ -16379,9 +16372,20 @@ update_sb_ha_group_ref_chassis(struct northd_input 
*input_data,
 struct shash_node *node;
 SHASH_FOR_EACH_SAFE (node, ha_ref_chassis_map) {
 struct ha_ref_chassis_info *ha_ref_info = node->data;
+size_t n = hmapx_count(_ref_info->ref_chassis);
+