Re: [ovs-dev] [PATCH ovn v7 1/4] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread Han Zhou
On Mon, Sep 11, 2023 at 9:01 AM  wrote:
>
> From: Numan Siddique 
>
> 'lb_data' engine node now also handles logical switch changes.
> Its data maintains ls to lb related information. i.e if a
> logical switch sw0 has lb1, lb2 and lb3 associated then
> it stores this info in its data.  And when a new load balancer
> lb4 is associated to it, it stores this information in its
> tracked data so that 'northd' engine node can handle it
> accordingly.  Tracked data will have information like:
>   changed ls -> {sw0 : {associated_lbs: [lb4]}
>
> The first handler 'northd_lb_data_handler_pre_od' is called before the
> 'northd_nb_logical_switch_handler' handler and it just creates or
> deletes the lb_datapaths hmap for the tracked lbs.
>
> The northd handler 'northd_lb_data_handler' updates the
> ovn_lb_datapaths's 'nb_ls_map' bitmap accordingly.
>
> Eg.  If the lb_data has the below tracked data:
>
> tracked_data = {'crupdated_lbs': [lb1, lb2],
> 'deleted_lbs': [lb3],
> 'crupdated_lb_groups': [lbg1, lbg2],
> 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
>  {ls: sw1, assoc_lbs: [lb1, lb2]}
>
> The handler northd_lb_data_handler(), creates the
> ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
> the ovn_lb_datapaths hmap.  It does the same for the created or updated lb
> groups lbg1 and lbg2 in the ovn_lbgrp_datapaths map.  It also updates the
> nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1.
>
> Reviewed-by: Ales Musil 
> Acked-by: Mark Michelson 
> Signed-off-by: Numan Siddique 
> ---
>  lib/lb.c |   5 +-
>  northd/en-lb-data.c  | 176 +++
>  northd/en-lb-data.h  |  17 
>  northd/en-lflow.c|   6 ++
>  northd/en-northd.c   |   6 +-
>  northd/inc-proc-northd.c |   2 +
>  northd/northd.c  |  83 +++---
>  northd/northd.h  |   4 +-
>  tests/ovn-northd.at  |  56 +
>  9 files changed, 322 insertions(+), 33 deletions(-)
>
> diff --git a/lib/lb.c b/lib/lb.c
> index 6fd67e2218..e6c9dc2be2 100644
> --- a/lib/lb.c
> +++ b/lib/lb.c
> @@ -1088,7 +1088,10 @@ ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths
*lb_dps, size_t n,
>  struct ovn_datapath **ods)
>  {
>  for (size_t i = 0; i < n; i++) {
> -bitmap_set1(lb_dps->nb_ls_map, ods[i]->index);
> +if (!bitmap_is_set(lb_dps->nb_ls_map, ods[i]->index)) {
> +bitmap_set1(lb_dps->nb_ls_map, ods[i]->index);
> +lb_dps->n_nb_ls++;
> +}
>  }
>  }
>
> diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c
> index 8acd9c8cb2..02b1bfd7a4 100644
> --- a/northd/en-lb-data.c
> +++ b/northd/en-lb-data.c
> @@ -39,6 +39,14 @@ static void lb_data_destroy(struct ed_type_lb_data *);
>  static void build_lbs(const struct nbrec_load_balancer_table *,
>const struct nbrec_load_balancer_group_table *,
>struct hmap *lbs, struct hmap *lb_groups);
> +static void build_od_lb_map(const struct nbrec_logical_switch_table *,
> + struct hmap *od_lb_map);
> +static struct od_lb_data *find_od_lb_data(struct hmap *od_lb_map,
> +  const struct uuid *od_uuid);
> +static void destroy_od_lb_data(struct od_lb_data *od_lb_data);
> +static struct od_lb_data *create_od_lb_data(struct hmap *od_lb_map,
> +const struct uuid *od_uuid);
> +
>  static struct ovn_lb_group *create_lb_group(
>  const struct nbrec_load_balancer_group *, struct hmap *lbs,
>  struct hmap *lb_groups);
> @@ -54,6 +62,7 @@ static struct crupdated_lbgrp *
> struct tracked_lb_data *);
>  static void add_deleted_lbgrp_to_tracked_data(
>  struct ovn_lb_group *, struct tracked_lb_data *);
> +static bool is_ls_lbs_changed(const struct nbrec_logical_switch *nbs);
>
>  /* 'lb_data' engine node manages the NB load balancers and load balancer
>   * groups.  For each NB LB, it creates 'struct ovn_northd_lb' and
> @@ -80,9 +89,13 @@ en_lb_data_run(struct engine_node *node, void *data)
>  EN_OVSDB_GET(engine_get_input("NB_load_balancer", node));
>  const struct nbrec_load_balancer_group_table *nb_lbg_table =
>  EN_OVSDB_GET(engine_get_input("NB_load_balancer_group", node));
> +const struct nbrec_logical_switch_table *nb_ls_table =
> +EN_OVSDB_GET(engine_get_input("NB_logical_switch", node));
>
>  lb_data->tracked = false;
>  build_lbs(nb_lb_table, nb_lbg_table, _data->lbs,
_data->lbgrps);
> +build_od_lb_map(nb_ls_table, _data->ls_lb_map);
> +
>  engine_set_node_state(node, EN_UPDATED);
>  }
>
> @@ -230,18 +243,104 @@ lb_data_load_balancer_group_handler(struct
engine_node *node, void *data)
>  return true;
>  }
>
> +struct od_lb_data {
> +struct hmap_node hmap_node;
> +struct uuid 

Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread Numan Siddique
On Mon, Sep 11, 2023 at 5:58 PM Han Zhou  wrote:
>
> On Mon, Sep 11, 2023 at 2:15 PM Numan Siddique  wrote:
> >
> > On Mon, Sep 11, 2023 at 2:18 PM Han Zhou  wrote:
> > >
> > > On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique  wrote:
> > > >
> > > > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou  wrote:
> > > > >
> > > > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou  wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > On Fri, Aug 18, 2023 at 1:58 AM  wrote:
> > > > > > >
> > > > > > > From: Numan Siddique 
> > > > > > >
> > > > > > > 'lb_data' engine node now also handles logical switch changes.
> > > > > > > Its data maintains ls to lb related information. i.e if a
> > > > > > > logical switch sw0 has lb1, lb2 and lb3 associated then
> > > > > > > it stores this info in its data.  And when a new load balancer
> > > > > > > lb4 is associated to it, it stores this information in its
> > > > > > > tracked data so that 'northd' engine node can handle it
> > > > > > > accordingly.  Tracked data will have information like:
> > > > > > >   changed ls -> {sw0 : {associated_lbs: [lb4]}
> > > > > > >
> > > > > > > In the 'northd' engne node, an additional handler for 'lb_data'
> > > > > > > is added after the 'nbrec_logical_switch' changes.  With this
> patch
> > > > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node.
> > > > > > >
> > > > > > > 
> > > > > > > engine_add_input(_northd, _lb_data,
> > > > > northd_lb_data_handler_pre_od);
> > > > > > > engine_add_input(_northd, _nb_logical_switch,
> > > > > > >  northd_nb_logical_switch_handler);
> > > > > > > engine_add_input(_northd, _lb_data,
> > > > > northd_lb_data_handler_post_od);
> > > > > > > 
> > > > > > >
> > > > > > > The first handler 'northd_lb_data_handler_pre_od' is called
> before
> > > the
> > > > > > > 'northd_nb_logical_switch_handler' handler and it just creates
> or
> > > > > > > deletes the lb_datapaths hmap for the tracked lbs.
> > > > > > >
> > > > > > > The second handler 'northd_lb_data_handler_post_od' is called
> after
> > > > > > > the 'northd_nb_logical_switch_handler' handler and it updates
> the
> > > > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap.
> > > > > > >
> > > > > > > Eg.  If the lb_data has the below tracked data:
> > > > > > >
> > > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2],
> > > > > > > 'deleted_lbs': [lb3],
> > > > > > > 'crupdated_lb_groups': [lbg1, lbg2],
> > > > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
> > > > > > >  {ls: sw1, assoc_lbs: [lb1,
> > > lb2]}
> > > > > > >
> > > > > > > then the first handler northd_lb_data_handler_pre_od(), creates
> the
> > > > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
> > > > > > > the ovn_lb_datapaths hmap.  Similarly for the created or
> updated lb
> > > > > > > groups lbg1 and lbg2.
> > > > > > >
> > > > > > > The second handler northd_lb_data_handler_post_od() updates the
> > > > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for
> sw1.
> > > > > > >
> > > > > > > This second handler is added mainly so that the actual logical
> > > switch
> > > > > > > handler northd_nb_logical_switch_handler() has done modifying
> the
> > > > > > > 'od' struct.
> > > > > > >
> > > > > >
> > > >
> > > > Hi Han and others,
> > > >
> > > > Thanks for the reviews.
> > > >
> > > >
> > > > > > Thanks for explaining the pre_od and post_od approach here! If we
> do
> > > want
> > > > > to go with this approach I'd suggest adding comments also in the
> code so
> > > > > that people can understand the code easily without having to look
> at the
> > > > > commit history.
> > > > > > However, I am concerned with the this approach. Here are the
> reasons:
> > > > > >
> > > > > > 1. The I-P engine implementation didn't consider such usage that
> add
> > > > > dependency between two nodes twice, although it didn't explicitly
> call
> > > this
> > > > > out in the documentation. If this is considered valid and necessary
> use
> > > > > case, we should call this out in the I-P engine documentation so
> that
> > > > > people modifying I-P engine implementation is aware of this and
> won't
> > > break
> > > > > it, and of course we should carefully evaluate the side-effects that
> > > can be
> > > > > caused by this.
> > > > > >
> > > > > > 2. For the current use case I think it will lead to dangling
> pointer
> > > in
> > > > > this _pre_od handling, because a LS may have been deleted from IDL,
> > > while
> > > > > this function still tries to access od->nbs, e.g. in
> > > > > init_lb_for_datapaths(). Because of this, we should handle LB data
> only
> > > > > after handling LS changes.
> > > > > >
> > > > > > 3. From the explanation above I can see what is done by pre_od and
> > > > > post_od but still don't understand why can't we do both steps after
> > > > > handling LS changes. Could you explain why? And is it possible to
> move
> 

Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread Han Zhou
On Mon, Sep 11, 2023 at 2:15 PM Numan Siddique  wrote:
>
> On Mon, Sep 11, 2023 at 2:18 PM Han Zhou  wrote:
> >
> > On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique  wrote:
> > >
> > > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou  wrote:
> > > >
> > > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou  wrote:
> > > > >
> > > > >
> > > > >
> > > > > On Fri, Aug 18, 2023 at 1:58 AM  wrote:
> > > > > >
> > > > > > From: Numan Siddique 
> > > > > >
> > > > > > 'lb_data' engine node now also handles logical switch changes.
> > > > > > Its data maintains ls to lb related information. i.e if a
> > > > > > logical switch sw0 has lb1, lb2 and lb3 associated then
> > > > > > it stores this info in its data.  And when a new load balancer
> > > > > > lb4 is associated to it, it stores this information in its
> > > > > > tracked data so that 'northd' engine node can handle it
> > > > > > accordingly.  Tracked data will have information like:
> > > > > >   changed ls -> {sw0 : {associated_lbs: [lb4]}
> > > > > >
> > > > > > In the 'northd' engne node, an additional handler for 'lb_data'
> > > > > > is added after the 'nbrec_logical_switch' changes.  With this
patch
> > > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node.
> > > > > >
> > > > > > 
> > > > > > engine_add_input(_northd, _lb_data,
> > > > northd_lb_data_handler_pre_od);
> > > > > > engine_add_input(_northd, _nb_logical_switch,
> > > > > >  northd_nb_logical_switch_handler);
> > > > > > engine_add_input(_northd, _lb_data,
> > > > northd_lb_data_handler_post_od);
> > > > > > 
> > > > > >
> > > > > > The first handler 'northd_lb_data_handler_pre_od' is called
before
> > the
> > > > > > 'northd_nb_logical_switch_handler' handler and it just creates
or
> > > > > > deletes the lb_datapaths hmap for the tracked lbs.
> > > > > >
> > > > > > The second handler 'northd_lb_data_handler_post_od' is called
after
> > > > > > the 'northd_nb_logical_switch_handler' handler and it updates
the
> > > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap.
> > > > > >
> > > > > > Eg.  If the lb_data has the below tracked data:
> > > > > >
> > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2],
> > > > > > 'deleted_lbs': [lb3],
> > > > > > 'crupdated_lb_groups': [lbg1, lbg2],
> > > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
> > > > > >  {ls: sw1, assoc_lbs: [lb1,
> > lb2]}
> > > > > >
> > > > > > then the first handler northd_lb_data_handler_pre_od(), creates
the
> > > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
> > > > > > the ovn_lb_datapaths hmap.  Similarly for the created or
updated lb
> > > > > > groups lbg1 and lbg2.
> > > > > >
> > > > > > The second handler northd_lb_data_handler_post_od() updates the
> > > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for
sw1.
> > > > > >
> > > > > > This second handler is added mainly so that the actual logical
> > switch
> > > > > > handler northd_nb_logical_switch_handler() has done modifying
the
> > > > > > 'od' struct.
> > > > > >
> > > > >
> > >
> > > Hi Han and others,
> > >
> > > Thanks for the reviews.
> > >
> > >
> > > > > Thanks for explaining the pre_od and post_od approach here! If we
do
> > want
> > > > to go with this approach I'd suggest adding comments also in the
code so
> > > > that people can understand the code easily without having to look
at the
> > > > commit history.
> > > > > However, I am concerned with the this approach. Here are the
reasons:
> > > > >
> > > > > 1. The I-P engine implementation didn't consider such usage that
add
> > > > dependency between two nodes twice, although it didn't explicitly
call
> > this
> > > > out in the documentation. If this is considered valid and necessary
use
> > > > case, we should call this out in the I-P engine documentation so
that
> > > > people modifying I-P engine implementation is aware of this and
won't
> > break
> > > > it, and of course we should carefully evaluate the side-effects that
> > can be
> > > > caused by this.
> > > > >
> > > > > 2. For the current use case I think it will lead to dangling
pointer
> > in
> > > > this _pre_od handling, because a LS may have been deleted from IDL,
> > while
> > > > this function still tries to access od->nbs, e.g. in
> > > > init_lb_for_datapaths(). Because of this, we should handle LB data
only
> > > > after handling LS changes.
> > > > >
> > > > > 3. From the explanation above I can see what is done by pre_od and
> > > > post_od but still don't understand why can't we do both steps after
> > > > handling LS changes. Could you explain why? And is it possible to
move
> > both
> > > > steps to post_od?
> > >
> > >
> > > In the earlier versions , lb_data engine node was not handling logical
> > > switch or logical router changes.
> > > It was only handling the load balancer changes.
> > > northd engine node handler for lb_data  had to determine the
> > > associated or 

Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread Numan Siddique
On Mon, Sep 11, 2023 at 2:18 PM Han Zhou  wrote:
>
> On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique  wrote:
> >
> > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou  wrote:
> > >
> > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou  wrote:
> > > >
> > > >
> > > >
> > > > On Fri, Aug 18, 2023 at 1:58 AM  wrote:
> > > > >
> > > > > From: Numan Siddique 
> > > > >
> > > > > 'lb_data' engine node now also handles logical switch changes.
> > > > > Its data maintains ls to lb related information. i.e if a
> > > > > logical switch sw0 has lb1, lb2 and lb3 associated then
> > > > > it stores this info in its data.  And when a new load balancer
> > > > > lb4 is associated to it, it stores this information in its
> > > > > tracked data so that 'northd' engine node can handle it
> > > > > accordingly.  Tracked data will have information like:
> > > > >   changed ls -> {sw0 : {associated_lbs: [lb4]}
> > > > >
> > > > > In the 'northd' engne node, an additional handler for 'lb_data'
> > > > > is added after the 'nbrec_logical_switch' changes.  With this patch
> > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node.
> > > > >
> > > > > 
> > > > > engine_add_input(_northd, _lb_data,
> > > northd_lb_data_handler_pre_od);
> > > > > engine_add_input(_northd, _nb_logical_switch,
> > > > >  northd_nb_logical_switch_handler);
> > > > > engine_add_input(_northd, _lb_data,
> > > northd_lb_data_handler_post_od);
> > > > > 
> > > > >
> > > > > The first handler 'northd_lb_data_handler_pre_od' is called before
> the
> > > > > 'northd_nb_logical_switch_handler' handler and it just creates or
> > > > > deletes the lb_datapaths hmap for the tracked lbs.
> > > > >
> > > > > The second handler 'northd_lb_data_handler_post_od' is called after
> > > > > the 'northd_nb_logical_switch_handler' handler and it updates the
> > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap.
> > > > >
> > > > > Eg.  If the lb_data has the below tracked data:
> > > > >
> > > > > tracked_data = {'crupdated_lbs': [lb1, lb2],
> > > > > 'deleted_lbs': [lb3],
> > > > > 'crupdated_lb_groups': [lbg1, lbg2],
> > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
> > > > >  {ls: sw1, assoc_lbs: [lb1,
> lb2]}
> > > > >
> > > > > then the first handler northd_lb_data_handler_pre_od(), creates the
> > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
> > > > > the ovn_lb_datapaths hmap.  Similarly for the created or updated lb
> > > > > groups lbg1 and lbg2.
> > > > >
> > > > > The second handler northd_lb_data_handler_post_od() updates the
> > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1.
> > > > >
> > > > > This second handler is added mainly so that the actual logical
> switch
> > > > > handler northd_nb_logical_switch_handler() has done modifying the
> > > > > 'od' struct.
> > > > >
> > > >
> >
> > Hi Han and others,
> >
> > Thanks for the reviews.
> >
> >
> > > > Thanks for explaining the pre_od and post_od approach here! If we do
> want
> > > to go with this approach I'd suggest adding comments also in the code so
> > > that people can understand the code easily without having to look at the
> > > commit history.
> > > > However, I am concerned with the this approach. Here are the reasons:
> > > >
> > > > 1. The I-P engine implementation didn't consider such usage that add
> > > dependency between two nodes twice, although it didn't explicitly call
> this
> > > out in the documentation. If this is considered valid and necessary use
> > > case, we should call this out in the I-P engine documentation so that
> > > people modifying I-P engine implementation is aware of this and won't
> break
> > > it, and of course we should carefully evaluate the side-effects that
> can be
> > > caused by this.
> > > >
> > > > 2. For the current use case I think it will lead to dangling pointer
> in
> > > this _pre_od handling, because a LS may have been deleted from IDL,
> while
> > > this function still tries to access od->nbs, e.g. in
> > > init_lb_for_datapaths(). Because of this, we should handle LB data only
> > > after handling LS changes.
> > > >
> > > > 3. From the explanation above I can see what is done by pre_od and
> > > post_od but still don't understand why can't we do both steps after
> > > handling LS changes. Could you explain why? And is it possible to move
> both
> > > steps to post_od?
> >
> >
> > In the earlier versions , lb_data engine node was not handling logical
> > switch or logical router changes.
> > It was only handling the load balancer changes.
> > northd engine node handler for lb_data  had to determine the
> > associated or disassociated load balancers/lb groups
> > for a logical switch or router.  And I could that properly using 2
> > handlers (pre_od and post_od).
> > In later versions,  I moved that logic to the lb_data engine node
> > itself.  But I thought I'll keep 2 handlers, the pre_od() to

Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread Han Zhou
On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique  wrote:
>
> On Fri, Sep 1, 2023 at 12:52 AM Han Zhou  wrote:
> >
> > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou  wrote:
> > >
> > >
> > >
> > > On Fri, Aug 18, 2023 at 1:58 AM  wrote:
> > > >
> > > > From: Numan Siddique 
> > > >
> > > > 'lb_data' engine node now also handles logical switch changes.
> > > > Its data maintains ls to lb related information. i.e if a
> > > > logical switch sw0 has lb1, lb2 and lb3 associated then
> > > > it stores this info in its data.  And when a new load balancer
> > > > lb4 is associated to it, it stores this information in its
> > > > tracked data so that 'northd' engine node can handle it
> > > > accordingly.  Tracked data will have information like:
> > > >   changed ls -> {sw0 : {associated_lbs: [lb4]}
> > > >
> > > > In the 'northd' engne node, an additional handler for 'lb_data'
> > > > is added after the 'nbrec_logical_switch' changes.  With this patch
> > > > we now have 2 handlers for 'lb_data' in 'northd' engine node.
> > > >
> > > > 
> > > > engine_add_input(_northd, _lb_data,
> > northd_lb_data_handler_pre_od);
> > > > engine_add_input(_northd, _nb_logical_switch,
> > > >  northd_nb_logical_switch_handler);
> > > > engine_add_input(_northd, _lb_data,
> > northd_lb_data_handler_post_od);
> > > > 
> > > >
> > > > The first handler 'northd_lb_data_handler_pre_od' is called before
the
> > > > 'northd_nb_logical_switch_handler' handler and it just creates or
> > > > deletes the lb_datapaths hmap for the tracked lbs.
> > > >
> > > > The second handler 'northd_lb_data_handler_post_od' is called after
> > > > the 'northd_nb_logical_switch_handler' handler and it updates the
> > > > ovn_lb_datapaths's 'nb_ls_map' bitmap.
> > > >
> > > > Eg.  If the lb_data has the below tracked data:
> > > >
> > > > tracked_data = {'crupdated_lbs': [lb1, lb2],
> > > > 'deleted_lbs': [lb3],
> > > > 'crupdated_lb_groups': [lbg1, lbg2],
> > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
> > > >  {ls: sw1, assoc_lbs: [lb1,
lb2]}
> > > >
> > > > then the first handler northd_lb_data_handler_pre_od(), creates the
> > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
> > > > the ovn_lb_datapaths hmap.  Similarly for the created or updated lb
> > > > groups lbg1 and lbg2.
> > > >
> > > > The second handler northd_lb_data_handler_post_od() updates the
> > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1.
> > > >
> > > > This second handler is added mainly so that the actual logical
switch
> > > > handler northd_nb_logical_switch_handler() has done modifying the
> > > > 'od' struct.
> > > >
> > >
>
> Hi Han and others,
>
> Thanks for the reviews.
>
>
> > > Thanks for explaining the pre_od and post_od approach here! If we do
want
> > to go with this approach I'd suggest adding comments also in the code so
> > that people can understand the code easily without having to look at the
> > commit history.
> > > However, I am concerned with the this approach. Here are the reasons:
> > >
> > > 1. The I-P engine implementation didn't consider such usage that add
> > dependency between two nodes twice, although it didn't explicitly call
this
> > out in the documentation. If this is considered valid and necessary use
> > case, we should call this out in the I-P engine documentation so that
> > people modifying I-P engine implementation is aware of this and won't
break
> > it, and of course we should carefully evaluate the side-effects that
can be
> > caused by this.
> > >
> > > 2. For the current use case I think it will lead to dangling pointer
in
> > this _pre_od handling, because a LS may have been deleted from IDL,
while
> > this function still tries to access od->nbs, e.g. in
> > init_lb_for_datapaths(). Because of this, we should handle LB data only
> > after handling LS changes.
> > >
> > > 3. From the explanation above I can see what is done by pre_od and
> > post_od but still don't understand why can't we do both steps after
> > handling LS changes. Could you explain why? And is it possible to move
both
> > steps to post_od?
>
>
> In the earlier versions , lb_data engine node was not handling logical
> switch or logical router changes.
> It was only handling the load balancer changes.
> northd engine node handler for lb_data  had to determine the
> associated or disassociated load balancers/lb groups
> for a logical switch or router.  And I could that properly using 2
> handlers (pre_od and post_od).
> In later versions,  I moved that logic to the lb_data engine node
> itself.  But I thought I'll keep 2 handlers, the pre_od() to
> just handle the load balancer/group changes and the post_od for
> updating the datapath bitmap of the lb_dps.
> IMO that seemed cleaner.
>
> But as you mentioned we can do all these in one handler.  So in v7 I
> just moved to one handler.
>
>
>
> > >
> > > Please find some 

Re: [ovs-dev] [PATCH ovn v6 07/16] northd: Sync SB Port bindings NAT column in a separate engine node.

2023-09-11 Thread Numan Siddique
On Fri, Sep 1, 2023 at 2:41 AM Han Zhou  wrote:
>
> On Fri, Aug 18, 2023 at 1:58 AM  wrote:
> >
> > From: Numan Siddique 
> >
> > A new engine node 'sync_to_sb_pb' is added within 'sync_to_sb'
> > node to sync NAT column of Port bindings table.  This separation
> > is required in order to add load balancer group I-P handling
> > in 'northd' engine node (which is handled in the next commit).
> >
> > 'sync_to_sb_pb' engine node can be later expanded to sync other
> > Port binding columns if required.
> >
> > Signed-off-by: Numan Siddique 
> > ---
> >  northd/en-sync-sb.c  |  31 +
> >  northd/en-sync-sb.h  |   4 +
> >  northd/inc-proc-northd.c |   8 +-
> >  northd/northd.c  | 243 +--
> >  northd/northd.h  |   2 +
> >  5 files changed, 174 insertions(+), 114 deletions(-)
> >
> > diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c
> > index 9832fce30a..552ed56452 100644
> > --- a/northd/en-sync-sb.c
> > +++ b/northd/en-sync-sb.c
> > @@ -254,6 +254,37 @@ sync_to_sb_lb_northd_handler(struct engine_node
> *node, void *data OVS_UNUSED)
> >  return false;
> >  }
> >
> > +/* sync_to_sb_pb engine node functions.
> > + * This engine node syncs the SB Port Bindings (partly).
> > + * en_northd engine create the SB Port binding rows and
> > + * updates most of the columns.
> > + * This engine node updates the port binding columns which
> > + * needs to be updated after northd engine is run.
> > + */
> > +
> > +void *
> > +en_sync_to_sb_pb_init(struct engine_node *node OVS_UNUSED,
> > +  struct engine_arg *arg OVS_UNUSED)
> > +{
> > +return NULL;
> > +}
> > +
> > +void
> > +en_sync_to_sb_pb_run(struct engine_node *node, void *data OVS_UNUSED)
> > +{
> > +const struct engine_context *eng_ctx = engine_get_context();
> > +struct northd_data *northd_data = engine_get_input_data("northd",
> node);
> > +
> > +sync_pbs(eng_ctx->ovnsb_idl_txn, _data->ls_ports);
> > +engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> > +void
> > +en_sync_to_sb_pb_cleanup(void *data OVS_UNUSED)
> > +{
> > +
> > +}
> > +
> >  /* static functions. */
> >  static void
> >  sync_addr_set(struct ovsdb_idl_txn *ovnsb_txn, const char *name,
> > diff --git a/northd/en-sync-sb.h b/northd/en-sync-sb.h
> > index 06d2a57710..700d3340e4 100644
> > --- a/northd/en-sync-sb.h
> > +++ b/northd/en-sync-sb.h
> > @@ -22,4 +22,8 @@ void en_sync_to_sb_lb_run(struct engine_node *, void
> *data);
> >  void en_sync_to_sb_lb_cleanup(void *data);
> >  bool sync_to_sb_lb_northd_handler(struct engine_node *, void *data
> OVS_UNUSED);
> >
> > +void *en_sync_to_sb_pb_init(struct engine_node *, struct engine_arg *);
> > +void en_sync_to_sb_pb_run(struct engine_node *, void *data);
> > +void en_sync_to_sb_pb_cleanup(void *data);
> > +
> >  #endif /* end of EN_SYNC_SB_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index 402c94e88c..dc8b880fd8 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -141,6 +141,7 @@ static ENGINE_NODE(sync_to_sb_addr_set,
> "sync_to_sb_addr_set");
> >  static ENGINE_NODE(fdb_aging, "fdb_aging");
> >  static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker");
> >  static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb");
> > +static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb");
> >  static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data");
> >
> >  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> > @@ -215,13 +216,16 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >   sync_to_sb_lb_northd_handler);
> >  engine_add_input(_sync_to_sb_lb, _sb_load_balancer, NULL);
> >
> > +engine_add_input(_sync_to_sb_pb, _northd, NULL);
>
> NULL handler here always triggers recompute for any en_northd change, which
> causes a performance regression for VIF I-P. Before this change, only
> tracked LSP changes will have related SB port-binding synced to SB, but now
> it iterates through all the ls_ports and resync each of them even if there
> is only one LSP in tracked_changes.
>
> I ran the scale test of the simulated ovn-k8s topology of 500 chassis x 50
> lsp, for each single VIF creation/deletion the "ovn-northd completion" time
> increased from 25ms to 40ms - nearly doubled.
>
> I think a simple handler can be implemented so that only sync tracked LSPs,
> and fallback to recompute only if changes are not tracked.

Thanks for the reviews.  I've added a handler in v7.  Please take a look.

Thanks
Numan

>
> Thanks,
> Han
>
> > +
> >  /* en_sync_to_sb engine node syncs the SB database tables from
> >   * the NB database tables.
> > - * Right now this engine syncs the SB Address_Set table and
> > - * SB Load_Balancer table.
> > + * Right now this engine syncs the SB Address_Set table,
> > + * SB Load_Balancer table and (partly) SB Port_Binding table.
> >   */
> >  engine_add_input(_sync_to_sb, _sync_to_sb_addr_set, NULL);
> >  

Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread Numan Siddique
On Fri, Sep 1, 2023 at 12:52 AM Han Zhou  wrote:
>
> On Thu, Aug 31, 2023 at 7:14 PM Han Zhou  wrote:
> >
> >
> >
> > On Fri, Aug 18, 2023 at 1:58 AM  wrote:
> > >
> > > From: Numan Siddique 
> > >
> > > 'lb_data' engine node now also handles logical switch changes.
> > > Its data maintains ls to lb related information. i.e if a
> > > logical switch sw0 has lb1, lb2 and lb3 associated then
> > > it stores this info in its data.  And when a new load balancer
> > > lb4 is associated to it, it stores this information in its
> > > tracked data so that 'northd' engine node can handle it
> > > accordingly.  Tracked data will have information like:
> > >   changed ls -> {sw0 : {associated_lbs: [lb4]}
> > >
> > > In the 'northd' engne node, an additional handler for 'lb_data'
> > > is added after the 'nbrec_logical_switch' changes.  With this patch
> > > we now have 2 handlers for 'lb_data' in 'northd' engine node.
> > >
> > > 
> > > engine_add_input(_northd, _lb_data,
> northd_lb_data_handler_pre_od);
> > > engine_add_input(_northd, _nb_logical_switch,
> > >  northd_nb_logical_switch_handler);
> > > engine_add_input(_northd, _lb_data,
> northd_lb_data_handler_post_od);
> > > 
> > >
> > > The first handler 'northd_lb_data_handler_pre_od' is called before the
> > > 'northd_nb_logical_switch_handler' handler and it just creates or
> > > deletes the lb_datapaths hmap for the tracked lbs.
> > >
> > > The second handler 'northd_lb_data_handler_post_od' is called after
> > > the 'northd_nb_logical_switch_handler' handler and it updates the
> > > ovn_lb_datapaths's 'nb_ls_map' bitmap.
> > >
> > > Eg.  If the lb_data has the below tracked data:
> > >
> > > tracked_data = {'crupdated_lbs': [lb1, lb2],
> > > 'deleted_lbs': [lb3],
> > > 'crupdated_lb_groups': [lbg1, lbg2],
> > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
> > >  {ls: sw1, assoc_lbs: [lb1, lb2]}
> > >
> > > then the first handler northd_lb_data_handler_pre_od(), creates the
> > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
> > > the ovn_lb_datapaths hmap.  Similarly for the created or updated lb
> > > groups lbg1 and lbg2.
> > >
> > > The second handler northd_lb_data_handler_post_od() updates the
> > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1.
> > >
> > > This second handler is added mainly so that the actual logical switch
> > > handler northd_nb_logical_switch_handler() has done modifying the
> > > 'od' struct.
> > >
> >

Hi Han and others,

Thanks for the reviews.


> > Thanks for explaining the pre_od and post_od approach here! If we do want
> to go with this approach I'd suggest adding comments also in the code so
> that people can understand the code easily without having to look at the
> commit history.
> > However, I am concerned with the this approach. Here are the reasons:
> >
> > 1. The I-P engine implementation didn't consider such usage that add
> dependency between two nodes twice, although it didn't explicitly call this
> out in the documentation. If this is considered valid and necessary use
> case, we should call this out in the I-P engine documentation so that
> people modifying I-P engine implementation is aware of this and won't break
> it, and of course we should carefully evaluate the side-effects that can be
> caused by this.
> >
> > 2. For the current use case I think it will lead to dangling pointer in
> this _pre_od handling, because a LS may have been deleted from IDL, while
> this function still tries to access od->nbs, e.g. in
> init_lb_for_datapaths(). Because of this, we should handle LB data only
> after handling LS changes.
> >
> > 3. From the explanation above I can see what is done by pre_od and
> post_od but still don't understand why can't we do both steps after
> handling LS changes. Could you explain why? And is it possible to move both
> steps to post_od?


In the earlier versions , lb_data engine node was not handling logical
switch or logical router changes.
It was only handling the load balancer changes.
northd engine node handler for lb_data  had to determine the
associated or disassociated load balancers/lb groups
for a logical switch or router.  And I could that properly using 2
handlers (pre_od and post_od).
In later versions,  I moved that logic to the lb_data engine node
itself.  But I thought I'll keep 2 handlers, the pre_od() to
just handle the load balancer/group changes and the post_od for
updating the datapath bitmap of the lb_dps.
IMO that seemed cleaner.

But as you mentioned we can do all these in one handler.  So in v7 I
just moved to one handler.



> >
> > Please find some more comments below.
> >
> > > Signed-off-by: Numan Siddique 
> > > ---
> > >  lib/lb.c |   5 +-
> > >  northd/en-lb-data.c  | 176 +++
> > >  northd/en-lb-data.h  |  17 
> > >  northd/en-lflow.c|   6 ++
> 

Re: [ovs-dev] [PATCH ovn v7 2/4] northd: Handle load balancer group changes for a logical switch.

2023-09-11 Thread 0-day Robot
Bleep bloop.  Greetings Numan Siddique, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 80 characters long (recommended limit is 79)
#323 FILE: northd/northd.c:5587:
HMAP_FOR_EACH (crupdated_lbgrp, hmap_node, _lb_data->crupdated_lbgrps) {

Lines checked: 447, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v7 1/4] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread 0-day Robot
Bleep bloop.  Greetings Numan Siddique, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line has trailing whitespace
#509 FILE: northd/northd.c:5432:
 * For every tracked 'lb' and 'lb_group' 

WARNING: Line has trailing whitespace
#520 FILE: northd/northd.c:5443:
 *re-evaluates 'od->has_lb_vip' to reflect any changes to the lb vips. 

Lines checked: 787, Warnings: 2, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn v7 4/4] northd: Handle load balancer/group changes for a logical router.

2023-09-11 Thread numans
From: Numan Siddique 

When a logical router gets updated due to load balancer or load balancer
groups changes, it is now incrementally handled first in 'lb_data'
engine node similar to how logical switch changes are handled.  The
tracking data of 'lb_data' is updated similarly so that northd engine
handler - northd_handle_lb_data_changes() handles it.

A new handler northd_handle_lr_changes() is added in the 'northd' engine
node for logical router changes.  This handler returns true if only
load balancer or load balancer group columns are changed.  It returns
false for any other changes.

northd_handle_lb_data_changes() also sets the logical router
od's lb_ips accordingly.

Below are the scale testing results done with these patches applied
using ovn-heater.  The test ran the scenario  -
ocp-500-density-heavy.yml [1].

With these patches applied (with load balancer I-P handling in northd
engine node) the resuts are:

---
Min (s) Median (s)  90%ile (s)  99%ile 
(s)  Max (s) Mean (s)Total (s)   Count   Failed
---
Iteration Total 0.1313631.1899943.213526
4.3081344.3945621.385713173.214153  125 0
Namespace.add_ports 0.0051760.0056110.006476
0.0198670.0242060.0060580.757188125 0
WorkerNode.bind_port0.0337760.0463430.054414
0.0617190.0636130.04681511.703773   250 0
WorkerNode.ping_port0.0051560.0069592.044939
3.6553284.2414960.627103156.775832  250 0
---

The results with the present main are:

---
Min (s) Median (s)  90%ile (s)  99%ile 
(s)  Max (s) Mean (s)Total (s)   Count   Failed
---
Iteration Total 3.2337954.3649265.400982
6.4128037.4097574.792270599.033790  125 0
Namespace.add_ports 0.0052300.0065640.007379
0.0190600.0374900.0072230.902930125 0
WorkerNode.bind_port0.0338640.0440520.049608
0.0548490.0561960.04400511.001231   250 0
WorkerNode.ping_port0.0053342.0604775.222422
6.2673327.2840012.323020580.754964  250 0
---

Few observations:

 - The total time taken has come down significantly from 599 seconds to 173.
 - 99%ile with these patches is 4.3 seconds compared to 6.4 seconds for the
   main.
 - 99%ile with these patches is 3.2 seconds compared to 5.4 seconds for the
   main.
 - CPU utilization of northd during the test with these patches
   is between 100% to 300% which is almost the same as main.
   Main difference being that, with these patches the test duration is
   less and hence overall less CPU utilization.

[1] - 
https://github.com/ovn-org/ovn-heater/blob/main/test-scenarios/ocp-500-density-heavy.yml

Reviewed-by: Ales Musil 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 lib/lb.c |  46 +-
 lib/lb.h |   9 ++
 northd/en-lb-data.c  | 327 +++
 northd/en-lb-data.h  |  14 ++
 northd/en-northd.c   |  20 +++
 northd/en-northd.h   |   1 +
 northd/inc-proc-northd.c |   5 +-
 northd/northd.c  | 245 ++---
 northd/northd.h  |   2 +
 tests/ovn-northd.at  |  42 ++---
 10 files changed, 598 insertions(+), 113 deletions(-)

diff --git a/lib/lb.c b/lib/lb.c
index e6c9dc2be2..d0d562b6fb 100644
--- a/lib/lb.c
+++ b/lib/lb.c
@@ -794,6 +794,7 @@ ovn_lb_group_init(struct ovn_lb_group *lb_group,
 const struct uuid *lb_uuid =
 _lb_group->load_balancer[i]->header_.uuid;
 lb_group->lbs[i] = ovn_northd_lb_find(lbs, lb_uuid);
+lb_group->has_routable_lb |= lb_group->lbs[i]->routable;
 }
 }
 
@@ -815,6 +816,7 @@ ovn_lb_group_cleanup(struct ovn_lb_group 

[ovs-dev] [PATCH ovn v7 3/4] northd: Sync SB Port bindings NAT column in a separate engine node.

2023-09-11 Thread numans
From: Numan Siddique 

A new engine node 'sync_to_sb_pb' is added within 'sync_to_sb'
node to sync NAT column of Port bindings table.  This separation
is required in order to add load balancer group I-P handling
in 'northd' engine node (which is handled in the next commit).

'sync_to_sb_pb' engine node can be later expanded to sync other
Port binding columns if required.

Reviewed-by: Ales Musil 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 northd/en-sync-sb.c  |  52 
 northd/en-sync-sb.h  |   5 +
 northd/inc-proc-northd.c |   8 +-
 northd/northd.c  | 274 +++
 northd/northd.h  |   3 +
 tests/ovn-northd.at  |  17 ++-
 6 files changed, 239 insertions(+), 120 deletions(-)

diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c
index f6dbd7b2b6..47ace03417 100644
--- a/northd/en-sync-sb.c
+++ b/northd/en-sync-sb.c
@@ -244,6 +244,58 @@ sync_to_sb_lb_northd_handler(struct engine_node *node, 
void *data OVS_UNUSED)
 return false;
 }
 
+/* sync_to_sb_pb engine node functions.
+ * This engine node syncs the SB Port Bindings (partly).
+ * en_northd engine create the SB Port binding rows and
+ * updates most of the columns.
+ * This engine node updates the port binding columns which
+ * needs to be updated after northd engine is run.
+ */
+
+void *
+en_sync_to_sb_pb_init(struct engine_node *node OVS_UNUSED,
+  struct engine_arg *arg OVS_UNUSED)
+{
+return NULL;
+}
+
+void
+en_sync_to_sb_pb_run(struct engine_node *node, void *data OVS_UNUSED)
+{
+const struct engine_context *eng_ctx = engine_get_context();
+struct northd_data *northd_data = engine_get_input_data("northd", node);
+
+sync_pbs(eng_ctx->ovnsb_idl_txn, _data->ls_ports);
+engine_set_node_state(node, EN_UPDATED);
+}
+
+void
+en_sync_to_sb_pb_cleanup(void *data OVS_UNUSED)
+{
+
+}
+
+bool
+sync_to_sb_pb_northd_handler(struct engine_node *node, void *data OVS_UNUSED)
+{
+const struct engine_context *eng_ctx = engine_get_context();
+if (!eng_ctx->ovnsb_idl_txn) {
+return false;
+}
+
+struct northd_data *nd = engine_get_input_data("northd", node);
+if (!nd->change_tracked) {
+return false;
+}
+
+if (!sync_pbs_for_northd_ls_changes(>tracked_ls_changes)) {
+return false;
+}
+
+engine_set_node_state(node, EN_UPDATED);
+return true;
+}
+
 /* static functions. */
 static void
 sync_addr_set(struct ovsdb_idl_txn *ovnsb_txn, const char *name,
diff --git a/northd/en-sync-sb.h b/northd/en-sync-sb.h
index 06d2a57710..f08565eee1 100644
--- a/northd/en-sync-sb.h
+++ b/northd/en-sync-sb.h
@@ -22,4 +22,9 @@ void en_sync_to_sb_lb_run(struct engine_node *, void *data);
 void en_sync_to_sb_lb_cleanup(void *data);
 bool sync_to_sb_lb_northd_handler(struct engine_node *, void *data OVS_UNUSED);
 
+void *en_sync_to_sb_pb_init(struct engine_node *, struct engine_arg *);
+void en_sync_to_sb_pb_run(struct engine_node *, void *data);
+void en_sync_to_sb_pb_cleanup(void *data);
+bool sync_to_sb_pb_northd_handler(struct engine_node *, void *data OVS_UNUSED);
+
 #endif /* end of EN_SYNC_SB_H */
diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
index 303b58d43f..e9e28c4bea 100644
--- a/northd/inc-proc-northd.c
+++ b/northd/inc-proc-northd.c
@@ -144,6 +144,7 @@ static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_group, 
"port_group");
 static ENGINE_NODE(fdb_aging, "fdb_aging");
 static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker");
 static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb");
+static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb");
 static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data");
 
 void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
@@ -228,15 +229,20 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
  sync_to_sb_lb_northd_handler);
 engine_add_input(_sync_to_sb_lb, _sb_load_balancer, NULL);
 
+engine_add_input(_sync_to_sb_pb, _northd,
+ sync_to_sb_pb_northd_handler);
+
 /* en_sync_to_sb engine node syncs the SB database tables from
  * the NB database tables.
  * Right now this engine syncs the SB Address_Set table, Port_Group table
- * SB Meter/Meter_Band tables and SB Load_Balancer table.
+ * SB Meter/Meter_Band tables and SB Load_Balancer table and
+ * (partly) SB Port_Binding table.
  */
 engine_add_input(_sync_to_sb, _sync_to_sb_addr_set, NULL);
 engine_add_input(_sync_to_sb, _port_group, NULL);
 engine_add_input(_sync_to_sb, _sync_meters, NULL);
 engine_add_input(_sync_to_sb, _sync_to_sb_lb, NULL);
+engine_add_input(_sync_to_sb, _sync_to_sb_pb, NULL);
 
 engine_add_input(_sync_from_sb, _northd,
  sync_from_sb_northd_handler);
diff --git a/northd/northd.c b/northd/northd.c
index c5e1d1c5a4..5651ee38c5 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -3527,8 +3527,6 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
 

[ovs-dev] [PATCH ovn v7 2/4] northd: Handle load balancer group changes for a logical switch.

2023-09-11 Thread numans
From: Numan Siddique 

For a given load balancer group 'A', northd engine data maintains
a bitmap of datapaths associated to this lb group.  So when lb group 'A'
gets associated to a logical switch 's1', the bitmap index of 's1' is set
in its bitmap.

In order to handle the load balancer group changes incrementally for a
logical switch, we need to set and clear the bitmap bits accordingly.
And this patch does it.

Reviewed-by: Ales Musil 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 northd/en-lb-data.c | 102 
 northd/en-lb-data.h |   4 ++
 northd/northd.c |  62 +++
 tests/ovn-northd.at |  30 +
 4 files changed, 154 insertions(+), 44 deletions(-)

diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c
index 02b1bfd7a4..3604ba8226 100644
--- a/northd/en-lb-data.c
+++ b/northd/en-lb-data.c
@@ -63,6 +63,7 @@ static struct crupdated_lbgrp *
 static void add_deleted_lbgrp_to_tracked_data(
 struct ovn_lb_group *, struct tracked_lb_data *);
 static bool is_ls_lbs_changed(const struct nbrec_logical_switch *nbs);
+static bool is_ls_lbgrps_changed(const struct nbrec_logical_switch *nbs);
 
 /* 'lb_data' engine node manages the NB load balancers and load balancer
  * groups.  For each NB LB, it creates 'struct ovn_northd_lb' and
@@ -271,12 +272,15 @@ lb_data_logical_switch_handler(struct engine_node *node, 
void *data)
 destroy_od_lb_data(od_lb_data);
 }
 } else {
-if (!is_ls_lbs_changed(nbs)) {
+bool ls_lbs_changed = is_ls_lbs_changed(nbs);
+bool ls_lbgrps_changed = is_ls_lbgrps_changed(nbs);
+if (!ls_lbs_changed && !ls_lbgrps_changed) {
 continue;
 }
 struct crupdated_od_lb_data *codlb = xzalloc(sizeof *codlb);
 codlb->od_uuid = nbs->header_.uuid;
 uuidset_init(>assoc_lbs);
+uuidset_init(>assoc_lbgrps);
 
 struct od_lb_data *od_lb_data =
 find_od_lb_data(_data->ls_lb_map, >header_.uuid);
@@ -285,38 +289,66 @@ lb_data_logical_switch_handler(struct engine_node *node, 
void *data)
 >header_.uuid);
 }
 
-struct uuidset *pre_lb_uuids = od_lb_data->lbs;
-od_lb_data->lbs = xzalloc(sizeof *od_lb_data->lbs);
-uuidset_init(od_lb_data->lbs);
-
-for (size_t i = 0; i < nbs->n_load_balancer; i++) {
-const struct uuid *lb_uuid =
->load_balancer[i]->header_.uuid;
-uuidset_insert(od_lb_data->lbs, lb_uuid);
+if (ls_lbs_changed) {
+struct uuidset *pre_lb_uuids = od_lb_data->lbs;
+od_lb_data->lbs = xzalloc(sizeof *od_lb_data->lbs);
+uuidset_init(od_lb_data->lbs);
+
+for (size_t i = 0; i < nbs->n_load_balancer; i++) {
+const struct uuid *lb_uuid =
+>load_balancer[i]->header_.uuid;
+uuidset_insert(od_lb_data->lbs, lb_uuid);
+
+struct uuidset_node *unode = uuidset_find(pre_lb_uuids,
+lb_uuid);
+
+if (!unode || (nbrec_load_balancer_row_get_seqno(
+nbs->load_balancer[i],
+OVSDB_IDL_CHANGE_MODIFY) > 0)) {
+/* Add this lb to the tracked data. */
+uuidset_insert(>assoc_lbs, lb_uuid);
+changed = true;
+}
+
+if (unode) {
+uuidset_delete(pre_lb_uuids, unode);
+}
+}
+if (!uuidset_is_empty(pre_lb_uuids)) {
+trk_lb_data->has_dissassoc_lbs_from_od = true;
+changed = true;
+}
 
-struct uuidset_node *unode = uuidset_find(pre_lb_uuids,
-  lb_uuid);
+uuidset_destroy(pre_lb_uuids);
+free(pre_lb_uuids);
+}
 
-if (!unode || (nbrec_load_balancer_row_get_seqno(
-nbs->load_balancer[i], OVSDB_IDL_CHANGE_MODIFY) > 0)) {
-/* Add this lb to the tracked data. */
-uuidset_insert(>assoc_lbs, lb_uuid);
-changed = true;
+if (ls_lbgrps_changed) {
+struct uuidset *pre_lbgrp_uuids = od_lb_data->lbgrps;
+od_lb_data->lbgrps = xzalloc(sizeof *od_lb_data->lbgrps);
+uuidset_init(od_lb_data->lbgrps);
+for (size_t i = 0; i < nbs->n_load_balancer_group; i++) {
+const struct uuid *lbg_uuid =
+>load_balancer_group[i]->header_.uuid;
+

[ovs-dev] [PATCH ovn v7 1/4] northd: Handle load balancer changes for a logical switch.

2023-09-11 Thread numans
From: Numan Siddique 

'lb_data' engine node now also handles logical switch changes.
Its data maintains ls to lb related information. i.e if a
logical switch sw0 has lb1, lb2 and lb3 associated then
it stores this info in its data.  And when a new load balancer
lb4 is associated to it, it stores this information in its
tracked data so that 'northd' engine node can handle it
accordingly.  Tracked data will have information like:
  changed ls -> {sw0 : {associated_lbs: [lb4]}

The first handler 'northd_lb_data_handler_pre_od' is called before the
'northd_nb_logical_switch_handler' handler and it just creates or
deletes the lb_datapaths hmap for the tracked lbs.

The northd handler 'northd_lb_data_handler' updates the
ovn_lb_datapaths's 'nb_ls_map' bitmap accordingly.

Eg.  If the lb_data has the below tracked data:

tracked_data = {'crupdated_lbs': [lb1, lb2],
'deleted_lbs': [lb3],
'crupdated_lb_groups': [lbg1, lbg2],
'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1],
 {ls: sw1, assoc_lbs: [lb1, lb2]}

The handler northd_lb_data_handler(), creates the
ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from
the ovn_lb_datapaths hmap.  It does the same for the created or updated lb
groups lbg1 and lbg2 in the ovn_lbgrp_datapaths map.  It also updates the
nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1.

Reviewed-by: Ales Musil 
Acked-by: Mark Michelson 
Signed-off-by: Numan Siddique 
---
 lib/lb.c |   5 +-
 northd/en-lb-data.c  | 176 +++
 northd/en-lb-data.h  |  17 
 northd/en-lflow.c|   6 ++
 northd/en-northd.c   |   6 +-
 northd/inc-proc-northd.c |   2 +
 northd/northd.c  |  83 +++---
 northd/northd.h  |   4 +-
 tests/ovn-northd.at  |  56 +
 9 files changed, 322 insertions(+), 33 deletions(-)

diff --git a/lib/lb.c b/lib/lb.c
index 6fd67e2218..e6c9dc2be2 100644
--- a/lib/lb.c
+++ b/lib/lb.c
@@ -1088,7 +1088,10 @@ ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths *lb_dps, 
size_t n,
 struct ovn_datapath **ods)
 {
 for (size_t i = 0; i < n; i++) {
-bitmap_set1(lb_dps->nb_ls_map, ods[i]->index);
+if (!bitmap_is_set(lb_dps->nb_ls_map, ods[i]->index)) {
+bitmap_set1(lb_dps->nb_ls_map, ods[i]->index);
+lb_dps->n_nb_ls++;
+}
 }
 }
 
diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c
index 8acd9c8cb2..02b1bfd7a4 100644
--- a/northd/en-lb-data.c
+++ b/northd/en-lb-data.c
@@ -39,6 +39,14 @@ static void lb_data_destroy(struct ed_type_lb_data *);
 static void build_lbs(const struct nbrec_load_balancer_table *,
   const struct nbrec_load_balancer_group_table *,
   struct hmap *lbs, struct hmap *lb_groups);
+static void build_od_lb_map(const struct nbrec_logical_switch_table *,
+ struct hmap *od_lb_map);
+static struct od_lb_data *find_od_lb_data(struct hmap *od_lb_map,
+  const struct uuid *od_uuid);
+static void destroy_od_lb_data(struct od_lb_data *od_lb_data);
+static struct od_lb_data *create_od_lb_data(struct hmap *od_lb_map,
+const struct uuid *od_uuid);
+
 static struct ovn_lb_group *create_lb_group(
 const struct nbrec_load_balancer_group *, struct hmap *lbs,
 struct hmap *lb_groups);
@@ -54,6 +62,7 @@ static struct crupdated_lbgrp *
struct tracked_lb_data *);
 static void add_deleted_lbgrp_to_tracked_data(
 struct ovn_lb_group *, struct tracked_lb_data *);
+static bool is_ls_lbs_changed(const struct nbrec_logical_switch *nbs);
 
 /* 'lb_data' engine node manages the NB load balancers and load balancer
  * groups.  For each NB LB, it creates 'struct ovn_northd_lb' and
@@ -80,9 +89,13 @@ en_lb_data_run(struct engine_node *node, void *data)
 EN_OVSDB_GET(engine_get_input("NB_load_balancer", node));
 const struct nbrec_load_balancer_group_table *nb_lbg_table =
 EN_OVSDB_GET(engine_get_input("NB_load_balancer_group", node));
+const struct nbrec_logical_switch_table *nb_ls_table =
+EN_OVSDB_GET(engine_get_input("NB_logical_switch", node));
 
 lb_data->tracked = false;
 build_lbs(nb_lb_table, nb_lbg_table, _data->lbs, _data->lbgrps);
+build_od_lb_map(nb_ls_table, _data->ls_lb_map);
+
 engine_set_node_state(node, EN_UPDATED);
 }
 
@@ -230,18 +243,104 @@ lb_data_load_balancer_group_handler(struct engine_node 
*node, void *data)
 return true;
 }
 
+struct od_lb_data {
+struct hmap_node hmap_node;
+struct uuid od_uuid;
+struct uuidset *lbs;
+struct uuidset *lbgrps;
+};
+
+bool
+lb_data_logical_switch_handler(struct engine_node *node, void *data)
+{
+struct ed_type_lb_data *lb_data = (struct ed_type_lb_data *) data;
+const struct 

[ovs-dev] [PATCH ovn v7 0/4] northd: I-P for load balancer and lb groups

2023-09-11 Thread numans
From: Numan Siddique 

This patch series adds the support to handle load balancer and
load balancer group changes incrementally in the "northd" engine
node. Changes to logical switches and router's load
balancer and load balancer group columns are also handled incrementally
provided other columns do not change.

v6 of the series also included lflow I-P handling.  But v7 drops these
patches as there are some concerns with the obj dep mgr usage. lflow I-P
handling patches will be submitted separately.

Below are the scale testing results done with these patches applied
using ovn-heater.  The test ran the scenario  -
ocp-500-density-heavy.yml [1].

With these patches applied (with load balancer I-P handling only in
northd engine node) the resuts are:

---
Min (s) Median (s)  90%ile (s)  99%ile 
(s)  Max (s) Mean (s)Total (s)   Count   Failed
---
Iteration Total 0.1313631.1899943.213526
4.3081344.3945621.385713173.214153  125 0
Namespace.add_ports 0.0051760.0056110.006476
0.0198670.0242060.0060580.757188125 0
WorkerNode.bind_port0.0337760.0463430.054414
0.0617190.0636130.04681511.703773   250 0
WorkerNode.ping_port0.0051560.0069592.044939
3.6553284.2414960.627103156.775832  250 0
---


The results with the present main (Result 3) are:


---
Min (s) Median (s)  90%ile (s)  99%ile 
(s)  Max (s) Mean (s)Total (s)   Count   Failed
---
Iteration Total 3.2337954.3649265.400982
6.4128037.4097574.792270599.033790  125 0
Namespace.add_ports 0.0052300.0065640.007379
0.0190600.0374900.0072230.902930125 0
WorkerNode.bind_port0.0338640.0440520.049608
0.0548490.0561960.04400511.001231   250 0
WorkerNode.ping_port0.0053342.0604775.222422
6.2673327.2840012.323020580.754964  250 0
---

v6 -> v7
---
  * First 4 patches of v6 are merged in main and branch-23.09 and
patches 9 to 16 are dropped.
  * v7 only has 4 patches now.
  * Addressed review comments.  There is only one handler for lb_data
engine input in northd engine node - northd_handle_lb_data_changes().
In v6 and earlier there were 2
handle functions - northd_handle_lb_data_changes_pre_od() and
northd_handle_lb_data_changes_post_od().  

v5 -> v6
---
  * Rebased.  Added 2 more patches (p15 and p16) for LR NAT I-P handling.

v4 -> v5
---
  * 6 new patches are added to the series which handles the LB changes
in the lflow engine node.
v3 -> v4
---
  * Covered more test scearios.
  * Found few issues and fixed them.  v3 was not handling the scenario of
a vip getting added or removed from a load balancer.

v2 -> v3

  * v2 was very inefficient in handling the load balancer group changes
and in associating the load balancers of the lb group to the
datapaths. This was the main reason for the regression in the full
recompute time taken.
v3 addressed these by more efficiently handling the lb group changes
incrementally.



Numan Siddique (4):
  northd: Handle load balancer changes for a logical switch.
  northd: Handle load balancer group changes for a logical switch.
  northd: Sync SB Port bindings NAT column in a separate engine node.
  northd: Handle load balancer/group changes for a logical router.

 lib/lb.c |  51 ++-
 lib/lb.h |   9 +
 northd/en-lb-data.c  | 431 -
 northd/en-lb-data.h  |  35 +++
 northd/en-lflow.c|   6 +
 northd/en-northd.c   |  26 +-
 northd/en-northd.h   |   1 +
 northd/en-sync-sb.c  |  52 +++
 northd/en-sync-sb.h  

Re: [ovs-dev] [PATCH v6 2/3] dpif: Probe support for OVS_ACTION_ATTR_DROP.

2023-09-11 Thread Eric Garver
On Sat, Sep 09, 2023 at 01:41:05AM +0200, Ilya Maximets wrote:
> On 9/1/23 21:42, Eric Garver wrote:
> > Kernel support is being added for this action. As such, we need to probe
> > the datapath for support.
> > 
> > Signed-off-by: Eric Garver 
> > ---
> >  include/linux/openvswitch.h |  2 +-
> >  lib/dpif.c  |  6 -
> >  lib/dpif.h  |  1 -
> >  ofproto/ofproto-dpif.c  | 52 +++--
> >  4 files changed, 51 insertions(+), 10 deletions(-)
> > 
> > diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> > index a265e05ad253..fce6456f47c8 100644
> > --- a/include/linux/openvswitch.h
> > +++ b/include/linux/openvswitch.h
> > @@ -1086,11 +1086,11 @@ enum ovs_action_attr {
> > OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> > OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
> > OVS_ACTION_ATTR_DEC_TTL,  /* Nested OVS_DEC_TTL_ATTR_*. */
> > +   OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */
> >  
> >  #ifndef __KERNEL__
> > OVS_ACTION_ATTR_TUNNEL_PUSH,   /* struct ovs_action_push_tnl*/
> > OVS_ACTION_ATTR_TUNNEL_POP,/* u32 port number. */
> > -   OVS_ACTION_ATTR_DROP,  /* u32 xlate_error. */
> > OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */
> >  #endif
> > __OVS_ACTION_ATTR_MAX,/* Nothing past this will be accepted
> > diff --git a/lib/dpif.c b/lib/dpif.c
> > index 0f480bec48d0..28fa40879655 100644
> > --- a/lib/dpif.c
> > +++ b/lib/dpif.c
> > @@ -1934,12 +1934,6 @@ dpif_supports_tnl_push_pop(const struct dpif *dpif)
> >  return dpif_is_netdev(dpif);
> >  }
> >  
> > -bool
> > -dpif_supports_explicit_drop_action(const struct dpif *dpif)
> > -{
> > -return dpif_is_netdev(dpif);
> > -}
> > -
> >  bool
> >  dpif_supports_lb_output_action(const struct dpif *dpif)
> >  {
> > diff --git a/lib/dpif.h b/lib/dpif.h
> > index 0f2dc2ef3c55..8dc5da45b3ef 100644
> > --- a/lib/dpif.h
> > +++ b/lib/dpif.h
> > @@ -940,7 +940,6 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, 
> > odp_port_t port_no,
> >  
> >  char *dpif_get_dp_version(const struct dpif *);
> >  bool dpif_supports_tnl_push_pop(const struct dpif *);
> > -bool dpif_supports_explicit_drop_action(const struct dpif *);
> >  bool dpif_synced_dp_layers(struct dpif *);
> >  
> >  /* Log functions. */
> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> > index e22ca757ac35..5ed849e463c2 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> > @@ -72,6 +72,8 @@
> >  #include "util.h"
> >  #include "uuid.h"
> >  #include "vlan-bitmap.h"
> > +#include "dpif-netdev.h"
> > +#include "netdev-offload.h"
> >  
> >  VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
> >  
> > @@ -220,6 +222,7 @@ static void ofproto_unixctl_init(void);
> >  static void ct_zone_config_init(struct dpif_backer *backer);
> >  static void ct_zone_config_uninit(struct dpif_backer *backer);
> >  static void ct_zone_timeout_policy_sweep(struct dpif_backer *backer);
> > +static bool check_drop_action(struct dpif_backer *backer);
> >  
> >  static inline struct ofproto_dpif *
> >  ofproto_dpif_cast(const struct ofproto *ofproto)
> > @@ -390,6 +393,15 @@ type_run(const char *type)
> >  udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
> >  }
> >  
> > +/* For TC, if vswitchd started with other_config:hw-offload set to 
> > "false",
> > + * and the configuration is now "true", then we need to re-probe 
> > datapath
> > + * support for the "drop" action. */
> > +if (backer->rt_support.explicit_drop_action
> > +&& netdev_is_flow_api_enabled() && !dpif_is_netdev(backer->dpif)) {
> > +backer->rt_support.explicit_drop_action = 
> > check_drop_action(backer);
> 
> This seems dangerous to me.  The value is accessed concurrently from
> the other threads, so the update will have to be atomic.
> 
> Also, this check is so specific and re-checks one particular thing.
> Might be hard to maintain.
> 
> OVS requires re-start after enabling HW offload, so the feature
> checking and ll other potential quirks can be taken care of.
> If users really don't want to restart OVS, we can do a soft restart
> by recreating the bridge when the value changes, similarly how
> the bridge is getting re-created if the datapath type changes.
> I posted an RFC here with an idea of the implementation:
>   
> https://patchwork.ozlabs.org/project/openvswitch/patch/20230908234009.892803-1-i.maxim...@ovn.org/
> 
> This way we will re-probe all the features and re-configure everything
> that needs to be re-configured.  I can continue working on that RFC
> once I'm back from PTO.
> 
> What do you think?  Eelco? Simon?

I agree that it would be better to fix this generically, i.e. re-probe
everything.

> Best regards, Ilya Maximets.
> 
> > +backer->need_revalidate = REV_RECONFIGURE;
> > +}
> > +
> >  if (backer->need_revalidate) {
> >  struct 

Re: [ovs-dev] [PATCH ovn] northd: Always ct commit ECMP symmetric traffic in the original direction.

2023-09-11 Thread Dumitru Ceara
On 9/7/23 15:59, Ales Musil wrote:
> On Fri, Sep 1, 2023 at 12:56 PM Dumitru Ceara  wrote:
> 
>> Commit 506f7d4bcfbc ("northd: rely on new actions for ecmp-symmetric
>> routing") relied on commit_ecmp_nh() to learn the mapping between a
>> traffic session's 5-tuple and the MAC of the next-hop to be used for
>> that session.  This logical action translates to OVS learn() action.
>>
>> While in theory this is the most correct way of tracking the mapping
>> between ECMP symmetric reply sessions and their next hop's MAC address,
>> it also creates additional load in ovs-vswitchd (to learn the new flows)
>> and introduces latency and affects traffic throughput.
>>
>> An alternative is to instead re-commit the 5-tuple (along with the
>> next-hop MAC and port information) to conntrack every time traffic
>> received from the next-hop side is forwarded by the OVN router.
>>
>> Testing shows that in a scenario with 4 next-hops and ECMP symmetric
>> replies enabled with traffic running for 600 seconds latency, throughput
>> and ovs-vswitchd CPU usage are significantly better with this change:
>>
>> - Before:
>>   - ovs-vswitchd ~1200% CPU (distributed across 17 revalidator threads)
>>   - Sent: 638.72MiB, 1.06MiB/s
>>   - Recv: 7.17GiB, 12.24MiB/s
>>
>> - After:
>>   - ovs-vswitchd ~7% CPU (distributed across 17 revalidator threads)
>>   - Sent: 892.69MiB, 1.49MiB/s
>>   - Recv: 8.63GiB, 14.72MiB/s
>>
>> The only downside of not using learn() flows is that OVN cannot
>> determine on its own when a next-hop goes away (without using BFD).
>> This scenario however can probably be handled by the CMS which has more
>> knowledge about the rest of the network (outside OVN), e.g.,
>> ovn-kubernetes currently flushes conntrack entries created for ECMP
>> symmetric reply sessions when it detects that the next-hop went away.
>>
>> If a next-hops changes MAC address that's handled by OVN gracefully and
>> the conntrack entry corresponding to that session gets updated
>> accordingly.
>>
>> NOTE: we don't remove the logical actions' implementation as
>> ovn-controller needs to be able to translate logical flows generated by
>> older versions of ovn-northd.
>>
>> Fixes: 506f7d4bcfbc ("northd: rely on new actions for ecmp-symmetric
>> routing")
>> Signed-off-by: Dumitru Ceara 
>> ---
>>  northd/northd.c | 52 ++---
>>  tests/ovn-northd.at | 20 ++---
>>  2 files changed, 27 insertions(+), 45 deletions(-)
>>
>> diff --git a/northd/northd.c b/northd/northd.c
>> index 459aaafb1c..e67d34cd28 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -262,7 +262,6 @@ enum ovn_stage {
>>  #define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[2]"
>>  #define REGBIT_LOOKUP_NEIGHBOR_IP_RESULT "reg9[3]"
>>  #define REGBIT_DST_NAT_IP_LOCAL "reg9[4]"
>> -#define REGBIT_KNOWN_ECMP_NH"reg9[5]"
>>  #define REGBIT_KNOWN_LB_SESSION "reg9[6]"
>>
>>  /* Register to store the eth address associated to a router port for
>> packets
>> @@ -370,8 +369,7 @@ enum ovn_stage {
>>   * | |   EGRESS_LOOPBACK/| G | UNUSED  |
>>   * | R9  |   PKT_LARGER/ | 4 | |
>>   * | |   LOOKUP_NEIGHBOR_RESULT/ |   | |
>> - * | |   SKIP_LOOKUP_NEIGHBOR/   |   | |
>> - * | |   KNOWN_ECMP_NH}  |   | |
>> + * | |   SKIP_LOOKUP_NEIGHBOR}   |   | |
>>   * | |   |   | |
>>   * | | REG_ORIG_TP_DPORT_ROUTER  |   | |
>>   * | |   |   | |
>> @@ -10846,15 +10844,13 @@ add_ecmp_symmetric_reply_flows(struct hmap
>> *lflows,
>>cidr);
>>  free(cidr);
>>  ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100,
>> -ds_cstr(_match),
>> -REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh_mac(); ct_next;",
>> -_route->header_);
>> + ds_cstr(_match), "ct_next;",
>> + _route->header_);
>>
>>  /* And packets that go out over an ECMP route need conntrack */
>>  ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100,
>> -ds_cstr(route_match),
>> -REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh(); ct_next;",
>> -_route->header_);
>> + ds_cstr(route_match), "ct_next;",
>> + _route->header_);
>>
>>  /* Save src eth and inport in ct_label for packets that arrive over
>>   * an ECMP route.
>> @@ -10867,9 +10863,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>>  ds_put_format(,
>>  "ct_commit { ct_label.ecmp_reply_eth = eth.src; "
>>  " %s = %" PRId64 ";}; "
>> -"commit_ecmp_nh(ipv6 = %s, proto = tcp); next;",
>> -ct_ecmp_reply_port_match, out_port->sb->tunnel_key,
>> -IN6_IS_ADDR_V4MAPPED(>prefix) ? "false" : "true");
>> +

Re: [ovs-dev] [PATCH ovn v2] ci, tests: Use parallelization permutations for few jobs

2023-09-11 Thread Dumitru Ceara
On 9/7/23 20:52, Han Zhou wrote:
> On Thu, Sep 7, 2023 at 7:48 AM Ales Musil  wrote:
>>
>> Single test needs to run 4 times to check all
>> the permutations, this is taking a lot of CI time
>> and job space.
>>
>> Remove the parallel permutation and leave parallelization
>> enabled for all tests, as this use case is more complex.
>> Only exception are 10 tests in ovn-northd.at that still run
>> parallelization permutations. This should be enough to cover
>> the code with parallelization disabled.
>>
>> This allows us to greatly reduce the number of test cases
>> (almost by half) and we can also remove 6 jobs from the CI
>> pipeline. The time reduction is very noticeable going down
>> from ~30 min to ~20 min.
>>
>> Signed-off-by: Ales Musil 
>> ---
>>  .github/workflows/test.yml | 28 +++-
>>  tests/ovn-macros.at| 15 +++
>>  tests/ovn-northd.at| 20 ++--
>>  3 files changed, 32 insertions(+), 31 deletions(-)
>>
>> diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
>> index fe2a14c40..5c5ce6ed1 100644
>> --- a/.github/workflows/test.yml
>> +++ b/.github/workflows/test.yml
>> @@ -102,29 +102,23 @@ jobs:
>>  cfg:
>>  - { compiler: gcc, opts: --disable-ssl }
>>  - { compiler: clang, opts: --disable-ssl }
>> -- { compiler: gcc, testsuite: test, test_range: "-500" }
>> -- { compiler: gcc, testsuite: test, test_range: "501-1000" }
>> -- { compiler: gcc, testsuite: test, test_range: "1001-" }
>> +- { compiler: gcc, testsuite: test, test_range: "-300" }
>> +- { compiler: gcc, testsuite: test, test_range: "301-600" }
>> +- { compiler: gcc, testsuite: test, test_range: "601-" }
>>  - { compiler: clang, testsuite: test, sanitizers: sanitizers,
> test_range: "-300" }
>>  - { compiler: clang, testsuite: test, sanitizers: sanitizers,
> test_range: "301-600" }
>> -- { compiler: clang, testsuite: test, sanitizers: sanitizers,
> test_range: "601-900" }
>> -- { compiler: clang, testsuite: test, sanitizers: sanitizers,
> test_range: "901-1200" }
>> -- { compiler: clang, testsuite: test, sanitizers: sanitizers,
> test_range: "1201-" }
>> -- { compiler: gcc, testsuite: test, libs: -ljemalloc,
> test_range: "-500" }
>> -- { compiler: gcc, testsuite: test, libs: -ljemalloc,
> test_range: "501-1000" }
>> -- { compiler: gcc, testsuite: test, libs: -ljemalloc,
> test_range: "1001-" }
>> +- { compiler: clang, testsuite: test, sanitizers: sanitizers,
> test_range: "601-" }
>> +- { compiler: gcc, testsuite: test, libs: -ljemalloc,
> test_range: "-300" }
>> +- { compiler: gcc, testsuite: test, libs: -ljemalloc,
> test_range: "301-600" }
>> +- { compiler: gcc, testsuite: test, libs: -ljemalloc,
> test_range: "601-" }
>>  - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk,
> test_range: "-100" }
>> -- { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk,
> test_range: "101-200" }
>> -- { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk,
> test_range: "201-" }
>> +- { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk,
> test_range: "101-" }
>>  - { compiler: gcc, testsuite: system-test-userspace, test_range:
> "-100" }
>> -- { compiler: gcc, testsuite: system-test-userspace, test_range:
> "101-200" }
>> -- { compiler: gcc, testsuite: system-test-userspace, test_range:
> "201-" }
>> +- { compiler: gcc, testsuite: system-test-userspace, test_range:
> "101-" }
>>  - { compiler: gcc, testsuite: system-test, test_range: "-100" }
>> -- { compiler: gcc, testsuite: system-test, test_range: "101-200"
> }
>> -- { compiler: gcc, testsuite: system-test, test_range: "201-" }
>> +- { compiler: gcc, testsuite: system-test, test_range: "101-" }
>>  - { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "-100" }
>> -- { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "101-200" }
>> -- { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "201-" }
>> +- { compiler: clang, testsuite: system-test, sanitizers:
> sanitizers, test_range: "101-" }
>>  - { arch: x86, compiler: gcc, opts: --disable-ssl }
>>
>>  steps:
>> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at
>> index 13d5dc3d4..7c0abdece 100644
>> --- a/tests/ovn-macros.at
>> +++ b/tests/ovn-macros.at
>> @@ -887,21 +887,28 @@ OVS_END_SHELL_HELPERS
>>  m4_define([OVN_POPULATE_ARP], [AT_CHECK(ovn_populate_arp__, [0],
> [ignore])])
>>
>>  # Defines versions of the test with all combinations of northd,
>> -# parallelization on/off and conditional monitoring on/off.
>> +# parallelization enabled and conditional monitoring on/off.
>>  m4_define([OVN_FOR_EACH_NORTHD],
>>[m4_foreach([NORTHD_TYPE], [ovn-northd],

[ovs-dev] [PATCH v2] checkpatch: Add checks for the subject line.

2023-09-11 Thread Eelco Chaudron
This patch adds WARNINGs for the subject line length and the format,
i.e., the sentence should start with a capital and end with a dot.

Signed-off-by: Eelco Chaudron 

---
v2: - Add test cases
- Made it work on files and git patches
- Changed it from 50 chars on the summary, to 70 chars on area
  and summary. This way git log --oneline will fit on a 80 char
  line.

 tests/checkpatch.at |   27 
 utilities/checkpatch.py |   53 ++-
 2 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/tests/checkpatch.at b/tests/checkpatch.at
index fdcdb846e..6689ce905 100755
--- a/tests/checkpatch.at
+++ b/tests/checkpatch.at
@@ -8,7 +8,12 @@ OVS_START_SHELL_HELPERS
 try_checkpatch() {
 # Take the patch to test from $1.  Remove an initial four-space indent
 # from it and, if it is just headers with no body, add a null body.
+# If it does not have a 'Subject', add a valid one.
 echo "$1" | sed 's/^//' > test.patch
+if grep 'Subject\:' test.patch >/dev/null 2>&1; then :
+else
+sed -i '1 i\Subject: Patch this is.' test.patch
+fi
 if grep '---' expout >/dev/null 2>&1; then :
 else
 printf '\n---\n' >> test.patch
@@ -560,3 +565,25 @@ try_checkpatch \
 "
 
 AT_CLEANUP
+
+AT_SETUP([checkpatch - subject])
+try_checkpatch \
+   "Author: A
+Commit: A
+Subject: netdev: invalid case and dot ending
+
+Signed-off-by: A" \
+"WARNING: The subject summary should start with a capital.
+WARNING: The subject summary should end with a dot.
+Subject: netdev: invalid case and dot ending"
+
+try_checkpatch \
+   "Author: A
+Commit: A
+Subject: netdev: This is a way to long commit summary and therefor it 
should report a WARNING!
+
+Signed-off-by: A" \
+"WARNING: The subject, ': ', is over 70 characters, i.e., 
85.
+Subject: netdev: This is a way to long commit summary and therefor it 
should report a WARNING!"
+
+AT_CLEANUP
diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index 5c4aaefb3..3f42c44f2 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -792,6 +792,36 @@ def run_file_checks(text):
 check['check'](text)
 
 
+def run_subject_checks(subject, spellcheck=False):
+warnings = False
+
+if spellcheck and check_spelling(subject, False):
+warnings = True
+
+summary = subject[subject.rindex(': ') + 2:]
+area_summary = subject[subject.index(': ') + 2:]
+area_summary_len = len(area_summary)
+if area_summary_len > 70:
+print_warning("The subject, ': ', is over 70 "
+  "characters, i.e., %u." % area_summary_len)
+warnings = True
+
+if summary[0].isalpha() and summary[0].islower():
+print_warning(
+"The subject summary should start with a capital.")
+warnings = True
+
+if subject[-1] not in [".", "?", "!"]:
+print_warning(
+"The subject summary should end with a dot.")
+warnings = True
+
+if warnings:
+print(subject)
+
+return warnings
+
+
 def ovs_checkpatch_parse(text, filename, author=None, committer=None):
 global print_file_name, total_line, checking_file, \
 empty_return_check_state
@@ -812,6 +842,7 @@ def ovs_checkpatch_parse(text, filename, author=None, 
committer=None):
 r'^@@ ([0-9-+]+),([0-9-+]+) ([0-9-+]+),([0-9-+]+) @@')
 is_author = re.compile(r'^(Author|From): (.*)$', re.I | re.M | re.S)
 is_committer = re.compile(r'^(Commit: )(.*)$', re.I | re.M | re.S)
+is_subject = re.compile(r'^(Subject: )(.*)$', re.I | re.M | re.S)
 is_signature = re.compile(r'^(Signed-off-by: )(.*)$',
   re.I | re.M | re.S)
 is_co_author = re.compile(r'^(Co-authored-by: )(.*)$',
@@ -911,6 +942,8 @@ def ovs_checkpatch_parse(text, filename, author=None, 
committer=None):
 committer = is_committer.match(line).group(2)
 elif is_author.match(line):
 author = is_author.match(line).group(2)
+elif is_subject.match(line):
+run_subject_checks(line, spellcheck)
 elif is_signature.match(line):
 m = is_signature.match(line)
 signatures.append(m.group(2))
@@ -1029,18 +1062,18 @@ def ovs_checkpatch_file(filename):
 result = ovs_checkpatch_parse(part.get_payload(decode=False), filename,
   mail.get('Author', mail['From']),
   mail['Commit'])
-if spellcheck:
-if not mail['Subject'] or not mail['Subject'].strip():
-if mail['Subject']:
-mail.replace_header('Subject', sys.argv[-1])
-else:
-mail.add_header('Subject', sys.argv[-1])
 
-print("Subject missing! Your provisional subject is",
-  mail['Subject'])
+if not mail['Subject'] or not 

Re: [ovs-dev] [PATCH ovn v6 00/16] northd: I-P for load balancer and lb groups

2023-09-11 Thread Numan Siddique
On Thu, Aug 31, 2023 at 2:47 PM Mark Michelson  wrote:
>
> I had a look at patches 1-8. I have no additional notes beyond what Han
> and Ales have already mentioned.
>
> For patches 1-8:
>
> Acked-by: Mark Michelson 

Thanks Mark, Han and Ales for the reviews.

I applied the first 4 patches of this series to main and backported to
branch-23.09.   I'll submit v7 soon with the patches 5-8 addressing
all the review comments.\

Thanks
Numan



>
> On 8/18/23 04:56, num...@ovn.org wrote:
> > From: Numan Siddique 
> >
> > This patch series adds the support to handle load balancer and
> > load balancer group changes incrementally in the "northd" engine
> > node and "lflow" engine node.  Changes to logical switches and router's load
> > balancer and load balancer group columns are also handled incrementally
> > provided other columns do not change.
> >
> > V4 of this series did not include LB I-P handling in the lflow engine
> > node.  V5 adds 6 more patches to handle the LB changes in the lflow
> > engine node.  V6 added 2 additional patches to handle router NAT I-P
> > handling.
> >
> > This patch series can be divided into 3 parts
> >
> > Part 1.  Patches 1 to 8  (LB I-P only in northd)
> > Part 2.  Patches 9 to 14 (LB I-P in lflow engine too)
> > Part 3.  Patches 15 and 16  (LR NAT I-P handling in both northd and
> > lflow)
> >
> > Submitting all these patches as one series.  These patches can also be
> > found here - https://github.com/numansiddique/ovn/tree/northd_ip_lb_ip_v6
> >
> > If there are any conflicts,  request the reviewers to use this branch
> > for reviewing.
> >
> > Below are the scale testing results done with these patches applied
> > using ovn-heater.  The test ran the scenario  -
> > ocp-500-density-heavy.yml [1].
> >
> > With these patches applied (with load balancer I-P handling in both
> > northd and lflow engine nodes) the resuts (Result 1) are:
> >
> > ---
> >  Min (s) Median (s)  90%ile (s)  
> > 99%ile (s)  Max (s) Mean (s)Total (s)   Count   
> > Failed
> > ---
> > Iteration Total 0.1356511.1305271.179357
> > 1.2014102.1802030.67460684.325717   125 0
> > Namespace.add_ports 0.0052180.0056780.006457
> > 0.0189360.0208120.0061820.772796125 0
> > WorkerNode.bind_port0.0336310.0432870.051171
> > 0.0582230.0628190.04383910.959757   250 0
> > WorkerNode.ping_port0.0054600.0067911.041434
> > 1.0648071.0699570.27435268.587878   250 0
> > ---
> >
> >
> > With only the first 8 patches applied (with load balancer I-P handling
> > only in northd engine node) the results (Result 2) are:
> >
> >
> > ---
> >  Min (s) Median (s)  90%ile (s)  
> > 99%ile (s)  Max (s) Mean (s)Total (s)   Count   
> > Failed
> > ---
> > Iteration Total 0.1329292.1571033.314847
> > 3.3315614.3786261.581889197.736147  125 0
> > Namespace.add_ports 0.0052170.0057600.006565
> > 0.0133480.0210140.0061060.763214125 0
> > WorkerNode.bind_port0.0352050.0454580.052278
> > 0.0598040.0639410.04565211.413122   250 0
> > WorkerNode.ping_port0.0050750.0068143.088548
> > 3.1925774.2420260.726453181.613284  250 0
> > ---
> >
> >
> > The results with the present main (Result 3) are:
> >
> > ---
> >  Min (s) Median (s)  90%ile (s)  
> > 99%ile (s)  Max (s) Mean (s)Total (s)   Count   
> > Failed
> > 

Re: [ovs-dev] [PATCH ovn] controller: disable OpenFlow inactivity probing

2023-09-11 Thread Vladislav Odintsov
Hi Dumitru,

if eventually this patch got merged, please remove next lines from its commit 
message:

"Disablement for ovs-vswitchd -> ovn-controller OF inacivity probing will
be done in the next patch."

Unfortunately I was unsuccessful in removing these probing direction (default 
60s).
Maybe you can give any hint how to solve this [1].

1: https://mail.openvswitch.org/pipermail/ovs-dev/2023-June/405445.html

> On 11 Sep 2023, at 15:57, Dumitru Ceara  wrote:
> 
> On 9/11/23 14:54, Dumitru Ceara wrote:
>> On 6/14/23 20:02, Vladislav Odintsov wrote:
>>> Hi Mark,
>>> 
>>> thanks for taking time to look on this!
>>> 
>>> Your point with a hung OVS is really an interesting case.
>>> 
>>> From one hand it’s a possible situation, and from another I guess it’s much 
>>> higher probability for OVS to be busy by some other work rather than to 
>>> hang in a loop. In my installation the first happening sometimes (OVS 
>>> business by real work). The reasons can be different but I can’t say that 
>>> it’s better to drop OF connection in such a moment (possibly triggering ha 
>>> chassis-redirect ports failover).
>>> 
>>> At the same time if we’re talking about L3GW node, which has a gateway 
>>> chassis redirect port, the hung OVS should be detected by other nodes with 
>>> BFD probes (if it’s a ha chassis-redirect port).
>>> And if it’s a normal hypervisor (with just a vif ports), so what can we do 
>>> with it? I guess if we drop OF connection, it won’t make any positive 
>>> changes. Maybe just release memory needed for handling this connection and 
>>> not allocate buffers…? Unfortunately I can’t evaluate this.
>>> 
>>> Moreover, the OVN default is a disabled probing. What can be a possible 
>>> recommended values to set probes if to leave it as is? How should users 
>>> find out that they have to configure this? Currently in docs there is only 
>>> information that this option configures probe interval. But it’s not 
>>> obvious when to configure it and which value to set.
>>> 
>> 
>> In support of this, quoting the original commit that added the probe
>> config knob:
>> 
>> "The main intention of this patch is to fix this recompuation issue for
>> older versions (there by requesting backport), it still would be
>> beneficial with the incremental processing engine."
>> 
>> That was back in 2019:
>> 
>> https://github.com/ovn-org/ovn/commit/c99069c8934c9ea55d310a8b6d48fb66aa477589
>> 
>> Since then lots of performance improvements landed in OVN
>> (ovn-controller too), we're probably fine.  Checking
>> ovn-org/ovn-kubernetes code I see they were also setting
>> ovn-openflow-probe-interval "just in case" so they probably won't be
>> affected by it getting removed:
>> 
>> https://github.com/ovn-org/ovn-kubernetes/commit/be1eb843852cd4c490514cec60553cca4feaf18e
>> 
>>> I hope this makes sense.
>>> 
>> 
>> The patch in its current form looks OK to me, I'm OK to merge it if Mark
>> doesn't have anything against it; therefore:
>> 
>> Acked-by: Dumitru Ceara 
>> 
> 
> I forgot to mention this earlier but, since this patch was posted way
> before soft freeze and since the changes seem contained, it's probably
> fair to apply this to the 23.09 branch too.  What do you think, Mark?
> 
> Thanks!
> 
>> Regards,
>> Dumitru
>> 
>>> Also, which Han’s patch to OVS you’re talking about? I looked through open 
>>> OVS patches and haven’t seen any. Please point me out.
>>> 
>>> regards,
>>> Vladislav Odintsov
>>> 
 On 9 Jun 2023, at 18:28, Mark Michelson  wrote:
 
 Hi,
 
 Thanks for linking the email thread, since I had the exact same concern 
 that Numan did about detecting if ovs-vswitchd goes down. It sounds like 
 because of the nature of unix sockets, this disconnection is still 
 detected by OVN and failover can occur as expected.
 
 But what about if ovs-vswitchd is still running but in a "compromised" 
 state. For instance, what if a coding error in ovs-vswitchd results in it 
 running an infinite loop? In such a case, the unix connection would still 
 be alive, but OVN would not be able to communicate with the ovs-vswitchd 
 process.
 
 Does this situation eventually lead to OVN disconnecting anyway after it 
 fills up the unix socket's buffer? If so, how long does that take? Or does 
 it lead to OVN slowly growing in memory usage while it waits forever to be 
 able to write to the socket?
 
 I think removing the hard-coded 5 second probe intervals from pinctrl.c 
 and features.c is a good idea. But I think we should leave the 
 configurable probe interval used by ofctrl.c for the case I described 
 before. Since Han's patch to OVS should disable probe intervals from the 
 OVS side by default, we would not trigger bad behavior simply because OVN 
 has a lot of work to do during a recompute. What do you think?
> On 6/8/23 10:15, Vladislav Odintsov wrote:
> OpenFlow connection is established over unix socket, 

Re: [ovs-dev] [PATCH ovn] controller: disable OpenFlow inactivity probing

2023-09-11 Thread Dumitru Ceara
On 9/11/23 14:54, Dumitru Ceara wrote:
> On 6/14/23 20:02, Vladislav Odintsov wrote:
>> Hi Mark,
>>
>> thanks for taking time to look on this!
>>
>> Your point with a hung OVS is really an interesting case.
>>
>> From one hand it’s a possible situation, and from another I guess it’s much 
>> higher probability for OVS to be busy by some other work rather than to hang 
>> in a loop. In my installation the first happening sometimes (OVS business by 
>> real work). The reasons can be different but I can’t say that it’s better to 
>> drop OF connection in such a moment (possibly triggering ha chassis-redirect 
>> ports failover).
>>
>> At the same time if we’re talking about L3GW node, which has a gateway 
>> chassis redirect port, the hung OVS should be detected by other nodes with 
>> BFD probes (if it’s a ha chassis-redirect port).
>> And if it’s a normal hypervisor (with just a vif ports), so what can we do 
>> with it? I guess if we drop OF connection, it won’t make any positive 
>> changes. Maybe just release memory needed for handling this connection and 
>> not allocate buffers…? Unfortunately I can’t evaluate this.
>>
>> Moreover, the OVN default is a disabled probing. What can be a possible 
>> recommended values to set probes if to leave it as is? How should users find 
>> out that they have to configure this? Currently in docs there is only 
>> information that this option configures probe interval. But it’s not obvious 
>> when to configure it and which value to set.
>>
> 
> In support of this, quoting the original commit that added the probe
> config knob:
> 
> "The main intention of this patch is to fix this recompuation issue for
> older versions (there by requesting backport), it still would be
> beneficial with the incremental processing engine."
> 
> That was back in 2019:
> 
> https://github.com/ovn-org/ovn/commit/c99069c8934c9ea55d310a8b6d48fb66aa477589
> 
> Since then lots of performance improvements landed in OVN
> (ovn-controller too), we're probably fine.  Checking
> ovn-org/ovn-kubernetes code I see they were also setting
> ovn-openflow-probe-interval "just in case" so they probably won't be
> affected by it getting removed:
> 
> https://github.com/ovn-org/ovn-kubernetes/commit/be1eb843852cd4c490514cec60553cca4feaf18e
> 
>> I hope this makes sense.
>>
> 
> The patch in its current form looks OK to me, I'm OK to merge it if Mark
> doesn't have anything against it; therefore:
> 
> Acked-by: Dumitru Ceara 
> 

I forgot to mention this earlier but, since this patch was posted way
before soft freeze and since the changes seem contained, it's probably
fair to apply this to the 23.09 branch too.  What do you think, Mark?

Thanks!

> Regards,
> Dumitru
> 
>> Also, which Han’s patch to OVS you’re talking about? I looked through open 
>> OVS patches and haven’t seen any. Please point me out.
>>
>> regards,
>> Vladislav Odintsov
>>
>>> On 9 Jun 2023, at 18:28, Mark Michelson  wrote:
>>>
>>> Hi,
>>>
>>> Thanks for linking the email thread, since I had the exact same concern 
>>> that Numan did about detecting if ovs-vswitchd goes down. It sounds like 
>>> because of the nature of unix sockets, this disconnection is still detected 
>>> by OVN and failover can occur as expected.
>>>
>>> But what about if ovs-vswitchd is still running but in a "compromised" 
>>> state. For instance, what if a coding error in ovs-vswitchd results in it 
>>> running an infinite loop? In such a case, the unix connection would still 
>>> be alive, but OVN would not be able to communicate with the ovs-vswitchd 
>>> process.
>>>
>>> Does this situation eventually lead to OVN disconnecting anyway after it 
>>> fills up the unix socket's buffer? If so, how long does that take? Or does 
>>> it lead to OVN slowly growing in memory usage while it waits forever to be 
>>> able to write to the socket?
>>>
>>> I think removing the hard-coded 5 second probe intervals from pinctrl.c and 
>>> features.c is a good idea. But I think we should leave the configurable 
>>> probe interval used by ofctrl.c for the case I described before. Since 
>>> Han's patch to OVS should disable probe intervals from the OVS side by 
>>> default, we would not trigger bad behavior simply because OVN has a lot of 
>>> work to do during a recompute. What do you think?
 On 6/8/23 10:15, Vladislav Odintsov wrote:
 OpenFlow connection is established over unix socket, which is a reliable
 connection and doesn't require additional probing.
 With this patch openflow probing in ovn-controller -> ovs-vswitchd
 direction is disabled for all three connections:
   - OF flows management connection,
   - OF features negotiation connection,
   - pinctrl connection.
 ovn-controller external_ids:ovn-openflow-probe-interval is removed as
 non-needed anymore.
 Disablement for ovs-vswitchd -> ovn-controller OF inacivity probing will
 be done in the next patch.
 Reported-at: 
 

Re: [ovs-dev] [PATCH ovn] controller: disable OpenFlow inactivity probing

2023-09-11 Thread Dumitru Ceara
On 6/14/23 20:02, Vladislav Odintsov wrote:
> Hi Mark,
> 
> thanks for taking time to look on this!
> 
> Your point with a hung OVS is really an interesting case.
> 
> From one hand it’s a possible situation, and from another I guess it’s much 
> higher probability for OVS to be busy by some other work rather than to hang 
> in a loop. In my installation the first happening sometimes (OVS business by 
> real work). The reasons can be different but I can’t say that it’s better to 
> drop OF connection in such a moment (possibly triggering ha chassis-redirect 
> ports failover).
> 
> At the same time if we’re talking about L3GW node, which has a gateway 
> chassis redirect port, the hung OVS should be detected by other nodes with 
> BFD probes (if it’s a ha chassis-redirect port).
> And if it’s a normal hypervisor (with just a vif ports), so what can we do 
> with it? I guess if we drop OF connection, it won’t make any positive 
> changes. Maybe just release memory needed for handling this connection and 
> not allocate buffers…? Unfortunately I can’t evaluate this.
> 
> Moreover, the OVN default is a disabled probing. What can be a possible 
> recommended values to set probes if to leave it as is? How should users find 
> out that they have to configure this? Currently in docs there is only 
> information that this option configures probe interval. But it’s not obvious 
> when to configure it and which value to set.
> 

In support of this, quoting the original commit that added the probe
config knob:

"The main intention of this patch is to fix this recompuation issue for
older versions (there by requesting backport), it still would be
beneficial with the incremental processing engine."

That was back in 2019:

https://github.com/ovn-org/ovn/commit/c99069c8934c9ea55d310a8b6d48fb66aa477589

Since then lots of performance improvements landed in OVN
(ovn-controller too), we're probably fine.  Checking
ovn-org/ovn-kubernetes code I see they were also setting
ovn-openflow-probe-interval "just in case" so they probably won't be
affected by it getting removed:

https://github.com/ovn-org/ovn-kubernetes/commit/be1eb843852cd4c490514cec60553cca4feaf18e

> I hope this makes sense.
> 

The patch in its current form looks OK to me, I'm OK to merge it if Mark
doesn't have anything against it; therefore:

Acked-by: Dumitru Ceara 

Regards,
Dumitru

> Also, which Han’s patch to OVS you’re talking about? I looked through open 
> OVS patches and haven’t seen any. Please point me out.
> 
> regards,
> Vladislav Odintsov
> 
>> On 9 Jun 2023, at 18:28, Mark Michelson  wrote:
>>
>> Hi,
>>
>> Thanks for linking the email thread, since I had the exact same concern that 
>> Numan did about detecting if ovs-vswitchd goes down. It sounds like because 
>> of the nature of unix sockets, this disconnection is still detected by OVN 
>> and failover can occur as expected.
>>
>> But what about if ovs-vswitchd is still running but in a "compromised" 
>> state. For instance, what if a coding error in ovs-vswitchd results in it 
>> running an infinite loop? In such a case, the unix connection would still be 
>> alive, but OVN would not be able to communicate with the ovs-vswitchd 
>> process.
>>
>> Does this situation eventually lead to OVN disconnecting anyway after it 
>> fills up the unix socket's buffer? If so, how long does that take? Or does 
>> it lead to OVN slowly growing in memory usage while it waits forever to be 
>> able to write to the socket?
>>
>> I think removing the hard-coded 5 second probe intervals from pinctrl.c and 
>> features.c is a good idea. But I think we should leave the configurable 
>> probe interval used by ofctrl.c for the case I described before. Since Han's 
>> patch to OVS should disable probe intervals from the OVS side by default, we 
>> would not trigger bad behavior simply because OVN has a lot of work to do 
>> during a recompute. What do you think?
>>> On 6/8/23 10:15, Vladislav Odintsov wrote:
>>> OpenFlow connection is established over unix socket, which is a reliable
>>> connection and doesn't require additional probing.
>>> With this patch openflow probing in ovn-controller -> ovs-vswitchd
>>> direction is disabled for all three connections:
>>>   - OF flows management connection,
>>>   - OF features negotiation connection,
>>>   - pinctrl connection.
>>> ovn-controller external_ids:ovn-openflow-probe-interval is removed as
>>> non-needed anymore.
>>> Disablement for ovs-vswitchd -> ovn-controller OF inacivity probing will
>>> be done in the next patch.
>>> Reported-at: 
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-May/404625.html
>>> Signed-off-by: Vladislav Odintsov 
>>> ---
>>>  NEWS|  5 +
>>>  controller/ofctrl.c | 14 ++
>>>  controller/ofctrl.h |  4 +---
>>>  controller/ovn-controller.8.xml | 14 --
>>>  controller/ovn-controller.c | 21 +
>>>  controller/pinctrl.c|  2 

Re: [ovs-dev] [PATCH ovn v2 2/2] ofctrl: Prevent conjunction duplication

2023-09-11 Thread Dumitru Ceara
On 9/6/23 16:39, Mark Michelson wrote:
> Thanks Ales,
> 

Hi Ales, Mark,

> Acked-by: Mark Michelson 
> 
> Thankfully the extra overhead is in the much less frequently-hit case.
> 

We still do a O(N^2) processing, I wonder if it's a lot of
work/complexity to add the map to all 'struct desired_flow'.

But yes, in general, it's likely fine to do things as this patch
proposes.  However, to make sure we catch it in the future, should we
log/count whenever we have a lot of entries in this specific hmap
('existing_conj')?

Thanks,
Dumitru

> On 8/29/23 04:47, Ales Musil wrote:
>> During ofctrl_add_or_append_flow we are able to combine
>> two flows with same match but different conjunctions.
>> However, the function didn't check if the conjunctions already
>> exist in the installed flow, which could result in conjunction
>> duplication and the flow would grow infinitely e.g.
>> actions=conjunction(1,1/2), conjunction(1,1/2)
>>
>> Make sure that we add only conjunctions that are not present
>> in the already existing flow.
>>
>> Reported-at: https://bugzilla.redhat.com/2175928
>> Signed-off-by: Ales Musil 
>> ---
>>   controller/ofctrl.c | 56 -
>>   1 file changed, 55 insertions(+), 1 deletion(-)
>>
>> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
>> index 9de8a145f..e39cef7aa 100644
>> --- a/controller/ofctrl.c
>> +++ b/controller/ofctrl.c
>> @@ -1273,6 +1273,37 @@ ofctrl_add_flow_metered(struct
>> ovn_desired_flow_table *desired_flows,
>>     meter_id, as_info, true);
>>   }
>>   +struct ofpact_ref {
>> +    struct hmap_node hmap_node;
>> +    struct ofpact *ofpact;
>> +};
>> +
>> +static struct ofpact_ref *
>> +ofpact_ref_find(const struct hmap *refs, const struct ofpact *ofpact)
>> +{
>> +    uint32_t hash = hash_bytes(ofpact, ofpact->len, 0);
>> +
>> +    struct ofpact_ref *ref;
>> +    HMAP_FOR_EACH_WITH_HASH (ref, hmap_node, hash, refs) {
>> +    if (ofpacts_equal(ref->ofpact, ref->ofpact->len,
>> +  ofpact, ofpact->len)) {
>> +    return ref;
>> +    }
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>> +static void
>> +ofpact_refs_destroy(struct hmap *refs)
>> +{
>> +    struct ofpact_ref *ref;
>> +    HMAP_FOR_EACH_POP (ref, hmap_node, refs) {
>> +    free(ref);
>> +    }
>> +    hmap_destroy(refs);
>> +}
>> +
>>   /* Either add a new flow, or append actions on an existing flow. If the
>>    * flow existed, a new link will also be created between the new
>> sb_uuid
>>    * and the existing flow. */
>> @@ -1292,6 +1323,21 @@ ofctrl_add_or_append_flow(struct
>> ovn_desired_flow_table *desired_flows,
>>  meter_id);
>>   existing = desired_flow_lookup_conjunctive(desired_flows,
>> >flow);
>>   if (existing) {
>> +    struct hmap existing_conj = HMAP_INITIALIZER(_conj);
>> +
>> +    struct ofpact *ofpact;
>> +    OFPACT_FOR_EACH (ofpact, existing->flow.ofpacts,
>> + existing->flow.ofpacts_len) {
>> +    if (ofpact->type != OFPACT_CONJUNCTION) {
>> +    continue;
>> +    }
>> +
>> +    struct ofpact_ref *ref = xmalloc(sizeof *ref);
>> +    ref->ofpact = ofpact;
>> +    uint32_t hash = hash_bytes(ofpact, ofpact->len, 0);
>> +    hmap_insert(_conj, >hmap_node, hash);
>> +    }
>> +
>>   /* There's already a flow with this particular match and action
>>    * 'conjunction'. Append the action to that flow rather than
>>    * adding a new flow.
>> @@ -1301,7 +1347,15 @@ ofctrl_add_or_append_flow(struct
>> ovn_desired_flow_table *desired_flows,
>>   ofpbuf_use_stub(, compound_stub,
>> sizeof(compound_stub));
>>   ofpbuf_put(, existing->flow.ofpacts,
>>  existing->flow.ofpacts_len);
>> -    ofpbuf_put(, f->flow.ofpacts, f->flow.ofpacts_len);
>> +
>> +    OFPACT_FOR_EACH (ofpact, f->flow.ofpacts, f->flow.ofpacts_len) {
>> +    if (ofpact->type != OFPACT_CONJUNCTION ||
>> +    !ofpact_ref_find(_conj, ofpact)) {
>> +    ofpbuf_put(, ofpact,
>> OFPACT_ALIGN(ofpact->len));
>> +    }
>> +    }
>> +
>> +    ofpact_refs_destroy(_conj);
>>     mem_stats.desired_flow_usage -= desired_flow_size(existing);
>>   free(existing->flow.ofpacts);
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] utilities: Add kernel_delay.py script to debug a busy Linux kernel.

2023-09-11 Thread Adrian Moreno



On 8/21/23 17:41, Eelco Chaudron wrote:

This patch adds an utility that can be used to determine if
an issue is related to a lack of Linux kernel resources.

This tool is also featured in a Red Hat developers blog article:

   
https://developers.redhat.com/articles/2023/07/24/troubleshooting-open-vswitch-kernel-blame

Signed-off-by: Eelco Chaudron 
---
  utilities/automake.mk   |4
  utilities/usdt-scripts/kernel_delay.py  | 1397 +++
  utilities/usdt-scripts/kernel_delay.rst |  594 +
  3 files changed, 1995 insertions(+)
  create mode 100755 utilities/usdt-scripts/kernel_delay.py
  create mode 100644 utilities/usdt-scripts/kernel_delay.rst



I have some comments below but overall an awesome script! Thanks Eelco.


diff --git a/utilities/automake.mk b/utilities/automake.mk
index 37d679f82..9a2114df4 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -23,6 +23,8 @@ scripts_DATA += utilities/ovs-lib
  usdt_SCRIPTS += \
utilities/usdt-scripts/bridge_loop.bt \
utilities/usdt-scripts/dpif_nl_exec_monitor.py \
+   utilities/usdt-scripts/kernel_delay.py \
+   utilities/usdt-scripts/kernel_delay.rst \
utilities/usdt-scripts/reval_monitor.py \
utilities/usdt-scripts/upcall_cost.py \
utilities/usdt-scripts/upcall_monitor.py
@@ -70,6 +72,8 @@ EXTRA_DIST += \
utilities/docker/debian/build-kernel-modules.sh \
utilities/usdt-scripts/bridge_loop.bt \
utilities/usdt-scripts/dpif_nl_exec_monitor.py \
+   utilities/usdt-scripts/kernel_delay.py \
+   utilities/usdt-scripts/kernel_delay.rst \
utilities/usdt-scripts/reval_monitor.py \
utilities/usdt-scripts/upcall_cost.py \
utilities/usdt-scripts/upcall_monitor.py
diff --git a/utilities/usdt-scripts/kernel_delay.py 
b/utilities/usdt-scripts/kernel_delay.py
new file mode 100755
index 0..bba7bc044
--- /dev/null
+++ b/utilities/usdt-scripts/kernel_delay.py
@@ -0,0 +1,1397 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Script information:
+# ---
+# This script allows a developer to quickly identify if the issue at hand
+# might be related to the kernel running out of resources or if it really is
+# an Open vSwitch issue.
+#
+# For documentation see the kernel_delay.rst file.
+#
+try:
+from bcc import BPF, USDT, USDTException
+from bcc.syscall import syscalls, syscall_name
+except ModuleNotFoundError:
+print("ERROR: Can't find the BPF Compiler Collection (BCC) tools!")
+
+from enum import IntEnum
+
+import argparse
+import datetime
+import os
+import pytz


IIRC it's the first time we add this dependency.
I think usdt-script dependencies are starting to grow. Have you considered 
adding a requirements.txt to document them all and make it easier for users to 
consume these scripts? Alternatively, we should maybe add it to the p



+import psutil
+import re
+import sys
+import time
+
+import ctypes as ct
+
+try:
+from time import time_ns
+except ImportError:
+# For compatibility with Python <= v3.6.
+def time_ns():
+now = datetime.datetime.now()
+return int(now.timestamp() * 1e9)
+
+
+#
+# Actual eBPF source code
+#
+EBPF_SOURCE = """
+#include 
+#include 
+
+#define MONITOR_PID 
+
+enum {
+
+};
+
+struct event_t {
+u64 ts;
+u32 tid;
+u32 id;
+
+int user_stack_id;
+int kernel_stack_id;
+
+u32 syscall;
+u64 entry_ts;
+
+};
+
+BPF_RINGBUF_OUTPUT(events, );
+BPF_STACK_TRACE(stack_traces, );
+BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1);
+BPF_TABLE("percpu_array", uint32_t, uint64_t, trigger_miss, 1);
+
+BPF_ARRAY(capture_on, u64, 1);
+static bool capture_enabled(u64 pid_tgid) {


nit: inline.


+int key = 0;
+u64 *ret;
+
+if ((pid_tgid >> 32) != MONITOR_PID)
+return false;
+
+ret = capture_on.lookup();
+return ret && *ret == 1;
+}
+
+static inline bool capture_enabled__() {
+int key = 0;
+u64 *ret;
+
+ret = capture_on.lookup();
+return ret && *ret == 1;
+}
+
+


nit: extra newline


+static struct event_t *get_event(uint32_t id) {
+struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t));
+
+if (!event) {
+dropcnt.increment(0);
+return NULL;
+}
+
+event->id = id;
+event->ts = bpf_ktime_get_ns();
+event->tid = 

Re: [ovs-dev] [PATCH ovn v2 1/2] ofctrl: Do not try to program long flows

2023-09-11 Thread Dumitru Ceara
On 9/6/23 16:39, Mark Michelson wrote:
> Thanks Ales,
> 
> Acked-by: Mark Michelson 
> 

Hi Ales, Mark,

> I think this change should be applied as-is. I also wanted to contribute
> some thoughts.
> 

I shared some thoughts too, below.

> As you mentioned in the commit message, this is not a self-healing
> process. I was looking at the OpenFlow specification, and I am having
> trouble figuring out how to properly handle this. It doesn't appear that
> OpenFlow provides a way to append new actions to an existing flow or to
> add new match conditions to an existing flow. The best you can do is to
> replace the actions or replace the match. This doesn't help when the
> match or actions are too long to send.
> 
> The best I could think of was to break the flow up and evaluate across
> multiple tables. This would work, but it would also completely break the
> model for how OVN correlates OpenFlow table numbers with specific
> logical stages. Are there any alternatives?
> 

Not a real alternative solution but should we also try a (single)
recompute and also clear the installed OpenFlows and install the new
set?  That's just in case it "fixes" the problem due to desired flows
being built in a different (equivalent) way.

> On 8/29/23 04:47, Ales Musil wrote:
>> If the flow size is bigger than UINT16_MAX it doesn't
>> fit into openflow message. Programming of such flow will
>> fail which results in disconnect of ofctrl. After reconnect
>> we program everything from scratch, in case the long flow still
>> remains the cycle continues. This causes the node to be almost
>> unusable as there will be massive traffic disruptions.
>>
>> To prevent that check if the flow is within the allowed size.
>> If not log the flow that would trigger this problem and do not program
>> it. This isn't a self-healing process, but the chance of this happening
>> are very slim. Also, any flow that is bigger than allowed size is OVN
>> bug, and it should be fixed.
>>
>> Reported-at: https://bugzilla.redhat.com/1955167
>> Signed-off-by: Ales Musil 
>> ---
>> v2: Fix the formatting error.
>> ---
>>   controller/ofctrl.c | 40 
>>   1 file changed, 36 insertions(+), 4 deletions(-)
>>
>> diff --git a/controller/ofctrl.c b/controller/ofctrl.c
>> index 64a444ff6..9de8a145f 100644
>> --- a/controller/ofctrl.c
>> +++ b/controller/ofctrl.c
>> @@ -1769,6 +1769,16 @@ ovn_flow_log(const struct ovn_flow *f, const
>> char *action)
>>   }
>>   }
>>   +static void
>> +ovn_flow_log_size_err(const struct ovn_flow *f)
>> +{
>> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
>> +
>> +    char *s = ovn_flow_to_string(f);
>> +    VLOG_ERR_RL(, "The FLOW_MOD message is too big: %s", s);
>> +    free(s);

It might be useful to add a coverage counter too.

Regards,
Dumitru

>> +}
>> +
>>   static void
>>   ovn_flow_uninit(struct ovn_flow *f)
>>   {
>> @@ -1888,15 +1898,27 @@ encode_bundle_add(struct ofpbuf *msg, struct
>> ofputil_bundle_ctrl_msg *bc)
>>   return ofputil_encode_bundle_add(OFP15_VERSION, );
>>   }
>>   -static void
>> +static bool
>>   add_flow_mod(struct ofputil_flow_mod *fm,
>>    struct ofputil_bundle_ctrl_msg *bc,
>>    struct ovs_list *msgs)
>>   {
>>   struct ofpbuf *msg = encode_flow_mod(fm);
>>   struct ofpbuf *bundle_msg = encode_bundle_add(msg, bc);
>> +
>> +    uint32_t flow_mod_len = msg->size;
>> +    uint32_t bundle_len = bundle_msg->size;
>> +
>>   ofpbuf_delete(msg);
>> +
>> +    if (flow_mod_len > UINT16_MAX || bundle_len > UINT16_MAX) {
>> +    ofpbuf_delete(bundle_msg);
>> +
>> +    return false;
>> +    }
>> +
>>   ovs_list_push_back(msgs, _msg->list_node);
>> +    return true;
>>   }
>>   
>>   /* group_table. */
>> @@ -2235,7 +2257,10 @@ installed_flow_add(struct ovn_flow *d,
>>   .new_cookie = htonll(d->cookie),
>>   .command = OFPFC_ADD,
>>   };
>> -    add_flow_mod(, bc, msgs);
>> +
>> +    if (!add_flow_mod(, bc, msgs)) {
>> +    ovn_flow_log_size_err(d);
>> +    }
>>   }
>>     static void
>> @@ -2259,7 +2284,7 @@ installed_flow_mod(struct ovn_flow *i, struct
>> ovn_flow *d,
>>   /* Use OFPFC_ADD so that cookie can be updated. */
>>   fm.command = OFPFC_ADD;
>>   }
>> -    add_flow_mod(, bc, msgs);
>> +    bool result = add_flow_mod(, bc, msgs);
>>     /* Replace 'i''s actions and cookie by 'd''s. */
>>   mem_stats.installed_flow_usage -= i->ofpacts_len - d->ofpacts_len;
>> @@ -2267,6 +2292,10 @@ installed_flow_mod(struct ovn_flow *i, struct
>> ovn_flow *d,
>>   i->ofpacts = xmemdup(d->ofpacts, d->ofpacts_len);
>>   i->ofpacts_len = d->ofpacts_len;
>>   i->cookie = d->cookie;
>> +
>> +    if (!result) {
>> +    ovn_flow_log_size_err(i);
>> +    }
>>   }
>>     static void
>> @@ -2280,7 +2309,10 @@ installed_flow_del(struct ovn_flow *i,
>>   .table_id = i->table_id,
>>   .command = OFPFC_DELETE_STRICT,
>>   };
>> -    

Re: [ovs-dev] [PATCH] dpdk: expose cpu usage stats on telemetry socket

2023-09-11 Thread Robin Jarry
Hey Kevin,

Kevin Traynor, Sep 07, 2023 at 15:37:
> This came up in conversation with other maintainers as I mentioned I was 
> reviewing and the question raised was - Why add this ? if you want these 
> values exposed, wouldn't it be better to to add to ovsdb ?

That's a good point. I had considered using ovsdb but it seemed to me
less suitable for a few reasons:

* I had understood that ovsdb is a configuration database, not a state
  reporting database.

* To have reliable and up to date numbers, ovs would need to push them
  at high rate to the database so that clients to get outdated cpu
  usage. The DPDK telemetry socket is real-time, the current numbers are
  returned on every request.

* I would need to define a custom schema / table to store structured
  information in the db. The DPDK telemetry socket already has a schema
  defined for this.

* Accessing ovsdb requires a library making it more complex to use for
  telemetry scrapers. The DPDK telemetry socket can be accessed with
  a standalone python script with no external dependencies[1].

[1]: 
https://github.com/rjarry/dpdk/blob/main/usertools/prometheus-dpdk-exporter.py#L135-L143

Maybe my observations are wrong, please do correct me if they are.

> Are you looking for individual lcore usage with identification of that 
> pmd? or overall aggregate usage ?
>
> I ask because it will report lcore id's which would need to be mapped to 
> pmd core id's for anything regarding individual pmds.
>
> That can be found in ovs-vswitchd.log or checked locally with 
> 'ovs-appctl dpdk/lcore-list' but assuming if they were available, then 
> user would not be using dpdk telemetry anyway.

I would assume that the important data is the aggregate usage for
overall monitoring and resource planing. Individual pmd usage can be
accessed for fine tuning and debugging via appctl.

> These stats are cumulative so in the absence of 'ovs-appctl 
> dpif-netdev/pmd-stats-clear'  that would need to be taken care of with 
> some post-processing by whatever is pulling these stats - otherwise 
> you'll get cumulative stats for an unknown time period and unknown 
> traffic profile (e.g. it would be counting before any traffic started).
>
> These might also be reset with pmd-stats-clear independently, so that 
> would need to be accounted for too.

The only important data point that we need is the ratio between
busy/(busy + idle) over a specified delta which any scraper can do.
I consider these numbers like any other counter that can eventually be
reset.

See this reply from Morten Brørup on dpdk-dev for more context:

https://lore.kernel.org/dpdk-dev/98cbd80474fa8b44bf855df32c47dc35d87...@smartserver.smartshare.dk/

> Another thing I noticed is that without the pmd-sleep info the stats in 
> isolation can be misleading. Example below:
>
> With low rate traffic and clearing stats between 10 sec runs
>
> 2023-09-07T13:14:56Z|00158|dpif_netdev|INFO|PMD max sleep request is 0 
> usecs.
> 2023-09-07T13:14:56Z|00159|dpif_netdev|INFO|PMD load based sleeps are 
> disabled.
>
> Time: 13:15:06.842
> Measurement duration: 10.009 s
>
> pmd thread numa_id 0 core_id 8:
>
>Iterations: 51712564  (0.19 us/it)
>- Used TSC cycles:   26021354654  (100.0 % of total cycles)
>- idle iterations:  51710963  ( 99.9 % of used cycles)
>- busy iterations:  1601  (  0.1 % of used cycles)
>- sleep iterations:0  (  0.0 % of iterations)
> ^^^ can see here that pmd does not sleep and is 0.1% busy
>
>Sleep time (us):   0  (  0 us/iteration avg.)
>Rx packets:37250  (4 Kpps, 866 cycles/pkt)
>Datapath passes:   37250  (1.00 passes/pkt)
>- PHWOL hits:  0  (  0.0 %)
>- MFEX Opt hits:   0  (  0.0 %)
>- Simple Match hits:   37250  (100.0 %)
>- EMC hits:0  (  0.0 %)
>- SMC hits:0  (  0.0 %)
>- Megaflow hits:   0  (  0.0 %, 0.00 subtbl lookups/hit)
>- Upcalls: 0  (  0.0 %, 0.0 us/upcall)
>- Lost upcalls:0  (  0.0 %)
>Tx packets:0
>
> {
>"/eal/lcore/usage": {
>  "lcore_ids": [
>1
>  ],
>  "total_cycles": [
>26127284389
>  ],
>  "busy_cycles": [
>32370313
>  ]
>}
> }
>
> ^^^ This in isolation implies pmd is 32370313/26127284389 0.12% busy 
> which is true
>
> 2023-09-07T13:15:06Z|00160|dpif_netdev|INFO|PMD max sleep request is 500 
> usecs.
> 2023-09-07T13:15:06Z|00161|dpif_netdev|INFO|PMD load based sleeps are 
> enabled.
>
> Time: 13:15:16.908
> Measurement duration: 10.008 s
>
> pmd thread numa_id 0 core_id 8:
>
>Iterations:75197  (133.09 us/it)
>- Used TSC cycles: 237910969  (  0.9 % of total cycles)
>- idle iterations: 73782  ( 74.4 % of used cycles)
>- busy iterations:  1415  ( 25.6 % of used cycles)
>- sleep iterations:

Re: [ovs-dev] [PATCH] checkpatch: Add checks for the subject line.

2023-09-11 Thread Eelco Chaudron



On 9 Sep 2023, at 1:11, Ilya Maximets wrote:

> On 9/5/23 14:40, Eelco Chaudron wrote:
>> This patch adds WARNINGs for the subject line length and the format,
>> i.e., the sentence should start with a capital and end with a dot.
>>
>> Signed-off-by: Eelco Chaudron 
>> ---
>>  utilities/checkpatch.py |   30 ++
>>  1 file changed, 30 insertions(+)
>>
>> diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
>> index 5c4aaefb3..57959595f 100755
>> --- a/utilities/checkpatch.py
>> +++ b/utilities/checkpatch.py
>> @@ -792,6 +792,33 @@ def run_file_checks(text):
>>  check['check'](text)
>>
>>
>> +def run_subject_checks(subject, spellcheck=False):
>> +print_subject = False
>> +
>> +if spellcheck and check_spelling(subject, False):
>> +print_subject = True
>> +
>> +summary = subject[subject.rindex(': ') + 2:]
>> +summary_len = len(summary)
>> +if summary_len > 50:
>> +print_warning("The subject summary is over 50 characters, i.e., 
>> %u." %
>
> Also, 50 seems like too low of a number.  It looks like about
> 25% of all commits in OVS will fail this check.  If we make it
> 65, for example, it will be only 2%.  7% with 60.

Yes, I noticed, but took the 50 from our documentation. Maybe we should limit 
the total length to 70 like they do in the kernel?

> What do you think?

Will add the test cases and send out a v2.

//Eelco

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3] utilities: Add kernel_delay.py script to debug a busy Linux kernel.

2023-09-11 Thread 0-day Robot
Bleep bloop.  Greetings Eelco Chaudron, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 106 characters long (recommended limit is 79)
#1511 FILE: utilities/usdt-scripts/kernel_delay.rst:54:
  --  


WARNING: Line is 93 characters long (recommended limit is 79)
#1516 FILE: utilities/usdt-scripts/kernel_delay.rst:59:
  NAME NUMBER   COUNT  TOTAL ns 
   MAX ns

WARNING: Line is 93 characters long (recommended limit is 79)
#1517 FILE: utilities/usdt-scripts/kernel_delay.rst:60:
  poll  7   5   184,193,176 
  184,191,520

WARNING: Line is 93 characters long (recommended limit is 79)
#1518 FILE: utilities/usdt-scripts/kernel_delay.rst:61:
  recvmsg  47 494   125,208,756 
  310,331

WARNING: Line is 93 characters long (recommended limit is 79)
#1519 FILE: utilities/usdt-scripts/kernel_delay.rst:62:
  futex   202   818,768,758 
4,023,039

WARNING: Line is 93 characters long (recommended limit is 79)
#1520 FILE: utilities/usdt-scripts/kernel_delay.rst:63:
  sendto   44  10   375,861 
  266,867

WARNING: Line is 93 characters long (recommended limit is 79)
#1521 FILE: utilities/usdt-scripts/kernel_delay.rst:64:
  sendmsg  46   443,294 
   11,213

WARNING: Line is 93 characters long (recommended limit is 79)
#1522 FILE: utilities/usdt-scripts/kernel_delay.rst:65:
  write 1   1 5,949 
5,949

WARNING: Line is 93 characters long (recommended limit is 79)
#1523 FILE: utilities/usdt-scripts/kernel_delay.rst:66:
  getrusage98   1 1,424 
1,424

WARNING: Line is 93 characters long (recommended limit is 79)
#1524 FILE: utilities/usdt-scripts/kernel_delay.rst:67:
  read  0   1 1,292 
1,292

WARNING: Line is 82 characters long (recommended limit is 79)
#1528 FILE: utilities/usdt-scripts/kernel_delay.rst:71:
  SCHED_CNT   TOTAL nsMIN nsMAX 
ns

WARNING: Line is 86 characters long (recommended limit is 79)
#1536 FILE: utilities/usdt-scripts/kernel_delay.rst:79:
  NAME   COUNT  TOTAL ns
MAX ns

WARNING: Line is 86 characters long (recommended limit is 79)
#1537 FILE: utilities/usdt-scripts/kernel_delay.rst:80:
  eno8303-rx-1   1 3,586
 3,586

WARNING: Line is 94 characters long (recommended limit is 79)
#1541 FILE: utilities/usdt-scripts/kernel_delay.rst:84:
  NAME VECT_NR   COUNT  TOTAL ns
MAX ns

WARNING: Line is 94 characters long (recommended limit is 79)
#1542 FILE: utilities/usdt-scripts/kernel_delay.rst:85:
  net_rx 3   117,699
17,699

WARNING: Line is 94 characters long (recommended limit is 79)
#1543 FILE: utilities/usdt-scripts/kernel_delay.rst:86:
  sched  7   613,820
 3,226

WARNING: Line is 94 characters long (recommended limit is 79)
#1544 FILE: utilities/usdt-scripts/kernel_delay.rst:87:
  rcu9  1613,586
 1,554

WARNING: Line is 94 characters long (recommended limit is 79)
#1545 FILE: utilities/usdt-scripts/kernel_delay.rst:88:
  timer  1   310,259
 3,815

WARNING: Line is 106 characters long (recommended limit is 79)
#1646 FILE: utilities/usdt-scripts/kernel_delay.rst:189:
  --  


WARNING: Line is 87 characters long (recommended limit is 79)
#1661 FILE: utilities/usdt-scripts/kernel_delay.rst:204:
 ENTRY (ns)   EXIT (ns)TID COMM DELTA (us)  
SYSCALL

WARNING: Line is 100 characters long (recommended limit is 79)
#1662 FILE: utilities/usdt-scripts/kernel_delay.rst:205:
--- --- --  
--  

WARNING: Line is 89 characters long (recommended limit is 79)
#1663 FILE: utilities/usdt-scripts/kernel_delay.rst:206:
   216182169493548621618216950312013359699 revalidator14
95  futex

WARNING: Line is 89 characters long (recommended limit is 79)
#1672 FILE: 

Re: [ovs-dev] [PATCH v6 7/7] system-dpdk: Run traffic tests.

2023-09-11 Thread Eelco Chaudron


On 11 Sep 2023, at 11:36, David Marchand wrote:

> On Fri, Sep 8, 2023 at 4:28 PM Eelco Chaudron  wrote:
>> On 6 Sep 2023, at 8:20, David Marchand wrote:
>>
>>> Integrate system-traffic.at tests as part of check-dpdk.
>>>
>>> Some tests that can't work with the userspace datapath are skipped by
>>> overriding some OVS_CHECK_* macros.
>>>
>>> ADD_VETH is implemented using the net/af_xdp DPDK driver.
>>>
>>> Signed-off-by: David Marchand 
>>
>> As discussed offline, this patch looks good, however I have some sync issue 
>> between testpmd and OVS-DPDK which is causing the following log message:
>>
>> 2023-09-08T13:15:18.160Z|00163|dpdk|INFO|VHOST_CONFIG: 
>> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>>  vhost peer closed
>> 2023-09-08T13:15:18.160Z|00164|netdev_dpdk|INFO|vHost Device 
>> '/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0'
>>  connection has been destroyed
>> 2023-09-08T13:15:18.160Z|00165|dpdk|INFO|VHOST_CONFIG: 
>> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>>  vhost-user client: socket created, fd: 24
>> 2023-09-08T13:15:18.160Z|00166|dpdk|WARN|VHOST_CONFIG: 
>> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>>  failed to connect: Connection refused
>> 2023-09-08T13:15:18.160Z|00085|dpif_netdev|INFO|Performing pmd to rx queue 
>> assignment using cycles algorithm.
>> 2023-09-08T13:15:18.160Z|00167|dpdk|INFO|VHOST_CONFIG: 
>> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>>  reconnecting...
>> 2023-09-08T13:15:18.160Z|00086|dpif_netdev|INFO|Core 58 on numa node 0 
>> assigned port 'dpdkvhostuserclient0' rx queue 0 (measured processing cycles 
>> 263044).
>>
>> The test is passing, but fails due to the “WARN|VHOST_CONFIG” log message 
>> being present.
>>
>> I’ll let you figure out if just adding the message is sufficient or need 
>> some synchronisation. Other than that the code looks good to me!
>
> It looks like testpmd is too quick to quit, and OVS tries to reconnect
> on the vhost user port socket once, before the port is deleted from
> the bridge.
> Inverting OVS_DPDK_STOP_TESTPMD() and AT_CHECK([ovs-vsctl del-port
> br10 dpdkvhostuserclient0], [], [stdout], [stderr]) does the job.
> But other tests have the same pattern... the difference is that only
> this test 5 has a ADD_VETH() so something is changing OVS shutdown
> timing with this last patch of mine.
>
> I will be away a few days, I'll have a look again when I am back.

Thanks David for doing the investigation. I’ll wait for a v7 once you get back 
:)

Cheers,

Eelco

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] utilities: Add kernel_delay.py script to debug a busy Linux kernel.

2023-09-11 Thread Eelco Chaudron
This patch adds an utility that can be used to determine if
an issue is related to a lack of Linux kernel resources.

This tool is also featured in a Red Hat developers blog article:

  
https://developers.redhat.com/articles/2023/07/24/troubleshooting-open-vswitch-kernel-blame

Signed-off-by: Eelco Chaudron 

---
v2: Addressed review comments from Aaron.
v3: Changed wording in documentation.

 utilities/automake.mk   |4 
 utilities/usdt-scripts/kernel_delay.py  | 1402 +++
 utilities/usdt-scripts/kernel_delay.rst |  596 +
 3 files changed, 2002 insertions(+)
 create mode 100755 utilities/usdt-scripts/kernel_delay.py
 create mode 100644 utilities/usdt-scripts/kernel_delay.rst

diff --git a/utilities/automake.mk b/utilities/automake.mk
index 37d679f82..9a2114df4 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -23,6 +23,8 @@ scripts_DATA += utilities/ovs-lib
 usdt_SCRIPTS += \
utilities/usdt-scripts/bridge_loop.bt \
utilities/usdt-scripts/dpif_nl_exec_monitor.py \
+   utilities/usdt-scripts/kernel_delay.py \
+   utilities/usdt-scripts/kernel_delay.rst \
utilities/usdt-scripts/reval_monitor.py \
utilities/usdt-scripts/upcall_cost.py \
utilities/usdt-scripts/upcall_monitor.py
@@ -70,6 +72,8 @@ EXTRA_DIST += \
utilities/docker/debian/build-kernel-modules.sh \
utilities/usdt-scripts/bridge_loop.bt \
utilities/usdt-scripts/dpif_nl_exec_monitor.py \
+   utilities/usdt-scripts/kernel_delay.py \
+   utilities/usdt-scripts/kernel_delay.rst \
utilities/usdt-scripts/reval_monitor.py \
utilities/usdt-scripts/upcall_cost.py \
utilities/usdt-scripts/upcall_monitor.py
diff --git a/utilities/usdt-scripts/kernel_delay.py 
b/utilities/usdt-scripts/kernel_delay.py
new file mode 100755
index 0..afecca68f
--- /dev/null
+++ b/utilities/usdt-scripts/kernel_delay.py
@@ -0,0 +1,1402 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022,2023 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Script information:
+# ---
+# This script allows a developer to quickly identify if the issue at hand
+# might be related to the kernel running out of resources or if it really is
+# an Open vSwitch issue.
+#
+# For documentation see the kernel_delay.rst file.
+#
+import argparse
+import datetime
+import os
+import pytz
+import psutil
+import re
+import sys
+import time
+
+import ctypes as ct
+
+try:
+from bcc import BPF, USDT, USDTException
+from bcc.syscall import syscalls, syscall_name
+except ModuleNotFoundError:
+print("ERROR: Can't find the BPF Compiler Collection (BCC) tools!")
+sys.exit(os.EX_OSFILE)
+
+from enum import IntEnum
+
+
+#
+# Actual eBPF source code
+#
+EBPF_SOURCE = """
+#include 
+#include 
+
+#define MONITOR_PID 
+
+enum {
+
+};
+
+struct event_t {
+u64 ts;
+u32 tid;
+u32 id;
+
+int user_stack_id;
+int kernel_stack_id;
+
+u32 syscall;
+u64 entry_ts;
+
+};
+
+BPF_RINGBUF_OUTPUT(events, );
+BPF_STACK_TRACE(stack_traces, );
+BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1);
+BPF_TABLE("percpu_array", uint32_t, uint64_t, trigger_miss, 1);
+
+BPF_ARRAY(capture_on, u64, 1);
+static bool capture_enabled(u64 pid_tgid) {
+int key = 0;
+u64 *ret;
+
+if ((pid_tgid >> 32) != MONITOR_PID)
+return false;
+
+ret = capture_on.lookup();
+return ret && *ret == 1;
+}
+
+static inline bool capture_enabled__() {
+int key = 0;
+u64 *ret;
+
+ret = capture_on.lookup();
+return ret && *ret == 1;
+}
+
+
+static struct event_t *get_event(uint32_t id) {
+struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t));
+
+if (!event) {
+dropcnt.increment(0);
+return NULL;
+}
+
+event->id = id;
+event->ts = bpf_ktime_get_ns();
+event->tid = bpf_get_current_pid_tgid();
+
+return event;
+}
+
+static int start_trigger() {
+int key = 0;
+u64 *val = capture_on.lookup();
+
+/* If the value is -1 we can't start as we are still processing the
+ * results in userspace. */
+if (!val || *val != 0) {
+trigger_miss.increment(0);
+return 0;
+}
+
+struct event_t *event = get_event(EVENT_START_TRIGGER);
+if (event) {
+   events.ringbuf_submit(event, 0);
+   *val = 1;
+} else {
+trigger_miss.increment(0);
+}
+return 0;
+}
+
+static 

Re: [ovs-dev] [PATCH v6 7/7] system-dpdk: Run traffic tests.

2023-09-11 Thread David Marchand
On Fri, Sep 8, 2023 at 4:28 PM Eelco Chaudron  wrote:
> On 6 Sep 2023, at 8:20, David Marchand wrote:
>
> > Integrate system-traffic.at tests as part of check-dpdk.
> >
> > Some tests that can't work with the userspace datapath are skipped by
> > overriding some OVS_CHECK_* macros.
> >
> > ADD_VETH is implemented using the net/af_xdp DPDK driver.
> >
> > Signed-off-by: David Marchand 
>
> As discussed offline, this patch looks good, however I have some sync issue 
> between testpmd and OVS-DPDK which is causing the following log message:
>
> 2023-09-08T13:15:18.160Z|00163|dpdk|INFO|VHOST_CONFIG: 
> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>  vhost peer closed
> 2023-09-08T13:15:18.160Z|00164|netdev_dpdk|INFO|vHost Device 
> '/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0'
>  connection has been destroyed
> 2023-09-08T13:15:18.160Z|00165|dpdk|INFO|VHOST_CONFIG: 
> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>  vhost-user client: socket created, fd: 24
> 2023-09-08T13:15:18.160Z|00166|dpdk|WARN|VHOST_CONFIG: 
> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>  failed to connect: Connection refused
> 2023-09-08T13:15:18.160Z|00085|dpif_netdev|INFO|Performing pmd to rx queue 
> assignment using cycles algorithm.
> 2023-09-08T13:15:18.160Z|00167|dpdk|INFO|VHOST_CONFIG: 
> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0)
>  reconnecting...
> 2023-09-08T13:15:18.160Z|00086|dpif_netdev|INFO|Core 58 on numa node 0 
> assigned port 'dpdkvhostuserclient0' rx queue 0 (measured processing cycles 
> 263044).
>
> The test is passing, but fails due to the “WARN|VHOST_CONFIG” log message 
> being present.
>
> I’ll let you figure out if just adding the message is sufficient or need some 
> synchronisation. Other than that the code looks good to me!

It looks like testpmd is too quick to quit, and OVS tries to reconnect
on the vhost user port socket once, before the port is deleted from
the bridge.
Inverting OVS_DPDK_STOP_TESTPMD() and AT_CHECK([ovs-vsctl del-port
br10 dpdkvhostuserclient0], [], [stdout], [stderr]) does the job.
But other tests have the same pattern... the difference is that only
this test 5 has a ADD_VETH() so something is changing OVS shutdown
timing with this last patch of mine.

I will be away a few days, I'll have a look again when I am back.


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4] hash: Add explicit typecasts to fix C++ compilation issues

2023-09-11 Thread Eelco Chaudron



On 9 Sep 2023, at 10:48, James Raphael Tiovalen wrote:

> C++ does not allow implicit conversion from void pointer to a specific
> pointer type. This change removes the cast from uint32_t* to void* in
> `hash_words_32aligned` and adds an explicit typecast from uint32_t* to
> uint64_t* in `hash_words_inline`.
>
> This issue was initially discovered on G++ v9.2.0 when a downstream C++
> application included the hash.h header file and was compiled on an AMD
> Ryzen Zen 2 CPU (__SSE4_2__ && __x86_64__). On the latest G++ version,
> it would throw an error. On the latest GCC version with `-Wc++-compat`,
> it would throw a warning.
>
> Acked-by: Mike Pattrick 
> Signed-off-by: James Raphael Tiovalen 
> ---
> Revisions:
>
> v1 -> v2: Fix build issue due to `-Wcast-align=strict` warning.
> v2 -> v3: Fix `-Warray-bounds=1` warning.
> v3 -> v4: Use `ALIGNED_CAST` as per Ilya's suggestion.
> ---
>  lib/hash.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/lib/hash.h b/lib/hash.h
> index 7b7f70c11..27ad782ea 100644
> --- a/lib/hash.h
> +++ b/lib/hash.h
> @@ -200,7 +200,7 @@ hash_finish32(uint64_t hash, uint32_t final, uint32_t 
> semifinal)
>  static inline uint32_t
>  hash_words_32aligned(const uint32_t *p_, size_t n_words, uint32_t basis)
>  {
> -const uint32_t *p = (const void *) p_;
> +const uint32_t *p = p_;

If the casting is no longer needed, we might as well rename the input parameter 
from p_ to p and we might be done.

Including Mike in this conversation as he did the original implementation, so 
it would be good to understand his intention.

>  uint32_t hash1 = basis;
>  uint32_t hash2 = 0;
>  uint32_t hash3 = n_words;
> @@ -254,7 +254,7 @@ hash_words_32aligned(const uint32_t *p_, size_t n_words, 
> uint32_t basis)
>  static inline uint32_t
>  hash_words_inline(const uint32_t *p_, size_t n_words, uint32_t basis)
>  {
> -const uint64_t *p = (const void *)p_;
> +const uint64_t *p =  ALIGNED_CAST(const uint64_t *, p_);
  ^^
One space to many.

>  uint64_t hash1 = basis;
>  uint64_t hash2 = 0;
>  uint64_t hash3 = n_words;
> --
> 2.42.0
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] vswitchd, ofproto-dpif: Add support for CT limit

2023-09-11 Thread Ales Musil
On Sat, Sep 9, 2023 at 12:33 AM Ilya Maximets  wrote:

> On 8/10/23 14:48, Ales Musil wrote:
> > Add support for setting CT zone limit via ovs-vswitchd
> > database CT_Zone entry. The limit is propagated
> > into corresponding datapath.
>

Hi Ilya,


>
> Thanks for tha patch!  I didn't try it, but see some comments inline.
>

thank you for the review.


>
> > In order to keep
> > backward compatibility the dpctl/ct-set-limits command
> > can overwrite the database settings.
>
> This doesn't seem like a good option.  If anything, the behavior
> should be the opposite, i.e. if the value is set in the database,
> it should overwrite whatever user have set before via appctl.
> We can treat a zero as a 'whatever currently is set' value, and
> all other values set in the database has to be enforced.  Or the
> column in the database may be optional, in this case we may treat
> an empty column as not configured and zero as unlimited.
>

In this version 0 is default system behavior I guess for backward
compatibility it would make sense to leave the setting untouched for 0.


>
> The dpctl/ct-set-limits may warn or fail in case of conflicting data.
> We also need a NEWS record about the new functionality.
>

Fail would be better IMO if we go with the database value being enforced
unless 0. WDYT?


>
> >
> > Signed-off-by: Ales Musil 
> > ---
> >  ofproto/ofproto-dpif.c | 42 +++
> >  ofproto/ofproto-dpif.h |  5 
> >  ofproto/ofproto-provider.h |  8 ++
> >  ofproto/ofproto.c  | 23 +
> >  ofproto/ofproto.h  |  3 +++
> >  tests/system-traffic.at| 51 +-
> >  vswitchd/bridge.c  | 10 
> >  vswitchd/vswitch.ovsschema |  8 --
> >  vswitchd/vswitch.xml   |  5 
> >  9 files changed, 152 insertions(+), 3 deletions(-)
> >
> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> > index e22ca757a..0c2818a5a 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> > @@ -5522,6 +5522,8 @@ ct_zone_config_init(struct dpif_backer *backer)
> >  cmap_init(>ct_zones);
> >  hmap_init(>ct_tps);
> >  ovs_list_init(>ct_tp_kill_list);
> > +ovs_list_init(>ct_zone_limits_to_add);
> > +ovs_list_init(>ct_zone_limits_to_del);
> >  clear_existing_ct_timeout_policies(backer);
> >  }
> >
> > @@ -5545,6 +5547,8 @@ ct_zone_config_uninit(struct dpif_backer *backer)
> >  id_pool_destroy(backer->tp_ids);
> >  cmap_destroy(>ct_zones);
> >  hmap_destroy(>ct_tps);
> > +ct_dpif_free_zone_limits(>ct_zone_limits_to_add);
> > +ct_dpif_free_zone_limits(>ct_zone_limits_to_del);
> >  }
> >
> >  static void
> > @@ -5625,6 +5629,42 @@ ct_del_zone_timeout_policy(const char
> *datapath_type, uint16_t zone_id)
> >  }
> >  }
> >
> > +static void
> > +ct_zone_limit_queue_update(const char *datapath_type, uint16_t zone_id,
> > +   uint32_t limit)
> > +{
> > +struct dpif_backer *backer = shash_find_data(_dpif_backers,
> > + datapath_type);
> > +if (!backer) {
> > +return;
> > +}
> > +
> > +struct ovs_list *queue = limit ?
> > +>ct_zone_limits_to_add :
> >ct_zone_limits_to_del;
> > +
> > +ct_dpif_push_zone_limit(queue, zone_id, limit, 0);
> > +}
> > +
> > +static void
> > +ct_zone_limits_commit(const char *datapath_type)
> > +{
> > +struct dpif_backer *backer = shash_find_data(_dpif_backers,
> > + datapath_type);
> > +if (!backer) {
> > +return;
> > +}
> > +
> > +if (!ovs_list_is_empty(>ct_zone_limits_to_add)) {
> > +ct_dpif_set_limits(backer->dpif, NULL,
> >ct_zone_limits_to_add);
> > +ct_dpif_free_zone_limits(>ct_zone_limits_to_add);
> > +}
> > +
> > +if (!ovs_list_is_empty(>ct_zone_limits_to_del)) {
> > +ct_dpif_del_limits(backer->dpif,
> >ct_zone_limits_to_del);
> > +ct_dpif_free_zone_limits(>ct_zone_limits_to_del);
> > +}
> > +}
> > +
> >  static void
> >  get_datapath_cap(const char *datapath_type, struct smap *cap)
> >  {
> > @@ -6914,4 +6954,6 @@ const struct ofproto_class ofproto_dpif_class = {
> >  ct_flush,   /* ct_flush */
> >  ct_set_zone_timeout_policy,
> >  ct_del_zone_timeout_policy,
> > +ct_zone_limit_queue_update,
> > +ct_zone_limits_commit
>
> Please, keep the trailing comma.
>
> >  };
> > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
> > index d8e0cd37a..b863dd6fc 100644
> > --- a/ofproto/ofproto-dpif.h
> > +++ b/ofproto/ofproto-dpif.h
> > @@ -284,6 +284,11 @@ struct dpif_backer {
> >  feature than
> 'bt_support'. */
> >
> >  struct atomic_count tnl_count;
> > +
> > +struct ovs_list ct_zone_limits_to_add;  /* CT zone limits queued for
> > + * addition into datapath.