Re: [ovs-dev] [PATCH ovn v7 1/4] northd: Handle load balancer changes for a logical switch.
On Mon, Sep 11, 2023 at 9:01 AM wrote: > > From: Numan Siddique > > 'lb_data' engine node now also handles logical switch changes. > Its data maintains ls to lb related information. i.e if a > logical switch sw0 has lb1, lb2 and lb3 associated then > it stores this info in its data. And when a new load balancer > lb4 is associated to it, it stores this information in its > tracked data so that 'northd' engine node can handle it > accordingly. Tracked data will have information like: > changed ls -> {sw0 : {associated_lbs: [lb4]} > > The first handler 'northd_lb_data_handler_pre_od' is called before the > 'northd_nb_logical_switch_handler' handler and it just creates or > deletes the lb_datapaths hmap for the tracked lbs. > > The northd handler 'northd_lb_data_handler' updates the > ovn_lb_datapaths's 'nb_ls_map' bitmap accordingly. > > Eg. If the lb_data has the below tracked data: > > tracked_data = {'crupdated_lbs': [lb1, lb2], > 'deleted_lbs': [lb3], > 'crupdated_lb_groups': [lbg1, lbg2], > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], > {ls: sw1, assoc_lbs: [lb1, lb2]} > > The handler northd_lb_data_handler(), creates the > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from > the ovn_lb_datapaths hmap. It does the same for the created or updated lb > groups lbg1 and lbg2 in the ovn_lbgrp_datapaths map. It also updates the > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1. > > Reviewed-by: Ales Musil > Acked-by: Mark Michelson > Signed-off-by: Numan Siddique > --- > lib/lb.c | 5 +- > northd/en-lb-data.c | 176 +++ > northd/en-lb-data.h | 17 > northd/en-lflow.c| 6 ++ > northd/en-northd.c | 6 +- > northd/inc-proc-northd.c | 2 + > northd/northd.c | 83 +++--- > northd/northd.h | 4 +- > tests/ovn-northd.at | 56 + > 9 files changed, 322 insertions(+), 33 deletions(-) > > diff --git a/lib/lb.c b/lib/lb.c > index 6fd67e2218..e6c9dc2be2 100644 > --- a/lib/lb.c > +++ b/lib/lb.c > @@ -1088,7 +1088,10 @@ ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths *lb_dps, size_t n, > struct ovn_datapath **ods) > { > for (size_t i = 0; i < n; i++) { > -bitmap_set1(lb_dps->nb_ls_map, ods[i]->index); > +if (!bitmap_is_set(lb_dps->nb_ls_map, ods[i]->index)) { > +bitmap_set1(lb_dps->nb_ls_map, ods[i]->index); > +lb_dps->n_nb_ls++; > +} > } > } > > diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c > index 8acd9c8cb2..02b1bfd7a4 100644 > --- a/northd/en-lb-data.c > +++ b/northd/en-lb-data.c > @@ -39,6 +39,14 @@ static void lb_data_destroy(struct ed_type_lb_data *); > static void build_lbs(const struct nbrec_load_balancer_table *, >const struct nbrec_load_balancer_group_table *, >struct hmap *lbs, struct hmap *lb_groups); > +static void build_od_lb_map(const struct nbrec_logical_switch_table *, > + struct hmap *od_lb_map); > +static struct od_lb_data *find_od_lb_data(struct hmap *od_lb_map, > + const struct uuid *od_uuid); > +static void destroy_od_lb_data(struct od_lb_data *od_lb_data); > +static struct od_lb_data *create_od_lb_data(struct hmap *od_lb_map, > +const struct uuid *od_uuid); > + > static struct ovn_lb_group *create_lb_group( > const struct nbrec_load_balancer_group *, struct hmap *lbs, > struct hmap *lb_groups); > @@ -54,6 +62,7 @@ static struct crupdated_lbgrp * > struct tracked_lb_data *); > static void add_deleted_lbgrp_to_tracked_data( > struct ovn_lb_group *, struct tracked_lb_data *); > +static bool is_ls_lbs_changed(const struct nbrec_logical_switch *nbs); > > /* 'lb_data' engine node manages the NB load balancers and load balancer > * groups. For each NB LB, it creates 'struct ovn_northd_lb' and > @@ -80,9 +89,13 @@ en_lb_data_run(struct engine_node *node, void *data) > EN_OVSDB_GET(engine_get_input("NB_load_balancer", node)); > const struct nbrec_load_balancer_group_table *nb_lbg_table = > EN_OVSDB_GET(engine_get_input("NB_load_balancer_group", node)); > +const struct nbrec_logical_switch_table *nb_ls_table = > +EN_OVSDB_GET(engine_get_input("NB_logical_switch", node)); > > lb_data->tracked = false; > build_lbs(nb_lb_table, nb_lbg_table, _data->lbs, _data->lbgrps); > +build_od_lb_map(nb_ls_table, _data->ls_lb_map); > + > engine_set_node_state(node, EN_UPDATED); > } > > @@ -230,18 +243,104 @@ lb_data_load_balancer_group_handler(struct engine_node *node, void *data) > return true; > } > > +struct od_lb_data { > +struct hmap_node hmap_node; > +struct uuid
Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.
On Mon, Sep 11, 2023 at 5:58 PM Han Zhou wrote: > > On Mon, Sep 11, 2023 at 2:15 PM Numan Siddique wrote: > > > > On Mon, Sep 11, 2023 at 2:18 PM Han Zhou wrote: > > > > > > On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique wrote: > > > > > > > > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou wrote: > > > > > > > > > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou wrote: > > > > > > > > > > > > > > > > > > > > > > > > On Fri, Aug 18, 2023 at 1:58 AM wrote: > > > > > > > > > > > > > > From: Numan Siddique > > > > > > > > > > > > > > 'lb_data' engine node now also handles logical switch changes. > > > > > > > Its data maintains ls to lb related information. i.e if a > > > > > > > logical switch sw0 has lb1, lb2 and lb3 associated then > > > > > > > it stores this info in its data. And when a new load balancer > > > > > > > lb4 is associated to it, it stores this information in its > > > > > > > tracked data so that 'northd' engine node can handle it > > > > > > > accordingly. Tracked data will have information like: > > > > > > > changed ls -> {sw0 : {associated_lbs: [lb4]} > > > > > > > > > > > > > > In the 'northd' engne node, an additional handler for 'lb_data' > > > > > > > is added after the 'nbrec_logical_switch' changes. With this > patch > > > > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node. > > > > > > > > > > > > > > > > > > > > > engine_add_input(_northd, _lb_data, > > > > > northd_lb_data_handler_pre_od); > > > > > > > engine_add_input(_northd, _nb_logical_switch, > > > > > > > northd_nb_logical_switch_handler); > > > > > > > engine_add_input(_northd, _lb_data, > > > > > northd_lb_data_handler_post_od); > > > > > > > > > > > > > > > > > > > > > The first handler 'northd_lb_data_handler_pre_od' is called > before > > > the > > > > > > > 'northd_nb_logical_switch_handler' handler and it just creates > or > > > > > > > deletes the lb_datapaths hmap for the tracked lbs. > > > > > > > > > > > > > > The second handler 'northd_lb_data_handler_post_od' is called > after > > > > > > > the 'northd_nb_logical_switch_handler' handler and it updates > the > > > > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap. > > > > > > > > > > > > > > Eg. If the lb_data has the below tracked data: > > > > > > > > > > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2], > > > > > > > 'deleted_lbs': [lb3], > > > > > > > 'crupdated_lb_groups': [lbg1, lbg2], > > > > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], > > > > > > > {ls: sw1, assoc_lbs: [lb1, > > > lb2]} > > > > > > > > > > > > > > then the first handler northd_lb_data_handler_pre_od(), creates > the > > > > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from > > > > > > > the ovn_lb_datapaths hmap. Similarly for the created or > updated lb > > > > > > > groups lbg1 and lbg2. > > > > > > > > > > > > > > The second handler northd_lb_data_handler_post_od() updates the > > > > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for > sw1. > > > > > > > > > > > > > > This second handler is added mainly so that the actual logical > > > switch > > > > > > > handler northd_nb_logical_switch_handler() has done modifying > the > > > > > > > 'od' struct. > > > > > > > > > > > > > > > > > > > > > Hi Han and others, > > > > > > > > Thanks for the reviews. > > > > > > > > > > > > > > Thanks for explaining the pre_od and post_od approach here! If we > do > > > want > > > > > to go with this approach I'd suggest adding comments also in the > code so > > > > > that people can understand the code easily without having to look > at the > > > > > commit history. > > > > > > However, I am concerned with the this approach. Here are the > reasons: > > > > > > > > > > > > 1. The I-P engine implementation didn't consider such usage that > add > > > > > dependency between two nodes twice, although it didn't explicitly > call > > > this > > > > > out in the documentation. If this is considered valid and necessary > use > > > > > case, we should call this out in the I-P engine documentation so > that > > > > > people modifying I-P engine implementation is aware of this and > won't > > > break > > > > > it, and of course we should carefully evaluate the side-effects that > > > can be > > > > > caused by this. > > > > > > > > > > > > 2. For the current use case I think it will lead to dangling > pointer > > > in > > > > > this _pre_od handling, because a LS may have been deleted from IDL, > > > while > > > > > this function still tries to access od->nbs, e.g. in > > > > > init_lb_for_datapaths(). Because of this, we should handle LB data > only > > > > > after handling LS changes. > > > > > > > > > > > > 3. From the explanation above I can see what is done by pre_od and > > > > > post_od but still don't understand why can't we do both steps after > > > > > handling LS changes. Could you explain why? And is it possible to > move >
Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.
On Mon, Sep 11, 2023 at 2:15 PM Numan Siddique wrote: > > On Mon, Sep 11, 2023 at 2:18 PM Han Zhou wrote: > > > > On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique wrote: > > > > > > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou wrote: > > > > > > > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou wrote: > > > > > > > > > > > > > > > > > > > > On Fri, Aug 18, 2023 at 1:58 AM wrote: > > > > > > > > > > > > From: Numan Siddique > > > > > > > > > > > > 'lb_data' engine node now also handles logical switch changes. > > > > > > Its data maintains ls to lb related information. i.e if a > > > > > > logical switch sw0 has lb1, lb2 and lb3 associated then > > > > > > it stores this info in its data. And when a new load balancer > > > > > > lb4 is associated to it, it stores this information in its > > > > > > tracked data so that 'northd' engine node can handle it > > > > > > accordingly. Tracked data will have information like: > > > > > > changed ls -> {sw0 : {associated_lbs: [lb4]} > > > > > > > > > > > > In the 'northd' engne node, an additional handler for 'lb_data' > > > > > > is added after the 'nbrec_logical_switch' changes. With this patch > > > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node. > > > > > > > > > > > > > > > > > > engine_add_input(_northd, _lb_data, > > > > northd_lb_data_handler_pre_od); > > > > > > engine_add_input(_northd, _nb_logical_switch, > > > > > > northd_nb_logical_switch_handler); > > > > > > engine_add_input(_northd, _lb_data, > > > > northd_lb_data_handler_post_od); > > > > > > > > > > > > > > > > > > The first handler 'northd_lb_data_handler_pre_od' is called before > > the > > > > > > 'northd_nb_logical_switch_handler' handler and it just creates or > > > > > > deletes the lb_datapaths hmap for the tracked lbs. > > > > > > > > > > > > The second handler 'northd_lb_data_handler_post_od' is called after > > > > > > the 'northd_nb_logical_switch_handler' handler and it updates the > > > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap. > > > > > > > > > > > > Eg. If the lb_data has the below tracked data: > > > > > > > > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2], > > > > > > 'deleted_lbs': [lb3], > > > > > > 'crupdated_lb_groups': [lbg1, lbg2], > > > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], > > > > > > {ls: sw1, assoc_lbs: [lb1, > > lb2]} > > > > > > > > > > > > then the first handler northd_lb_data_handler_pre_od(), creates the > > > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from > > > > > > the ovn_lb_datapaths hmap. Similarly for the created or updated lb > > > > > > groups lbg1 and lbg2. > > > > > > > > > > > > The second handler northd_lb_data_handler_post_od() updates the > > > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1. > > > > > > > > > > > > This second handler is added mainly so that the actual logical > > switch > > > > > > handler northd_nb_logical_switch_handler() has done modifying the > > > > > > 'od' struct. > > > > > > > > > > > > > > > > > Hi Han and others, > > > > > > Thanks for the reviews. > > > > > > > > > > > Thanks for explaining the pre_od and post_od approach here! If we do > > want > > > > to go with this approach I'd suggest adding comments also in the code so > > > > that people can understand the code easily without having to look at the > > > > commit history. > > > > > However, I am concerned with the this approach. Here are the reasons: > > > > > > > > > > 1. The I-P engine implementation didn't consider such usage that add > > > > dependency between two nodes twice, although it didn't explicitly call > > this > > > > out in the documentation. If this is considered valid and necessary use > > > > case, we should call this out in the I-P engine documentation so that > > > > people modifying I-P engine implementation is aware of this and won't > > break > > > > it, and of course we should carefully evaluate the side-effects that > > can be > > > > caused by this. > > > > > > > > > > 2. For the current use case I think it will lead to dangling pointer > > in > > > > this _pre_od handling, because a LS may have been deleted from IDL, > > while > > > > this function still tries to access od->nbs, e.g. in > > > > init_lb_for_datapaths(). Because of this, we should handle LB data only > > > > after handling LS changes. > > > > > > > > > > 3. From the explanation above I can see what is done by pre_od and > > > > post_od but still don't understand why can't we do both steps after > > > > handling LS changes. Could you explain why? And is it possible to move > > both > > > > steps to post_od? > > > > > > > > > In the earlier versions , lb_data engine node was not handling logical > > > switch or logical router changes. > > > It was only handling the load balancer changes. > > > northd engine node handler for lb_data had to determine the > > > associated or
Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.
On Mon, Sep 11, 2023 at 2:18 PM Han Zhou wrote: > > On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique wrote: > > > > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou wrote: > > > > > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou wrote: > > > > > > > > > > > > > > > > On Fri, Aug 18, 2023 at 1:58 AM wrote: > > > > > > > > > > From: Numan Siddique > > > > > > > > > > 'lb_data' engine node now also handles logical switch changes. > > > > > Its data maintains ls to lb related information. i.e if a > > > > > logical switch sw0 has lb1, lb2 and lb3 associated then > > > > > it stores this info in its data. And when a new load balancer > > > > > lb4 is associated to it, it stores this information in its > > > > > tracked data so that 'northd' engine node can handle it > > > > > accordingly. Tracked data will have information like: > > > > > changed ls -> {sw0 : {associated_lbs: [lb4]} > > > > > > > > > > In the 'northd' engne node, an additional handler for 'lb_data' > > > > > is added after the 'nbrec_logical_switch' changes. With this patch > > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node. > > > > > > > > > > > > > > > engine_add_input(_northd, _lb_data, > > > northd_lb_data_handler_pre_od); > > > > > engine_add_input(_northd, _nb_logical_switch, > > > > > northd_nb_logical_switch_handler); > > > > > engine_add_input(_northd, _lb_data, > > > northd_lb_data_handler_post_od); > > > > > > > > > > > > > > > The first handler 'northd_lb_data_handler_pre_od' is called before > the > > > > > 'northd_nb_logical_switch_handler' handler and it just creates or > > > > > deletes the lb_datapaths hmap for the tracked lbs. > > > > > > > > > > The second handler 'northd_lb_data_handler_post_od' is called after > > > > > the 'northd_nb_logical_switch_handler' handler and it updates the > > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap. > > > > > > > > > > Eg. If the lb_data has the below tracked data: > > > > > > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2], > > > > > 'deleted_lbs': [lb3], > > > > > 'crupdated_lb_groups': [lbg1, lbg2], > > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], > > > > > {ls: sw1, assoc_lbs: [lb1, > lb2]} > > > > > > > > > > then the first handler northd_lb_data_handler_pre_od(), creates the > > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from > > > > > the ovn_lb_datapaths hmap. Similarly for the created or updated lb > > > > > groups lbg1 and lbg2. > > > > > > > > > > The second handler northd_lb_data_handler_post_od() updates the > > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1. > > > > > > > > > > This second handler is added mainly so that the actual logical > switch > > > > > handler northd_nb_logical_switch_handler() has done modifying the > > > > > 'od' struct. > > > > > > > > > > > > > Hi Han and others, > > > > Thanks for the reviews. > > > > > > > > Thanks for explaining the pre_od and post_od approach here! If we do > want > > > to go with this approach I'd suggest adding comments also in the code so > > > that people can understand the code easily without having to look at the > > > commit history. > > > > However, I am concerned with the this approach. Here are the reasons: > > > > > > > > 1. The I-P engine implementation didn't consider such usage that add > > > dependency between two nodes twice, although it didn't explicitly call > this > > > out in the documentation. If this is considered valid and necessary use > > > case, we should call this out in the I-P engine documentation so that > > > people modifying I-P engine implementation is aware of this and won't > break > > > it, and of course we should carefully evaluate the side-effects that > can be > > > caused by this. > > > > > > > > 2. For the current use case I think it will lead to dangling pointer > in > > > this _pre_od handling, because a LS may have been deleted from IDL, > while > > > this function still tries to access od->nbs, e.g. in > > > init_lb_for_datapaths(). Because of this, we should handle LB data only > > > after handling LS changes. > > > > > > > > 3. From the explanation above I can see what is done by pre_od and > > > post_od but still don't understand why can't we do both steps after > > > handling LS changes. Could you explain why? And is it possible to move > both > > > steps to post_od? > > > > > > In the earlier versions , lb_data engine node was not handling logical > > switch or logical router changes. > > It was only handling the load balancer changes. > > northd engine node handler for lb_data had to determine the > > associated or disassociated load balancers/lb groups > > for a logical switch or router. And I could that properly using 2 > > handlers (pre_od and post_od). > > In later versions, I moved that logic to the lb_data engine node > > itself. But I thought I'll keep 2 handlers, the pre_od() to
Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.
On Mon, Sep 11, 2023 at 9:48 AM Numan Siddique wrote: > > On Fri, Sep 1, 2023 at 12:52 AM Han Zhou wrote: > > > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou wrote: > > > > > > > > > > > > On Fri, Aug 18, 2023 at 1:58 AM wrote: > > > > > > > > From: Numan Siddique > > > > > > > > 'lb_data' engine node now also handles logical switch changes. > > > > Its data maintains ls to lb related information. i.e if a > > > > logical switch sw0 has lb1, lb2 and lb3 associated then > > > > it stores this info in its data. And when a new load balancer > > > > lb4 is associated to it, it stores this information in its > > > > tracked data so that 'northd' engine node can handle it > > > > accordingly. Tracked data will have information like: > > > > changed ls -> {sw0 : {associated_lbs: [lb4]} > > > > > > > > In the 'northd' engne node, an additional handler for 'lb_data' > > > > is added after the 'nbrec_logical_switch' changes. With this patch > > > > we now have 2 handlers for 'lb_data' in 'northd' engine node. > > > > > > > > > > > > engine_add_input(_northd, _lb_data, > > northd_lb_data_handler_pre_od); > > > > engine_add_input(_northd, _nb_logical_switch, > > > > northd_nb_logical_switch_handler); > > > > engine_add_input(_northd, _lb_data, > > northd_lb_data_handler_post_od); > > > > > > > > > > > > The first handler 'northd_lb_data_handler_pre_od' is called before the > > > > 'northd_nb_logical_switch_handler' handler and it just creates or > > > > deletes the lb_datapaths hmap for the tracked lbs. > > > > > > > > The second handler 'northd_lb_data_handler_post_od' is called after > > > > the 'northd_nb_logical_switch_handler' handler and it updates the > > > > ovn_lb_datapaths's 'nb_ls_map' bitmap. > > > > > > > > Eg. If the lb_data has the below tracked data: > > > > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2], > > > > 'deleted_lbs': [lb3], > > > > 'crupdated_lb_groups': [lbg1, lbg2], > > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], > > > > {ls: sw1, assoc_lbs: [lb1, lb2]} > > > > > > > > then the first handler northd_lb_data_handler_pre_od(), creates the > > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from > > > > the ovn_lb_datapaths hmap. Similarly for the created or updated lb > > > > groups lbg1 and lbg2. > > > > > > > > The second handler northd_lb_data_handler_post_od() updates the > > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1. > > > > > > > > This second handler is added mainly so that the actual logical switch > > > > handler northd_nb_logical_switch_handler() has done modifying the > > > > 'od' struct. > > > > > > > > > Hi Han and others, > > Thanks for the reviews. > > > > > Thanks for explaining the pre_od and post_od approach here! If we do want > > to go with this approach I'd suggest adding comments also in the code so > > that people can understand the code easily without having to look at the > > commit history. > > > However, I am concerned with the this approach. Here are the reasons: > > > > > > 1. The I-P engine implementation didn't consider such usage that add > > dependency between two nodes twice, although it didn't explicitly call this > > out in the documentation. If this is considered valid and necessary use > > case, we should call this out in the I-P engine documentation so that > > people modifying I-P engine implementation is aware of this and won't break > > it, and of course we should carefully evaluate the side-effects that can be > > caused by this. > > > > > > 2. For the current use case I think it will lead to dangling pointer in > > this _pre_od handling, because a LS may have been deleted from IDL, while > > this function still tries to access od->nbs, e.g. in > > init_lb_for_datapaths(). Because of this, we should handle LB data only > > after handling LS changes. > > > > > > 3. From the explanation above I can see what is done by pre_od and > > post_od but still don't understand why can't we do both steps after > > handling LS changes. Could you explain why? And is it possible to move both > > steps to post_od? > > > In the earlier versions , lb_data engine node was not handling logical > switch or logical router changes. > It was only handling the load balancer changes. > northd engine node handler for lb_data had to determine the > associated or disassociated load balancers/lb groups > for a logical switch or router. And I could that properly using 2 > handlers (pre_od and post_od). > In later versions, I moved that logic to the lb_data engine node > itself. But I thought I'll keep 2 handlers, the pre_od() to > just handle the load balancer/group changes and the post_od for > updating the datapath bitmap of the lb_dps. > IMO that seemed cleaner. > > But as you mentioned we can do all these in one handler. So in v7 I > just moved to one handler. > > > > > > > > > Please find some
Re: [ovs-dev] [PATCH ovn v6 07/16] northd: Sync SB Port bindings NAT column in a separate engine node.
On Fri, Sep 1, 2023 at 2:41 AM Han Zhou wrote: > > On Fri, Aug 18, 2023 at 1:58 AM wrote: > > > > From: Numan Siddique > > > > A new engine node 'sync_to_sb_pb' is added within 'sync_to_sb' > > node to sync NAT column of Port bindings table. This separation > > is required in order to add load balancer group I-P handling > > in 'northd' engine node (which is handled in the next commit). > > > > 'sync_to_sb_pb' engine node can be later expanded to sync other > > Port binding columns if required. > > > > Signed-off-by: Numan Siddique > > --- > > northd/en-sync-sb.c | 31 + > > northd/en-sync-sb.h | 4 + > > northd/inc-proc-northd.c | 8 +- > > northd/northd.c | 243 +-- > > northd/northd.h | 2 + > > 5 files changed, 174 insertions(+), 114 deletions(-) > > > > diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c > > index 9832fce30a..552ed56452 100644 > > --- a/northd/en-sync-sb.c > > +++ b/northd/en-sync-sb.c > > @@ -254,6 +254,37 @@ sync_to_sb_lb_northd_handler(struct engine_node > *node, void *data OVS_UNUSED) > > return false; > > } > > > > +/* sync_to_sb_pb engine node functions. > > + * This engine node syncs the SB Port Bindings (partly). > > + * en_northd engine create the SB Port binding rows and > > + * updates most of the columns. > > + * This engine node updates the port binding columns which > > + * needs to be updated after northd engine is run. > > + */ > > + > > +void * > > +en_sync_to_sb_pb_init(struct engine_node *node OVS_UNUSED, > > + struct engine_arg *arg OVS_UNUSED) > > +{ > > +return NULL; > > +} > > + > > +void > > +en_sync_to_sb_pb_run(struct engine_node *node, void *data OVS_UNUSED) > > +{ > > +const struct engine_context *eng_ctx = engine_get_context(); > > +struct northd_data *northd_data = engine_get_input_data("northd", > node); > > + > > +sync_pbs(eng_ctx->ovnsb_idl_txn, _data->ls_ports); > > +engine_set_node_state(node, EN_UPDATED); > > +} > > + > > +void > > +en_sync_to_sb_pb_cleanup(void *data OVS_UNUSED) > > +{ > > + > > +} > > + > > /* static functions. */ > > static void > > sync_addr_set(struct ovsdb_idl_txn *ovnsb_txn, const char *name, > > diff --git a/northd/en-sync-sb.h b/northd/en-sync-sb.h > > index 06d2a57710..700d3340e4 100644 > > --- a/northd/en-sync-sb.h > > +++ b/northd/en-sync-sb.h > > @@ -22,4 +22,8 @@ void en_sync_to_sb_lb_run(struct engine_node *, void > *data); > > void en_sync_to_sb_lb_cleanup(void *data); > > bool sync_to_sb_lb_northd_handler(struct engine_node *, void *data > OVS_UNUSED); > > > > +void *en_sync_to_sb_pb_init(struct engine_node *, struct engine_arg *); > > +void en_sync_to_sb_pb_run(struct engine_node *, void *data); > > +void en_sync_to_sb_pb_cleanup(void *data); > > + > > #endif /* end of EN_SYNC_SB_H */ > > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c > > index 402c94e88c..dc8b880fd8 100644 > > --- a/northd/inc-proc-northd.c > > +++ b/northd/inc-proc-northd.c > > @@ -141,6 +141,7 @@ static ENGINE_NODE(sync_to_sb_addr_set, > "sync_to_sb_addr_set"); > > static ENGINE_NODE(fdb_aging, "fdb_aging"); > > static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker"); > > static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb"); > > +static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb"); > > static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data"); > > > > void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > @@ -215,13 +216,16 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, > > sync_to_sb_lb_northd_handler); > > engine_add_input(_sync_to_sb_lb, _sb_load_balancer, NULL); > > > > +engine_add_input(_sync_to_sb_pb, _northd, NULL); > > NULL handler here always triggers recompute for any en_northd change, which > causes a performance regression for VIF I-P. Before this change, only > tracked LSP changes will have related SB port-binding synced to SB, but now > it iterates through all the ls_ports and resync each of them even if there > is only one LSP in tracked_changes. > > I ran the scale test of the simulated ovn-k8s topology of 500 chassis x 50 > lsp, for each single VIF creation/deletion the "ovn-northd completion" time > increased from 25ms to 40ms - nearly doubled. > > I think a simple handler can be implemented so that only sync tracked LSPs, > and fallback to recompute only if changes are not tracked. Thanks for the reviews. I've added a handler in v7. Please take a look. Thanks Numan > > Thanks, > Han > > > + > > /* en_sync_to_sb engine node syncs the SB database tables from > > * the NB database tables. > > - * Right now this engine syncs the SB Address_Set table and > > - * SB Load_Balancer table. > > + * Right now this engine syncs the SB Address_Set table, > > + * SB Load_Balancer table and (partly) SB Port_Binding table. > > */ > > engine_add_input(_sync_to_sb, _sync_to_sb_addr_set, NULL); > >
Re: [ovs-dev] [PATCH ovn v6 05/16] northd: Handle load balancer changes for a logical switch.
On Fri, Sep 1, 2023 at 12:52 AM Han Zhou wrote: > > On Thu, Aug 31, 2023 at 7:14 PM Han Zhou wrote: > > > > > > > > On Fri, Aug 18, 2023 at 1:58 AM wrote: > > > > > > From: Numan Siddique > > > > > > 'lb_data' engine node now also handles logical switch changes. > > > Its data maintains ls to lb related information. i.e if a > > > logical switch sw0 has lb1, lb2 and lb3 associated then > > > it stores this info in its data. And when a new load balancer > > > lb4 is associated to it, it stores this information in its > > > tracked data so that 'northd' engine node can handle it > > > accordingly. Tracked data will have information like: > > > changed ls -> {sw0 : {associated_lbs: [lb4]} > > > > > > In the 'northd' engne node, an additional handler for 'lb_data' > > > is added after the 'nbrec_logical_switch' changes. With this patch > > > we now have 2 handlers for 'lb_data' in 'northd' engine node. > > > > > > > > > engine_add_input(_northd, _lb_data, > northd_lb_data_handler_pre_od); > > > engine_add_input(_northd, _nb_logical_switch, > > > northd_nb_logical_switch_handler); > > > engine_add_input(_northd, _lb_data, > northd_lb_data_handler_post_od); > > > > > > > > > The first handler 'northd_lb_data_handler_pre_od' is called before the > > > 'northd_nb_logical_switch_handler' handler and it just creates or > > > deletes the lb_datapaths hmap for the tracked lbs. > > > > > > The second handler 'northd_lb_data_handler_post_od' is called after > > > the 'northd_nb_logical_switch_handler' handler and it updates the > > > ovn_lb_datapaths's 'nb_ls_map' bitmap. > > > > > > Eg. If the lb_data has the below tracked data: > > > > > > tracked_data = {'crupdated_lbs': [lb1, lb2], > > > 'deleted_lbs': [lb3], > > > 'crupdated_lb_groups': [lbg1, lbg2], > > > 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], > > > {ls: sw1, assoc_lbs: [lb1, lb2]} > > > > > > then the first handler northd_lb_data_handler_pre_od(), creates the > > > ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from > > > the ovn_lb_datapaths hmap. Similarly for the created or updated lb > > > groups lbg1 and lbg2. > > > > > > The second handler northd_lb_data_handler_post_od() updates the > > > nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1. > > > > > > This second handler is added mainly so that the actual logical switch > > > handler northd_nb_logical_switch_handler() has done modifying the > > > 'od' struct. > > > > > Hi Han and others, Thanks for the reviews. > > Thanks for explaining the pre_od and post_od approach here! If we do want > to go with this approach I'd suggest adding comments also in the code so > that people can understand the code easily without having to look at the > commit history. > > However, I am concerned with the this approach. Here are the reasons: > > > > 1. The I-P engine implementation didn't consider such usage that add > dependency between two nodes twice, although it didn't explicitly call this > out in the documentation. If this is considered valid and necessary use > case, we should call this out in the I-P engine documentation so that > people modifying I-P engine implementation is aware of this and won't break > it, and of course we should carefully evaluate the side-effects that can be > caused by this. > > > > 2. For the current use case I think it will lead to dangling pointer in > this _pre_od handling, because a LS may have been deleted from IDL, while > this function still tries to access od->nbs, e.g. in > init_lb_for_datapaths(). Because of this, we should handle LB data only > after handling LS changes. > > > > 3. From the explanation above I can see what is done by pre_od and > post_od but still don't understand why can't we do both steps after > handling LS changes. Could you explain why? And is it possible to move both > steps to post_od? In the earlier versions , lb_data engine node was not handling logical switch or logical router changes. It was only handling the load balancer changes. northd engine node handler for lb_data had to determine the associated or disassociated load balancers/lb groups for a logical switch or router. And I could that properly using 2 handlers (pre_od and post_od). In later versions, I moved that logic to the lb_data engine node itself. But I thought I'll keep 2 handlers, the pre_od() to just handle the load balancer/group changes and the post_od for updating the datapath bitmap of the lb_dps. IMO that seemed cleaner. But as you mentioned we can do all these in one handler. So in v7 I just moved to one handler. > > > > Please find some more comments below. > > > > > Signed-off-by: Numan Siddique > > > --- > > > lib/lb.c | 5 +- > > > northd/en-lb-data.c | 176 +++ > > > northd/en-lb-data.h | 17 > > > northd/en-lflow.c| 6 ++ >
Re: [ovs-dev] [PATCH ovn v7 2/4] northd: Handle load balancer group changes for a logical switch.
Bleep bloop. Greetings Numan Siddique, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 80 characters long (recommended limit is 79) #323 FILE: northd/northd.c:5587: HMAP_FOR_EACH (crupdated_lbgrp, hmap_node, _lb_data->crupdated_lbgrps) { Lines checked: 447, Warnings: 1, Errors: 0 Please check this out. If you feel there has been an error, please email acon...@redhat.com Thanks, 0-day Robot ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn v7 1/4] northd: Handle load balancer changes for a logical switch.
Bleep bloop. Greetings Numan Siddique, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line has trailing whitespace #509 FILE: northd/northd.c:5432: * For every tracked 'lb' and 'lb_group' WARNING: Line has trailing whitespace #520 FILE: northd/northd.c:5443: *re-evaluates 'od->has_lb_vip' to reflect any changes to the lb vips. Lines checked: 787, Warnings: 2, Errors: 0 Please check this out. If you feel there has been an error, please email acon...@redhat.com Thanks, 0-day Robot ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH ovn v7 4/4] northd: Handle load balancer/group changes for a logical router.
From: Numan Siddique When a logical router gets updated due to load balancer or load balancer groups changes, it is now incrementally handled first in 'lb_data' engine node similar to how logical switch changes are handled. The tracking data of 'lb_data' is updated similarly so that northd engine handler - northd_handle_lb_data_changes() handles it. A new handler northd_handle_lr_changes() is added in the 'northd' engine node for logical router changes. This handler returns true if only load balancer or load balancer group columns are changed. It returns false for any other changes. northd_handle_lb_data_changes() also sets the logical router od's lb_ips accordingly. Below are the scale testing results done with these patches applied using ovn-heater. The test ran the scenario - ocp-500-density-heavy.yml [1]. With these patches applied (with load balancer I-P handling in northd engine node) the resuts are: --- Min (s) Median (s) 90%ile (s) 99%ile (s) Max (s) Mean (s)Total (s) Count Failed --- Iteration Total 0.1313631.1899943.213526 4.3081344.3945621.385713173.214153 125 0 Namespace.add_ports 0.0051760.0056110.006476 0.0198670.0242060.0060580.757188125 0 WorkerNode.bind_port0.0337760.0463430.054414 0.0617190.0636130.04681511.703773 250 0 WorkerNode.ping_port0.0051560.0069592.044939 3.6553284.2414960.627103156.775832 250 0 --- The results with the present main are: --- Min (s) Median (s) 90%ile (s) 99%ile (s) Max (s) Mean (s)Total (s) Count Failed --- Iteration Total 3.2337954.3649265.400982 6.4128037.4097574.792270599.033790 125 0 Namespace.add_ports 0.0052300.0065640.007379 0.0190600.0374900.0072230.902930125 0 WorkerNode.bind_port0.0338640.0440520.049608 0.0548490.0561960.04400511.001231 250 0 WorkerNode.ping_port0.0053342.0604775.222422 6.2673327.2840012.323020580.754964 250 0 --- Few observations: - The total time taken has come down significantly from 599 seconds to 173. - 99%ile with these patches is 4.3 seconds compared to 6.4 seconds for the main. - 99%ile with these patches is 3.2 seconds compared to 5.4 seconds for the main. - CPU utilization of northd during the test with these patches is between 100% to 300% which is almost the same as main. Main difference being that, with these patches the test duration is less and hence overall less CPU utilization. [1] - https://github.com/ovn-org/ovn-heater/blob/main/test-scenarios/ocp-500-density-heavy.yml Reviewed-by: Ales Musil Acked-by: Mark Michelson Signed-off-by: Numan Siddique --- lib/lb.c | 46 +- lib/lb.h | 9 ++ northd/en-lb-data.c | 327 +++ northd/en-lb-data.h | 14 ++ northd/en-northd.c | 20 +++ northd/en-northd.h | 1 + northd/inc-proc-northd.c | 5 +- northd/northd.c | 245 ++--- northd/northd.h | 2 + tests/ovn-northd.at | 42 ++--- 10 files changed, 598 insertions(+), 113 deletions(-) diff --git a/lib/lb.c b/lib/lb.c index e6c9dc2be2..d0d562b6fb 100644 --- a/lib/lb.c +++ b/lib/lb.c @@ -794,6 +794,7 @@ ovn_lb_group_init(struct ovn_lb_group *lb_group, const struct uuid *lb_uuid = _lb_group->load_balancer[i]->header_.uuid; lb_group->lbs[i] = ovn_northd_lb_find(lbs, lb_uuid); +lb_group->has_routable_lb |= lb_group->lbs[i]->routable; } } @@ -815,6 +816,7 @@ ovn_lb_group_cleanup(struct ovn_lb_group
[ovs-dev] [PATCH ovn v7 3/4] northd: Sync SB Port bindings NAT column in a separate engine node.
From: Numan Siddique A new engine node 'sync_to_sb_pb' is added within 'sync_to_sb' node to sync NAT column of Port bindings table. This separation is required in order to add load balancer group I-P handling in 'northd' engine node (which is handled in the next commit). 'sync_to_sb_pb' engine node can be later expanded to sync other Port binding columns if required. Reviewed-by: Ales Musil Acked-by: Mark Michelson Signed-off-by: Numan Siddique --- northd/en-sync-sb.c | 52 northd/en-sync-sb.h | 5 + northd/inc-proc-northd.c | 8 +- northd/northd.c | 274 +++ northd/northd.h | 3 + tests/ovn-northd.at | 17 ++- 6 files changed, 239 insertions(+), 120 deletions(-) diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c index f6dbd7b2b6..47ace03417 100644 --- a/northd/en-sync-sb.c +++ b/northd/en-sync-sb.c @@ -244,6 +244,58 @@ sync_to_sb_lb_northd_handler(struct engine_node *node, void *data OVS_UNUSED) return false; } +/* sync_to_sb_pb engine node functions. + * This engine node syncs the SB Port Bindings (partly). + * en_northd engine create the SB Port binding rows and + * updates most of the columns. + * This engine node updates the port binding columns which + * needs to be updated after northd engine is run. + */ + +void * +en_sync_to_sb_pb_init(struct engine_node *node OVS_UNUSED, + struct engine_arg *arg OVS_UNUSED) +{ +return NULL; +} + +void +en_sync_to_sb_pb_run(struct engine_node *node, void *data OVS_UNUSED) +{ +const struct engine_context *eng_ctx = engine_get_context(); +struct northd_data *northd_data = engine_get_input_data("northd", node); + +sync_pbs(eng_ctx->ovnsb_idl_txn, _data->ls_ports); +engine_set_node_state(node, EN_UPDATED); +} + +void +en_sync_to_sb_pb_cleanup(void *data OVS_UNUSED) +{ + +} + +bool +sync_to_sb_pb_northd_handler(struct engine_node *node, void *data OVS_UNUSED) +{ +const struct engine_context *eng_ctx = engine_get_context(); +if (!eng_ctx->ovnsb_idl_txn) { +return false; +} + +struct northd_data *nd = engine_get_input_data("northd", node); +if (!nd->change_tracked) { +return false; +} + +if (!sync_pbs_for_northd_ls_changes(>tracked_ls_changes)) { +return false; +} + +engine_set_node_state(node, EN_UPDATED); +return true; +} + /* static functions. */ static void sync_addr_set(struct ovsdb_idl_txn *ovnsb_txn, const char *name, diff --git a/northd/en-sync-sb.h b/northd/en-sync-sb.h index 06d2a57710..f08565eee1 100644 --- a/northd/en-sync-sb.h +++ b/northd/en-sync-sb.h @@ -22,4 +22,9 @@ void en_sync_to_sb_lb_run(struct engine_node *, void *data); void en_sync_to_sb_lb_cleanup(void *data); bool sync_to_sb_lb_northd_handler(struct engine_node *, void *data OVS_UNUSED); +void *en_sync_to_sb_pb_init(struct engine_node *, struct engine_arg *); +void en_sync_to_sb_pb_run(struct engine_node *, void *data); +void en_sync_to_sb_pb_cleanup(void *data); +bool sync_to_sb_pb_northd_handler(struct engine_node *, void *data OVS_UNUSED); + #endif /* end of EN_SYNC_SB_H */ diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c index 303b58d43f..e9e28c4bea 100644 --- a/northd/inc-proc-northd.c +++ b/northd/inc-proc-northd.c @@ -144,6 +144,7 @@ static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(port_group, "port_group"); static ENGINE_NODE(fdb_aging, "fdb_aging"); static ENGINE_NODE(fdb_aging_waker, "fdb_aging_waker"); static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb"); +static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb"); static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data"); void inc_proc_northd_init(struct ovsdb_idl_loop *nb, @@ -228,15 +229,20 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, sync_to_sb_lb_northd_handler); engine_add_input(_sync_to_sb_lb, _sb_load_balancer, NULL); +engine_add_input(_sync_to_sb_pb, _northd, + sync_to_sb_pb_northd_handler); + /* en_sync_to_sb engine node syncs the SB database tables from * the NB database tables. * Right now this engine syncs the SB Address_Set table, Port_Group table - * SB Meter/Meter_Band tables and SB Load_Balancer table. + * SB Meter/Meter_Band tables and SB Load_Balancer table and + * (partly) SB Port_Binding table. */ engine_add_input(_sync_to_sb, _sync_to_sb_addr_set, NULL); engine_add_input(_sync_to_sb, _port_group, NULL); engine_add_input(_sync_to_sb, _sync_meters, NULL); engine_add_input(_sync_to_sb, _sync_to_sb_lb, NULL); +engine_add_input(_sync_to_sb, _sync_to_sb_pb, NULL); engine_add_input(_sync_from_sb, _northd, sync_from_sb_northd_handler); diff --git a/northd/northd.c b/northd/northd.c index c5e1d1c5a4..5651ee38c5 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -3527,8 +3527,6 @@ ovn_port_update_sbrec(struct ovsdb_idl_txn *ovnsb_txn,
[ovs-dev] [PATCH ovn v7 2/4] northd: Handle load balancer group changes for a logical switch.
From: Numan Siddique For a given load balancer group 'A', northd engine data maintains a bitmap of datapaths associated to this lb group. So when lb group 'A' gets associated to a logical switch 's1', the bitmap index of 's1' is set in its bitmap. In order to handle the load balancer group changes incrementally for a logical switch, we need to set and clear the bitmap bits accordingly. And this patch does it. Reviewed-by: Ales Musil Acked-by: Mark Michelson Signed-off-by: Numan Siddique --- northd/en-lb-data.c | 102 northd/en-lb-data.h | 4 ++ northd/northd.c | 62 +++ tests/ovn-northd.at | 30 + 4 files changed, 154 insertions(+), 44 deletions(-) diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c index 02b1bfd7a4..3604ba8226 100644 --- a/northd/en-lb-data.c +++ b/northd/en-lb-data.c @@ -63,6 +63,7 @@ static struct crupdated_lbgrp * static void add_deleted_lbgrp_to_tracked_data( struct ovn_lb_group *, struct tracked_lb_data *); static bool is_ls_lbs_changed(const struct nbrec_logical_switch *nbs); +static bool is_ls_lbgrps_changed(const struct nbrec_logical_switch *nbs); /* 'lb_data' engine node manages the NB load balancers and load balancer * groups. For each NB LB, it creates 'struct ovn_northd_lb' and @@ -271,12 +272,15 @@ lb_data_logical_switch_handler(struct engine_node *node, void *data) destroy_od_lb_data(od_lb_data); } } else { -if (!is_ls_lbs_changed(nbs)) { +bool ls_lbs_changed = is_ls_lbs_changed(nbs); +bool ls_lbgrps_changed = is_ls_lbgrps_changed(nbs); +if (!ls_lbs_changed && !ls_lbgrps_changed) { continue; } struct crupdated_od_lb_data *codlb = xzalloc(sizeof *codlb); codlb->od_uuid = nbs->header_.uuid; uuidset_init(>assoc_lbs); +uuidset_init(>assoc_lbgrps); struct od_lb_data *od_lb_data = find_od_lb_data(_data->ls_lb_map, >header_.uuid); @@ -285,38 +289,66 @@ lb_data_logical_switch_handler(struct engine_node *node, void *data) >header_.uuid); } -struct uuidset *pre_lb_uuids = od_lb_data->lbs; -od_lb_data->lbs = xzalloc(sizeof *od_lb_data->lbs); -uuidset_init(od_lb_data->lbs); - -for (size_t i = 0; i < nbs->n_load_balancer; i++) { -const struct uuid *lb_uuid = ->load_balancer[i]->header_.uuid; -uuidset_insert(od_lb_data->lbs, lb_uuid); +if (ls_lbs_changed) { +struct uuidset *pre_lb_uuids = od_lb_data->lbs; +od_lb_data->lbs = xzalloc(sizeof *od_lb_data->lbs); +uuidset_init(od_lb_data->lbs); + +for (size_t i = 0; i < nbs->n_load_balancer; i++) { +const struct uuid *lb_uuid = +>load_balancer[i]->header_.uuid; +uuidset_insert(od_lb_data->lbs, lb_uuid); + +struct uuidset_node *unode = uuidset_find(pre_lb_uuids, +lb_uuid); + +if (!unode || (nbrec_load_balancer_row_get_seqno( +nbs->load_balancer[i], +OVSDB_IDL_CHANGE_MODIFY) > 0)) { +/* Add this lb to the tracked data. */ +uuidset_insert(>assoc_lbs, lb_uuid); +changed = true; +} + +if (unode) { +uuidset_delete(pre_lb_uuids, unode); +} +} +if (!uuidset_is_empty(pre_lb_uuids)) { +trk_lb_data->has_dissassoc_lbs_from_od = true; +changed = true; +} -struct uuidset_node *unode = uuidset_find(pre_lb_uuids, - lb_uuid); +uuidset_destroy(pre_lb_uuids); +free(pre_lb_uuids); +} -if (!unode || (nbrec_load_balancer_row_get_seqno( -nbs->load_balancer[i], OVSDB_IDL_CHANGE_MODIFY) > 0)) { -/* Add this lb to the tracked data. */ -uuidset_insert(>assoc_lbs, lb_uuid); -changed = true; +if (ls_lbgrps_changed) { +struct uuidset *pre_lbgrp_uuids = od_lb_data->lbgrps; +od_lb_data->lbgrps = xzalloc(sizeof *od_lb_data->lbgrps); +uuidset_init(od_lb_data->lbgrps); +for (size_t i = 0; i < nbs->n_load_balancer_group; i++) { +const struct uuid *lbg_uuid = +>load_balancer_group[i]->header_.uuid; +
[ovs-dev] [PATCH ovn v7 1/4] northd: Handle load balancer changes for a logical switch.
From: Numan Siddique 'lb_data' engine node now also handles logical switch changes. Its data maintains ls to lb related information. i.e if a logical switch sw0 has lb1, lb2 and lb3 associated then it stores this info in its data. And when a new load balancer lb4 is associated to it, it stores this information in its tracked data so that 'northd' engine node can handle it accordingly. Tracked data will have information like: changed ls -> {sw0 : {associated_lbs: [lb4]} The first handler 'northd_lb_data_handler_pre_od' is called before the 'northd_nb_logical_switch_handler' handler and it just creates or deletes the lb_datapaths hmap for the tracked lbs. The northd handler 'northd_lb_data_handler' updates the ovn_lb_datapaths's 'nb_ls_map' bitmap accordingly. Eg. If the lb_data has the below tracked data: tracked_data = {'crupdated_lbs': [lb1, lb2], 'deleted_lbs': [lb3], 'crupdated_lb_groups': [lbg1, lbg2], 'crupdated_ls_lbs': [{ls: sw0, assoc_lbs: [lb1], {ls: sw1, assoc_lbs: [lb1, lb2]} The handler northd_lb_data_handler(), creates the ovn_lb_datapaths object for lb1 and lb2 and deletes lb3 from the ovn_lb_datapaths hmap. It does the same for the created or updated lb groups lbg1 and lbg2 in the ovn_lbgrp_datapaths map. It also updates the nb_ls_bitmap of lb1 for sw0 and sw1 and nb_ls_bitmap of lb2 for sw1. Reviewed-by: Ales Musil Acked-by: Mark Michelson Signed-off-by: Numan Siddique --- lib/lb.c | 5 +- northd/en-lb-data.c | 176 +++ northd/en-lb-data.h | 17 northd/en-lflow.c| 6 ++ northd/en-northd.c | 6 +- northd/inc-proc-northd.c | 2 + northd/northd.c | 83 +++--- northd/northd.h | 4 +- tests/ovn-northd.at | 56 + 9 files changed, 322 insertions(+), 33 deletions(-) diff --git a/lib/lb.c b/lib/lb.c index 6fd67e2218..e6c9dc2be2 100644 --- a/lib/lb.c +++ b/lib/lb.c @@ -1088,7 +1088,10 @@ ovn_lb_datapaths_add_ls(struct ovn_lb_datapaths *lb_dps, size_t n, struct ovn_datapath **ods) { for (size_t i = 0; i < n; i++) { -bitmap_set1(lb_dps->nb_ls_map, ods[i]->index); +if (!bitmap_is_set(lb_dps->nb_ls_map, ods[i]->index)) { +bitmap_set1(lb_dps->nb_ls_map, ods[i]->index); +lb_dps->n_nb_ls++; +} } } diff --git a/northd/en-lb-data.c b/northd/en-lb-data.c index 8acd9c8cb2..02b1bfd7a4 100644 --- a/northd/en-lb-data.c +++ b/northd/en-lb-data.c @@ -39,6 +39,14 @@ static void lb_data_destroy(struct ed_type_lb_data *); static void build_lbs(const struct nbrec_load_balancer_table *, const struct nbrec_load_balancer_group_table *, struct hmap *lbs, struct hmap *lb_groups); +static void build_od_lb_map(const struct nbrec_logical_switch_table *, + struct hmap *od_lb_map); +static struct od_lb_data *find_od_lb_data(struct hmap *od_lb_map, + const struct uuid *od_uuid); +static void destroy_od_lb_data(struct od_lb_data *od_lb_data); +static struct od_lb_data *create_od_lb_data(struct hmap *od_lb_map, +const struct uuid *od_uuid); + static struct ovn_lb_group *create_lb_group( const struct nbrec_load_balancer_group *, struct hmap *lbs, struct hmap *lb_groups); @@ -54,6 +62,7 @@ static struct crupdated_lbgrp * struct tracked_lb_data *); static void add_deleted_lbgrp_to_tracked_data( struct ovn_lb_group *, struct tracked_lb_data *); +static bool is_ls_lbs_changed(const struct nbrec_logical_switch *nbs); /* 'lb_data' engine node manages the NB load balancers and load balancer * groups. For each NB LB, it creates 'struct ovn_northd_lb' and @@ -80,9 +89,13 @@ en_lb_data_run(struct engine_node *node, void *data) EN_OVSDB_GET(engine_get_input("NB_load_balancer", node)); const struct nbrec_load_balancer_group_table *nb_lbg_table = EN_OVSDB_GET(engine_get_input("NB_load_balancer_group", node)); +const struct nbrec_logical_switch_table *nb_ls_table = +EN_OVSDB_GET(engine_get_input("NB_logical_switch", node)); lb_data->tracked = false; build_lbs(nb_lb_table, nb_lbg_table, _data->lbs, _data->lbgrps); +build_od_lb_map(nb_ls_table, _data->ls_lb_map); + engine_set_node_state(node, EN_UPDATED); } @@ -230,18 +243,104 @@ lb_data_load_balancer_group_handler(struct engine_node *node, void *data) return true; } +struct od_lb_data { +struct hmap_node hmap_node; +struct uuid od_uuid; +struct uuidset *lbs; +struct uuidset *lbgrps; +}; + +bool +lb_data_logical_switch_handler(struct engine_node *node, void *data) +{ +struct ed_type_lb_data *lb_data = (struct ed_type_lb_data *) data; +const struct
[ovs-dev] [PATCH ovn v7 0/4] northd: I-P for load balancer and lb groups
From: Numan Siddique This patch series adds the support to handle load balancer and load balancer group changes incrementally in the "northd" engine node. Changes to logical switches and router's load balancer and load balancer group columns are also handled incrementally provided other columns do not change. v6 of the series also included lflow I-P handling. But v7 drops these patches as there are some concerns with the obj dep mgr usage. lflow I-P handling patches will be submitted separately. Below are the scale testing results done with these patches applied using ovn-heater. The test ran the scenario - ocp-500-density-heavy.yml [1]. With these patches applied (with load balancer I-P handling only in northd engine node) the resuts are: --- Min (s) Median (s) 90%ile (s) 99%ile (s) Max (s) Mean (s)Total (s) Count Failed --- Iteration Total 0.1313631.1899943.213526 4.3081344.3945621.385713173.214153 125 0 Namespace.add_ports 0.0051760.0056110.006476 0.0198670.0242060.0060580.757188125 0 WorkerNode.bind_port0.0337760.0463430.054414 0.0617190.0636130.04681511.703773 250 0 WorkerNode.ping_port0.0051560.0069592.044939 3.6553284.2414960.627103156.775832 250 0 --- The results with the present main (Result 3) are: --- Min (s) Median (s) 90%ile (s) 99%ile (s) Max (s) Mean (s)Total (s) Count Failed --- Iteration Total 3.2337954.3649265.400982 6.4128037.4097574.792270599.033790 125 0 Namespace.add_ports 0.0052300.0065640.007379 0.0190600.0374900.0072230.902930125 0 WorkerNode.bind_port0.0338640.0440520.049608 0.0548490.0561960.04400511.001231 250 0 WorkerNode.ping_port0.0053342.0604775.222422 6.2673327.2840012.323020580.754964 250 0 --- v6 -> v7 --- * First 4 patches of v6 are merged in main and branch-23.09 and patches 9 to 16 are dropped. * v7 only has 4 patches now. * Addressed review comments. There is only one handler for lb_data engine input in northd engine node - northd_handle_lb_data_changes(). In v6 and earlier there were 2 handle functions - northd_handle_lb_data_changes_pre_od() and northd_handle_lb_data_changes_post_od(). v5 -> v6 --- * Rebased. Added 2 more patches (p15 and p16) for LR NAT I-P handling. v4 -> v5 --- * 6 new patches are added to the series which handles the LB changes in the lflow engine node. v3 -> v4 --- * Covered more test scearios. * Found few issues and fixed them. v3 was not handling the scenario of a vip getting added or removed from a load balancer. v2 -> v3 * v2 was very inefficient in handling the load balancer group changes and in associating the load balancers of the lb group to the datapaths. This was the main reason for the regression in the full recompute time taken. v3 addressed these by more efficiently handling the lb group changes incrementally. Numan Siddique (4): northd: Handle load balancer changes for a logical switch. northd: Handle load balancer group changes for a logical switch. northd: Sync SB Port bindings NAT column in a separate engine node. northd: Handle load balancer/group changes for a logical router. lib/lb.c | 51 ++- lib/lb.h | 9 + northd/en-lb-data.c | 431 - northd/en-lb-data.h | 35 +++ northd/en-lflow.c| 6 + northd/en-northd.c | 26 +- northd/en-northd.h | 1 + northd/en-sync-sb.c | 52 +++ northd/en-sync-sb.h
Re: [ovs-dev] [PATCH v6 2/3] dpif: Probe support for OVS_ACTION_ATTR_DROP.
On Sat, Sep 09, 2023 at 01:41:05AM +0200, Ilya Maximets wrote: > On 9/1/23 21:42, Eric Garver wrote: > > Kernel support is being added for this action. As such, we need to probe > > the datapath for support. > > > > Signed-off-by: Eric Garver > > --- > > include/linux/openvswitch.h | 2 +- > > lib/dpif.c | 6 - > > lib/dpif.h | 1 - > > ofproto/ofproto-dpif.c | 52 +++-- > > 4 files changed, 51 insertions(+), 10 deletions(-) > > > > diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h > > index a265e05ad253..fce6456f47c8 100644 > > --- a/include/linux/openvswitch.h > > +++ b/include/linux/openvswitch.h > > @@ -1086,11 +1086,11 @@ enum ovs_action_attr { > > OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ > > OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ > > OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ > > + OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ > > > > #ifndef __KERNEL__ > > OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/ > > OVS_ACTION_ATTR_TUNNEL_POP,/* u32 port number. */ > > - OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ > > OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */ > > #endif > > __OVS_ACTION_ATTR_MAX,/* Nothing past this will be accepted > > diff --git a/lib/dpif.c b/lib/dpif.c > > index 0f480bec48d0..28fa40879655 100644 > > --- a/lib/dpif.c > > +++ b/lib/dpif.c > > @@ -1934,12 +1934,6 @@ dpif_supports_tnl_push_pop(const struct dpif *dpif) > > return dpif_is_netdev(dpif); > > } > > > > -bool > > -dpif_supports_explicit_drop_action(const struct dpif *dpif) > > -{ > > -return dpif_is_netdev(dpif); > > -} > > - > > bool > > dpif_supports_lb_output_action(const struct dpif *dpif) > > { > > diff --git a/lib/dpif.h b/lib/dpif.h > > index 0f2dc2ef3c55..8dc5da45b3ef 100644 > > --- a/lib/dpif.h > > +++ b/lib/dpif.h > > @@ -940,7 +940,6 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, > > odp_port_t port_no, > > > > char *dpif_get_dp_version(const struct dpif *); > > bool dpif_supports_tnl_push_pop(const struct dpif *); > > -bool dpif_supports_explicit_drop_action(const struct dpif *); > > bool dpif_synced_dp_layers(struct dpif *); > > > > /* Log functions. */ > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > > index e22ca757ac35..5ed849e463c2 100644 > > --- a/ofproto/ofproto-dpif.c > > +++ b/ofproto/ofproto-dpif.c > > @@ -72,6 +72,8 @@ > > #include "util.h" > > #include "uuid.h" > > #include "vlan-bitmap.h" > > +#include "dpif-netdev.h" > > +#include "netdev-offload.h" > > > > VLOG_DEFINE_THIS_MODULE(ofproto_dpif); > > > > @@ -220,6 +222,7 @@ static void ofproto_unixctl_init(void); > > static void ct_zone_config_init(struct dpif_backer *backer); > > static void ct_zone_config_uninit(struct dpif_backer *backer); > > static void ct_zone_timeout_policy_sweep(struct dpif_backer *backer); > > +static bool check_drop_action(struct dpif_backer *backer); > > > > static inline struct ofproto_dpif * > > ofproto_dpif_cast(const struct ofproto *ofproto) > > @@ -390,6 +393,15 @@ type_run(const char *type) > > udpif_set_threads(backer->udpif, n_handlers, n_revalidators); > > } > > > > +/* For TC, if vswitchd started with other_config:hw-offload set to > > "false", > > + * and the configuration is now "true", then we need to re-probe > > datapath > > + * support for the "drop" action. */ > > +if (backer->rt_support.explicit_drop_action > > +&& netdev_is_flow_api_enabled() && !dpif_is_netdev(backer->dpif)) { > > +backer->rt_support.explicit_drop_action = > > check_drop_action(backer); > > This seems dangerous to me. The value is accessed concurrently from > the other threads, so the update will have to be atomic. > > Also, this check is so specific and re-checks one particular thing. > Might be hard to maintain. > > OVS requires re-start after enabling HW offload, so the feature > checking and ll other potential quirks can be taken care of. > If users really don't want to restart OVS, we can do a soft restart > by recreating the bridge when the value changes, similarly how > the bridge is getting re-created if the datapath type changes. > I posted an RFC here with an idea of the implementation: > > https://patchwork.ozlabs.org/project/openvswitch/patch/20230908234009.892803-1-i.maxim...@ovn.org/ > > This way we will re-probe all the features and re-configure everything > that needs to be re-configured. I can continue working on that RFC > once I'm back from PTO. > > What do you think? Eelco? Simon? I agree that it would be better to fix this generically, i.e. re-probe everything. > Best regards, Ilya Maximets. > > > +backer->need_revalidate = REV_RECONFIGURE; > > +} > > + > > if (backer->need_revalidate) { > > struct
Re: [ovs-dev] [PATCH ovn] northd: Always ct commit ECMP symmetric traffic in the original direction.
On 9/7/23 15:59, Ales Musil wrote: > On Fri, Sep 1, 2023 at 12:56 PM Dumitru Ceara wrote: > >> Commit 506f7d4bcfbc ("northd: rely on new actions for ecmp-symmetric >> routing") relied on commit_ecmp_nh() to learn the mapping between a >> traffic session's 5-tuple and the MAC of the next-hop to be used for >> that session. This logical action translates to OVS learn() action. >> >> While in theory this is the most correct way of tracking the mapping >> between ECMP symmetric reply sessions and their next hop's MAC address, >> it also creates additional load in ovs-vswitchd (to learn the new flows) >> and introduces latency and affects traffic throughput. >> >> An alternative is to instead re-commit the 5-tuple (along with the >> next-hop MAC and port information) to conntrack every time traffic >> received from the next-hop side is forwarded by the OVN router. >> >> Testing shows that in a scenario with 4 next-hops and ECMP symmetric >> replies enabled with traffic running for 600 seconds latency, throughput >> and ovs-vswitchd CPU usage are significantly better with this change: >> >> - Before: >> - ovs-vswitchd ~1200% CPU (distributed across 17 revalidator threads) >> - Sent: 638.72MiB, 1.06MiB/s >> - Recv: 7.17GiB, 12.24MiB/s >> >> - After: >> - ovs-vswitchd ~7% CPU (distributed across 17 revalidator threads) >> - Sent: 892.69MiB, 1.49MiB/s >> - Recv: 8.63GiB, 14.72MiB/s >> >> The only downside of not using learn() flows is that OVN cannot >> determine on its own when a next-hop goes away (without using BFD). >> This scenario however can probably be handled by the CMS which has more >> knowledge about the rest of the network (outside OVN), e.g., >> ovn-kubernetes currently flushes conntrack entries created for ECMP >> symmetric reply sessions when it detects that the next-hop went away. >> >> If a next-hops changes MAC address that's handled by OVN gracefully and >> the conntrack entry corresponding to that session gets updated >> accordingly. >> >> NOTE: we don't remove the logical actions' implementation as >> ovn-controller needs to be able to translate logical flows generated by >> older versions of ovn-northd. >> >> Fixes: 506f7d4bcfbc ("northd: rely on new actions for ecmp-symmetric >> routing") >> Signed-off-by: Dumitru Ceara >> --- >> northd/northd.c | 52 ++--- >> tests/ovn-northd.at | 20 ++--- >> 2 files changed, 27 insertions(+), 45 deletions(-) >> >> diff --git a/northd/northd.c b/northd/northd.c >> index 459aaafb1c..e67d34cd28 100644 >> --- a/northd/northd.c >> +++ b/northd/northd.c >> @@ -262,7 +262,6 @@ enum ovn_stage { >> #define REGBIT_LOOKUP_NEIGHBOR_RESULT "reg9[2]" >> #define REGBIT_LOOKUP_NEIGHBOR_IP_RESULT "reg9[3]" >> #define REGBIT_DST_NAT_IP_LOCAL "reg9[4]" >> -#define REGBIT_KNOWN_ECMP_NH"reg9[5]" >> #define REGBIT_KNOWN_LB_SESSION "reg9[6]" >> >> /* Register to store the eth address associated to a router port for >> packets >> @@ -370,8 +369,7 @@ enum ovn_stage { >> * | | EGRESS_LOOPBACK/| G | UNUSED | >> * | R9 | PKT_LARGER/ | 4 | | >> * | | LOOKUP_NEIGHBOR_RESULT/ | | | >> - * | | SKIP_LOOKUP_NEIGHBOR/ | | | >> - * | | KNOWN_ECMP_NH} | | | >> + * | | SKIP_LOOKUP_NEIGHBOR} | | | >> * | | | | | >> * | | REG_ORIG_TP_DPORT_ROUTER | | | >> * | | | | | >> @@ -10846,15 +10844,13 @@ add_ecmp_symmetric_reply_flows(struct hmap >> *lflows, >>cidr); >> free(cidr); >> ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, >> -ds_cstr(_match), >> -REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh_mac(); ct_next;", >> -_route->header_); >> + ds_cstr(_match), "ct_next;", >> + _route->header_); >> >> /* And packets that go out over an ECMP route need conntrack */ >> ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_DEFRAG, 100, >> -ds_cstr(route_match), >> -REGBIT_KNOWN_ECMP_NH" = chk_ecmp_nh(); ct_next;", >> -_route->header_); >> + ds_cstr(route_match), "ct_next;", >> + _route->header_); >> >> /* Save src eth and inport in ct_label for packets that arrive over >> * an ECMP route. >> @@ -10867,9 +10863,8 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows, >> ds_put_format(, >> "ct_commit { ct_label.ecmp_reply_eth = eth.src; " >> " %s = %" PRId64 ";}; " >> -"commit_ecmp_nh(ipv6 = %s, proto = tcp); next;", >> -ct_ecmp_reply_port_match, out_port->sb->tunnel_key, >> -IN6_IS_ADDR_V4MAPPED(>prefix) ? "false" : "true"); >> +
Re: [ovs-dev] [PATCH ovn v2] ci, tests: Use parallelization permutations for few jobs
On 9/7/23 20:52, Han Zhou wrote: > On Thu, Sep 7, 2023 at 7:48 AM Ales Musil wrote: >> >> Single test needs to run 4 times to check all >> the permutations, this is taking a lot of CI time >> and job space. >> >> Remove the parallel permutation and leave parallelization >> enabled for all tests, as this use case is more complex. >> Only exception are 10 tests in ovn-northd.at that still run >> parallelization permutations. This should be enough to cover >> the code with parallelization disabled. >> >> This allows us to greatly reduce the number of test cases >> (almost by half) and we can also remove 6 jobs from the CI >> pipeline. The time reduction is very noticeable going down >> from ~30 min to ~20 min. >> >> Signed-off-by: Ales Musil >> --- >> .github/workflows/test.yml | 28 +++- >> tests/ovn-macros.at| 15 +++ >> tests/ovn-northd.at| 20 ++-- >> 3 files changed, 32 insertions(+), 31 deletions(-) >> >> diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml >> index fe2a14c40..5c5ce6ed1 100644 >> --- a/.github/workflows/test.yml >> +++ b/.github/workflows/test.yml >> @@ -102,29 +102,23 @@ jobs: >> cfg: >> - { compiler: gcc, opts: --disable-ssl } >> - { compiler: clang, opts: --disable-ssl } >> -- { compiler: gcc, testsuite: test, test_range: "-500" } >> -- { compiler: gcc, testsuite: test, test_range: "501-1000" } >> -- { compiler: gcc, testsuite: test, test_range: "1001-" } >> +- { compiler: gcc, testsuite: test, test_range: "-300" } >> +- { compiler: gcc, testsuite: test, test_range: "301-600" } >> +- { compiler: gcc, testsuite: test, test_range: "601-" } >> - { compiler: clang, testsuite: test, sanitizers: sanitizers, > test_range: "-300" } >> - { compiler: clang, testsuite: test, sanitizers: sanitizers, > test_range: "301-600" } >> -- { compiler: clang, testsuite: test, sanitizers: sanitizers, > test_range: "601-900" } >> -- { compiler: clang, testsuite: test, sanitizers: sanitizers, > test_range: "901-1200" } >> -- { compiler: clang, testsuite: test, sanitizers: sanitizers, > test_range: "1201-" } >> -- { compiler: gcc, testsuite: test, libs: -ljemalloc, > test_range: "-500" } >> -- { compiler: gcc, testsuite: test, libs: -ljemalloc, > test_range: "501-1000" } >> -- { compiler: gcc, testsuite: test, libs: -ljemalloc, > test_range: "1001-" } >> +- { compiler: clang, testsuite: test, sanitizers: sanitizers, > test_range: "601-" } >> +- { compiler: gcc, testsuite: test, libs: -ljemalloc, > test_range: "-300" } >> +- { compiler: gcc, testsuite: test, libs: -ljemalloc, > test_range: "301-600" } >> +- { compiler: gcc, testsuite: test, libs: -ljemalloc, > test_range: "601-" } >> - { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, > test_range: "-100" } >> -- { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, > test_range: "101-200" } >> -- { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, > test_range: "201-" } >> +- { compiler: gcc, testsuite: system-test-dpdk, dpdk: dpdk, > test_range: "101-" } >> - { compiler: gcc, testsuite: system-test-userspace, test_range: > "-100" } >> -- { compiler: gcc, testsuite: system-test-userspace, test_range: > "101-200" } >> -- { compiler: gcc, testsuite: system-test-userspace, test_range: > "201-" } >> +- { compiler: gcc, testsuite: system-test-userspace, test_range: > "101-" } >> - { compiler: gcc, testsuite: system-test, test_range: "-100" } >> -- { compiler: gcc, testsuite: system-test, test_range: "101-200" > } >> -- { compiler: gcc, testsuite: system-test, test_range: "201-" } >> +- { compiler: gcc, testsuite: system-test, test_range: "101-" } >> - { compiler: clang, testsuite: system-test, sanitizers: > sanitizers, test_range: "-100" } >> -- { compiler: clang, testsuite: system-test, sanitizers: > sanitizers, test_range: "101-200" } >> -- { compiler: clang, testsuite: system-test, sanitizers: > sanitizers, test_range: "201-" } >> +- { compiler: clang, testsuite: system-test, sanitizers: > sanitizers, test_range: "101-" } >> - { arch: x86, compiler: gcc, opts: --disable-ssl } >> >> steps: >> diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at >> index 13d5dc3d4..7c0abdece 100644 >> --- a/tests/ovn-macros.at >> +++ b/tests/ovn-macros.at >> @@ -887,21 +887,28 @@ OVS_END_SHELL_HELPERS >> m4_define([OVN_POPULATE_ARP], [AT_CHECK(ovn_populate_arp__, [0], > [ignore])]) >> >> # Defines versions of the test with all combinations of northd, >> -# parallelization on/off and conditional monitoring on/off. >> +# parallelization enabled and conditional monitoring on/off. >> m4_define([OVN_FOR_EACH_NORTHD], >>[m4_foreach([NORTHD_TYPE], [ovn-northd],
[ovs-dev] [PATCH v2] checkpatch: Add checks for the subject line.
This patch adds WARNINGs for the subject line length and the format, i.e., the sentence should start with a capital and end with a dot. Signed-off-by: Eelco Chaudron --- v2: - Add test cases - Made it work on files and git patches - Changed it from 50 chars on the summary, to 70 chars on area and summary. This way git log --oneline will fit on a 80 char line. tests/checkpatch.at | 27 utilities/checkpatch.py | 53 ++- 2 files changed, 70 insertions(+), 10 deletions(-) diff --git a/tests/checkpatch.at b/tests/checkpatch.at index fdcdb846e..6689ce905 100755 --- a/tests/checkpatch.at +++ b/tests/checkpatch.at @@ -8,7 +8,12 @@ OVS_START_SHELL_HELPERS try_checkpatch() { # Take the patch to test from $1. Remove an initial four-space indent # from it and, if it is just headers with no body, add a null body. +# If it does not have a 'Subject', add a valid one. echo "$1" | sed 's/^//' > test.patch +if grep 'Subject\:' test.patch >/dev/null 2>&1; then : +else +sed -i '1 i\Subject: Patch this is.' test.patch +fi if grep '---' expout >/dev/null 2>&1; then : else printf '\n---\n' >> test.patch @@ -560,3 +565,25 @@ try_checkpatch \ " AT_CLEANUP + +AT_SETUP([checkpatch - subject]) +try_checkpatch \ + "Author: A +Commit: A +Subject: netdev: invalid case and dot ending + +Signed-off-by: A" \ +"WARNING: The subject summary should start with a capital. +WARNING: The subject summary should end with a dot. +Subject: netdev: invalid case and dot ending" + +try_checkpatch \ + "Author: A +Commit: A +Subject: netdev: This is a way to long commit summary and therefor it should report a WARNING! + +Signed-off-by: A" \ +"WARNING: The subject, ': ', is over 70 characters, i.e., 85. +Subject: netdev: This is a way to long commit summary and therefor it should report a WARNING!" + +AT_CLEANUP diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py index 5c4aaefb3..3f42c44f2 100755 --- a/utilities/checkpatch.py +++ b/utilities/checkpatch.py @@ -792,6 +792,36 @@ def run_file_checks(text): check['check'](text) +def run_subject_checks(subject, spellcheck=False): +warnings = False + +if spellcheck and check_spelling(subject, False): +warnings = True + +summary = subject[subject.rindex(': ') + 2:] +area_summary = subject[subject.index(': ') + 2:] +area_summary_len = len(area_summary) +if area_summary_len > 70: +print_warning("The subject, ': ', is over 70 " + "characters, i.e., %u." % area_summary_len) +warnings = True + +if summary[0].isalpha() and summary[0].islower(): +print_warning( +"The subject summary should start with a capital.") +warnings = True + +if subject[-1] not in [".", "?", "!"]: +print_warning( +"The subject summary should end with a dot.") +warnings = True + +if warnings: +print(subject) + +return warnings + + def ovs_checkpatch_parse(text, filename, author=None, committer=None): global print_file_name, total_line, checking_file, \ empty_return_check_state @@ -812,6 +842,7 @@ def ovs_checkpatch_parse(text, filename, author=None, committer=None): r'^@@ ([0-9-+]+),([0-9-+]+) ([0-9-+]+),([0-9-+]+) @@') is_author = re.compile(r'^(Author|From): (.*)$', re.I | re.M | re.S) is_committer = re.compile(r'^(Commit: )(.*)$', re.I | re.M | re.S) +is_subject = re.compile(r'^(Subject: )(.*)$', re.I | re.M | re.S) is_signature = re.compile(r'^(Signed-off-by: )(.*)$', re.I | re.M | re.S) is_co_author = re.compile(r'^(Co-authored-by: )(.*)$', @@ -911,6 +942,8 @@ def ovs_checkpatch_parse(text, filename, author=None, committer=None): committer = is_committer.match(line).group(2) elif is_author.match(line): author = is_author.match(line).group(2) +elif is_subject.match(line): +run_subject_checks(line, spellcheck) elif is_signature.match(line): m = is_signature.match(line) signatures.append(m.group(2)) @@ -1029,18 +1062,18 @@ def ovs_checkpatch_file(filename): result = ovs_checkpatch_parse(part.get_payload(decode=False), filename, mail.get('Author', mail['From']), mail['Commit']) -if spellcheck: -if not mail['Subject'] or not mail['Subject'].strip(): -if mail['Subject']: -mail.replace_header('Subject', sys.argv[-1]) -else: -mail.add_header('Subject', sys.argv[-1]) -print("Subject missing! Your provisional subject is", - mail['Subject']) +if not mail['Subject'] or not
Re: [ovs-dev] [PATCH ovn v6 00/16] northd: I-P for load balancer and lb groups
On Thu, Aug 31, 2023 at 2:47 PM Mark Michelson wrote: > > I had a look at patches 1-8. I have no additional notes beyond what Han > and Ales have already mentioned. > > For patches 1-8: > > Acked-by: Mark Michelson Thanks Mark, Han and Ales for the reviews. I applied the first 4 patches of this series to main and backported to branch-23.09. I'll submit v7 soon with the patches 5-8 addressing all the review comments.\ Thanks Numan > > On 8/18/23 04:56, num...@ovn.org wrote: > > From: Numan Siddique > > > > This patch series adds the support to handle load balancer and > > load balancer group changes incrementally in the "northd" engine > > node and "lflow" engine node. Changes to logical switches and router's load > > balancer and load balancer group columns are also handled incrementally > > provided other columns do not change. > > > > V4 of this series did not include LB I-P handling in the lflow engine > > node. V5 adds 6 more patches to handle the LB changes in the lflow > > engine node. V6 added 2 additional patches to handle router NAT I-P > > handling. > > > > This patch series can be divided into 3 parts > > > > Part 1. Patches 1 to 8 (LB I-P only in northd) > > Part 2. Patches 9 to 14 (LB I-P in lflow engine too) > > Part 3. Patches 15 and 16 (LR NAT I-P handling in both northd and > > lflow) > > > > Submitting all these patches as one series. These patches can also be > > found here - https://github.com/numansiddique/ovn/tree/northd_ip_lb_ip_v6 > > > > If there are any conflicts, request the reviewers to use this branch > > for reviewing. > > > > Below are the scale testing results done with these patches applied > > using ovn-heater. The test ran the scenario - > > ocp-500-density-heavy.yml [1]. > > > > With these patches applied (with load balancer I-P handling in both > > northd and lflow engine nodes) the resuts (Result 1) are: > > > > --- > > Min (s) Median (s) 90%ile (s) > > 99%ile (s) Max (s) Mean (s)Total (s) Count > > Failed > > --- > > Iteration Total 0.1356511.1305271.179357 > > 1.2014102.1802030.67460684.325717 125 0 > > Namespace.add_ports 0.0052180.0056780.006457 > > 0.0189360.0208120.0061820.772796125 0 > > WorkerNode.bind_port0.0336310.0432870.051171 > > 0.0582230.0628190.04383910.959757 250 0 > > WorkerNode.ping_port0.0054600.0067911.041434 > > 1.0648071.0699570.27435268.587878 250 0 > > --- > > > > > > With only the first 8 patches applied (with load balancer I-P handling > > only in northd engine node) the results (Result 2) are: > > > > > > --- > > Min (s) Median (s) 90%ile (s) > > 99%ile (s) Max (s) Mean (s)Total (s) Count > > Failed > > --- > > Iteration Total 0.1329292.1571033.314847 > > 3.3315614.3786261.581889197.736147 125 0 > > Namespace.add_ports 0.0052170.0057600.006565 > > 0.0133480.0210140.0061060.763214125 0 > > WorkerNode.bind_port0.0352050.0454580.052278 > > 0.0598040.0639410.04565211.413122 250 0 > > WorkerNode.ping_port0.0050750.0068143.088548 > > 3.1925774.2420260.726453181.613284 250 0 > > --- > > > > > > The results with the present main (Result 3) are: > > > > --- > > Min (s) Median (s) 90%ile (s) > > 99%ile (s) Max (s) Mean (s)Total (s) Count > > Failed > >
Re: [ovs-dev] [PATCH ovn] controller: disable OpenFlow inactivity probing
Hi Dumitru, if eventually this patch got merged, please remove next lines from its commit message: "Disablement for ovs-vswitchd -> ovn-controller OF inacivity probing will be done in the next patch." Unfortunately I was unsuccessful in removing these probing direction (default 60s). Maybe you can give any hint how to solve this [1]. 1: https://mail.openvswitch.org/pipermail/ovs-dev/2023-June/405445.html > On 11 Sep 2023, at 15:57, Dumitru Ceara wrote: > > On 9/11/23 14:54, Dumitru Ceara wrote: >> On 6/14/23 20:02, Vladislav Odintsov wrote: >>> Hi Mark, >>> >>> thanks for taking time to look on this! >>> >>> Your point with a hung OVS is really an interesting case. >>> >>> From one hand it’s a possible situation, and from another I guess it’s much >>> higher probability for OVS to be busy by some other work rather than to >>> hang in a loop. In my installation the first happening sometimes (OVS >>> business by real work). The reasons can be different but I can’t say that >>> it’s better to drop OF connection in such a moment (possibly triggering ha >>> chassis-redirect ports failover). >>> >>> At the same time if we’re talking about L3GW node, which has a gateway >>> chassis redirect port, the hung OVS should be detected by other nodes with >>> BFD probes (if it’s a ha chassis-redirect port). >>> And if it’s a normal hypervisor (with just a vif ports), so what can we do >>> with it? I guess if we drop OF connection, it won’t make any positive >>> changes. Maybe just release memory needed for handling this connection and >>> not allocate buffers…? Unfortunately I can’t evaluate this. >>> >>> Moreover, the OVN default is a disabled probing. What can be a possible >>> recommended values to set probes if to leave it as is? How should users >>> find out that they have to configure this? Currently in docs there is only >>> information that this option configures probe interval. But it’s not >>> obvious when to configure it and which value to set. >>> >> >> In support of this, quoting the original commit that added the probe >> config knob: >> >> "The main intention of this patch is to fix this recompuation issue for >> older versions (there by requesting backport), it still would be >> beneficial with the incremental processing engine." >> >> That was back in 2019: >> >> https://github.com/ovn-org/ovn/commit/c99069c8934c9ea55d310a8b6d48fb66aa477589 >> >> Since then lots of performance improvements landed in OVN >> (ovn-controller too), we're probably fine. Checking >> ovn-org/ovn-kubernetes code I see they were also setting >> ovn-openflow-probe-interval "just in case" so they probably won't be >> affected by it getting removed: >> >> https://github.com/ovn-org/ovn-kubernetes/commit/be1eb843852cd4c490514cec60553cca4feaf18e >> >>> I hope this makes sense. >>> >> >> The patch in its current form looks OK to me, I'm OK to merge it if Mark >> doesn't have anything against it; therefore: >> >> Acked-by: Dumitru Ceara >> > > I forgot to mention this earlier but, since this patch was posted way > before soft freeze and since the changes seem contained, it's probably > fair to apply this to the 23.09 branch too. What do you think, Mark? > > Thanks! > >> Regards, >> Dumitru >> >>> Also, which Han’s patch to OVS you’re talking about? I looked through open >>> OVS patches and haven’t seen any. Please point me out. >>> >>> regards, >>> Vladislav Odintsov >>> On 9 Jun 2023, at 18:28, Mark Michelson wrote: Hi, Thanks for linking the email thread, since I had the exact same concern that Numan did about detecting if ovs-vswitchd goes down. It sounds like because of the nature of unix sockets, this disconnection is still detected by OVN and failover can occur as expected. But what about if ovs-vswitchd is still running but in a "compromised" state. For instance, what if a coding error in ovs-vswitchd results in it running an infinite loop? In such a case, the unix connection would still be alive, but OVN would not be able to communicate with the ovs-vswitchd process. Does this situation eventually lead to OVN disconnecting anyway after it fills up the unix socket's buffer? If so, how long does that take? Or does it lead to OVN slowly growing in memory usage while it waits forever to be able to write to the socket? I think removing the hard-coded 5 second probe intervals from pinctrl.c and features.c is a good idea. But I think we should leave the configurable probe interval used by ofctrl.c for the case I described before. Since Han's patch to OVS should disable probe intervals from the OVS side by default, we would not trigger bad behavior simply because OVN has a lot of work to do during a recompute. What do you think? > On 6/8/23 10:15, Vladislav Odintsov wrote: > OpenFlow connection is established over unix socket,
Re: [ovs-dev] [PATCH ovn] controller: disable OpenFlow inactivity probing
On 9/11/23 14:54, Dumitru Ceara wrote: > On 6/14/23 20:02, Vladislav Odintsov wrote: >> Hi Mark, >> >> thanks for taking time to look on this! >> >> Your point with a hung OVS is really an interesting case. >> >> From one hand it’s a possible situation, and from another I guess it’s much >> higher probability for OVS to be busy by some other work rather than to hang >> in a loop. In my installation the first happening sometimes (OVS business by >> real work). The reasons can be different but I can’t say that it’s better to >> drop OF connection in such a moment (possibly triggering ha chassis-redirect >> ports failover). >> >> At the same time if we’re talking about L3GW node, which has a gateway >> chassis redirect port, the hung OVS should be detected by other nodes with >> BFD probes (if it’s a ha chassis-redirect port). >> And if it’s a normal hypervisor (with just a vif ports), so what can we do >> with it? I guess if we drop OF connection, it won’t make any positive >> changes. Maybe just release memory needed for handling this connection and >> not allocate buffers…? Unfortunately I can’t evaluate this. >> >> Moreover, the OVN default is a disabled probing. What can be a possible >> recommended values to set probes if to leave it as is? How should users find >> out that they have to configure this? Currently in docs there is only >> information that this option configures probe interval. But it’s not obvious >> when to configure it and which value to set. >> > > In support of this, quoting the original commit that added the probe > config knob: > > "The main intention of this patch is to fix this recompuation issue for > older versions (there by requesting backport), it still would be > beneficial with the incremental processing engine." > > That was back in 2019: > > https://github.com/ovn-org/ovn/commit/c99069c8934c9ea55d310a8b6d48fb66aa477589 > > Since then lots of performance improvements landed in OVN > (ovn-controller too), we're probably fine. Checking > ovn-org/ovn-kubernetes code I see they were also setting > ovn-openflow-probe-interval "just in case" so they probably won't be > affected by it getting removed: > > https://github.com/ovn-org/ovn-kubernetes/commit/be1eb843852cd4c490514cec60553cca4feaf18e > >> I hope this makes sense. >> > > The patch in its current form looks OK to me, I'm OK to merge it if Mark > doesn't have anything against it; therefore: > > Acked-by: Dumitru Ceara > I forgot to mention this earlier but, since this patch was posted way before soft freeze and since the changes seem contained, it's probably fair to apply this to the 23.09 branch too. What do you think, Mark? Thanks! > Regards, > Dumitru > >> Also, which Han’s patch to OVS you’re talking about? I looked through open >> OVS patches and haven’t seen any. Please point me out. >> >> regards, >> Vladislav Odintsov >> >>> On 9 Jun 2023, at 18:28, Mark Michelson wrote: >>> >>> Hi, >>> >>> Thanks for linking the email thread, since I had the exact same concern >>> that Numan did about detecting if ovs-vswitchd goes down. It sounds like >>> because of the nature of unix sockets, this disconnection is still detected >>> by OVN and failover can occur as expected. >>> >>> But what about if ovs-vswitchd is still running but in a "compromised" >>> state. For instance, what if a coding error in ovs-vswitchd results in it >>> running an infinite loop? In such a case, the unix connection would still >>> be alive, but OVN would not be able to communicate with the ovs-vswitchd >>> process. >>> >>> Does this situation eventually lead to OVN disconnecting anyway after it >>> fills up the unix socket's buffer? If so, how long does that take? Or does >>> it lead to OVN slowly growing in memory usage while it waits forever to be >>> able to write to the socket? >>> >>> I think removing the hard-coded 5 second probe intervals from pinctrl.c and >>> features.c is a good idea. But I think we should leave the configurable >>> probe interval used by ofctrl.c for the case I described before. Since >>> Han's patch to OVS should disable probe intervals from the OVS side by >>> default, we would not trigger bad behavior simply because OVN has a lot of >>> work to do during a recompute. What do you think? On 6/8/23 10:15, Vladislav Odintsov wrote: OpenFlow connection is established over unix socket, which is a reliable connection and doesn't require additional probing. With this patch openflow probing in ovn-controller -> ovs-vswitchd direction is disabled for all three connections: - OF flows management connection, - OF features negotiation connection, - pinctrl connection. ovn-controller external_ids:ovn-openflow-probe-interval is removed as non-needed anymore. Disablement for ovs-vswitchd -> ovn-controller OF inacivity probing will be done in the next patch. Reported-at:
Re: [ovs-dev] [PATCH ovn] controller: disable OpenFlow inactivity probing
On 6/14/23 20:02, Vladislav Odintsov wrote: > Hi Mark, > > thanks for taking time to look on this! > > Your point with a hung OVS is really an interesting case. > > From one hand it’s a possible situation, and from another I guess it’s much > higher probability for OVS to be busy by some other work rather than to hang > in a loop. In my installation the first happening sometimes (OVS business by > real work). The reasons can be different but I can’t say that it’s better to > drop OF connection in such a moment (possibly triggering ha chassis-redirect > ports failover). > > At the same time if we’re talking about L3GW node, which has a gateway > chassis redirect port, the hung OVS should be detected by other nodes with > BFD probes (if it’s a ha chassis-redirect port). > And if it’s a normal hypervisor (with just a vif ports), so what can we do > with it? I guess if we drop OF connection, it won’t make any positive > changes. Maybe just release memory needed for handling this connection and > not allocate buffers…? Unfortunately I can’t evaluate this. > > Moreover, the OVN default is a disabled probing. What can be a possible > recommended values to set probes if to leave it as is? How should users find > out that they have to configure this? Currently in docs there is only > information that this option configures probe interval. But it’s not obvious > when to configure it and which value to set. > In support of this, quoting the original commit that added the probe config knob: "The main intention of this patch is to fix this recompuation issue for older versions (there by requesting backport), it still would be beneficial with the incremental processing engine." That was back in 2019: https://github.com/ovn-org/ovn/commit/c99069c8934c9ea55d310a8b6d48fb66aa477589 Since then lots of performance improvements landed in OVN (ovn-controller too), we're probably fine. Checking ovn-org/ovn-kubernetes code I see they were also setting ovn-openflow-probe-interval "just in case" so they probably won't be affected by it getting removed: https://github.com/ovn-org/ovn-kubernetes/commit/be1eb843852cd4c490514cec60553cca4feaf18e > I hope this makes sense. > The patch in its current form looks OK to me, I'm OK to merge it if Mark doesn't have anything against it; therefore: Acked-by: Dumitru Ceara Regards, Dumitru > Also, which Han’s patch to OVS you’re talking about? I looked through open > OVS patches and haven’t seen any. Please point me out. > > regards, > Vladislav Odintsov > >> On 9 Jun 2023, at 18:28, Mark Michelson wrote: >> >> Hi, >> >> Thanks for linking the email thread, since I had the exact same concern that >> Numan did about detecting if ovs-vswitchd goes down. It sounds like because >> of the nature of unix sockets, this disconnection is still detected by OVN >> and failover can occur as expected. >> >> But what about if ovs-vswitchd is still running but in a "compromised" >> state. For instance, what if a coding error in ovs-vswitchd results in it >> running an infinite loop? In such a case, the unix connection would still be >> alive, but OVN would not be able to communicate with the ovs-vswitchd >> process. >> >> Does this situation eventually lead to OVN disconnecting anyway after it >> fills up the unix socket's buffer? If so, how long does that take? Or does >> it lead to OVN slowly growing in memory usage while it waits forever to be >> able to write to the socket? >> >> I think removing the hard-coded 5 second probe intervals from pinctrl.c and >> features.c is a good idea. But I think we should leave the configurable >> probe interval used by ofctrl.c for the case I described before. Since Han's >> patch to OVS should disable probe intervals from the OVS side by default, we >> would not trigger bad behavior simply because OVN has a lot of work to do >> during a recompute. What do you think? >>> On 6/8/23 10:15, Vladislav Odintsov wrote: >>> OpenFlow connection is established over unix socket, which is a reliable >>> connection and doesn't require additional probing. >>> With this patch openflow probing in ovn-controller -> ovs-vswitchd >>> direction is disabled for all three connections: >>> - OF flows management connection, >>> - OF features negotiation connection, >>> - pinctrl connection. >>> ovn-controller external_ids:ovn-openflow-probe-interval is removed as >>> non-needed anymore. >>> Disablement for ovs-vswitchd -> ovn-controller OF inacivity probing will >>> be done in the next patch. >>> Reported-at: >>> https://mail.openvswitch.org/pipermail/ovs-dev/2023-May/404625.html >>> Signed-off-by: Vladislav Odintsov >>> --- >>> NEWS| 5 + >>> controller/ofctrl.c | 14 ++ >>> controller/ofctrl.h | 4 +--- >>> controller/ovn-controller.8.xml | 14 -- >>> controller/ovn-controller.c | 21 + >>> controller/pinctrl.c| 2
Re: [ovs-dev] [PATCH ovn v2 2/2] ofctrl: Prevent conjunction duplication
On 9/6/23 16:39, Mark Michelson wrote: > Thanks Ales, > Hi Ales, Mark, > Acked-by: Mark Michelson > > Thankfully the extra overhead is in the much less frequently-hit case. > We still do a O(N^2) processing, I wonder if it's a lot of work/complexity to add the map to all 'struct desired_flow'. But yes, in general, it's likely fine to do things as this patch proposes. However, to make sure we catch it in the future, should we log/count whenever we have a lot of entries in this specific hmap ('existing_conj')? Thanks, Dumitru > On 8/29/23 04:47, Ales Musil wrote: >> During ofctrl_add_or_append_flow we are able to combine >> two flows with same match but different conjunctions. >> However, the function didn't check if the conjunctions already >> exist in the installed flow, which could result in conjunction >> duplication and the flow would grow infinitely e.g. >> actions=conjunction(1,1/2), conjunction(1,1/2) >> >> Make sure that we add only conjunctions that are not present >> in the already existing flow. >> >> Reported-at: https://bugzilla.redhat.com/2175928 >> Signed-off-by: Ales Musil >> --- >> controller/ofctrl.c | 56 - >> 1 file changed, 55 insertions(+), 1 deletion(-) >> >> diff --git a/controller/ofctrl.c b/controller/ofctrl.c >> index 9de8a145f..e39cef7aa 100644 >> --- a/controller/ofctrl.c >> +++ b/controller/ofctrl.c >> @@ -1273,6 +1273,37 @@ ofctrl_add_flow_metered(struct >> ovn_desired_flow_table *desired_flows, >> meter_id, as_info, true); >> } >> +struct ofpact_ref { >> + struct hmap_node hmap_node; >> + struct ofpact *ofpact; >> +}; >> + >> +static struct ofpact_ref * >> +ofpact_ref_find(const struct hmap *refs, const struct ofpact *ofpact) >> +{ >> + uint32_t hash = hash_bytes(ofpact, ofpact->len, 0); >> + >> + struct ofpact_ref *ref; >> + HMAP_FOR_EACH_WITH_HASH (ref, hmap_node, hash, refs) { >> + if (ofpacts_equal(ref->ofpact, ref->ofpact->len, >> + ofpact, ofpact->len)) { >> + return ref; >> + } >> + } >> + >> + return NULL; >> +} >> + >> +static void >> +ofpact_refs_destroy(struct hmap *refs) >> +{ >> + struct ofpact_ref *ref; >> + HMAP_FOR_EACH_POP (ref, hmap_node, refs) { >> + free(ref); >> + } >> + hmap_destroy(refs); >> +} >> + >> /* Either add a new flow, or append actions on an existing flow. If the >> * flow existed, a new link will also be created between the new >> sb_uuid >> * and the existing flow. */ >> @@ -1292,6 +1323,21 @@ ofctrl_add_or_append_flow(struct >> ovn_desired_flow_table *desired_flows, >> meter_id); >> existing = desired_flow_lookup_conjunctive(desired_flows, >> >flow); >> if (existing) { >> + struct hmap existing_conj = HMAP_INITIALIZER(_conj); >> + >> + struct ofpact *ofpact; >> + OFPACT_FOR_EACH (ofpact, existing->flow.ofpacts, >> + existing->flow.ofpacts_len) { >> + if (ofpact->type != OFPACT_CONJUNCTION) { >> + continue; >> + } >> + >> + struct ofpact_ref *ref = xmalloc(sizeof *ref); >> + ref->ofpact = ofpact; >> + uint32_t hash = hash_bytes(ofpact, ofpact->len, 0); >> + hmap_insert(_conj, >hmap_node, hash); >> + } >> + >> /* There's already a flow with this particular match and action >> * 'conjunction'. Append the action to that flow rather than >> * adding a new flow. >> @@ -1301,7 +1347,15 @@ ofctrl_add_or_append_flow(struct >> ovn_desired_flow_table *desired_flows, >> ofpbuf_use_stub(, compound_stub, >> sizeof(compound_stub)); >> ofpbuf_put(, existing->flow.ofpacts, >> existing->flow.ofpacts_len); >> - ofpbuf_put(, f->flow.ofpacts, f->flow.ofpacts_len); >> + >> + OFPACT_FOR_EACH (ofpact, f->flow.ofpacts, f->flow.ofpacts_len) { >> + if (ofpact->type != OFPACT_CONJUNCTION || >> + !ofpact_ref_find(_conj, ofpact)) { >> + ofpbuf_put(, ofpact, >> OFPACT_ALIGN(ofpact->len)); >> + } >> + } >> + >> + ofpact_refs_destroy(_conj); >> mem_stats.desired_flow_usage -= desired_flow_size(existing); >> free(existing->flow.ofpacts); > > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] utilities: Add kernel_delay.py script to debug a busy Linux kernel.
On 8/21/23 17:41, Eelco Chaudron wrote: This patch adds an utility that can be used to determine if an issue is related to a lack of Linux kernel resources. This tool is also featured in a Red Hat developers blog article: https://developers.redhat.com/articles/2023/07/24/troubleshooting-open-vswitch-kernel-blame Signed-off-by: Eelco Chaudron --- utilities/automake.mk |4 utilities/usdt-scripts/kernel_delay.py | 1397 +++ utilities/usdt-scripts/kernel_delay.rst | 594 + 3 files changed, 1995 insertions(+) create mode 100755 utilities/usdt-scripts/kernel_delay.py create mode 100644 utilities/usdt-scripts/kernel_delay.rst I have some comments below but overall an awesome script! Thanks Eelco. diff --git a/utilities/automake.mk b/utilities/automake.mk index 37d679f82..9a2114df4 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -23,6 +23,8 @@ scripts_DATA += utilities/ovs-lib usdt_SCRIPTS += \ utilities/usdt-scripts/bridge_loop.bt \ utilities/usdt-scripts/dpif_nl_exec_monitor.py \ + utilities/usdt-scripts/kernel_delay.py \ + utilities/usdt-scripts/kernel_delay.rst \ utilities/usdt-scripts/reval_monitor.py \ utilities/usdt-scripts/upcall_cost.py \ utilities/usdt-scripts/upcall_monitor.py @@ -70,6 +72,8 @@ EXTRA_DIST += \ utilities/docker/debian/build-kernel-modules.sh \ utilities/usdt-scripts/bridge_loop.bt \ utilities/usdt-scripts/dpif_nl_exec_monitor.py \ + utilities/usdt-scripts/kernel_delay.py \ + utilities/usdt-scripts/kernel_delay.rst \ utilities/usdt-scripts/reval_monitor.py \ utilities/usdt-scripts/upcall_cost.py \ utilities/usdt-scripts/upcall_monitor.py diff --git a/utilities/usdt-scripts/kernel_delay.py b/utilities/usdt-scripts/kernel_delay.py new file mode 100755 index 0..bba7bc044 --- /dev/null +++ b/utilities/usdt-scripts/kernel_delay.py @@ -0,0 +1,1397 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2022 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Script information: +# --- +# This script allows a developer to quickly identify if the issue at hand +# might be related to the kernel running out of resources or if it really is +# an Open vSwitch issue. +# +# For documentation see the kernel_delay.rst file. +# +try: +from bcc import BPF, USDT, USDTException +from bcc.syscall import syscalls, syscall_name +except ModuleNotFoundError: +print("ERROR: Can't find the BPF Compiler Collection (BCC) tools!") + +from enum import IntEnum + +import argparse +import datetime +import os +import pytz IIRC it's the first time we add this dependency. I think usdt-script dependencies are starting to grow. Have you considered adding a requirements.txt to document them all and make it easier for users to consume these scripts? Alternatively, we should maybe add it to the p +import psutil +import re +import sys +import time + +import ctypes as ct + +try: +from time import time_ns +except ImportError: +# For compatibility with Python <= v3.6. +def time_ns(): +now = datetime.datetime.now() +return int(now.timestamp() * 1e9) + + +# +# Actual eBPF source code +# +EBPF_SOURCE = """ +#include +#include + +#define MONITOR_PID + +enum { + +}; + +struct event_t { +u64 ts; +u32 tid; +u32 id; + +int user_stack_id; +int kernel_stack_id; + +u32 syscall; +u64 entry_ts; + +}; + +BPF_RINGBUF_OUTPUT(events, ); +BPF_STACK_TRACE(stack_traces, ); +BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1); +BPF_TABLE("percpu_array", uint32_t, uint64_t, trigger_miss, 1); + +BPF_ARRAY(capture_on, u64, 1); +static bool capture_enabled(u64 pid_tgid) { nit: inline. +int key = 0; +u64 *ret; + +if ((pid_tgid >> 32) != MONITOR_PID) +return false; + +ret = capture_on.lookup(); +return ret && *ret == 1; +} + +static inline bool capture_enabled__() { +int key = 0; +u64 *ret; + +ret = capture_on.lookup(); +return ret && *ret == 1; +} + + nit: extra newline +static struct event_t *get_event(uint32_t id) { +struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t)); + +if (!event) { +dropcnt.increment(0); +return NULL; +} + +event->id = id; +event->ts = bpf_ktime_get_ns(); +event->tid =
Re: [ovs-dev] [PATCH ovn v2 1/2] ofctrl: Do not try to program long flows
On 9/6/23 16:39, Mark Michelson wrote: > Thanks Ales, > > Acked-by: Mark Michelson > Hi Ales, Mark, > I think this change should be applied as-is. I also wanted to contribute > some thoughts. > I shared some thoughts too, below. > As you mentioned in the commit message, this is not a self-healing > process. I was looking at the OpenFlow specification, and I am having > trouble figuring out how to properly handle this. It doesn't appear that > OpenFlow provides a way to append new actions to an existing flow or to > add new match conditions to an existing flow. The best you can do is to > replace the actions or replace the match. This doesn't help when the > match or actions are too long to send. > > The best I could think of was to break the flow up and evaluate across > multiple tables. This would work, but it would also completely break the > model for how OVN correlates OpenFlow table numbers with specific > logical stages. Are there any alternatives? > Not a real alternative solution but should we also try a (single) recompute and also clear the installed OpenFlows and install the new set? That's just in case it "fixes" the problem due to desired flows being built in a different (equivalent) way. > On 8/29/23 04:47, Ales Musil wrote: >> If the flow size is bigger than UINT16_MAX it doesn't >> fit into openflow message. Programming of such flow will >> fail which results in disconnect of ofctrl. After reconnect >> we program everything from scratch, in case the long flow still >> remains the cycle continues. This causes the node to be almost >> unusable as there will be massive traffic disruptions. >> >> To prevent that check if the flow is within the allowed size. >> If not log the flow that would trigger this problem and do not program >> it. This isn't a self-healing process, but the chance of this happening >> are very slim. Also, any flow that is bigger than allowed size is OVN >> bug, and it should be fixed. >> >> Reported-at: https://bugzilla.redhat.com/1955167 >> Signed-off-by: Ales Musil >> --- >> v2: Fix the formatting error. >> --- >> controller/ofctrl.c | 40 >> 1 file changed, 36 insertions(+), 4 deletions(-) >> >> diff --git a/controller/ofctrl.c b/controller/ofctrl.c >> index 64a444ff6..9de8a145f 100644 >> --- a/controller/ofctrl.c >> +++ b/controller/ofctrl.c >> @@ -1769,6 +1769,16 @@ ovn_flow_log(const struct ovn_flow *f, const >> char *action) >> } >> } >> +static void >> +ovn_flow_log_size_err(const struct ovn_flow *f) >> +{ >> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); >> + >> + char *s = ovn_flow_to_string(f); >> + VLOG_ERR_RL(, "The FLOW_MOD message is too big: %s", s); >> + free(s); It might be useful to add a coverage counter too. Regards, Dumitru >> +} >> + >> static void >> ovn_flow_uninit(struct ovn_flow *f) >> { >> @@ -1888,15 +1898,27 @@ encode_bundle_add(struct ofpbuf *msg, struct >> ofputil_bundle_ctrl_msg *bc) >> return ofputil_encode_bundle_add(OFP15_VERSION, ); >> } >> -static void >> +static bool >> add_flow_mod(struct ofputil_flow_mod *fm, >> struct ofputil_bundle_ctrl_msg *bc, >> struct ovs_list *msgs) >> { >> struct ofpbuf *msg = encode_flow_mod(fm); >> struct ofpbuf *bundle_msg = encode_bundle_add(msg, bc); >> + >> + uint32_t flow_mod_len = msg->size; >> + uint32_t bundle_len = bundle_msg->size; >> + >> ofpbuf_delete(msg); >> + >> + if (flow_mod_len > UINT16_MAX || bundle_len > UINT16_MAX) { >> + ofpbuf_delete(bundle_msg); >> + >> + return false; >> + } >> + >> ovs_list_push_back(msgs, _msg->list_node); >> + return true; >> } >> >> /* group_table. */ >> @@ -2235,7 +2257,10 @@ installed_flow_add(struct ovn_flow *d, >> .new_cookie = htonll(d->cookie), >> .command = OFPFC_ADD, >> }; >> - add_flow_mod(, bc, msgs); >> + >> + if (!add_flow_mod(, bc, msgs)) { >> + ovn_flow_log_size_err(d); >> + } >> } >> static void >> @@ -2259,7 +2284,7 @@ installed_flow_mod(struct ovn_flow *i, struct >> ovn_flow *d, >> /* Use OFPFC_ADD so that cookie can be updated. */ >> fm.command = OFPFC_ADD; >> } >> - add_flow_mod(, bc, msgs); >> + bool result = add_flow_mod(, bc, msgs); >> /* Replace 'i''s actions and cookie by 'd''s. */ >> mem_stats.installed_flow_usage -= i->ofpacts_len - d->ofpacts_len; >> @@ -2267,6 +2292,10 @@ installed_flow_mod(struct ovn_flow *i, struct >> ovn_flow *d, >> i->ofpacts = xmemdup(d->ofpacts, d->ofpacts_len); >> i->ofpacts_len = d->ofpacts_len; >> i->cookie = d->cookie; >> + >> + if (!result) { >> + ovn_flow_log_size_err(i); >> + } >> } >> static void >> @@ -2280,7 +2309,10 @@ installed_flow_del(struct ovn_flow *i, >> .table_id = i->table_id, >> .command = OFPFC_DELETE_STRICT, >> }; >> -
Re: [ovs-dev] [PATCH] dpdk: expose cpu usage stats on telemetry socket
Hey Kevin, Kevin Traynor, Sep 07, 2023 at 15:37: > This came up in conversation with other maintainers as I mentioned I was > reviewing and the question raised was - Why add this ? if you want these > values exposed, wouldn't it be better to to add to ovsdb ? That's a good point. I had considered using ovsdb but it seemed to me less suitable for a few reasons: * I had understood that ovsdb is a configuration database, not a state reporting database. * To have reliable and up to date numbers, ovs would need to push them at high rate to the database so that clients to get outdated cpu usage. The DPDK telemetry socket is real-time, the current numbers are returned on every request. * I would need to define a custom schema / table to store structured information in the db. The DPDK telemetry socket already has a schema defined for this. * Accessing ovsdb requires a library making it more complex to use for telemetry scrapers. The DPDK telemetry socket can be accessed with a standalone python script with no external dependencies[1]. [1]: https://github.com/rjarry/dpdk/blob/main/usertools/prometheus-dpdk-exporter.py#L135-L143 Maybe my observations are wrong, please do correct me if they are. > Are you looking for individual lcore usage with identification of that > pmd? or overall aggregate usage ? > > I ask because it will report lcore id's which would need to be mapped to > pmd core id's for anything regarding individual pmds. > > That can be found in ovs-vswitchd.log or checked locally with > 'ovs-appctl dpdk/lcore-list' but assuming if they were available, then > user would not be using dpdk telemetry anyway. I would assume that the important data is the aggregate usage for overall monitoring and resource planing. Individual pmd usage can be accessed for fine tuning and debugging via appctl. > These stats are cumulative so in the absence of 'ovs-appctl > dpif-netdev/pmd-stats-clear' that would need to be taken care of with > some post-processing by whatever is pulling these stats - otherwise > you'll get cumulative stats for an unknown time period and unknown > traffic profile (e.g. it would be counting before any traffic started). > > These might also be reset with pmd-stats-clear independently, so that > would need to be accounted for too. The only important data point that we need is the ratio between busy/(busy + idle) over a specified delta which any scraper can do. I consider these numbers like any other counter that can eventually be reset. See this reply from Morten Brørup on dpdk-dev for more context: https://lore.kernel.org/dpdk-dev/98cbd80474fa8b44bf855df32c47dc35d87...@smartserver.smartshare.dk/ > Another thing I noticed is that without the pmd-sleep info the stats in > isolation can be misleading. Example below: > > With low rate traffic and clearing stats between 10 sec runs > > 2023-09-07T13:14:56Z|00158|dpif_netdev|INFO|PMD max sleep request is 0 > usecs. > 2023-09-07T13:14:56Z|00159|dpif_netdev|INFO|PMD load based sleeps are > disabled. > > Time: 13:15:06.842 > Measurement duration: 10.009 s > > pmd thread numa_id 0 core_id 8: > >Iterations: 51712564 (0.19 us/it) >- Used TSC cycles: 26021354654 (100.0 % of total cycles) >- idle iterations: 51710963 ( 99.9 % of used cycles) >- busy iterations: 1601 ( 0.1 % of used cycles) >- sleep iterations:0 ( 0.0 % of iterations) > ^^^ can see here that pmd does not sleep and is 0.1% busy > >Sleep time (us): 0 ( 0 us/iteration avg.) >Rx packets:37250 (4 Kpps, 866 cycles/pkt) >Datapath passes: 37250 (1.00 passes/pkt) >- PHWOL hits: 0 ( 0.0 %) >- MFEX Opt hits: 0 ( 0.0 %) >- Simple Match hits: 37250 (100.0 %) >- EMC hits:0 ( 0.0 %) >- SMC hits:0 ( 0.0 %) >- Megaflow hits: 0 ( 0.0 %, 0.00 subtbl lookups/hit) >- Upcalls: 0 ( 0.0 %, 0.0 us/upcall) >- Lost upcalls:0 ( 0.0 %) >Tx packets:0 > > { >"/eal/lcore/usage": { > "lcore_ids": [ >1 > ], > "total_cycles": [ >26127284389 > ], > "busy_cycles": [ >32370313 > ] >} > } > > ^^^ This in isolation implies pmd is 32370313/26127284389 0.12% busy > which is true > > 2023-09-07T13:15:06Z|00160|dpif_netdev|INFO|PMD max sleep request is 500 > usecs. > 2023-09-07T13:15:06Z|00161|dpif_netdev|INFO|PMD load based sleeps are > enabled. > > Time: 13:15:16.908 > Measurement duration: 10.008 s > > pmd thread numa_id 0 core_id 8: > >Iterations:75197 (133.09 us/it) >- Used TSC cycles: 237910969 ( 0.9 % of total cycles) >- idle iterations: 73782 ( 74.4 % of used cycles) >- busy iterations: 1415 ( 25.6 % of used cycles) >- sleep iterations:
Re: [ovs-dev] [PATCH] checkpatch: Add checks for the subject line.
On 9 Sep 2023, at 1:11, Ilya Maximets wrote: > On 9/5/23 14:40, Eelco Chaudron wrote: >> This patch adds WARNINGs for the subject line length and the format, >> i.e., the sentence should start with a capital and end with a dot. >> >> Signed-off-by: Eelco Chaudron >> --- >> utilities/checkpatch.py | 30 ++ >> 1 file changed, 30 insertions(+) >> >> diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py >> index 5c4aaefb3..57959595f 100755 >> --- a/utilities/checkpatch.py >> +++ b/utilities/checkpatch.py >> @@ -792,6 +792,33 @@ def run_file_checks(text): >> check['check'](text) >> >> >> +def run_subject_checks(subject, spellcheck=False): >> +print_subject = False >> + >> +if spellcheck and check_spelling(subject, False): >> +print_subject = True >> + >> +summary = subject[subject.rindex(': ') + 2:] >> +summary_len = len(summary) >> +if summary_len > 50: >> +print_warning("The subject summary is over 50 characters, i.e., >> %u." % > > Also, 50 seems like too low of a number. It looks like about > 25% of all commits in OVS will fail this check. If we make it > 65, for example, it will be only 2%. 7% with 60. Yes, I noticed, but took the 50 from our documentation. Maybe we should limit the total length to 70 like they do in the kernel? > What do you think? Will add the test cases and send out a v2. //Eelco ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v3] utilities: Add kernel_delay.py script to debug a busy Linux kernel.
Bleep bloop. Greetings Eelco Chaudron, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 106 characters long (recommended limit is 79) #1511 FILE: utilities/usdt-scripts/kernel_delay.rst:54: -- WARNING: Line is 93 characters long (recommended limit is 79) #1516 FILE: utilities/usdt-scripts/kernel_delay.rst:59: NAME NUMBER COUNT TOTAL ns MAX ns WARNING: Line is 93 characters long (recommended limit is 79) #1517 FILE: utilities/usdt-scripts/kernel_delay.rst:60: poll 7 5 184,193,176 184,191,520 WARNING: Line is 93 characters long (recommended limit is 79) #1518 FILE: utilities/usdt-scripts/kernel_delay.rst:61: recvmsg 47 494 125,208,756 310,331 WARNING: Line is 93 characters long (recommended limit is 79) #1519 FILE: utilities/usdt-scripts/kernel_delay.rst:62: futex 202 818,768,758 4,023,039 WARNING: Line is 93 characters long (recommended limit is 79) #1520 FILE: utilities/usdt-scripts/kernel_delay.rst:63: sendto 44 10 375,861 266,867 WARNING: Line is 93 characters long (recommended limit is 79) #1521 FILE: utilities/usdt-scripts/kernel_delay.rst:64: sendmsg 46 443,294 11,213 WARNING: Line is 93 characters long (recommended limit is 79) #1522 FILE: utilities/usdt-scripts/kernel_delay.rst:65: write 1 1 5,949 5,949 WARNING: Line is 93 characters long (recommended limit is 79) #1523 FILE: utilities/usdt-scripts/kernel_delay.rst:66: getrusage98 1 1,424 1,424 WARNING: Line is 93 characters long (recommended limit is 79) #1524 FILE: utilities/usdt-scripts/kernel_delay.rst:67: read 0 1 1,292 1,292 WARNING: Line is 82 characters long (recommended limit is 79) #1528 FILE: utilities/usdt-scripts/kernel_delay.rst:71: SCHED_CNT TOTAL nsMIN nsMAX ns WARNING: Line is 86 characters long (recommended limit is 79) #1536 FILE: utilities/usdt-scripts/kernel_delay.rst:79: NAME COUNT TOTAL ns MAX ns WARNING: Line is 86 characters long (recommended limit is 79) #1537 FILE: utilities/usdt-scripts/kernel_delay.rst:80: eno8303-rx-1 1 3,586 3,586 WARNING: Line is 94 characters long (recommended limit is 79) #1541 FILE: utilities/usdt-scripts/kernel_delay.rst:84: NAME VECT_NR COUNT TOTAL ns MAX ns WARNING: Line is 94 characters long (recommended limit is 79) #1542 FILE: utilities/usdt-scripts/kernel_delay.rst:85: net_rx 3 117,699 17,699 WARNING: Line is 94 characters long (recommended limit is 79) #1543 FILE: utilities/usdt-scripts/kernel_delay.rst:86: sched 7 613,820 3,226 WARNING: Line is 94 characters long (recommended limit is 79) #1544 FILE: utilities/usdt-scripts/kernel_delay.rst:87: rcu9 1613,586 1,554 WARNING: Line is 94 characters long (recommended limit is 79) #1545 FILE: utilities/usdt-scripts/kernel_delay.rst:88: timer 1 310,259 3,815 WARNING: Line is 106 characters long (recommended limit is 79) #1646 FILE: utilities/usdt-scripts/kernel_delay.rst:189: -- WARNING: Line is 87 characters long (recommended limit is 79) #1661 FILE: utilities/usdt-scripts/kernel_delay.rst:204: ENTRY (ns) EXIT (ns)TID COMM DELTA (us) SYSCALL WARNING: Line is 100 characters long (recommended limit is 79) #1662 FILE: utilities/usdt-scripts/kernel_delay.rst:205: --- --- -- -- WARNING: Line is 89 characters long (recommended limit is 79) #1663 FILE: utilities/usdt-scripts/kernel_delay.rst:206: 216182169493548621618216950312013359699 revalidator14 95 futex WARNING: Line is 89 characters long (recommended limit is 79) #1672 FILE:
Re: [ovs-dev] [PATCH v6 7/7] system-dpdk: Run traffic tests.
On 11 Sep 2023, at 11:36, David Marchand wrote: > On Fri, Sep 8, 2023 at 4:28 PM Eelco Chaudron wrote: >> On 6 Sep 2023, at 8:20, David Marchand wrote: >> >>> Integrate system-traffic.at tests as part of check-dpdk. >>> >>> Some tests that can't work with the userspace datapath are skipped by >>> overriding some OVS_CHECK_* macros. >>> >>> ADD_VETH is implemented using the net/af_xdp DPDK driver. >>> >>> Signed-off-by: David Marchand >> >> As discussed offline, this patch looks good, however I have some sync issue >> between testpmd and OVS-DPDK which is causing the following log message: >> >> 2023-09-08T13:15:18.160Z|00163|dpdk|INFO|VHOST_CONFIG: >> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) >> vhost peer closed >> 2023-09-08T13:15:18.160Z|00164|netdev_dpdk|INFO|vHost Device >> '/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0' >> connection has been destroyed >> 2023-09-08T13:15:18.160Z|00165|dpdk|INFO|VHOST_CONFIG: >> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) >> vhost-user client: socket created, fd: 24 >> 2023-09-08T13:15:18.160Z|00166|dpdk|WARN|VHOST_CONFIG: >> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) >> failed to connect: Connection refused >> 2023-09-08T13:15:18.160Z|00085|dpif_netdev|INFO|Performing pmd to rx queue >> assignment using cycles algorithm. >> 2023-09-08T13:15:18.160Z|00167|dpdk|INFO|VHOST_CONFIG: >> (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) >> reconnecting... >> 2023-09-08T13:15:18.160Z|00086|dpif_netdev|INFO|Core 58 on numa node 0 >> assigned port 'dpdkvhostuserclient0' rx queue 0 (measured processing cycles >> 263044). >> >> The test is passing, but fails due to the “WARN|VHOST_CONFIG” log message >> being present. >> >> I’ll let you figure out if just adding the message is sufficient or need >> some synchronisation. Other than that the code looks good to me! > > It looks like testpmd is too quick to quit, and OVS tries to reconnect > on the vhost user port socket once, before the port is deleted from > the bridge. > Inverting OVS_DPDK_STOP_TESTPMD() and AT_CHECK([ovs-vsctl del-port > br10 dpdkvhostuserclient0], [], [stdout], [stderr]) does the job. > But other tests have the same pattern... the difference is that only > this test 5 has a ADD_VETH() so something is changing OVS shutdown > timing with this last patch of mine. > > I will be away a few days, I'll have a look again when I am back. Thanks David for doing the investigation. I’ll wait for a v7 once you get back :) Cheers, Eelco ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v3] utilities: Add kernel_delay.py script to debug a busy Linux kernel.
This patch adds an utility that can be used to determine if an issue is related to a lack of Linux kernel resources. This tool is also featured in a Red Hat developers blog article: https://developers.redhat.com/articles/2023/07/24/troubleshooting-open-vswitch-kernel-blame Signed-off-by: Eelco Chaudron --- v2: Addressed review comments from Aaron. v3: Changed wording in documentation. utilities/automake.mk |4 utilities/usdt-scripts/kernel_delay.py | 1402 +++ utilities/usdt-scripts/kernel_delay.rst | 596 + 3 files changed, 2002 insertions(+) create mode 100755 utilities/usdt-scripts/kernel_delay.py create mode 100644 utilities/usdt-scripts/kernel_delay.rst diff --git a/utilities/automake.mk b/utilities/automake.mk index 37d679f82..9a2114df4 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -23,6 +23,8 @@ scripts_DATA += utilities/ovs-lib usdt_SCRIPTS += \ utilities/usdt-scripts/bridge_loop.bt \ utilities/usdt-scripts/dpif_nl_exec_monitor.py \ + utilities/usdt-scripts/kernel_delay.py \ + utilities/usdt-scripts/kernel_delay.rst \ utilities/usdt-scripts/reval_monitor.py \ utilities/usdt-scripts/upcall_cost.py \ utilities/usdt-scripts/upcall_monitor.py @@ -70,6 +72,8 @@ EXTRA_DIST += \ utilities/docker/debian/build-kernel-modules.sh \ utilities/usdt-scripts/bridge_loop.bt \ utilities/usdt-scripts/dpif_nl_exec_monitor.py \ + utilities/usdt-scripts/kernel_delay.py \ + utilities/usdt-scripts/kernel_delay.rst \ utilities/usdt-scripts/reval_monitor.py \ utilities/usdt-scripts/upcall_cost.py \ utilities/usdt-scripts/upcall_monitor.py diff --git a/utilities/usdt-scripts/kernel_delay.py b/utilities/usdt-scripts/kernel_delay.py new file mode 100755 index 0..afecca68f --- /dev/null +++ b/utilities/usdt-scripts/kernel_delay.py @@ -0,0 +1,1402 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2022,2023 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Script information: +# --- +# This script allows a developer to quickly identify if the issue at hand +# might be related to the kernel running out of resources or if it really is +# an Open vSwitch issue. +# +# For documentation see the kernel_delay.rst file. +# +import argparse +import datetime +import os +import pytz +import psutil +import re +import sys +import time + +import ctypes as ct + +try: +from bcc import BPF, USDT, USDTException +from bcc.syscall import syscalls, syscall_name +except ModuleNotFoundError: +print("ERROR: Can't find the BPF Compiler Collection (BCC) tools!") +sys.exit(os.EX_OSFILE) + +from enum import IntEnum + + +# +# Actual eBPF source code +# +EBPF_SOURCE = """ +#include +#include + +#define MONITOR_PID + +enum { + +}; + +struct event_t { +u64 ts; +u32 tid; +u32 id; + +int user_stack_id; +int kernel_stack_id; + +u32 syscall; +u64 entry_ts; + +}; + +BPF_RINGBUF_OUTPUT(events, ); +BPF_STACK_TRACE(stack_traces, ); +BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1); +BPF_TABLE("percpu_array", uint32_t, uint64_t, trigger_miss, 1); + +BPF_ARRAY(capture_on, u64, 1); +static bool capture_enabled(u64 pid_tgid) { +int key = 0; +u64 *ret; + +if ((pid_tgid >> 32) != MONITOR_PID) +return false; + +ret = capture_on.lookup(); +return ret && *ret == 1; +} + +static inline bool capture_enabled__() { +int key = 0; +u64 *ret; + +ret = capture_on.lookup(); +return ret && *ret == 1; +} + + +static struct event_t *get_event(uint32_t id) { +struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t)); + +if (!event) { +dropcnt.increment(0); +return NULL; +} + +event->id = id; +event->ts = bpf_ktime_get_ns(); +event->tid = bpf_get_current_pid_tgid(); + +return event; +} + +static int start_trigger() { +int key = 0; +u64 *val = capture_on.lookup(); + +/* If the value is -1 we can't start as we are still processing the + * results in userspace. */ +if (!val || *val != 0) { +trigger_miss.increment(0); +return 0; +} + +struct event_t *event = get_event(EVENT_START_TRIGGER); +if (event) { + events.ringbuf_submit(event, 0); + *val = 1; +} else { +trigger_miss.increment(0); +} +return 0; +} + +static
Re: [ovs-dev] [PATCH v6 7/7] system-dpdk: Run traffic tests.
On Fri, Sep 8, 2023 at 4:28 PM Eelco Chaudron wrote: > On 6 Sep 2023, at 8:20, David Marchand wrote: > > > Integrate system-traffic.at tests as part of check-dpdk. > > > > Some tests that can't work with the userspace datapath are skipped by > > overriding some OVS_CHECK_* macros. > > > > ADD_VETH is implemented using the net/af_xdp DPDK driver. > > > > Signed-off-by: David Marchand > > As discussed offline, this patch looks good, however I have some sync issue > between testpmd and OVS-DPDK which is causing the following log message: > > 2023-09-08T13:15:18.160Z|00163|dpdk|INFO|VHOST_CONFIG: > (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) > vhost peer closed > 2023-09-08T13:15:18.160Z|00164|netdev_dpdk|INFO|vHost Device > '/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0' > connection has been destroyed > 2023-09-08T13:15:18.160Z|00165|dpdk|INFO|VHOST_CONFIG: > (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) > vhost-user client: socket created, fd: 24 > 2023-09-08T13:15:18.160Z|00166|dpdk|WARN|VHOST_CONFIG: > (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) > failed to connect: Connection refused > 2023-09-08T13:15:18.160Z|00085|dpif_netdev|INFO|Performing pmd to rx queue > assignment using cycles algorithm. > 2023-09-08T13:15:18.160Z|00167|dpdk|INFO|VHOST_CONFIG: > (/root/Documents/reviews/ovs_david_dpdk_tests/ovs_github/tests/system-dpdk-testsuite.dir/005/dpdkvhostclient0) > reconnecting... > 2023-09-08T13:15:18.160Z|00086|dpif_netdev|INFO|Core 58 on numa node 0 > assigned port 'dpdkvhostuserclient0' rx queue 0 (measured processing cycles > 263044). > > The test is passing, but fails due to the “WARN|VHOST_CONFIG” log message > being present. > > I’ll let you figure out if just adding the message is sufficient or need some > synchronisation. Other than that the code looks good to me! It looks like testpmd is too quick to quit, and OVS tries to reconnect on the vhost user port socket once, before the port is deleted from the bridge. Inverting OVS_DPDK_STOP_TESTPMD() and AT_CHECK([ovs-vsctl del-port br10 dpdkvhostuserclient0], [], [stdout], [stderr]) does the job. But other tests have the same pattern... the difference is that only this test 5 has a ADD_VETH() so something is changing OVS shutdown timing with this last patch of mine. I will be away a few days, I'll have a look again when I am back. -- David Marchand ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v4] hash: Add explicit typecasts to fix C++ compilation issues
On 9 Sep 2023, at 10:48, James Raphael Tiovalen wrote: > C++ does not allow implicit conversion from void pointer to a specific > pointer type. This change removes the cast from uint32_t* to void* in > `hash_words_32aligned` and adds an explicit typecast from uint32_t* to > uint64_t* in `hash_words_inline`. > > This issue was initially discovered on G++ v9.2.0 when a downstream C++ > application included the hash.h header file and was compiled on an AMD > Ryzen Zen 2 CPU (__SSE4_2__ && __x86_64__). On the latest G++ version, > it would throw an error. On the latest GCC version with `-Wc++-compat`, > it would throw a warning. > > Acked-by: Mike Pattrick > Signed-off-by: James Raphael Tiovalen > --- > Revisions: > > v1 -> v2: Fix build issue due to `-Wcast-align=strict` warning. > v2 -> v3: Fix `-Warray-bounds=1` warning. > v3 -> v4: Use `ALIGNED_CAST` as per Ilya's suggestion. > --- > lib/hash.h | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/lib/hash.h b/lib/hash.h > index 7b7f70c11..27ad782ea 100644 > --- a/lib/hash.h > +++ b/lib/hash.h > @@ -200,7 +200,7 @@ hash_finish32(uint64_t hash, uint32_t final, uint32_t > semifinal) > static inline uint32_t > hash_words_32aligned(const uint32_t *p_, size_t n_words, uint32_t basis) > { > -const uint32_t *p = (const void *) p_; > +const uint32_t *p = p_; If the casting is no longer needed, we might as well rename the input parameter from p_ to p and we might be done. Including Mike in this conversation as he did the original implementation, so it would be good to understand his intention. > uint32_t hash1 = basis; > uint32_t hash2 = 0; > uint32_t hash3 = n_words; > @@ -254,7 +254,7 @@ hash_words_32aligned(const uint32_t *p_, size_t n_words, > uint32_t basis) > static inline uint32_t > hash_words_inline(const uint32_t *p_, size_t n_words, uint32_t basis) > { > -const uint64_t *p = (const void *)p_; > +const uint64_t *p = ALIGNED_CAST(const uint64_t *, p_); ^^ One space to many. > uint64_t hash1 = basis; > uint64_t hash2 = 0; > uint64_t hash3 = n_words; > -- > 2.42.0 > > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH] vswitchd, ofproto-dpif: Add support for CT limit
On Sat, Sep 9, 2023 at 12:33 AM Ilya Maximets wrote: > On 8/10/23 14:48, Ales Musil wrote: > > Add support for setting CT zone limit via ovs-vswitchd > > database CT_Zone entry. The limit is propagated > > into corresponding datapath. > Hi Ilya, > > Thanks for tha patch! I didn't try it, but see some comments inline. > thank you for the review. > > > In order to keep > > backward compatibility the dpctl/ct-set-limits command > > can overwrite the database settings. > > This doesn't seem like a good option. If anything, the behavior > should be the opposite, i.e. if the value is set in the database, > it should overwrite whatever user have set before via appctl. > We can treat a zero as a 'whatever currently is set' value, and > all other values set in the database has to be enforced. Or the > column in the database may be optional, in this case we may treat > an empty column as not configured and zero as unlimited. > In this version 0 is default system behavior I guess for backward compatibility it would make sense to leave the setting untouched for 0. > > The dpctl/ct-set-limits may warn or fail in case of conflicting data. > We also need a NEWS record about the new functionality. > Fail would be better IMO if we go with the database value being enforced unless 0. WDYT? > > > > > Signed-off-by: Ales Musil > > --- > > ofproto/ofproto-dpif.c | 42 +++ > > ofproto/ofproto-dpif.h | 5 > > ofproto/ofproto-provider.h | 8 ++ > > ofproto/ofproto.c | 23 + > > ofproto/ofproto.h | 3 +++ > > tests/system-traffic.at| 51 +- > > vswitchd/bridge.c | 10 > > vswitchd/vswitch.ovsschema | 8 -- > > vswitchd/vswitch.xml | 5 > > 9 files changed, 152 insertions(+), 3 deletions(-) > > > > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c > > index e22ca757a..0c2818a5a 100644 > > --- a/ofproto/ofproto-dpif.c > > +++ b/ofproto/ofproto-dpif.c > > @@ -5522,6 +5522,8 @@ ct_zone_config_init(struct dpif_backer *backer) > > cmap_init(>ct_zones); > > hmap_init(>ct_tps); > > ovs_list_init(>ct_tp_kill_list); > > +ovs_list_init(>ct_zone_limits_to_add); > > +ovs_list_init(>ct_zone_limits_to_del); > > clear_existing_ct_timeout_policies(backer); > > } > > > > @@ -5545,6 +5547,8 @@ ct_zone_config_uninit(struct dpif_backer *backer) > > id_pool_destroy(backer->tp_ids); > > cmap_destroy(>ct_zones); > > hmap_destroy(>ct_tps); > > +ct_dpif_free_zone_limits(>ct_zone_limits_to_add); > > +ct_dpif_free_zone_limits(>ct_zone_limits_to_del); > > } > > > > static void > > @@ -5625,6 +5629,42 @@ ct_del_zone_timeout_policy(const char > *datapath_type, uint16_t zone_id) > > } > > } > > > > +static void > > +ct_zone_limit_queue_update(const char *datapath_type, uint16_t zone_id, > > + uint32_t limit) > > +{ > > +struct dpif_backer *backer = shash_find_data(_dpif_backers, > > + datapath_type); > > +if (!backer) { > > +return; > > +} > > + > > +struct ovs_list *queue = limit ? > > +>ct_zone_limits_to_add : > >ct_zone_limits_to_del; > > + > > +ct_dpif_push_zone_limit(queue, zone_id, limit, 0); > > +} > > + > > +static void > > +ct_zone_limits_commit(const char *datapath_type) > > +{ > > +struct dpif_backer *backer = shash_find_data(_dpif_backers, > > + datapath_type); > > +if (!backer) { > > +return; > > +} > > + > > +if (!ovs_list_is_empty(>ct_zone_limits_to_add)) { > > +ct_dpif_set_limits(backer->dpif, NULL, > >ct_zone_limits_to_add); > > +ct_dpif_free_zone_limits(>ct_zone_limits_to_add); > > +} > > + > > +if (!ovs_list_is_empty(>ct_zone_limits_to_del)) { > > +ct_dpif_del_limits(backer->dpif, > >ct_zone_limits_to_del); > > +ct_dpif_free_zone_limits(>ct_zone_limits_to_del); > > +} > > +} > > + > > static void > > get_datapath_cap(const char *datapath_type, struct smap *cap) > > { > > @@ -6914,4 +6954,6 @@ const struct ofproto_class ofproto_dpif_class = { > > ct_flush, /* ct_flush */ > > ct_set_zone_timeout_policy, > > ct_del_zone_timeout_policy, > > +ct_zone_limit_queue_update, > > +ct_zone_limits_commit > > Please, keep the trailing comma. > > > }; > > diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h > > index d8e0cd37a..b863dd6fc 100644 > > --- a/ofproto/ofproto-dpif.h > > +++ b/ofproto/ofproto-dpif.h > > @@ -284,6 +284,11 @@ struct dpif_backer { > > feature than > 'bt_support'. */ > > > > struct atomic_count tnl_count; > > + > > +struct ovs_list ct_zone_limits_to_add; /* CT zone limits queued for > > + * addition into datapath.