On 1/26/24 01:16, Numan Siddique wrote: > On Wed, Jan 17, 2024 at 7:38 AM Dumitru Ceara <dce...@redhat.com> wrote: >> >> On 1/11/24 16:29, num...@ovn.org wrote: >>> From: Numan Siddique <num...@ovn.org> >>> >>> This new engine now maintains the load balancer and NAT data of a >>> logical router which was earlier part of northd engine node data. >>> The main inputs to this engine are: >>> - northd node >>> - lr_nat node >>> - lb_data node >>> >>> A record for each logical router is maintained in the 'lr_stateful_table' >>> hmap table and this record >>> - stores the lb related data >>> - embeds the 'lr_nat' record. >>> >>> This engine node becomes an input to 'lflow' node. >>> >>> Signed-off-by: Numan Siddique <num...@ovn.org> >>> --- >> >> Hi Numan, >> >> Most of my comments in this patch are minor and can be easily addressed, >> the code looks correct overall to me. >> >> There is one thing though so I'll wait for feedback on the comment I >> made in build_ip_routing_flows_for_router_type_lsp() before acking the >> patch. > > Hi Dumitru, > > PSB for the build_ip_routing_flows_for_router_type_lsp() comment replies. > > Thanks for the reviews. > > Numan > >> >> Thanks, >> Dumitru >> >>> lib/stopwatch-names.h | 1 + >>> northd/automake.mk | 2 + >>> northd/en-lflow.c | 4 + >>> northd/en-lr-nat.h | 3 + >>> northd/en-lr-stateful.c | 641 +++++++++++++++++++++++++++++++++++ >>> northd/en-lr-stateful.h | 105 ++++++ >>> northd/en-sync-sb.c | 49 +-- >>> northd/inc-proc-northd.c | 13 +- >>> northd/northd.c | 711 ++++++++++++--------------------------- >>> northd/northd.h | 139 +++++++- >>> northd/ovn-northd.c | 1 + >>> tests/ovn-northd.at | 62 ++++ >>> 12 files changed, 1213 insertions(+), 518 deletions(-) >>> create mode 100644 northd/en-lr-stateful.c >>> create mode 100644 northd/en-lr-stateful.h >>> >>> diff --git a/lib/stopwatch-names.h b/lib/stopwatch-names.h >>> index 782d64320a..e5e41fbfd8 100644 >>> --- a/lib/stopwatch-names.h >>> +++ b/lib/stopwatch-names.h >>> @@ -30,5 +30,6 @@ >>> #define PORT_GROUP_RUN_STOPWATCH_NAME "port_group_run" >>> #define SYNC_METERS_RUN_STOPWATCH_NAME "sync_meters_run" >>> #define LR_NAT_RUN_STOPWATCH_NAME "lr_nat_run" >>> +#define LR_STATEFUL_RUN_STOPWATCH_NAME "lr_stateful" >>> >>> #endif >>> diff --git a/northd/automake.mk b/northd/automake.mk >>> index a477105470..b886356c9c 100644 >>> --- a/northd/automake.mk >>> +++ b/northd/automake.mk >>> @@ -26,6 +26,8 @@ northd_ovn_northd_SOURCES = \ >>> northd/en-lb-data.h \ >>> northd/en-lr-nat.c \ >>> northd/en-lr-nat.h \ >>> + northd/en-lr-stateful.c \ >>> + northd/en-lr-stateful.h \ >>> northd/inc-proc-northd.c \ >>> northd/inc-proc-northd.h \ >>> northd/ipam.c \ >>> diff --git a/northd/en-lflow.c b/northd/en-lflow.c >>> index e4f875ef7c..bb9f78a6a1 100644 >>> --- a/northd/en-lflow.c >>> +++ b/northd/en-lflow.c >>> @@ -20,6 +20,7 @@ >>> >>> #include "en-lflow.h" >>> #include "en-lr-nat.h" >>> +#include "en-lr-stateful.h" >>> #include "en-northd.h" >>> #include "en-meters.h" >>> >>> @@ -43,6 +44,8 @@ lflow_get_input_data(struct engine_node *node, >>> engine_get_input_data("sync_meters", node); >>> struct ed_type_lr_nat_data *lr_nat_data = >>> engine_get_input_data("lr_nat", node); >>> + struct ed_type_lr_stateful *lr_stateful_data = >>> + engine_get_input_data("lr_stateful", node); >>> >>> lflow_input->nbrec_bfd_table = >>> EN_OVSDB_GET(engine_get_input("NB_bfd", node)); >>> @@ -66,6 +69,7 @@ lflow_get_input_data(struct engine_node *node, >>> lflow_input->lr_ports = &northd_data->lr_ports; >>> lflow_input->ls_port_groups = &pg_data->ls_port_groups; >>> lflow_input->lr_nats = &lr_nat_data->lr_nats; >>> + lflow_input->lr_stateful_table = &lr_stateful_data->table; >>> lflow_input->meter_groups = &sync_meters_data->meter_groups; >>> lflow_input->lb_datapaths_map = &northd_data->lb_datapaths_map; >>> lflow_input->svc_monitor_map = &northd_data->svc_monitor_map; >>> diff --git a/northd/en-lr-nat.h b/northd/en-lr-nat.h >>> index 3ec4c7b506..4d8e142ae6 100644 >>> --- a/northd/en-lr-nat.h >>> +++ b/northd/en-lr-nat.h >>> @@ -86,6 +86,9 @@ struct lr_nat_table { >>> const struct lr_nat_record * lr_nat_table_find_by_index( >>> const struct lr_nat_table *, size_t od_index); >>> >>> +#define LR_NAT_TABLE_FOR_EACH(LR_NAT_REC, TABLE) \ >>> + HMAP_FOR_EACH (LR_NAT_REC, key_node, &(TABLE)->entries) >>> + >>> struct ed_type_lr_nat_data { >>> struct lr_nat_table lr_nats; >>> >>> diff --git a/northd/en-lr-stateful.c b/northd/en-lr-stateful.c >>> new file mode 100644 >>> index 0000000000..dedfdf6747 >>> --- /dev/null >>> +++ b/northd/en-lr-stateful.c >>> @@ -0,0 +1,641 @@ >>> +/* >>> + * Copyright (c) 2023, Red Hat, Inc. >> >> 2024 >> >>> + * >>> + * Licensed under the Apache License, Version 2.0 (the "License"); >>> + * you may not use this file except in compliance with the License. >>> + * You may obtain a copy of the License at: >>> + * >>> + * http://www.apache.org/licenses/LICENSE-2.0 >>> + * >>> + * Unless required by applicable law or agreed to in writing, software >>> + * distributed under the License is distributed on an "AS IS" BASIS, >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >>> + * See the License for the specific language governing permissions and >>> + * limitations under the License. >>> + */ >>> + >>> +#include <config.h> >>> + >>> +#include <getopt.h> >>> +#include <stdlib.h> >>> +#include <stdio.h> >>> + >>> +/* OVS includes */ >>> +#include "include/openvswitch/hmap.h" >>> +#include "lib/bitmap.h" >>> +#include "lib/socket-util.h" >>> +#include "lib/uuidset.h" >>> +#include "openvswitch/util.h" >>> +#include "openvswitch/vlog.h" >>> +#include "stopwatch.h" >>> + >>> +/* OVN includes */ >>> +#include "en-lb-data.h" >>> +#include "en-lr-nat.h" >>> +#include "en-lr-stateful.h" >>> +#include "lib/inc-proc-eng.h" >>> +#include "lib/lb.h" >>> +#include "lib/ovn-nb-idl.h" >>> +#include "lib/ovn-sb-idl.h" >>> +#include "lib/ovn-util.h" >>> +#include "lib/stopwatch-names.h" >>> +#include "northd.h" >>> + >>> +VLOG_DEFINE_THIS_MODULE(en_lr_stateful); >>> + >>> +/* Static function declarations. */ >>> +static void lr_stateful_table_init(struct lr_stateful_table *); >>> +static void lr_stateful_table_clear(struct lr_stateful_table *); >>> +static void lr_stateful_table_destroy(struct lr_stateful_table *); >>> +static struct lr_stateful_record *lr_stateful_table_find_( >>> + const struct lr_stateful_table *, const struct nbrec_logical_router *); >>> +static struct lr_stateful_record *lr_stateful_table_find_by_index_( >>> + const struct lr_stateful_table *table, size_t od_index); >> >> 'table' can be omitted. >> >>> + >>> +static void lr_stateful_table_build(struct lr_stateful_table *, >>> + const struct lr_nat_table *, >>> + const struct ovn_datapaths >>> *lr_datapaths, >>> + const struct hmap *lb_datapaths_map, >>> + const struct hmap *lbgrp_datapaths_map); >>> + >>> +static struct lr_stateful_input lr_stateful_get_input_data( >>> + struct engine_node *); >>> + >>> +static struct lr_stateful_record *lr_stateful_record_create( >>> + struct lr_stateful_table *, const struct lr_nat_record *, >>> + const struct hmap *lb_datapaths_map, >>> + const struct hmap *lbgrp_datapaths_map); >>> +static void lr_stateful_record_destroy(struct lr_stateful_record *); >>> +static void lr_stateful_record_init( >>> + struct lr_stateful_record *, >>> + const struct hmap *lb_datapaths_map, >>> + const struct hmap *lbgrp_datapaths_map); >>> + >>> +static void build_lrouter_lb_reachable_ips(struct lr_stateful_record *, >>> + const struct ovn_northd_lb *); >>> +static void add_neigh_ips_to_lrouter(struct lr_stateful_record *, >>> + enum lb_neighbor_responder_mode, >>> + const struct sset *lb_ips_v4, >>> + const struct sset *lb_ips_v6); >>> +static void remove_lrouter_lb_reachable_ips(struct lr_stateful_record *, >>> + enum >>> lb_neighbor_responder_mode, >>> + const struct sset *lb_ips_v4, >>> + const struct sset *lb_ips_v6); >>> +static void lr_stateful_build_vip_nats(struct lr_stateful_record *); >>> + >>> +/* 'lr_stateful' engine node manages the NB logical router LB data. >>> + */ >>> +void * >>> +en_lr_stateful_init(struct engine_node *node OVS_UNUSED, >>> + struct engine_arg *arg OVS_UNUSED) >>> +{ >>> + struct ed_type_lr_stateful *data = xzalloc(sizeof *data); >>> + lr_stateful_table_init(&data->table); >>> + hmapx_init(&data->trk_data.crupdated); >>> + return data; >>> +} >>> + >>> +void >>> +en_lr_stateful_cleanup(void *data_) >>> +{ >>> + struct ed_type_lr_stateful *data = data_; >>> + lr_stateful_table_destroy(&data->table); >>> + hmapx_destroy(&data->trk_data.crupdated); >>> +} >>> + >>> +void >>> +en_lr_stateful_clear_tracked_data(void *data_) >>> +{ >>> + struct ed_type_lr_stateful *data = data_; >>> + >>> + hmapx_clear(&data->trk_data.crupdated); >>> +} >>> + >>> +void >>> +en_lr_stateful_run(struct engine_node *node, void *data_) >>> +{ >>> + struct lr_stateful_input input_data = lr_stateful_get_input_data(node); >>> + struct ed_type_lr_stateful *data = data_; >>> + >>> + stopwatch_start(LR_STATEFUL_RUN_STOPWATCH_NAME, time_msec()); >>> + >>> + lr_stateful_table_clear(&data->table); >>> + lr_stateful_table_build(&data->table, input_data.lr_nats, >>> + input_data.lr_datapaths, >>> + input_data.lb_datapaths_map, >>> + input_data.lbgrp_datapaths_map); >>> + >>> + stopwatch_stop(LR_STATEFUL_RUN_STOPWATCH_NAME, time_msec()); >>> + engine_set_node_state(node, EN_UPDATED); >>> +} >>> + >>> +bool >>> +lr_stateful_northd_handler(struct engine_node *node, void *data OVS_UNUSED) >>> +{ >>> + struct northd_data *northd_data = engine_get_input_data("northd", >>> node); >>> + if (!northd_has_tracked_data(&northd_data->trk_data)) { >>> + return false; >>> + } >>> + >>> + /* lr_stateful node needs inputs for any changes to NAT and load >>> balancers. >>> + * Changes to NAT is provided by the lr_nat tracked data and changes >>> + * to lbs and lb grps is provided by lb_data's tracked data. >>> + * So we don't need to do anything here for northd changes. >>> + * But we do need to access the datapaths and lb_datapaths from the >>> + * northd engine node and hence its an input. >>> + * */ >>> + return true; >>> +} >>> + >>> +bool >>> +lr_stateful_lb_data_handler(struct engine_node *node, void *data_) >>> +{ >>> + struct ed_type_lb_data *lb_data = engine_get_input_data("lb_data", >>> node); >>> + if (!lb_data->tracked) { >>> + return false; >>> + } >>> + >>> + struct ed_type_lr_stateful *data = data_; >>> + struct lr_stateful_input input_data = >>> + lr_stateful_get_input_data(node); >> >> Nit: Fits on one line. And there's no need for an empty line afterwards. >> >>> + >>> + const struct tracked_lb_data *trk_lb_data = &lb_data->tracked_lb_data; >>> + const struct crupdated_od_lb_data *codlb; >>> + >>> + LIST_FOR_EACH (codlb, list_node, &trk_lb_data->crupdated_lr_lbs) { >>> + const struct ovn_datapath *od = ovn_datapath_find( >>> + &input_data.lr_datapaths->datapaths, &codlb->od_uuid); >>> + ovs_assert(od); >>> + >>> + struct lr_stateful_record *lr_stateful_rec >>> =lr_stateful_table_find_( >>> + &data->table, od->nbr); >> >> Missing space after '='. I'd align this as: >> >> struct lr_stateful_record *lr_stateful_rec = >> lr_stateful_table_find_(&data->table, od->nbr); >> >>> + if (!lr_stateful_rec) { >>> + const struct lr_nat_record *lrnat_rec = >>> lr_nat_table_find_by_index( >>> + input_data.lr_nats, od->index); >>> + ovs_assert(lrnat_rec); >>> + >>> + lr_stateful_rec = lr_stateful_record_create(&data->table, >>> + lrnat_rec, >>> + input_data.lb_datapaths_map, >>> + >>> input_data.lbgrp_datapaths_map); >> >> Nit indentation; is this better? >> >> lr_stateful_rec = >> lr_stateful_record_create(&data->table, lrnat_rec, >> input_data.lb_datapaths_map, >> input_data.lbgrp_datapaths_map); >> >>> + >>> + /* Add the lr_stateful_rec rec to the tracking data. */ >>> + hmapx_add(&data->trk_data.crupdated, lr_stateful_rec); >>> + continue; >>> + } >>> + >>> + /* Update. */ >>> + struct uuidset_node *uuidnode; >>> + UUIDSET_FOR_EACH (uuidnode, &codlb->assoc_lbs) { >>> + const struct ovn_lb_datapaths *lb_dps = ovn_lb_datapaths_find( >>> + input_data.lb_datapaths_map, &uuidnode->uuid); >>> + ovs_assert(lb_dps); >>> + >>> + /* Add the lb_ips of lb_dps to the od. */ >>> + build_lrouter_lb_ips(lr_stateful_rec->lb_ips, lb_dps->lb); >>> + build_lrouter_lb_reachable_ips(lr_stateful_rec, lb_dps->lb); >> >> Hmm, is this actually correct though? If the LB update was a VIP >> removal we're not going to remove the old VIP from the >> lr_stateful_rec->lb_ips set. Or am I missing something? >> >> Is it just a matter of reinitializing lr_stateful_rec->lb_ips before this? >> >> Later edit: I just realized we do that in the block below, where we call >> remove_ips_from_lb_ip_set()/remove_lrouter_lb_reachable_ips(); it used >> to be like that before you moved the code here and it's probably correct. >> >>> + } >>> + >>> + UUIDSET_FOR_EACH (uuidnode, &codlb->assoc_lbgrps) { >>> + const struct ovn_lb_group_datapaths *lbgrp_dps = >>> + ovn_lb_group_datapaths_find(input_data.lbgrp_datapaths_map, >>> + &uuidnode->uuid); >>> + ovs_assert(lbgrp_dps); >>> + >>> + for (size_t j = 0; j < lbgrp_dps->lb_group->n_lbs; j++) { >>> + const struct uuid *lb_uuid >>> + = &lbgrp_dps->lb_group->lbs[j]->nlb->header_.uuid; >>> + const struct ovn_lb_datapaths *lb_dps = >>> ovn_lb_datapaths_find( >>> + input_data.lb_datapaths_map, lb_uuid); >>> + ovs_assert(lb_dps); >>> + >>> + /* Add the lb_ips of lb_dps to the od. */ >>> + build_lrouter_lb_ips(lr_stateful_rec->lb_ips, lb_dps->lb); >>> + build_lrouter_lb_reachable_ips(lr_stateful_rec, >>> lb_dps->lb); >>> + } >>> + } >>> + >>> + /* Add the lr_stateful_rec rec to the tracking data. */ >>> + hmapx_add(&data->trk_data.crupdated, lr_stateful_rec); >>> + } >>> + >>> + const struct crupdated_lb *clb; >>> + HMAP_FOR_EACH (clb, hmap_node, &trk_lb_data->crupdated_lbs) { >>> + const struct uuid *lb_uuid = &clb->lb->nlb->header_.uuid; >>> + const struct ovn_northd_lb *lb = clb->lb; >>> + >>> + const struct ovn_lb_datapaths *lb_dps = ovn_lb_datapaths_find( >>> + input_data.lb_datapaths_map, lb_uuid); >>> + ovs_assert(lb_dps); >>> + >>> + size_t index; >>> + BITMAP_FOR_EACH_1 (index, ods_size(input_data.lr_datapaths), >>> + lb_dps->nb_lr_map) { >>> + const struct ovn_datapath *od = >>> + input_data.lr_datapaths->array[index]; >>> + >>> + struct lr_stateful_record *lr_stateful_rec = >>> + lr_stateful_table_find_(&data->table, od->nbr); >>> + ovs_assert(lr_stateful_rec); >>> + >>> + /* Update the od->lb_ips with the deleted and inserted >>> + * vips (if any). */ >>> + remove_ips_from_lb_ip_set(lr_stateful_rec->lb_ips, >>> lb->routable, >>> + &clb->deleted_vips_v4, >>> + &clb->deleted_vips_v6); >>> + add_ips_to_lb_ip_set(lr_stateful_rec->lb_ips, lb->routable, >>> + &clb->inserted_vips_v4, >>> + &clb->inserted_vips_v6); >>> + >>> + remove_lrouter_lb_reachable_ips(lr_stateful_rec, >>> lb->neigh_mode, >>> + &clb->deleted_vips_v4, >>> + &clb->deleted_vips_v6); >>> + add_neigh_ips_to_lrouter(lr_stateful_rec, lb->neigh_mode, >>> + &clb->inserted_vips_v4, >>> + &clb->inserted_vips_v6); >>> + >>> + /* Add the lr_stateful_rec rec to the tracking data. */ >>> + hmapx_add(&data->trk_data.crupdated, lr_stateful_rec); >>> + } >>> + } >>> + >>> + const struct crupdated_lbgrp *crupdated_lbgrp; >>> + HMAP_FOR_EACH (crupdated_lbgrp, hmap_node, >>> + &trk_lb_data->crupdated_lbgrps) { >>> + const struct uuid *lb_uuid = &crupdated_lbgrp->lbgrp->uuid; >>> + const struct ovn_lb_group_datapaths *lbgrp_dps = >>> + ovn_lb_group_datapaths_find(input_data.lbgrp_datapaths_map, >>> + lb_uuid); >>> + ovs_assert(lbgrp_dps); >>> + >>> + struct hmapx_node *hnode; >>> + HMAPX_FOR_EACH (hnode, &crupdated_lbgrp->assoc_lbs) { >>> + const struct ovn_northd_lb *lb = hnode->data; >>> + lb_uuid = &lb->nlb->header_.uuid; >>> + const struct ovn_lb_datapaths *lb_dps = ovn_lb_datapaths_find( >>> + input_data.lb_datapaths_map, lb_uuid); >>> + ovs_assert(lb_dps); >>> + for (size_t i = 0; i < lbgrp_dps->n_lr; i++) { >>> + const struct ovn_datapath *od = lbgrp_dps->lr[i]; >>> + struct lr_stateful_record *lr_stateful_rec = >>> + lr_stateful_table_find_(&data->table, od->nbr); >>> + ovs_assert(lr_stateful_rec); >>> + /* Add the lb_ips of lb_dps to the lr lb data. */ >>> + build_lrouter_lb_ips(lr_stateful_rec->lb_ips, lb_dps->lb); >>> + build_lrouter_lb_reachable_ips(lr_stateful_rec, >>> lb_dps->lb); >>> + >>> + /* Add the lr_stateful_rec rec to the tracking data. */ >>> + hmapx_add(&data->trk_data.crupdated, lr_stateful_rec); >>> + } >>> + } >>> + } >>> + >>> + if (lr_stateful_has_tracked_data(&data->trk_data)) { >>> + struct hmapx_node *hmapx_node; >>> + /* For all the modified lr_stateful records (re)build the >>> + * vip nats. */ >> >> Nit: the comment should go above the variable declaration and I think we >> could use an empty line before HMAPX_FOR_EACH. Also, the comment fits >> fine on a single line. >> >>> + HMAPX_FOR_EACH (hmapx_node, &data->trk_data.crupdated) { >>> + lr_stateful_build_vip_nats(hmapx_node->data); >>> + } >>> + >>> + engine_set_node_state(node, EN_UPDATED); >>> + } >>> + >>> + return true; >>> +} >>> + >>> +bool >>> +lr_stateful_lr_nat_handler(struct engine_node *node, void *data_) >>> +{ >>> + struct ed_type_lr_nat_data *lr_nat_data = >>> + engine_get_input_data("lr_nat", node); >>> + >>> + if (!lr_nat_has_tracked_data(&lr_nat_data->trk_data)) { >>> + return false; >>> + } >>> + >>> + struct ed_type_lr_stateful *data = data_; >>> + struct lr_stateful_input input_data = >>> + lr_stateful_get_input_data(node); >> >> Nit: fits fine on a single line and we could do reverse xmas tree: >> >> struct lr_stateful_input input_data = lr_stateful_get_input_data(node); >> struct ed_type_lr_stateful *data = data_; >> struct hmapx_node *hmapx_node; >> >>> + struct hmapx_node *hmapx_node; >>> + >>> + HMAPX_FOR_EACH (hmapx_node, &lr_nat_data->trk_data.crupdated) { >>> + const struct lr_nat_record *lrnat_rec = hmapx_node->data; >>> + struct lr_stateful_record *lr_stateful_rec = >>> + lr_stateful_table_find_(&data->table, lrnat_rec->od->nbr); >>> + if (!lr_stateful_rec) { >>> + lr_stateful_rec = lr_stateful_record_create(&data->table, >>> + lrnat_rec, >>> + input_data.lb_datapaths_map, >>> + >>> input_data.lbgrp_datapaths_map); >> >> Nit: indentation, wdyt of: >> >> lr_stateful_rec = >> lr_stateful_record_create(&data->table, lrnat_rec, >> input_data.lb_datapaths_map, >> input_data.lbgrp_datapaths_map); >> >>> + } else { >>> + lr_stateful_build_vip_nats(lr_stateful_rec); >>> + } >>> + >>> + /* Add the lr_stateful_rec rec to the tracking data. */ >>> + hmapx_add(&data->trk_data.crupdated, lr_stateful_rec); >>> + } >>> + >>> + if (lr_stateful_has_tracked_data(&data->trk_data)) { >>> + engine_set_node_state(node, EN_UPDATED); >>> + } >>> + >>> + return true; >>> +} >>> + >>> +const struct lr_stateful_record * >>> +lr_stateful_table_find_by_index(const struct lr_stateful_table *table, >>> + size_t od_index) >>> +{ >>> + return lr_stateful_table_find_by_index_(table, od_index); >>> +} >>> + >>> +/* static functions. */ >>> +static void >>> +lr_stateful_table_init(struct lr_stateful_table *table) >>> +{ >>> + *table = (struct lr_stateful_table) { >>> + .entries = HMAP_INITIALIZER(&table->entries), >>> + }; >>> +} >>> + >>> +static void >>> +lr_stateful_table_destroy(struct lr_stateful_table *table) >>> +{ >>> + lr_stateful_table_clear(table); >>> + hmap_destroy(&table->entries); >>> +} >>> + >>> +static void >>> +lr_stateful_table_clear(struct lr_stateful_table *table) >>> +{ >>> + struct lr_stateful_record *lr_stateful_rec; >>> + HMAP_FOR_EACH_POP (lr_stateful_rec, key_node, &table->entries) { >>> + lr_stateful_record_destroy(lr_stateful_rec); >>> + } >>> + >>> + free(table->array); >>> + table->array = NULL; >>> +} >>> + >>> +static void >>> +lr_stateful_table_build(struct lr_stateful_table *table, >>> + const struct lr_nat_table *lr_nats, >>> + const struct ovn_datapaths *lr_datapaths, >>> + const struct hmap *lb_datapaths_map, >>> + const struct hmap *lbgrp_datapaths_map) >>> +{ >>> + table->array = xrealloc(table->array, >>> + ods_size(lr_datapaths) * sizeof *table->array); >>> + const struct lr_nat_record *lrnat_rec; >>> + LR_NAT_TABLE_FOR_EACH (lrnat_rec, lr_nats) { >>> + lr_stateful_record_create(table, lrnat_rec, lb_datapaths_map, >>> + lbgrp_datapaths_map); >>> + } >>> +} >>> + >>> +static struct lr_stateful_record * >>> +lr_stateful_table_find_(const struct lr_stateful_table *table, >>> + const struct nbrec_logical_router *nbr) >>> +{ >>> + struct lr_stateful_record *lr_stateful_rec; >>> + >>> + HMAP_FOR_EACH_WITH_HASH (lr_stateful_rec, key_node, >>> + uuid_hash(&nbr->header_.uuid), >>> &table->entries) { >>> + if (nbr == lr_stateful_rec->od->nbr) { >>> + return lr_stateful_rec; >>> + } >>> + } >>> + return NULL; >>> +} >>> + >>> +static struct lr_stateful_record * >>> +lr_stateful_table_find_by_index_(const struct lr_stateful_table *table, >>> + size_t od_index) >>> +{ >>> + ovs_assert(od_index <= hmap_count(&table->entries)); >>> + return table->array[od_index]; >>> +} >>> + >>> +static struct lr_stateful_record * >>> +lr_stateful_record_create(struct lr_stateful_table *table, >>> + const struct lr_nat_record *lrnat_rec, >>> + const struct hmap *lb_datapaths_map, >>> + const struct hmap *lbgrp_datapaths_map) >>> +{ >>> + struct lr_stateful_record *lr_stateful_rec = >>> + xzalloc(sizeof *lr_stateful_rec); >>> + lr_stateful_rec->lrnat_rec = lrnat_rec; >>> + lr_stateful_rec->od = lrnat_rec->od; >>> + lr_stateful_record_init(lr_stateful_rec, lb_datapaths_map, >>> + lbgrp_datapaths_map); >> >> Nit: indentation >> >>> + >>> + hmap_insert(&table->entries, &lr_stateful_rec->key_node, >>> + uuid_hash(&lr_stateful_rec->od->nbr->header_.uuid)); >>> + >>> + table->array[lr_stateful_rec->od->index] = lr_stateful_rec; >>> + return lr_stateful_rec; >>> +} >>> + >>> +static void >>> +lr_stateful_record_destroy(struct lr_stateful_record *lr_stateful_rec) >>> +{ >>> + ovn_lb_ip_set_destroy(lr_stateful_rec->lb_ips); >>> + lr_stateful_rec->lb_ips = NULL; >> >> No need to set to NULL, we free lr_stateful_rec below. >> >>> + sset_destroy(&lr_stateful_rec->vip_nats); >>> + free(lr_stateful_rec); >>> +} >>> + >>> +static void >>> +lr_stateful_record_init(struct lr_stateful_record *lr_stateful_rec, >>> + const struct hmap *lb_datapaths_map, >>> + const struct hmap *lbgrp_datapaths_map) >> >> IMO we don't need this separate function, we can just do everything in >> lr_stateful_record_create(). >> >>> +{ >>> + /* Checking load balancer groups first, starting from the largest one, >>> + * to more efficiently copy IP sets. */ >>> + size_t largest_group = 0; >>> + >>> + const struct nbrec_logical_router *nbr = lr_stateful_rec->od->nbr; >>> + for (size_t i = 1; i < nbr->n_load_balancer_group; i++) { >>> + if (nbr->load_balancer_group[i]->n_load_balancer > >>> + nbr->load_balancer_group[largest_group]->n_load_balancer) { >>> + largest_group = i; >>> + } >>> + } >>> + >>> + for (size_t i = 0; i < nbr->n_load_balancer_group; i++) { >>> + size_t idx = (i + largest_group) % nbr->n_load_balancer_group; >>> + >>> + const struct nbrec_load_balancer_group *nbrec_lb_group = >>> + nbr->load_balancer_group[idx]; >>> + const struct uuid *lbgrp_uuid = &nbrec_lb_group->header_.uuid; >>> + >>> + const struct ovn_lb_group_datapaths *lbgrp_dps = >>> + ovn_lb_group_datapaths_find(lbgrp_datapaths_map, >>> + lbgrp_uuid); >>> + ovs_assert(lbgrp_dps); >>> + >>> + if (!lr_stateful_rec->lb_ips) { >>> + lr_stateful_rec->lb_ips = >>> + ovn_lb_ip_set_clone(lbgrp_dps->lb_group->lb_ips); >>> + } else { >>> + for (size_t j = 0; j < lbgrp_dps->lb_group->n_lbs; j++) { >>> + build_lrouter_lb_ips(lr_stateful_rec->lb_ips, >>> + lbgrp_dps->lb_group->lbs[j]); >>> + } >>> + } >>> + >>> + for (size_t j = 0; j < lbgrp_dps->lb_group->n_lbs; j++) { >>> + build_lrouter_lb_reachable_ips(lr_stateful_rec, >>> + lbgrp_dps->lb_group->lbs[j]); >>> + } >>> + } >>> + >>> + if (!lr_stateful_rec->lb_ips) { >>> + lr_stateful_rec->lb_ips = ovn_lb_ip_set_create(); >>> + } >>> + >>> + for (size_t i = 0; i < nbr->n_load_balancer; i++) { >>> + const struct uuid *lb_uuid = >>> + &nbr->load_balancer[i]->header_.uuid; >> >> Fits on one line now. >> >>> + const struct ovn_lb_datapaths *lb_dps = >>> + ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid); >>> + ovs_assert(lb_dps); >>> + build_lrouter_lb_ips(lr_stateful_rec->lb_ips, lb_dps->lb); >>> + build_lrouter_lb_reachable_ips(lr_stateful_rec, lb_dps->lb); >>> + } >>> + >>> + sset_init(&lr_stateful_rec->vip_nats); >> >> This should move up into lr_stateful_record_create(), but OK. >> >>> + >>> + if (nbr->n_nat) { >>> + lr_stateful_build_vip_nats(lr_stateful_rec); >>> + } >>> +} >>> + >>> +static struct lr_stateful_input >>> +lr_stateful_get_input_data(struct engine_node *node) >>> +{ >>> + struct northd_data *northd_data = engine_get_input_data("northd", >>> node); >>> + struct ed_type_lr_nat_data *lr_nat_data = >>> + engine_get_input_data("lr_nat", node); >>> + >>> + return (struct lr_stateful_input) { >>> + .lr_datapaths = &northd_data->lr_datapaths, >>> + .lb_datapaths_map = &northd_data->lb_datapaths_map, >>> + .lbgrp_datapaths_map = &northd_data->lb_group_datapaths_map, >>> + .lr_nats = &lr_nat_data->lr_nats, >>> + }; >>> +} >>> + >>> +static void >>> +build_lrouter_lb_reachable_ips(struct lr_stateful_record *lr_stateful_rec, >>> + const struct ovn_northd_lb *lb) >>> +{ >>> + add_neigh_ips_to_lrouter(lr_stateful_rec, lb->neigh_mode, &lb->ips_v4, >>> + &lb->ips_v6); >>> +} >>> + >>> +static void >>> +add_neigh_ips_to_lrouter(struct lr_stateful_record *lr_stateful_rec, >>> + enum lb_neighbor_responder_mode neigh_mode, >>> + const struct sset *lb_ips_v4, >>> + const struct sset *lb_ips_v6) >>> +{ >>> + /* If configured to not reply to any neighbor requests for all VIPs >>> + * return early. >>> + */ >>> + if (neigh_mode == LB_NEIGH_RESPOND_NONE) { >>> + return; >>> + } >>> + >>> + const char *ip_address; >>> + >>> + /* If configured to reply to neighbor requests for all VIPs force them >>> + * all to be considered "reachable". >>> + */ >>> + if (neigh_mode == LB_NEIGH_RESPOND_ALL) { >>> + SSET_FOR_EACH (ip_address, lb_ips_v4) { >>> + sset_add(&lr_stateful_rec->lb_ips->ips_v4_reachable, >>> ip_address); >>> + } >>> + SSET_FOR_EACH (ip_address, lb_ips_v6) { >>> + sset_add(&lr_stateful_rec->lb_ips->ips_v6_reachable, >>> ip_address); >>> + } >>> + >>> + return; >>> + } >>> + >>> + /* Otherwise, a VIP is reachable if there's at least one router >>> + * subnet that includes it. >>> + */ >>> + ovs_assert(neigh_mode == LB_NEIGH_RESPOND_REACHABLE); >>> + >>> + SSET_FOR_EACH (ip_address, lb_ips_v4) { >>> + struct ovn_port *op; >>> + ovs_be32 vip_ip4; >>> + if (ip_parse(ip_address, &vip_ip4)) { >>> + HMAP_FOR_EACH (op, dp_node, &lr_stateful_rec->od->ports) { >>> + if (lrouter_port_ipv4_reachable(op, vip_ip4)) { >>> + sset_add(&lr_stateful_rec->lb_ips->ips_v4_reachable, >>> + ip_address); >> >> Maybe not for this patch, but would it make sense to also start tracking >> what LB IPs are reachable on each port? That would allow us to avoid >> the additional lookup in build_lswitch_rport_arp_req_flows() where we >> check if the IP is reachable on a given router port. >> >>> + break; >>> + } >>> + } >>> + } >>> + } >>> + >>> + SSET_FOR_EACH (ip_address, lb_ips_v6) { >>> + struct ovn_port *op; >>> + struct in6_addr vip; >>> + if (ipv6_parse(ip_address, &vip)) { >>> + HMAP_FOR_EACH (op, dp_node, &lr_stateful_rec->od->ports) { >>> + if (lrouter_port_ipv6_reachable(op, &vip)) { >>> + sset_add(&lr_stateful_rec->lb_ips->ips_v6_reachable, >>> + ip_address); >>> + break; >>> + } >>> + } >>> + } >>> + } >>> +} >>> + >>> +static void >>> +remove_lrouter_lb_reachable_ips(struct lr_stateful_record *lr_stateful_rec, >>> + enum lb_neighbor_responder_mode neigh_mode, >>> + const struct sset *lb_ips_v4, >>> + const struct sset *lb_ips_v6) >>> +{ >>> + if (neigh_mode == LB_NEIGH_RESPOND_NONE) { >>> + return; >>> + } >>> + >>> + const char *ip_address; >>> + SSET_FOR_EACH (ip_address, lb_ips_v4) { >>> + sset_find_and_delete(&lr_stateful_rec->lb_ips->ips_v4_reachable, >>> + ip_address); >>> + } >>> + SSET_FOR_EACH (ip_address, lb_ips_v6) { >>> + sset_find_and_delete(&lr_stateful_rec->lb_ips->ips_v6_reachable, >>> + ip_address); >>> + } >>> +} >>> + >>> +static void >>> +lr_stateful_build_vip_nats(struct lr_stateful_record *lr_stateful_rec) >> >> This rebuilds the VIP nats set. I think I'd call it >> lr_stateful_rebuild_vip_nats(). >> >>> +{ >>> + sset_clear(&lr_stateful_rec->vip_nats); >>> + const char *external_ip; >>> + SSET_FOR_EACH (external_ip, &lr_stateful_rec->lrnat_rec->external_ips) >>> { >>> + bool is_vip_nat = false; >>> + if (addr_is_ipv6(external_ip)) { >>> + is_vip_nat = sset_contains(&lr_stateful_rec->lb_ips->ips_v6, >>> + external_ip); >>> + } else { >>> + is_vip_nat = sset_contains(&lr_stateful_rec->lb_ips->ips_v4, >>> + external_ip); >>> + } >>> + >>> + if (is_vip_nat) { >>> + sset_add(&lr_stateful_rec->vip_nats, external_ip); >>> + } >>> + } >>> +} >>> diff --git a/northd/en-lr-stateful.h b/northd/en-lr-stateful.h >>> new file mode 100644 >>> index 0000000000..0bc1f4ee75 >>> --- /dev/null >>> +++ b/northd/en-lr-stateful.h >>> @@ -0,0 +1,105 @@ >>> +/* >>> + * Copyright (c) 2023, Red Hat, Inc. >> >> 2024 >> >>> + * >>> + * Licensed under the Apache License, Version 2.0 (the "License"); >>> + * you may not use this file except in compliance with the License. >>> + * You may obtain a copy of the License at: >>> + * >>> + * http://www.apache.org/licenses/LICENSE-2.0 >>> + * >>> + * Unless required by applicable law or agreed to in writing, software >>> + * distributed under the License is distributed on an "AS IS" BASIS, >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >>> + * See the License for the specific language governing permissions and >>> + * limitations under the License. >>> + */ >>> +#ifndef EN_LR_STATEFUL_H >>> +#define EN_LR_STATEFUL_H 1 >>> + >>> +#include <stdint.h> >>> + >>> +/* OVS includes. */ >>> +#include "lib/hmapx.h" >>> +#include "openvswitch/hmap.h" >>> +#include "sset.h" >>> + >>> +/* OVN includes. */ >>> +#include "lib/inc-proc-eng.h" >>> +#include "lib/lb.h" >>> +#include "lib/ovn-nb-idl.h" >>> +#include "lib/ovn-sb-idl.h" >>> +#include "lib/ovn-util.h" >>> + >>> +struct ovn_datapath; >>> +struct lr_nat_record; >>> + >>> +/* lr_stateful_table: This represents a table of logical routers with >>> + * stateful related data. >>> + * stateful related data has two main components >>> + * - NAT and >>> + * - Load balancers. >>> + * >>> + * lr_stateful_record: It is a record in the lr_stateful_table for each >>> + * logical router. >>> + */ >>> + >>> +struct lr_stateful_record { >>> + struct hmap_node key_node; /* Index on 'nbr->header_.uuid'. */ >>> + >>> + const struct ovn_datapath *od; >>> + const struct lr_nat_record *lrnat_rec; >>> + >>> + /* Load Balancer vIPs relevant for this datapath. */ >>> + struct ovn_lb_ip_set *lb_ips; >>> + >>> + /* sset of vips which are also part of lr nats. */ >>> + struct sset vip_nats; >>> +}; >>> + >>> +struct lr_stateful_table { >>> + struct hmap entries; >>> + >>> + /* The array index of each element in 'entries'. */ >>> + struct lr_stateful_record **array; >>> +}; >>> + >>> +#define LR_STATEFUL_TABLE_FOR_EACH(LR_LB_NAT_REC, TABLE) \ >>> + HMAP_FOR_EACH (LR_LB_NAT_REC, key_node, &(TABLE)->entries) >>> + >>> +struct lr_stateful_tracked_data { >>> + /* Created or updated logical router with LB and/or NAT data. */ >>> + struct hmapx crupdated; /* Stores 'struct lr_stateful_record'. */ >>> +}; >>> + >>> +struct ed_type_lr_stateful { >>> + struct lr_stateful_table table; >>> + >>> + /* Node's tracked data. */ >>> + struct lr_stateful_tracked_data trk_data; >>> +}; >>> + >>> +struct lr_stateful_input { >>> + const struct ovn_datapaths *lr_datapaths; >>> + const struct hmap *lb_datapaths_map; >>> + const struct hmap *lbgrp_datapaths_map; >>> + const struct lr_nat_table *lr_nats; >>> +}; >>> + >>> +void *en_lr_stateful_init(struct engine_node *, struct engine_arg *); >>> +void en_lr_stateful_cleanup(void *data); >>> +void en_lr_stateful_clear_tracked_data(void *data); >>> +void en_lr_stateful_run(struct engine_node *, void *data); >>> + >>> +bool lr_stateful_northd_handler(struct engine_node *, void *data); >>> +bool lr_stateful_lr_nat_handler(struct engine_node *, void *data); >>> +bool lr_stateful_lb_data_handler(struct engine_node *, void *data); >>> + >>> +const struct lr_stateful_record *lr_stateful_table_find_by_index( >>> + const struct lr_stateful_table *, size_t od_index); >>> + >>> +static inline bool >>> +lr_stateful_has_tracked_data(struct lr_stateful_tracked_data *trk_data) { >>> + return !hmapx_is_empty(&trk_data->crupdated); >>> +} >>> + >>> +#endif /* EN_lr_stateful_H */ >>> diff --git a/northd/en-sync-sb.c b/northd/en-sync-sb.c >>> index 11e12428f7..d39cbbf2e6 100644 >>> --- a/northd/en-sync-sb.c >>> +++ b/northd/en-sync-sb.c >>> @@ -22,6 +22,7 @@ >>> #include "openvswitch/util.h" >>> >>> #include "en-lr-nat.h" >>> +#include "en-lr-stateful.h" >>> #include "en-sync-sb.h" >>> #include "lib/inc-proc-eng.h" >>> #include "lib/lb.h" >>> @@ -41,7 +42,7 @@ static void sync_addr_sets(struct ovsdb_idl_txn >>> *ovnsb_txn, >>> const struct nbrec_address_set_table *, >>> const struct nbrec_port_group_table *, >>> const struct sbrec_address_set_table *, >>> - const struct ovn_datapaths *lr_datapaths); >>> + const struct lr_stateful_table *); >>> static const struct sbrec_address_set *sb_address_set_lookup_by_name( >>> struct ovsdb_idl_index *, const char *name); >>> static void update_sb_addr_set(struct sorted_array *, >>> @@ -87,11 +88,11 @@ en_sync_to_sb_addr_set_run(struct engine_node *node, >>> void *data OVS_UNUSED) >>> EN_OVSDB_GET(engine_get_input("SB_address_set", node)); >>> >>> const struct engine_context *eng_ctx = engine_get_context(); >>> - struct northd_data *northd_data = engine_get_input_data("northd", >>> node); >>> - >>> + const struct ed_type_lr_stateful *lr_stateful_data = >>> + engine_get_input_data("lr_stateful", node); >>> sync_addr_sets(eng_ctx->ovnsb_idl_txn, nb_address_set_table, >>> nb_port_group_table, sb_address_set_table, >>> - &northd_data->lr_datapaths); >>> + &lr_stateful_data->table); >>> >>> engine_set_node_state(node, EN_UPDATED); >>> } >>> @@ -288,10 +289,12 @@ en_sync_to_sb_pb_run(struct engine_node *node, void >>> *data OVS_UNUSED) >>> { >>> const struct engine_context *eng_ctx = engine_get_context(); >>> struct northd_data *northd_data = engine_get_input_data("northd", >>> node); >>> - struct ed_type_lr_nat_data *lr_nat_data = >>> - engine_get_input_data("lr_nat", node); >>> + struct ed_type_lr_stateful *lr_stateful_data = >>> + engine_get_input_data("lr_stateful", node); >>> + >>> sync_pbs(eng_ctx->ovnsb_idl_txn, &northd_data->ls_ports, >>> - &northd_data->lr_ports, &lr_nat_data->lr_nats); >>> + &northd_data->lr_ports, >>> + &lr_stateful_data->table); >>> engine_set_node_state(node, EN_UPDATED); >>> } >>> >>> @@ -316,7 +319,11 @@ sync_to_sb_pb_northd_handler(struct engine_node *node, >>> void *data OVS_UNUSED) >>> return false; >>> } >>> >>> - if (!sync_pbs_for_northd_changed_ovn_ports(&nd->trk_data.trk_lsps)) { >>> + struct ed_type_lr_stateful *lr_stateful_data = >>> + engine_get_input_data("lr_stateful", node); >>> + >>> + if (!sync_pbs_for_northd_changed_ovn_ports(&nd->trk_data.trk_lsps, >>> + &lr_stateful_data->table)) { >>> return false; >>> } >>> >>> @@ -362,7 +369,7 @@ sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn, >>> const struct nbrec_address_set_table *nb_address_set_table, >>> const struct nbrec_port_group_table *nb_port_group_table, >>> const struct sbrec_address_set_table *sb_address_set_table, >>> - const struct ovn_datapaths *lr_datapaths) >>> + const struct lr_stateful_table *lr_statefuls) >>> { >>> struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets); >>> >>> @@ -406,16 +413,14 @@ sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn, >>> } >>> >>> /* Sync router load balancer VIP generated address sets. */ >>> - struct ovn_datapath *od; >>> - HMAP_FOR_EACH (od, key_node, &lr_datapaths->datapaths) { >>> - ovs_assert(od->nbr); >>> - >>> - if (sset_count(&od->lb_ips->ips_v4_reachable)) { >>> - char *ipv4_addrs_name = lr_lb_address_set_name(od->tunnel_key, >>> - AF_INET); >>> + const struct lr_stateful_record *lr_sful_rec; >>> + LR_STATEFUL_TABLE_FOR_EACH (lr_sful_rec, lr_statefuls) { >>> + if (sset_count(&lr_sful_rec->lb_ips->ips_v4_reachable)) { >>> + char *ipv4_addrs_name = >>> + lr_lb_address_set_name(lr_sful_rec->od->tunnel_key, >>> AF_INET); >>> >>> struct sorted_array ipv4_addrs_sorted = >>> - sorted_array_from_sset(&od->lb_ips->ips_v4_reachable); >>> + >>> sorted_array_from_sset(&lr_sful_rec->lb_ips->ips_v4_reachable); >>> >>> sync_addr_set(ovnsb_txn, ipv4_addrs_name, >>> &ipv4_addrs_sorted, &sb_address_sets); >>> @@ -423,11 +428,11 @@ sync_addr_sets(struct ovsdb_idl_txn *ovnsb_txn, >>> free(ipv4_addrs_name); >>> } >>> >>> - if (sset_count(&od->lb_ips->ips_v6_reachable)) { >>> - char *ipv6_addrs_name = lr_lb_address_set_name(od->tunnel_key, >>> - AF_INET6); >>> - struct sorted_array ipv6_addrs_sorted = >>> - sorted_array_from_sset(&od->lb_ips->ips_v6_reachable); >>> + if (sset_count(&lr_sful_rec->lb_ips->ips_v6_reachable)) { >>> + char *ipv6_addrs_name = >>> + lr_lb_address_set_name(lr_sful_rec->od->tunnel_key, >>> AF_INET6); >>> + struct sorted_array ipv6_addrs_sorted = sorted_array_from_sset( >>> + &lr_sful_rec->lb_ips->ips_v6_reachable); >>> >>> sync_addr_set(ovnsb_txn, ipv6_addrs_name, >>> &ipv6_addrs_sorted, &sb_address_sets); >>> diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c >>> index 1f211b278e..97bcce9655 100644 >>> --- a/northd/inc-proc-northd.c >>> +++ b/northd/inc-proc-northd.c >>> @@ -31,6 +31,7 @@ >>> #include "openvswitch/vlog.h" >>> #include "inc-proc-northd.h" >>> #include "en-lb-data.h" >>> +#include "en-lr-stateful.h" >>> #include "en-lr-nat.h" >>> #include "en-northd.h" >>> #include "en-lflow.h" >>> @@ -148,6 +149,7 @@ static ENGINE_NODE(sync_to_sb_lb, "sync_to_sb_lb"); >>> static ENGINE_NODE(sync_to_sb_pb, "sync_to_sb_pb"); >>> static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lb_data, "lb_data"); >>> static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lr_nat, "lr_nat"); >>> +static ENGINE_NODE_WITH_CLEAR_TRACK_DATA(lr_stateful, "lr_stateful"); >>> >>> void inc_proc_northd_init(struct ovsdb_idl_loop *nb, >>> struct ovsdb_idl_loop *sb) >>> @@ -193,6 +195,11 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, >>> >>> engine_add_input(&en_lr_nat, &en_northd, lr_nat_northd_handler); >>> >>> + engine_add_input(&en_lr_stateful, &en_northd, >>> lr_stateful_northd_handler); >>> + engine_add_input(&en_lr_stateful, &en_lr_nat, >>> lr_stateful_lr_nat_handler); >>> + engine_add_input(&en_lr_stateful, &en_lb_data, >>> + lr_stateful_lb_data_handler); >>> + >>> engine_add_input(&en_mac_binding_aging, &en_nb_nb_global, NULL); >>> engine_add_input(&en_mac_binding_aging, &en_sb_mac_binding, NULL); >>> engine_add_input(&en_mac_binding_aging, &en_northd, NULL); >>> @@ -214,15 +221,17 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, >>> engine_add_input(&en_lflow, &en_sb_logical_flow, NULL); >>> engine_add_input(&en_lflow, &en_sb_multicast_group, NULL); >>> engine_add_input(&en_lflow, &en_sb_igmp_group, NULL); >>> + engine_add_input(&en_lflow, &en_lr_nat, NULL); >>> + engine_add_input(&en_lflow, &en_lr_stateful, NULL); >>> engine_add_input(&en_lflow, &en_northd, lflow_northd_handler); >>> engine_add_input(&en_lflow, &en_port_group, lflow_port_group_handler); >>> - engine_add_input(&en_lflow, &en_lr_nat, NULL); >>> >>> engine_add_input(&en_sync_to_sb_addr_set, &en_nb_address_set, >>> sync_to_sb_addr_set_nb_address_set_handler); >>> engine_add_input(&en_sync_to_sb_addr_set, &en_nb_port_group, >>> sync_to_sb_addr_set_nb_port_group_handler); >>> engine_add_input(&en_sync_to_sb_addr_set, &en_northd, NULL); >>> + engine_add_input(&en_sync_to_sb_addr_set, &en_lr_stateful, NULL); >>> engine_add_input(&en_sync_to_sb_addr_set, &en_sb_address_set, NULL); >>> >>> engine_add_input(&en_port_group, &en_nb_port_group, >>> @@ -240,7 +249,7 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb, >>> >>> engine_add_input(&en_sync_to_sb_pb, &en_northd, >>> sync_to_sb_pb_northd_handler); >>> - engine_add_input(&en_sync_to_sb_pb, &en_lr_nat, NULL); >>> + engine_add_input(&en_sync_to_sb_pb, &en_lr_stateful, NULL); >>> >>> /* en_sync_to_sb engine node syncs the SB database tables from >>> * the NB database tables. >>> diff --git a/northd/northd.c b/northd/northd.c >>> index e5e86326e3..fc2fc835b2 100644 >>> --- a/northd/northd.c >>> +++ b/northd/northd.c >>> @@ -45,6 +45,7 @@ >>> #include "northd.h" >>> #include "en-lb-data.h" >>> #include "en-lr-nat.h" >>> +#include "en-lr-stateful.h" >>> #include "lib/ovn-parallel-hmap.h" >>> #include "ovn/actions.h" >>> #include "ovn/features.h" >>> @@ -618,13 +619,6 @@ init_lb_for_datapath(struct ovn_datapath *od) >>> } >>> } >>> >>> -static void >>> -destroy_lb_for_datapath(struct ovn_datapath *od) >>> -{ >>> - ovn_lb_ip_set_destroy(od->lb_ips); >>> - od->lb_ips = NULL; >>> -} >>> - >>> /* A group of logical router datapaths which are connected - either >>> * directly or indirectly. >>> * Each logical router can belong to only one group. */ >>> @@ -677,7 +671,6 @@ ovn_datapath_destroy(struct hmap *datapaths, struct >>> ovn_datapath *od) >>> destroy_ipam_info(&od->ipam_info); >>> free(od->router_ports); >>> free(od->ls_peers); >>> - destroy_lb_for_datapath(od); >>> free(od->localnet_ports); >>> free(od->l3dgw_ports); >>> destroy_mcast_info_for_datapath(od); >>> @@ -1318,124 +1311,6 @@ struct lflow_ref_node { >>> struct ovn_lflow *lflow; >>> }; >>> >>> -/* A logical switch port or logical router port. >>> - * >>> - * In steady state, an ovn_port points to a northbound Logical_Switch_Port >>> - * record (via 'nbsp') *or* a Logical_Router_Port record (via 'nbrp'), and >>> to a >>> - * southbound Port_Binding record (via 'sb'). As the state of the system >>> - * changes, join_logical_ports() may determine that there is a new LSP or >>> LRP >>> - * that has no corresponding Port_Binding record (in which case >>> build_ports()) >>> - * will create the missing Port_Binding) or that a Port_Binding record >>> exists >>> - * that has no coresponding LSP (in which case build_ports() will delete >>> the >>> - * spurious Port_Binding). Thus, after build_ports() runs, any given >>> ovn_port >>> - * will have 'sb' nonnull, and 'nbsp' xor 'nbrp' nonnull. >>> - * >>> - * Ordinarily there is only one ovn_port that points to a given LSP or LRP >>> (but >>> - * distributed gateway ports point a "derived" ovn_port to a duplicate >>> LRP). >>> - */ >>> -struct ovn_port { >>> - /* Port name aka key. >>> - * >>> - * This is ordinarily the same as nbsp->name or nbrp->name and >>> - * sb->logical_port. (A distributed gateway port creates a "derived" >>> - * ovn_port with key "cr-%s" % nbrp->name.) */ >>> - struct hmap_node key_node; /* Index on 'key'. */ >>> - char *key; /* nbsp->name, nbrp->name, >>> sb->logical_port. */ >>> - char *json_key; /* 'key', quoted for use in JSON. */ >>> - >>> - const struct sbrec_port_binding *sb; /* May be NULL. */ >>> - >>> - uint32_t tunnel_key; >>> - >>> - /* Logical switch port data. */ >>> - const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */ >>> - >>> - struct lport_addresses *lsp_addrs; /* Logical switch port addresses. >>> */ >>> - unsigned int n_lsp_addrs; /* Total length of lsp_addrs. */ >>> - unsigned int n_lsp_non_router_addrs; /* Number of elements from the >>> - * beginning of 'lsp_addrs' >>> extracted >>> - * directly from LSP 'addresses'. >>> */ >>> - >>> - struct lport_addresses *ps_addrs; /* Port security addresses. */ >>> - unsigned int n_ps_addrs; >>> - >>> - bool lsp_can_be_inc_processed; /* If it can be incrementally processed >>> when >>> - the port changes. */ >>> - >>> - /* Logical router port data. */ >>> - const struct nbrec_logical_router_port *nbrp; /* May be NULL. */ >>> - >>> - struct lport_addresses lrp_networks; >>> - >>> - struct ovn_port_routable_addresses routables; >>> - >>> - /* Logical port multicast data. */ >>> - struct mcast_port_info mcast_info; >>> - >>> - /* At most one of l3dgw_port and cr_port can be not NULL. */ >>> - >>> - /* This is set to a distributed gateway port if and only if this >>> ovn_port >>> - * is "derived" from it. Otherwise this is set to NULL. The derived >>> - * ovn_port represents the instance of distributed gateway port on the >>> - * gateway chassis.*/ >>> - struct ovn_port *l3dgw_port; >>> - >>> - /* This is set to the "derived" chassis-redirect port of this port if >>> and >>> - * only if this port is a distributed gateway port. Otherwise this is >>> set >>> - * to NULL. */ >>> - struct ovn_port *cr_port; >>> - >>> - bool has_unknown; /* If the addresses have 'unknown' defined. */ >>> - >>> - bool has_bfd; >>> - >>> - /* The port's peer: >>> - * >>> - * - A switch port S of type "router" has a router port R as a >>> peer, >>> - * and R in turn has S has its peer. >>> - * >>> - * - Two connected logical router ports have each other as peer. >>> - * >>> - * - Other kinds of ports have no peer. */ >>> - struct ovn_port *peer; >>> - >>> - struct ovn_datapath *od; >>> - >>> - struct ovs_list list; /* In list of similar records. */ >>> - >>> - struct hmap_node dp_node; /* Node in od->ports. */ >>> - >>> - struct lport_addresses proxy_arp_addrs; >>> - >>> - /* Temporarily used for traversing a list (or hmap) of ports. */ >>> - bool visited; >>> - >>> - /* List of struct lflow_ref_node that points to the lflows generated by >>> - * this ovn_port. >>> - * >>> - * This data is initialized and destroyed by the en_northd node, but >>> - * populated and used only by the en_lflow node. Ideally this data >>> should >>> - * be maintained as part of en_lflow's data (struct lflow_data): a hash >>> - * index from ovn_port key to lflows. However, it would be less >>> efficient >>> - * and more complex: >>> - * >>> - * 1. It would require an extra search (using the index) to find the >>> - * lflows. >>> - * >>> - * 2. Building the index needs to be thread-safe, using either a global >>> - * lock which is obviously less efficient, or hash-based lock array >>> which >>> - * is more complex. >>> - * >>> - * Adding the list here is more straightforward. The drawback is that >>> we >>> - * need to keep in mind that this data belongs to en_lflow node, so >>> never >>> - * access it from any other nodes. >>> - */ >>> - struct ovs_list lflows; >>> - >>> - /* Only used for the router type LSP whose peer is l3dgw_port */ >>> - bool enable_router_port_acl; >>> -}; >>> - >>> static bool lsp_can_be_inc_processed(const struct >>> nbrec_logical_switch_port *); >>> >>> static bool >>> @@ -1460,16 +1335,21 @@ destroy_routable_addresses(struct >>> ovn_port_routable_addresses *ra) >>> } >>> >>> static char **get_nat_addresses(const struct ovn_port *op, size_t *n, >>> - bool routable_only, bool include_lb_ips); >>> + bool routable_only, bool include_lb_ips, >>> + const struct lr_stateful_record *); >>> >>> -static void >>> -assign_routable_addresses(struct ovn_port *op) >>> +static struct ovn_port_routable_addresses >>> +get_op_routable_addresses(struct ovn_port *op, >>> + const struct lr_stateful_record *lr_stateful_rec) >>> { >>> size_t n; >>> - char **nats = get_nat_addresses(op, &n, true, true); >>> + char **nats = get_nat_addresses(op, &n, true, true, lr_stateful_rec); >>> >>> if (!nats) { >>> - return; >>> + return (struct ovn_port_routable_addresses) { >>> + .laddrs = NULL, >>> + .n_addrs = 0, >>> + }; >>> } >>> >>> struct lport_addresses *laddrs = xcalloc(n, sizeof(*laddrs)); >>> @@ -1485,9 +1365,15 @@ assign_routable_addresses(struct ovn_port *op) >>> } >>> free(nats); >>> >>> - /* Everything seems to have worked out */ >>> - op->routables.laddrs = laddrs; >>> - op->routables.n_addrs = n_addrs; >>> + if (!n_addrs) { >>> + free(laddrs); >>> + laddrs = NULL; >>> + } >>> + >>> + return (struct ovn_port_routable_addresses) { >>> + .laddrs = laddrs, >>> + .n_addrs = n_addrs, >>> + }; >>> } >>> >>> >>> @@ -1547,8 +1433,6 @@ ovn_port_destroy_orphan(struct ovn_port *port) >>> } >>> free(port->ps_addrs); >>> >>> - destroy_routable_addresses(&port->routables); >>> - >>> destroy_lport_addresses(&port->lrp_networks); >>> destroy_lport_addresses(&port->proxy_arp_addrs); >>> free(port->json_key); >>> @@ -2590,9 +2474,7 @@ join_logical_ports(const struct >>> sbrec_port_binding_table *sbrec_pb_table, >>> sizeof *od->l3dgw_ports); >>> } >>> od->l3dgw_ports[od->n_l3dgw_ports++] = op; >>> - >>> - assign_routable_addresses(op); >>> - } >>> + } >>> } >>> } >>> >>> @@ -2695,7 +2577,8 @@ join_logical_ports(const struct >>> sbrec_port_binding_table *sbrec_pb_table, >>> * and must free the returned array when it is no longer needed. */ >>> static char ** >>> get_nat_addresses(const struct ovn_port *op, size_t *n, bool routable_only, >>> - bool include_lb_ips) >>> + bool include_lb_ips, >>> + const struct lr_stateful_record *lr_stateful_rec) >>> { >>> size_t n_nats = 0; >>> struct eth_addr mac; >>> @@ -2780,23 +2663,25 @@ get_nat_addresses(const struct ovn_port *op, size_t >>> *n, bool routable_only, >>> } >>> } >>> >>> - if (include_lb_ips) { >>> + if (include_lb_ips && lr_stateful_rec) { >>> const char *ip_address; >>> if (routable_only) { >>> - SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v4_routable) { >>> + SSET_FOR_EACH (ip_address, >>> + &lr_stateful_rec->lb_ips->ips_v4_routable) { >>> ds_put_format(&c_addresses, " %s", ip_address); >>> central_ip_address = true; >>> } >>> - SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v6_routable) { >>> + SSET_FOR_EACH (ip_address, >>> + &lr_stateful_rec->lb_ips->ips_v6_routable) { >>> ds_put_format(&c_addresses, " %s", ip_address); >>> central_ip_address = true; >>> } >>> } else { >>> - SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v4) { >>> + SSET_FOR_EACH (ip_address, &lr_stateful_rec->lb_ips->ips_v4) { >>> ds_put_format(&c_addresses, " %s", ip_address); >>> central_ip_address = true; >>> } >>> - SSET_FOR_EACH (ip_address, &op->od->lb_ips->ips_v6) { >>> + SSET_FOR_EACH (ip_address, &lr_stateful_rec->lb_ips->ips_v6) { >>> ds_put_format(&c_addresses, " %s", ip_address); >>> central_ip_address = true; >>> } >>> @@ -3867,21 +3752,8 @@ build_lb_datapaths(const struct hmap *lbs, const >>> struct hmap *lb_groups, >>> HMAP_FOR_EACH (od, key_node, &lr_datapaths->datapaths) { >>> ovs_assert(od->nbr); >>> >>> - /* Checking load balancer groups first, starting from the largest >>> one, >>> - * to more efficiently copy IP sets. */ >>> - size_t largest_group = 0; >>> - >>> - for (size_t i = 1; i < od->nbr->n_load_balancer_group; i++) { >>> - if (od->nbr->load_balancer_group[i]->n_load_balancer > >>> - >>> od->nbr->load_balancer_group[largest_group]->n_load_balancer) { >>> - largest_group = i; >>> - } >>> - } >>> - >>> for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { >>> - size_t idx = (i + largest_group) % >>> od->nbr->n_load_balancer_group; >>> - >>> - nbrec_lb_group = od->nbr->load_balancer_group[idx]; >>> + nbrec_lb_group = od->nbr->load_balancer_group[i]; >>> const struct uuid *lb_group_uuid = >>> &nbrec_lb_group->header_.uuid; >>> >>> lb_group_dps = >>> @@ -3889,20 +3761,6 @@ build_lb_datapaths(const struct hmap *lbs, const >>> struct hmap *lb_groups, >>> lb_group_uuid); >>> ovs_assert(lb_group_dps); >>> ovn_lb_group_datapaths_add_lr(lb_group_dps, od); >>> - >>> - if (!od->lb_ips) { >>> - od->lb_ips = >>> - ovn_lb_ip_set_clone(lb_group_dps->lb_group->lb_ips); >>> - } else { >>> - for (size_t j = 0; j < lb_group_dps->lb_group->n_lbs; j++) >>> { >>> - build_lrouter_lb_ips(od->lb_ips, >>> - lb_group_dps->lb_group->lbs[j]); >>> - } >>> - } >>> - } >>> - >>> - if (!od->lb_ips) { >>> - od->lb_ips = ovn_lb_ip_set_create(); >>> } >>> >>> for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { >>> @@ -3911,7 +3769,6 @@ build_lb_datapaths(const struct hmap *lbs, const >>> struct hmap *lb_groups, >>> lb_dps = ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid); >>> ovs_assert(lb_dps); >>> ovn_lb_datapaths_add_lr(lb_dps, 1, &od); >>> - build_lrouter_lb_ips(od->lb_ips, lb_dps->lb); >>> } >>> } >>> >>> @@ -3965,102 +3822,6 @@ build_lb_svcs( >>> } >>> } >>> >>> -static bool lrouter_port_ipv4_reachable(const struct ovn_port *op, >>> - ovs_be32 addr); >>> -static bool lrouter_port_ipv6_reachable(const struct ovn_port *op, >>> - const struct in6_addr *addr); >>> - >>> -static void >>> -add_neigh_ips_to_lrouter(struct ovn_datapath *od, >>> - enum lb_neighbor_responder_mode neigh_mode, >>> - const struct sset *lb_ips_v4, >>> - const struct sset *lb_ips_v6) >>> -{ >>> - /* If configured to not reply to any neighbor requests for all VIPs >>> - * return early. >>> - */ >>> - if (neigh_mode == LB_NEIGH_RESPOND_NONE) { >>> - return; >>> - } >>> - >>> - const char *ip_address; >>> - >>> - /* If configured to reply to neighbor requests for all VIPs force them >>> - * all to be considered "reachable". >>> - */ >>> - if (neigh_mode == LB_NEIGH_RESPOND_ALL) { >>> - SSET_FOR_EACH (ip_address, lb_ips_v4) { >>> - sset_add(&od->lb_ips->ips_v4_reachable, ip_address); >>> - } >>> - SSET_FOR_EACH (ip_address, lb_ips_v6) { >>> - sset_add(&od->lb_ips->ips_v6_reachable, ip_address); >>> - } >>> - >>> - return; >>> - } >>> - >>> - /* Otherwise, a VIP is reachable if there's at least one router >>> - * subnet that includes it. >>> - */ >>> - ovs_assert(neigh_mode == LB_NEIGH_RESPOND_REACHABLE); >>> - >>> - SSET_FOR_EACH (ip_address, lb_ips_v4) { >>> - struct ovn_port *op; >>> - ovs_be32 vip_ip4; >>> - if (ip_parse(ip_address, &vip_ip4)) { >>> - HMAP_FOR_EACH (op, dp_node, &od->ports) { >>> - if (lrouter_port_ipv4_reachable(op, vip_ip4)) { >>> - sset_add(&od->lb_ips->ips_v4_reachable, >>> - ip_address); >>> - break; >>> - } >>> - } >>> - } >>> - } >>> - >>> - SSET_FOR_EACH (ip_address, lb_ips_v6) { >>> - struct ovn_port *op; >>> - struct in6_addr vip; >>> - if (ipv6_parse(ip_address, &vip)) { >>> - HMAP_FOR_EACH (op, dp_node, &od->ports) { >>> - if (lrouter_port_ipv6_reachable(op, &vip)) { >>> - sset_add(&od->lb_ips->ips_v6_reachable, >>> - ip_address); >>> - break; >>> - } >>> - } >>> - } >>> - } >>> -} >>> - >>> -static void >>> -remove_lrouter_lb_reachable_ips(struct ovn_datapath *od, >>> - enum lb_neighbor_responder_mode neigh_mode, >>> - const struct sset *lb_ips_v4, >>> - const struct sset *lb_ips_v6) >>> -{ >>> - if (neigh_mode == LB_NEIGH_RESPOND_NONE) { >>> - return; >>> - } >>> - >>> - const char *ip_address; >>> - SSET_FOR_EACH (ip_address, lb_ips_v4) { >>> - sset_find_and_delete(&od->lb_ips->ips_v4_reachable, ip_address); >>> - } >>> - SSET_FOR_EACH (ip_address, lb_ips_v6) { >>> - sset_find_and_delete(&od->lb_ips->ips_v6_reachable, ip_address); >>> - } >>> -} >>> - >>> -static void >>> -build_lrouter_lb_reachable_ips(struct ovn_datapath *od, >>> - const struct ovn_northd_lb *lb) >>> -{ >>> - add_neigh_ips_to_lrouter(od, lb->neigh_mode, &lb->ips_v4, >>> - &lb->ips_v6); >>> -} >>> - >>> - >>> static void >>> build_lrouter_lbs_check(const struct ovn_datapaths *lr_datapaths) >>> { >>> @@ -4082,43 +3843,6 @@ build_lrouter_lbs_check(const struct ovn_datapaths >>> *lr_datapaths) >>> } >>> } >>> >>> -static void >>> -build_lrouter_lbs_reachable_ips(struct ovn_datapaths *lr_datapaths, >>> - struct hmap *lb_dps_map, >>> - struct hmap *lb_group_dps_map) >>> -{ >>> - struct ovn_datapath *od; >>> - >>> - HMAP_FOR_EACH (od, key_node, &lr_datapaths->datapaths) { >>> - if (!od->nbr) { >>> - continue; >>> - } >>> - >>> - for (size_t i = 0; i < od->nbr->n_load_balancer; i++) { >>> - struct ovn_lb_datapaths *lb_dps = >>> - ovn_lb_datapaths_find(lb_dps_map, >>> - &od->nbr->load_balancer[i]->header_.uuid); >>> - ovs_assert(lb_dps); >>> - build_lrouter_lb_reachable_ips(od, lb_dps->lb); >>> - } >>> - >>> - for (size_t i = 0; i < od->nbr->n_load_balancer_group; i++) { >>> - const struct nbrec_load_balancer_group *nbrec_lb_group = >>> - od->nbr->load_balancer_group[i]; >>> - struct ovn_lb_group_datapaths *lb_group_dps; >>> - >>> - lb_group_dps = >>> - ovn_lb_group_datapaths_find(lb_group_dps_map, >>> - &nbrec_lb_group->header_.uuid); >>> - ovs_assert(lb_group_dps); >>> - for (size_t j = 0; j < lb_group_dps->lb_group->n_lbs; j++) { >>> - build_lrouter_lb_reachable_ips(od, >>> - >>> lb_group_dps->lb_group->lbs[j]); >>> - } >>> - } >>> - } >>> -} >>> - >>> static void >>> build_lswitch_lbs_from_lrouter(struct ovn_datapaths *lr_datapaths, >>> struct hmap *lb_dps_map, >>> @@ -4182,8 +3906,6 @@ build_lb_port_related_data( >>> struct hmap *svc_monitor_map) >>> { >>> build_lrouter_lbs_check(lr_datapaths); >>> - build_lrouter_lbs_reachable_ips(lr_datapaths, lb_dps_map, >>> - lb_group_dps_map); >>> build_lb_svcs(ovnsb_txn, sbrec_service_monitor_table, ls_ports, >>> lb_dps_map, >>> svc_monitor_lsps, svc_monitor_map); >>> build_lswitch_lbs_from_lrouter(lr_datapaths, lb_dps_map, >>> lb_group_dps_map); >>> @@ -4549,7 +4271,8 @@ check_sb_lb_duplicates(const struct >>> sbrec_load_balancer_table *table) >>> * Caller should make sure that the OVN SB IDL txn is not NULL. Presently >>> it >>> * only syncs the nat column of port binding corresponding to the >>> 'op->nbsp' */ >>> static void >>> -sync_pb_for_lsp(struct ovn_port *op) >>> +sync_pb_for_lsp(struct ovn_port *op, >>> + const struct lr_stateful_table *lr_stateful_table) >>> { >>> ovs_assert(op->nbsp); >>> >>> @@ -4568,10 +4291,17 @@ sync_pb_for_lsp(struct ovn_port *op) >>> if (nat_addresses && !strcmp(nat_addresses, "router")) { >>> if (op->peer && op->peer->od >>> && (chassis || op->peer->od->n_l3dgw_ports)) { >>> - bool exclude_lb_vips = smap_get_bool(&op->nbsp->options, >>> + bool include_lb_vips = !smap_get_bool(&op->nbsp->options, >>> "exclude-lb-vips-from-garp", false); >>> + >>> + const struct lr_stateful_record *lr_stateful_rec = NULL; >>> + >>> + if (include_lb_vips) { >>> + lr_stateful_rec = lr_stateful_table_find_by_index( >>> + lr_stateful_table, op->peer->od->index); >>> + } >>> nats = get_nat_addresses(op->peer, &n_nats, false, >>> - !exclude_lb_vips); >>> + include_lb_vips, lr_stateful_rec); >>> } >>> } else if (nat_addresses && (chassis || l3dgw_ports)) { >>> struct lport_addresses laddrs; >>> @@ -4678,7 +4408,8 @@ sync_pb_for_lsp(struct ovn_port *op) >>> * Caller should make sure that the OVN SB IDL txn is not NULL. Presently >>> it >>> * only sets the port binding options column for the router ports */ >>> static void >>> -sync_pb_for_lrp(struct ovn_port *op, const struct lr_nat_table *lr_nats) >>> +sync_pb_for_lrp(struct ovn_port *op, >>> + const struct lr_stateful_table *lr_stateful_table) >>> { >>> ovs_assert(op->nbrp); >>> >>> @@ -4687,14 +4418,14 @@ sync_pb_for_lrp(struct ovn_port *op, const struct >>> lr_nat_table *lr_nats) >>> >>> const char *chassis_name = smap_get(&op->od->nbr->options, "chassis"); >>> if (is_cr_port(op)) { >>> - const struct lr_nat_record *lrnat_rec = >>> - lr_nat_table_find_by_index(lr_nats, op->od->index); >>> - ovs_assert(lrnat_rec); >>> + const struct lr_stateful_record *lr_stateful_rec = >>> + lr_stateful_table_find_by_index(lr_stateful_table, >>> op->od->index); >>> + ovs_assert(lr_stateful_rec); >>> >>> smap_add(&new, "distributed-port", op->nbrp->name); >>> >>> bool always_redirect = >>> - !lrnat_rec->has_distributed_nat && >>> + !lr_stateful_rec->lrnat_rec->has_distributed_nat && >>> !l3dgw_port_has_associated_vtep_lports(op->l3dgw_port); >>> >>> const char *redirect_type = smap_get(&op->nbrp->options, >>> @@ -4744,17 +4475,18 @@ static void ovn_update_ipv6_opt_for_op(struct >>> ovn_port *op); >>> * the logical switch ports. */ >>> void >>> sync_pbs(struct ovsdb_idl_txn *ovnsb_idl_txn, struct hmap *ls_ports, >>> - struct hmap *lr_ports, const struct lr_nat_table *lr_nats) >>> + struct hmap *lr_ports, >>> + const struct lr_stateful_table *lr_stateful_table) >>> { >>> ovs_assert(ovnsb_idl_txn); >>> >>> struct ovn_port *op; >>> HMAP_FOR_EACH (op, key_node, ls_ports) { >>> - sync_pb_for_lsp(op); >>> + sync_pb_for_lsp(op, lr_stateful_table); >>> } >>> >>> HMAP_FOR_EACH (op, key_node, lr_ports) { >>> - sync_pb_for_lrp(op, lr_nats); >>> + sync_pb_for_lrp(op, lr_stateful_table); >>> } >>> >>> ovn_update_ipv6_options(lr_ports); >>> @@ -4763,20 +4495,22 @@ sync_pbs(struct ovsdb_idl_txn *ovnsb_idl_txn, >>> struct hmap *ls_ports, >>> /* Sync the SB Port bindings for the added and updated logical switch ports >>> * of the tracked northd engine data. */ >>> bool >>> -sync_pbs_for_northd_changed_ovn_ports(struct tracked_ovn_ports >>> *trk_ovn_ports) >>> +sync_pbs_for_northd_changed_ovn_ports( >>> + struct tracked_ovn_ports *trk_ovn_ports, >>> + const struct lr_stateful_table *lr_stateful_table) >>> { >>> struct hmapx_node *hmapx_node; >>> struct ovn_port *op; >>> HMAPX_FOR_EACH (hmapx_node, &trk_ovn_ports->created) { >>> op = hmapx_node->data; >>> ovs_assert(op->nbsp); >>> - sync_pb_for_lsp(op); >>> + sync_pb_for_lsp(op, lr_stateful_table); >>> } >>> >>> HMAPX_FOR_EACH (hmapx_node, &trk_ovn_ports->updated) { >>> op = hmapx_node->data; >>> ovs_assert(op->nbsp); >>> - sync_pb_for_lsp(op); >>> + sync_pb_for_lsp(op, lr_stateful_table); >>> } >>> >>> return true; >>> @@ -5439,14 +5173,13 @@ fail: >>> } >>> >>> /* Returns true if the logical router has changes which can be >>> - * incrementally handled. >>> + * incrementally handled or the changes can be ignored. >> >> Nit: I don't think we need this. Deciding that input changes can be >> ignored is part of handling those changes incrementally. I'd just leave >> the comment as it was. >> >>> * Presently supports i-p for the below changes: >>> * - load balancers and load balancer groups. >>> * - NAT changes >>> */ >>> static bool >>> -lr_changes_can_be_handled( >>> - const struct nbrec_logical_router *lr) >>> +lr_changes_can_be_handled(const struct nbrec_logical_router *lr) >>> { >>> /* Check if the columns are changed in this row. */ >>> enum nbrec_logical_router_column_id col; >>> @@ -5510,17 +5243,6 @@ is_lr_nats_changed(const struct nbrec_logical_router >>> *nbr) { >>> || is_lr_nats_seqno_changed(nbr)); >>> } >>> >>> -static bool >>> -lr_has_routable_nats(const struct nbrec_logical_router *nbr) { >>> - for (size_t i = 0; i < nbr->n_nat; i++) { >>> - if (smap_get_bool(&nbr->nat[i]->options, "add_route", false)) { >>> - return true; >>> - } >>> - } >>> - >>> - return false; >>> -} >>> - >>> /* Return true if changes are handled incrementally, false otherwise. >>> * >>> * Note: Changes to load balancer and load balancer groups associated with >>> @@ -5547,12 +5269,6 @@ northd_handle_lr_changes(const struct northd_input >>> *ni, >>> } >>> >>> if (is_lr_nats_changed(changed_lr)) { >>> - if (lr_has_routable_nats(changed_lr)) { >>> - /* router has routable NATs. We can't handle these changes >>> - * incrementally yet. Fall back to recompute. */ >>> - goto fail; >>> - } >>> - >>> struct ovn_datapath *od = ovn_datapath_find_( >>> &nd->lr_datapaths.datapaths, >>> &changed_lr->header_.uuid); >>> @@ -5834,10 +5550,6 @@ northd_handle_lb_data_changes(struct tracked_lb_data >>> *trk_lb_data, >>> ovs_assert(lb_dps); >>> ovn_lb_datapaths_add_lr(lb_dps, 1, &od); >>> >>> - /* Add the lb_ips of lb_dps to the od. */ >>> - build_lrouter_lb_ips(od->lb_ips, lb_dps->lb); >>> - build_lrouter_lb_reachable_ips(od, lb_dps->lb); >>> - >>> /* Add the lb to the northd tracked data. */ >>> hmapx_add(&nd_changes->trk_lbs.crupdated, lb_dps); >>> } >>> @@ -5856,10 +5568,6 @@ northd_handle_lb_data_changes(struct tracked_lb_data >>> *trk_lb_data, >>> ovs_assert(lb_dps); >>> ovn_lb_datapaths_add_lr(lb_dps, 1, &od); >>> >>> - /* Add the lb_ips of lb_dps to the od. */ >>> - build_lrouter_lb_ips(od->lb_ips, lb_dps->lb); >>> - build_lrouter_lb_reachable_ips(od, lb_dps->lb); >>> - >>> /* Add the lb to the northd tracked data. */ >>> hmapx_add(&nd_changes->trk_lbs.crupdated, lb_dps); >>> } >>> @@ -5888,22 +5596,6 @@ northd_handle_lb_data_changes(struct tracked_lb_data >>> *trk_lb_data, >>> od = lr_datapaths->array[index]; >>> /* Re-evaluate 'od->has_lb_vip' */ >>> init_lb_for_datapath(od); >>> - >>> - /* Update the od->lb_ips with the deleted and inserted >>> - * vips (if any). */ >>> - remove_ips_from_lb_ip_set(od->lb_ips, lb->routable, >>> - &clb->deleted_vips_v4, >>> - &clb->deleted_vips_v6); >>> - add_ips_to_lb_ip_set(od->lb_ips, lb->routable, >>> - &clb->inserted_vips_v4, >>> - &clb->inserted_vips_v6); >>> - >>> - remove_lrouter_lb_reachable_ips(od, lb->neigh_mode, >>> - &clb->deleted_vips_v4, >>> - &clb->deleted_vips_v6); >>> - add_neigh_ips_to_lrouter(od, lb->neigh_mode, >>> - &clb->inserted_vips_v4, >>> - &clb->inserted_vips_v6); >>> } >>> } >>> >>> @@ -5928,9 +5620,6 @@ northd_handle_lb_data_changes(struct tracked_lb_data >>> *trk_lb_data, >>> >>> /* Re-evaluate 'od->has_lb_vip' */ >>> init_lb_for_datapath(od); >>> - >>> - /* Add the lb_ips of lb_dps to the od. */ >>> - build_lrouter_lb_ips(od->lb_ips, lb_dps->lb); >>> } >>> >>> for (size_t i = 0; i < lbgrp_dps->n_ls; i++) { >>> @@ -9228,7 +8917,7 @@ arp_nd_ns_match(const char *ips, int addr_family, >>> struct ds *match) >>> /* Returns 'true' if the IPv4 'addr' is on the same subnet with one of the >>> * IPs configured on the router port. >>> */ >>> -static bool >>> +bool >>> lrouter_port_ipv4_reachable(const struct ovn_port *op, ovs_be32 addr) >>> { >>> for (size_t i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) { >>> @@ -9244,7 +8933,7 @@ lrouter_port_ipv4_reachable(const struct ovn_port >>> *op, ovs_be32 addr) >>> /* Returns 'true' if the IPv6 'addr' is on the same subnet with one of the >>> * IPs configured on the router port. >>> */ >>> -static bool >>> +bool >>> lrouter_port_ipv6_reachable(const struct ovn_port *op, >>> const struct in6_addr *addr) >>> { >>> @@ -9306,12 +8995,11 @@ build_lswitch_rport_arp_req_flow(const char *ips, >>> * - 75: ARP requests to router owned IPs (interface IP/LB/NAT). >>> */ >>> static void >>> -build_lswitch_rport_arp_req_flows(struct ovn_port *op, >>> - struct ovn_datapath *sw_od, >>> - struct ovn_port *sw_op, >>> - const struct lr_nat_table *lr_nats, >>> - struct hmap *lflows, >>> - const struct ovsdb_idl_row *stage_hint) >>> +build_lswitch_rport_arp_req_flows( >>> + struct ovn_port *op, struct ovn_datapath *sw_od, >>> + struct ovn_port *sw_op, const struct lr_nat_table *lr_nats, >>> + const struct lr_stateful_table *lr_stateful_table, >>> + struct hmap *lflows, const struct ovsdb_idl_row *stage_hint) >>> { >>> if (!op || !op->nbrp) { >>> return; >>> @@ -9325,32 +9013,38 @@ build_lswitch_rport_arp_req_flows(struct ovn_port >>> *op, >>> * router port. >>> * Priority: 80. >>> */ >>> - >>> - const char *ip_addr; >>> - SSET_FOR_EACH (ip_addr, &op->od->lb_ips->ips_v4_reachable) { >>> - ovs_be32 ipv4_addr; >>> - >>> - /* Check if the ovn port has a network configured on which we could >>> - * expect ARP requests for the LB VIP. >>> - */ >>> - if (ip_parse(ip_addr, &ipv4_addr) && >>> - lrouter_port_ipv4_reachable(op, ipv4_addr)) { >>> - build_lswitch_rport_arp_req_flow( >>> - ip_addr, AF_INET, sw_op, sw_od, 80, lflows, >>> - stage_hint); >>> + const struct lr_stateful_record *lr_stateful_rec = NULL; >>> + if (op->od->nbr->n_load_balancer || >>> op->od->nbr->n_load_balancer_group) { >>> + lr_stateful_rec = >>> lr_stateful_table_find_by_index(lr_stateful_table, >>> + op->od->index); >> >> Nit: indentation >> >>> + ovs_assert(lr_stateful_rec); >>> + >>> + const char *ip_addr; >>> + SSET_FOR_EACH (ip_addr, >>> &lr_stateful_rec->lb_ips->ips_v4_reachable) { >>> + ovs_be32 ipv4_addr; >>> + >>> + /* Check if the ovn port has a network configured on which we >>> could >>> + * expect ARP requests for the LB VIP. >>> + */ >> >> Nit: indentation, the two '*' above need to be one column to the right. >> >>> + if (ip_parse(ip_addr, &ipv4_addr) && >>> + lrouter_port_ipv4_reachable(op, ipv4_addr)) { >>> + build_lswitch_rport_arp_req_flow( >>> + ip_addr, AF_INET, sw_op, sw_od, 80, lflows, >>> + stage_hint); >>> + } >>> } >>> - } >>> - SSET_FOR_EACH (ip_addr, &op->od->lb_ips->ips_v6_reachable) { >>> - struct in6_addr ipv6_addr; >>> + SSET_FOR_EACH (ip_addr, >>> &lr_stateful_rec->lb_ips->ips_v6_reachable) { >>> + struct in6_addr ipv6_addr; >>> >>> - /* Check if the ovn port has a network configured on which we could >>> - * expect NS requests for the LB VIP. >>> - */ >>> - if (ipv6_parse(ip_addr, &ipv6_addr) && >>> - lrouter_port_ipv6_reachable(op, &ipv6_addr)) { >>> - build_lswitch_rport_arp_req_flow( >>> - ip_addr, AF_INET6, sw_op, sw_od, 80, lflows, >>> - stage_hint); >>> + /* Check if the ovn port has a network configured on which we >>> could >>> + * expect NS requests for the LB VIP. >>> + */ >>> + if (ipv6_parse(ip_addr, &ipv6_addr) && >>> + lrouter_port_ipv6_reachable(op, &ipv6_addr)) { >>> + build_lswitch_rport_arp_req_flow( >>> + ip_addr, AF_INET6, sw_op, sw_od, 80, lflows, >>> + stage_hint); >>> + } >>> } >>> } >>> >>> @@ -9400,13 +9094,17 @@ build_lswitch_rport_arp_req_flows(struct ovn_port >>> *op, >>> * expect ARP requests/NS for the DNAT external_ip. >>> */ >>> if (nat_entry_is_v6(nat_entry)) { >>> - if (!sset_contains(&op->od->lb_ips->ips_v6, nat->external_ip)) >>> { >>> + if (!lr_stateful_rec || >>> + !sset_contains(&lr_stateful_rec->lb_ips->ips_v6, >>> + nat->external_ip)) { >>> build_lswitch_rport_arp_req_flow( >>> nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows, >>> stage_hint); >>> } >>> } else { >>> - if (!sset_contains(&op->od->lb_ips->ips_v4, nat->external_ip)) >>> { >>> + if (!lr_stateful_rec || >>> + !sset_contains(&lr_stateful_rec->lb_ips->ips_v4, >>> + nat->external_ip)) { >>> build_lswitch_rport_arp_req_flow( >>> nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows, >>> stage_hint); >>> @@ -9431,13 +9129,17 @@ build_lswitch_rport_arp_req_flows(struct ovn_port >>> *op, >>> * expect ARP requests/NS for the SNAT external_ip. >>> */ >>> if (nat_entry_is_v6(nat_entry)) { >>> - if (!sset_contains(&op->od->lb_ips->ips_v6, nat->external_ip)) >>> { >>> + if (!lr_stateful_rec || >>> + !sset_contains(&lr_stateful_rec->lb_ips->ips_v6, >>> + nat->external_ip)) { >>> build_lswitch_rport_arp_req_flow( >>> nat->external_ip, AF_INET6, sw_op, sw_od, 80, lflows, >>> stage_hint); >>> } >>> } else { >>> - if (!sset_contains(&op->od->lb_ips->ips_v4, nat->external_ip)) >>> { >>> + if (!lr_stateful_rec || >>> + !sset_contains(&lr_stateful_rec->lb_ips->ips_v4, >>> + nat->external_ip)) { >>> build_lswitch_rport_arp_req_flow( >>> nat->external_ip, AF_INET, sw_op, sw_od, 80, lflows, >>> stage_hint); >>> @@ -10476,11 +10178,11 @@ build_lswitch_ip_mcast_igmp_mld(struct >>> ovn_igmp_group *igmp_group, >>> >>> /* Ingress table 25: Destination lookup, unicast handling (priority 50), */ >>> static void >>> -build_lswitch_ip_unicast_lookup(struct ovn_port *op, >>> - const struct lr_nat_table *lr_nats, >>> - struct hmap *lflows, >>> - struct ds *actions, >>> - struct ds *match) >>> +build_lswitch_ip_unicast_lookup( >>> + struct ovn_port *op, const struct lr_nat_table *lr_nats, >>> + const struct lr_stateful_table *lr_stateful_table, >>> + struct hmap *lflows, struct ds *actions, >>> + struct ds *match) >>> { >>> ovs_assert(op->nbsp); >>> if (lsp_is_external(op->nbsp)) { >>> @@ -10493,7 +10195,8 @@ build_lswitch_ip_unicast_lookup(struct ovn_port *op, >>> */ >>> if (lsp_is_router(op->nbsp)) { >>> build_lswitch_rport_arp_req_flows(op->peer, op->od, op, lr_nats, >>> - lflows, &op->nbsp->header_); >>> + lr_stateful_table, lflows, >>> + &op->nbsp->header_); >>> } >>> >>> for (size_t i = 0; i < op->nbsp->n_addresses; i++) { >>> @@ -12683,6 +12386,7 @@ build_lrouter_port_nat_arp_nd_flow(struct ovn_port >>> *op, >>> static void >>> build_lrouter_drop_own_dest(struct ovn_port *op, >>> const struct lr_nat_record *lrnat_rec, >>> + const struct lr_stateful_record >>> *lr_stateful_rec, >>> enum ovn_stage stage, >>> uint16_t priority, bool drop_snat_ip, >>> struct hmap *lflows) >>> @@ -12695,8 +12399,8 @@ build_lrouter_drop_own_dest(struct ovn_port *op, >>> >>> bool router_ip_in_snat_ips = !!shash_find(&lrnat_rec->snat_ips, >>> ip); >>> - bool router_ip_in_lb_ips = >>> - !!sset_find(&op->od->lb_ips->ips_v4, ip); >>> + bool router_ip_in_lb_ips = (lr_stateful_rec && >> >> Nit: no need for parenthesis. >> >>> + !!sset_find(&lr_stateful_rec->lb_ips->ips_v4, ip)); >>> bool drop_router_ip = (drop_snat_ip == (router_ip_in_snat_ips >>> || >>> router_ip_in_lb_ips)); >>> >>> @@ -12725,8 +12429,8 @@ build_lrouter_drop_own_dest(struct ovn_port *op, >>> >>> bool router_ip_in_snat_ips = !!shash_find(&lrnat_rec->snat_ips, >>> ip); >>> - bool router_ip_in_lb_ips = >>> - !!sset_find(&op->od->lb_ips->ips_v6, ip); >>> + bool router_ip_in_lb_ips = (lr_stateful_rec && >>> + !!sset_find(&lr_stateful_rec->lb_ips->ips_v6, ip)); >> >> Same here. >> >>> bool drop_router_ip = (drop_snat_ip == (router_ip_in_snat_ips >>> || >>> router_ip_in_lb_ips)); >>> >>> @@ -13438,7 +13142,8 @@ build_ip_routing_flows_for_lrp( >>> */ >>> static void >>> build_ip_routing_flows_for_router_type_lsp( >>> - struct ovn_port *op, const struct hmap *lr_ports, struct hmap >>> *lflows) >>> + struct ovn_port *op, const struct lr_stateful_table >>> *lr_stateful_table, >>> + const struct hmap *lr_ports, struct hmap *lflows) >>> { >>> ovs_assert(op->nbsp); >>> if (!lsp_is_router(op->nbsp)) { >>> @@ -13446,7 +13151,8 @@ build_ip_routing_flows_for_router_type_lsp( >>> } >>> >>> struct ovn_port *peer = ovn_port_get_peer(lr_ports, op); >>> - if (!peer || !peer->nbrp || !peer->lrp_networks.n_ipv4_addrs) { >>> + if (!peer || !peer->nbrp || !peer->lrp_networks.n_ipv4_addrs >>> + || !op->od->n_router_ports) { >>> return; >>> } >>> >>> @@ -13457,19 +13163,29 @@ build_ip_routing_flows_for_router_type_lsp( >>> continue; >>> } >>> >>> - struct ovn_port_routable_addresses *ra = &router_port->routables; >>> - for (size_t j = 0; j < ra->n_addrs; j++) { >>> - struct lport_addresses *laddrs = &ra->laddrs[j]; >>> - for (size_t k = 0; k < laddrs->n_ipv4_addrs; k++) { >>> - add_route(lflows, peer->od, peer, >>> - peer->lrp_networks.ipv4_addrs[0].addr_s, >>> - laddrs->ipv4_addrs[k].network_s, >>> - laddrs->ipv4_addrs[k].plen, NULL, false, 0, >>> - &peer->nbrp->header_, false, >>> - ROUTE_PRIO_OFFSET_CONNECTED); >>> + const struct lr_stateful_record *lr_stateful_rec = >>> + lr_stateful_table_find_by_index(lr_stateful_table, >>> + router_port->od->index); >>> + >>> + if (router_port->nbrp->ha_chassis_group || >>> + router_port->nbrp->n_gateway_chassis) { >> >> Why do we do this now only for distributed gateway ports? Isn't this >> useful for gw routers too? > > Before this patch, the routable addresses for the router ports were > initialized > only if the logical router was a distributed router with gateway router ports. > > - https://github.com/ovn-org/ovn/blob/branch-23.09/northd/northd.c#L2765 >
This explains it, thanks! > And this was done in the northd engine node. > > In this patch, we generate these routable addressed at the time of > lflow generation. > So no functional changes are introduced in this patch. > > Please let me know if you have further questions. > No, it looks good to me now: Acked-by: Dumitru Ceara <dce...@redhat.com> Regards, Dumitru _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev