On Fri, Jan 26, 2024 at 1:42 PM Han Zhou <hz...@ovn.org> wrote:
>
> On Thu, Jan 25, 2024 at 8:22 AM Numan Siddique <num...@ovn.org> wrote:
> >
> > On Tue, Jan 23, 2024 at 2:40 AM Han Zhou <hz...@ovn.org> wrote:
> > >
> > > On Mon, Jan 22, 2024 at 7:11 PM Numan Siddique <num...@ovn.org> wrote:
> > > >
> > > > On Mon, Jan 22, 2024 at 4:03 PM Han Zhou <hz...@ovn.org> wrote:
> > > > >
> > > > > On Mon, Jan 22, 2024 at 9:18 AM Numan Siddique <num...@ovn.org>
> wrote:
> > > > > >
> > > > > > Hi Han,
> > > > > >
> > > > > > Thanks for the reviews.
> > > > > > PSB.
> > > > > >
> > > > > > Thanks
> > > > > > Numan
> > > > > >
> > > > > > On Mon, Jan 22, 2024 at 2:23 AM Han Zhou <hz...@ovn.org> wrote:
> > > > > > >
> > > > > > > On Thu, Jan 11, 2024 at 7:30 AM <num...@ovn.org> wrote:
> > > > > > > >
> > > > > > > > From: Numan Siddique <num...@ovn.org>
> > > > > > > >
> > > > > > > > This new engine now maintains the load balancer and NAT data
> of a
> > > > > > > > logical router which was earlier part of northd engine node
> data.
> > > > > > > > The main inputs to this engine are:
> > > > > > > >    - northd node
> > > > > > > >    - lr_nat node
> > > > > > > >    - lb_data node
> > > > > > > >
> > > > > > > > A record for each logical router is maintained in the
> > > > > 'lr_stateful_table'
> > > > > > > > hmap table and this record
> > > > > > > >    - stores the lb related data
> > > > > > > >    - embeds the 'lr_nat' record.
> > > > > > > >
> > > > > > > > This engine node becomes an input to 'lflow' node.
> > > > > > > >
> > > > > > > > Signed-off-by: Numan Siddique <num...@ovn.org>
> > > > > > > > ---
> > > > > > > >  lib/stopwatch-names.h    |   1 +
> > > > > > > >  northd/automake.mk       |   2 +
> > > > > > > >  northd/en-lflow.c        |   4 +
> > > > > > > >  northd/en-lr-nat.h       |   3 +
> > > > > > > >  northd/en-lr-stateful.c  | 641
> > > +++++++++++++++++++++++++++++++++++
> > > > > > > >  northd/en-lr-stateful.h  | 105 ++++++
> > > > > > > >  northd/en-sync-sb.c      |  49 +--
> > > > > > > >  northd/inc-proc-northd.c |  13 +-
> > > > > > > >  northd/northd.c          | 711
> > > > > ++++++++++++---------------------------
> > > > > > > >  northd/northd.h          | 139 +++++++-
> > > > > > > >  northd/ovn-northd.c      |   1 +
> > > > > > > >  tests/ovn-northd.at      |  62 ++++
> > > > > > > >  12 files changed, 1213 insertions(+), 518 deletions(-)
> > > > > > > >  create mode 100644 northd/en-lr-stateful.c
> > > > > > > >  create mode 100644 northd/en-lr-stateful.h
> > > > > > > >
> > > > > > > > diff --git a/lib/stopwatch-names.h b/lib/stopwatch-names.h
> > > > > > > > index 782d64320a..e5e41fbfd8 100644
> > > > > > > > --- a/lib/stopwatch-names.h
> > > > > > > > +++ b/lib/stopwatch-names.h
> > > > > > > > @@ -30,5 +30,6 @@
> > > > > > > >  #define PORT_GROUP_RUN_STOPWATCH_NAME "port_group_run"
> > > > > > > >  #define SYNC_METERS_RUN_STOPWATCH_NAME "sync_meters_run"
> > > > > > > >  #define LR_NAT_RUN_STOPWATCH_NAME "lr_nat_run"
> > > > > > > > +#define LR_STATEFUL_RUN_STOPWATCH_NAME "lr_stateful"
> > > > > > > >
> > > > > > > >  #endif
> > > > > > > > diff --git a/northd/automake.mk b/northd/automake.mk
> > > > > > > > index a477105470..b886356c9c 100644
> > > > > > > > --- a/northd/automake.mk
> > > > > > > > +++ b/northd/automake.mk
> > > > > > > > @@ -26,6 +26,8 @@ northd_ovn_northd_SOURCES = \
> > > > > > > >         northd/en-lb-data.h \
> > > > > > > >         northd/en-lr-nat.c \
> > > > > > > >         northd/en-lr-nat.h \
> > > > > > > > +       northd/en-lr-stateful.c \
> > > > > > > > +       northd/en-lr-stateful.h \
> > > > > > > >         northd/inc-proc-northd.c \
> > > > > > > >         northd/inc-proc-northd.h \
> > > > > > > >         northd/ipam.c \
> > > > > > > > diff --git a/northd/en-lflow.c b/northd/en-lflow.c
> > > > > > > > index e4f875ef7c..bb9f78a6a1 100644
> > > > > > > > --- a/northd/en-lflow.c
> > > > > > > > +++ b/northd/en-lflow.c
> > > > > > > > @@ -20,6 +20,7 @@
> > > > > > > >
> > > > > > > >  #include "en-lflow.h"
> > > > > > > >  #include "en-lr-nat.h"
> > > > > > > > +#include "en-lr-stateful.h"
> > > > > > > >  #include "en-northd.h"
> > > > > > > >  #include "en-meters.h"
> > > > > > > >
> > > > > > > > @@ -43,6 +44,8 @@ lflow_get_input_data(struct engine_node
> *node,
> > > > > > > >          engine_get_input_data("sync_meters", node);
> > > > > > > >      struct ed_type_lr_nat_data *lr_nat_data =
> > > > > > > >          engine_get_input_data("lr_nat", node);
> > > > > > > > +    struct ed_type_lr_stateful *lr_stateful_data =
> > > > > > > > +        engine_get_input_data("lr_stateful", node);
> > > > > > > >
> > > > > > > >      lflow_input->nbrec_bfd_table =
> > > > > > > >          EN_OVSDB_GET(engine_get_input("NB_bfd", node));
> > > > > > > > @@ -66,6 +69,7 @@ lflow_get_input_data(struct engine_node
> *node,
> > > > > > > >      lflow_input->lr_ports = &northd_data->lr_ports;
> > > > > > > >      lflow_input->ls_port_groups = &pg_data->ls_port_groups;
> > > > > > > >      lflow_input->lr_nats = &lr_nat_data->lr_nats;
> > > > > > > > +    lflow_input->lr_stateful_table =
> &lr_stateful_data->table;
> > > > > > > >      lflow_input->meter_groups =
> &sync_meters_data->meter_groups;
> > > > > > > >      lflow_input->lb_datapaths_map =
> > > &northd_data->lb_datapaths_map;
> > > > > > > >      lflow_input->svc_monitor_map =
> &northd_data->svc_monitor_map;
> > > > > > > > diff --git a/northd/en-lr-nat.h b/northd/en-lr-nat.h
> > > > > > > > index 3ec4c7b506..4d8e142ae6 100644
> > > > > > > > --- a/northd/en-lr-nat.h
> > > > > > > > +++ b/northd/en-lr-nat.h
> > > > > > > > @@ -86,6 +86,9 @@ struct lr_nat_table {
> > > > > > > >  const struct lr_nat_record * lr_nat_table_find_by_index(
> > > > > > > >      const struct lr_nat_table *, size_t od_index);
> > > > > > > >
> > > > > > > > +#define LR_NAT_TABLE_FOR_EACH(LR_NAT_REC, TABLE) \
> > > > > > > > +    HMAP_FOR_EACH (LR_NAT_REC, key_node, &(TABLE)->entries)
> > > > > > > > +
> > > > > > > >  struct ed_type_lr_nat_data {
> > > > > > > >      struct lr_nat_table lr_nats;
> > > > > > > >
> > > > > > > > diff --git a/northd/en-lr-stateful.c b/northd/en-lr-stateful.c
> > > > > > > > new file mode 100644
> > > > > > > > index 0000000000..dedfdf6747
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/northd/en-lr-stateful.c
> > > > > > > > @@ -0,0 +1,641 @@
> > > > > > > > +/*
> > > > > > > > + * Copyright (c) 2023, Red Hat, Inc.
> > > > > > > > + *
> > > > > > > > + * Licensed under the Apache License, Version 2.0 (the
> > > "License");
> > > > > > > > + * you may not use this file except in compliance with the
> > > License.
> > > > > > > > + * You may obtain a copy of the License at:
> > > > > > > > + *
> > > > > > > > + *     http://www.apache.org/licenses/LICENSE-2.0
> > > > > > > > + *
> > > > > > > > + * Unless required by applicable law or agreed to in writing,
> > > > > software
> > > > > > > > + * distributed under the License is distributed on an "AS IS"
> > > BASIS,
> > > > > > > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
> express
> > > or
> > > > > > > implied.
> > > > > > > > + * See the License for the specific language governing
> > > permissions
> > > > > and
> > > > > > > > + * limitations under the License.
> > > > > > > > + */
> > > > > > > > +
> > > > > > > > +#include <config.h>
> > > > > > > > +
> > > > > > > > +#include <getopt.h>
> > > > > > > > +#include <stdlib.h>
> > > > > > > > +#include <stdio.h>
> > > > > > > > +
> > > > > > > > +/* OVS includes */
> > > > > > > > +#include "include/openvswitch/hmap.h"
> > > > > > > > +#include "lib/bitmap.h"
> > > > > > > > +#include "lib/socket-util.h"
> > > > > > > > +#include "lib/uuidset.h"
> > > > > > > > +#include "openvswitch/util.h"
> > > > > > > > +#include "openvswitch/vlog.h"
> > > > > > > > +#include "stopwatch.h"
> > > > > > > > +
> > > > > > > > +/* OVN includes */
> > > > > > > > +#include "en-lb-data.h"
> > > > > > > > +#include "en-lr-nat.h"
> > > > > > > > +#include "en-lr-stateful.h"
> > > > > > > > +#include "lib/inc-proc-eng.h"
> > > > > > > > +#include "lib/lb.h"
> > > > > > > > +#include "lib/ovn-nb-idl.h"
> > > > > > > > +#include "lib/ovn-sb-idl.h"
> > > > > > > > +#include "lib/ovn-util.h"
> > > > > > > > +#include "lib/stopwatch-names.h"
> > > > > > > > +#include "northd.h"
> > > > > > > > +
> > > > > > > > +VLOG_DEFINE_THIS_MODULE(en_lr_stateful);
> > > > > > > > +
> > > > > > > > +/* Static function declarations. */
> > > > > > > > +static void lr_stateful_table_init(struct lr_stateful_table
> *);
> > > > > > > > +static void lr_stateful_table_clear(struct lr_stateful_table
> *);
> > > > > > > > +static void lr_stateful_table_destroy(struct
> lr_stateful_table
> > > *);
> > > > > > > > +static struct lr_stateful_record *lr_stateful_table_find_(
> > > > > > > > +    const struct lr_stateful_table *, const struct
> > > > > nbrec_logical_router
> > > > > > > *);
> > > > > > > > +static struct lr_stateful_record
> > > *lr_stateful_table_find_by_index_(
> > > > > > > > +    const struct lr_stateful_table *table, size_t od_index);
> > > > > > > > +
> > > > > > > > +static void lr_stateful_table_build(struct lr_stateful_table
> *,
> > > > > > > > +                                   const struct lr_nat_table
> *,
> > > > > > > > +                                   const struct ovn_datapaths
> > > > > > > *lr_datapaths,
> > > > > > > > +                                   const struct hmap
> > > > > *lb_datapaths_map,
> > > > > > > > +                                   const struct hmap
> > > > > > > *lbgrp_datapaths_map);
> > > > > > > > +
> > > > > > > > +static struct lr_stateful_input lr_stateful_get_input_data(
> > > > > > > > +    struct engine_node *);
> > > > > > > > +
> > > > > > > > +static struct lr_stateful_record *lr_stateful_record_create(
> > > > > > > > +    struct lr_stateful_table *, const struct lr_nat_record *,
> > > > > > > > +    const struct hmap *lb_datapaths_map,
> > > > > > > > +    const struct hmap *lbgrp_datapaths_map);
> > > > > > > > +static void lr_stateful_record_destroy(struct
> lr_stateful_record
> > > *);
> > > > > > > > +static void lr_stateful_record_init(
> > > > > > > > +    struct lr_stateful_record *,
> > > > > > > > +    const struct hmap *lb_datapaths_map,
> > > > > > > > +    const struct hmap *lbgrp_datapaths_map);
> > > > > > > > +
> > > > > > > > +static void build_lrouter_lb_reachable_ips(struct
> > > lr_stateful_record
> > > > > *,
> > > > > > > > +                                           const struct
> > > > > ovn_northd_lb *);
> > > > > > > > +static void add_neigh_ips_to_lrouter(struct
> lr_stateful_record *,
> > > > > > > > +                                     enum
> > > lb_neighbor_responder_mode,
> > > > > > > > +                                     const struct sset
> > > *lb_ips_v4,
> > > > > > > > +                                     const struct sset
> > > *lb_ips_v6);
> > > > > > > > +static void remove_lrouter_lb_reachable_ips(struct
> > > > > lr_stateful_record *,
> > > > > > > > +                                            enum
> > > > > > > lb_neighbor_responder_mode,
> > > > > > > > +                                            const struct sset
> > > > > *lb_ips_v4,
> > > > > > > > +                                            const struct sset
> > > > > > > *lb_ips_v6);
> > > > > > > > +static void lr_stateful_build_vip_nats(struct
> lr_stateful_record
> > > *);
> > > > > > > > +
> > > > > > > > +/* 'lr_stateful' engine node manages the NB logical router LB
> > > data.
> > > > > > > > + */
> > > > > > > > +void *
> > > > > > > > +en_lr_stateful_init(struct engine_node *node OVS_UNUSED,
> > > > > > > > +                    struct engine_arg *arg OVS_UNUSED)
> > > > > > > > +{
> > > > > > > > +    struct ed_type_lr_stateful *data = xzalloc(sizeof *data);
> > > > > > > > +    lr_stateful_table_init(&data->table);
> > > > > > > > +    hmapx_init(&data->trk_data.crupdated);
> > > > > > > > +    return data;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +void
> > > > > > > > +en_lr_stateful_cleanup(void *data_)
> > > > > > > > +{
> > > > > > > > +    struct ed_type_lr_stateful *data = data_;
> > > > > > > > +    lr_stateful_table_destroy(&data->table);
> > > > > > > > +    hmapx_destroy(&data->trk_data.crupdated);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +void
> > > > > > > > +en_lr_stateful_clear_tracked_data(void *data_)
> > > > > > > > +{
> > > > > > > > +    struct ed_type_lr_stateful *data = data_;
> > > > > > > > +
> > > > > > > > +    hmapx_clear(&data->trk_data.crupdated);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +void
> > > > > > > > +en_lr_stateful_run(struct engine_node *node, void *data_)
> > > > > > > > +{
> > > > > > > > +    struct lr_stateful_input input_data =
> > > > > > > lr_stateful_get_input_data(node);
> > > > > > > > +    struct ed_type_lr_stateful *data = data_;
> > > > > > > > +
> > > > > > > > +    stopwatch_start(LR_STATEFUL_RUN_STOPWATCH_NAME,
> time_msec());
> > > > > > > > +
> > > > > > > > +    lr_stateful_table_clear(&data->table);
> > > > > > > > +    lr_stateful_table_build(&data->table, input_data.lr_nats,
> > > > > > > > +                            input_data.lr_datapaths,
> > > > > > > > +                            input_data.lb_datapaths_map,
> > > > > > > > +                            input_data.lbgrp_datapaths_map);
> > > > > > > > +
> > > > > > > > +    stopwatch_stop(LR_STATEFUL_RUN_STOPWATCH_NAME,
> time_msec());
> > > > > > > > +    engine_set_node_state(node, EN_UPDATED);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +bool
> > > > > > > > +lr_stateful_northd_handler(struct engine_node *node, void
> *data
> > > > > > > OVS_UNUSED)
> > > > > > > > +{
> > > > > > > > +    struct northd_data *northd_data =
> > > engine_get_input_data("northd",
> > > > > > > node);
> > > > > > > > +    if (!northd_has_tracked_data(&northd_data->trk_data)) {
> > > > > > > > +        return false;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    /* lr_stateful node needs inputs for any changes to NAT
> and
> > > load
> > > > > > > balancers.
> > > > > > > > +     * Changes to NAT is provided by the lr_nat tracked data
> and
> > > > > changes
> > > > > > > > +     * to lbs and lb grps is provided by lb_data's tracked
> data.
> > > > > > > > +     * So we don't need to do anything here for northd
> changes.
> > > > > > > > +     * But we do need to access the datapaths and
> lb_datapaths
> > > from
> > > > > the
> > > > > > > > +     * northd engine node and hence its an input.
> > > > > > > > +     * */
> > > > >
> > > > > I wonder if this is sufficient. It is true that NAT and LB/LB grp
> > > changes
> > > > > are handled in _lr_nat_handler and _lb_data_handler, but what if the
> > > > > NAT/LB/LB grp didn't change, but the northd data depended by this
> node
> > > > > change:
> > > > > * northd_data->lr_datapaths
> > > > > * northd_data->lb_datapaths_map
> > > > > * northd_data->lb_group_datapaths_map
> > > > >
> > > >
> > > > I think it is sufficient.  Please see below.
> > > >
> > > > > Since these are used for data compute of the node, would it be a
> > > problem if
> > > > > any of these changes but ignored here?
> > > > > In general, if node A depends on B, we can ignore B's change only
> if we
> > > can
> > > > > prove that:
> > > > > * B's changes triggers C's change, and
> > > > > * C's change that reflects/covers B's change is handled by A
> > > > >
> > > >
> > > > We could also argue that,  if node A depends on both B and C directly
> > > > and node B also depends on C,
> > > > then it is sufficient if A handles changes to C.
> > > >
> > > > If node B changes its state due to some node D, then A can ignore B's
> > > > changes (if those changes don't
> > > > relate to the functionality of A).
> > > >
> > > > > At least the code is not obvious on this part and I can't see it
> from
> > > the
> > > > > above comment, either.
> > > >
> > > > Let's take each of the above fields you mentioned
> > > >
> > > > 1. northd_data->lr_datapaths   :  If this data structure changes,
> > > > northd engine node results in a recompute.
> > > >     If in the future we add I-P for add/delete of logical
> > > > switches/routers,  we need to include that in the northd tracked data.
> > > >     lr_stateful_northd_handler() can either handle these changes or
> > > > fall back to recompute.
> > > >
> > >
> > > It would be better to call this out in the comment.
> > >
> > > > 2.  For the other 2 data (northd_data->lb_datapaths_map  and
> > > > northd_data->lb_group_datapaths_map)
> > > >      northd tracking data already captures the changes (in
> > > nd_changes->trk_lbs).
> > > >     In the lr_stateful since we also depend on _lb_data we are o.k to
> > > > ignore this in northd handler.
> > > >     The above comment documents that.
> > > >
> > >
> > > Checked the code again, and I see where I was confused. I was under the
> > > impression that the mapping between LB and datapaths are maintained and
> > > tracked in northd_data only, but I now realized that it actually
> existing
> > > in both northd_data and lb_data. The LB -> datapath maping is in
> > > northd_data and the datapath -> LB mapping is in lb_data. So it means
> > > whenever northd_data->lb_datapaths_map changes, the
> > > lb_data->ls_lb_map/lr_lb_map will also change, and since
> > > lb_data->ls_lb_map/lr_lb_map changes are handled in lr_stateful, it is
> > > sufficient.
> > >
> > > I would be helpful to clarify this in the comments above. I also forgot
> > > (should have recalled during the LB I-P review) why the mappings be
> split
> > > in two nodes.
> > >
> > > > I can document this with more details in the next version.
> > >
> > > Thanks!
> > >
> > > >
> > > >
> > > > >
> > > > > > > > +    return true;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +bool
> > > > > > > > +lr_stateful_lb_data_handler(struct engine_node *node, void
> > > *data_)
> > > > > > > > +{
> > > > > > > > +    struct ed_type_lb_data *lb_data =
> > > > > engine_get_input_data("lb_data",
> > > > > > > node);
> > > > > > > > +    if (!lb_data->tracked) {
> > > > > > > > +        return false;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    struct ed_type_lr_stateful *data = data_;
> > > > > > > > +    struct lr_stateful_input input_data =
> > > > > > > > +        lr_stateful_get_input_data(node);
> > > > > > > > +
> > > > > > > > +    const struct tracked_lb_data *trk_lb_data =
> > > > > > > &lb_data->tracked_lb_data;
> > > > > > > > +    const struct crupdated_od_lb_data *codlb;
> > > > > > > > +
> > > > > > > > +    LIST_FOR_EACH (codlb, list_node,
> > > &trk_lb_data->crupdated_lr_lbs)
> > > > > {
> > > > > > > > +        const struct ovn_datapath *od = ovn_datapath_find(
> > > > > > > > +            &input_data.lr_datapaths->datapaths,
> > > &codlb->od_uuid);
> > > > > > > > +        ovs_assert(od);
> > > > > > > > +
> > > > > > > > +        struct lr_stateful_record *lr_stateful_rec
> > > > > > > =lr_stateful_table_find_(
> > > > > > > > +            &data->table, od->nbr);
> > > > > > > > +        if (!lr_stateful_rec) {
> > > > > > > > +            const struct lr_nat_record *lrnat_rec =
> > > > > > > lr_nat_table_find_by_index(
> > > > > > > > +                input_data.lr_nats, od->index);
> > > > > > > > +            ovs_assert(lrnat_rec);
> > > > > > > > +
> > > > > > > > +            lr_stateful_rec =
> > > lr_stateful_record_create(&data->table,
> > > > > > > > +                                            lrnat_rec,
> > > > > > > > +
> > > > >  input_data.lb_datapaths_map,
> > > > > > > > +
> > > > > > >  input_data.lbgrp_datapaths_map);
> > > > > > > > +
> > > > > > > > +            /* Add the lr_stateful_rec rec to the tracking
> data.
> > > */
> > > > > > > > +            hmapx_add(&data->trk_data.crupdated,
> > > lr_stateful_rec);
> > > > > > > > +            continue;
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        /* Update. */
> > > > > > > > +        struct uuidset_node *uuidnode;
> > > > > > > > +        UUIDSET_FOR_EACH (uuidnode, &codlb->assoc_lbs) {
> > > > > > > > +            const struct ovn_lb_datapaths *lb_dps =
> > > > > > > ovn_lb_datapaths_find(
> > > > > > > > +                input_data.lb_datapaths_map,
> &uuidnode->uuid);
> > > > > > > > +            ovs_assert(lb_dps);
> > > > > > > > +
> > > > > > > > +            /* Add the lb_ips of lb_dps to the od. */
> > > > > > > > +            build_lrouter_lb_ips(lr_stateful_rec->lb_ips,
> > > > > lb_dps->lb);
> > > > > > > > +            build_lrouter_lb_reachable_ips(lr_stateful_rec,
> > > > > lb_dps->lb);
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        UUIDSET_FOR_EACH (uuidnode, &codlb->assoc_lbgrps) {
> > > > > > > > +            const struct ovn_lb_group_datapaths *lbgrp_dps =
> > > > > > > > +
> > > > > > >  ovn_lb_group_datapaths_find(input_data.lbgrp_datapaths_map,
> > > > > > > > +                                            &uuidnode->uuid);
> > > > > > > > +            ovs_assert(lbgrp_dps);
> > > > > > > > +
> > > > > > > > +            for (size_t j = 0; j <
> lbgrp_dps->lb_group->n_lbs;
> > > j++) {
> > > > > > > > +                const struct uuid *lb_uuid
> > > > > > > > +                    =
> > > > > &lbgrp_dps->lb_group->lbs[j]->nlb->header_.uuid;
> > > > > > > > +                const struct ovn_lb_datapaths *lb_dps =
> > > > > > > ovn_lb_datapaths_find(
> > > > > > > > +                    input_data.lb_datapaths_map, lb_uuid);
> > > > > > > > +                ovs_assert(lb_dps);
> > > > > > > > +
> > > > > > > > +                /* Add the lb_ips of lb_dps to the od. */
> > > > > > > > +                build_lrouter_lb_ips(lr_stateful_rec->lb_ips,
> > > > > > > lb_dps->lb);
> > > > > > > > +
>  build_lrouter_lb_reachable_ips(lr_stateful_rec,
> > > > > > > lb_dps->lb);
> > > > > > > > +            }
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        /* Add the lr_stateful_rec rec to the tracking data.
> */
> > > > > > > > +        hmapx_add(&data->trk_data.crupdated,
> lr_stateful_rec);
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    const struct crupdated_lb *clb;
> > > > > > > > +    HMAP_FOR_EACH (clb, hmap_node,
> &trk_lb_data->crupdated_lbs) {
> > > > > > > > +        const struct uuid *lb_uuid =
> &clb->lb->nlb->header_.uuid;
> > > > > > > > +        const struct ovn_northd_lb *lb = clb->lb;
> > > > > > > > +
> > > > > > > > +        const struct ovn_lb_datapaths *lb_dps =
> > > > > ovn_lb_datapaths_find(
> > > > > > > > +            input_data.lb_datapaths_map, lb_uuid);
> > > > > > > > +        ovs_assert(lb_dps);
> > > > > > > > +
> > > > > > > > +        size_t index;
> > > > > > > > +        BITMAP_FOR_EACH_1 (index,
> > > ods_size(input_data.lr_datapaths),
> > > > > > > > +                           lb_dps->nb_lr_map) {
> > > > > > > > +            const struct ovn_datapath *od =
> > > > > > > > +                input_data.lr_datapaths->array[index];
> > > > > > > > +
> > > > > > > > +            struct lr_stateful_record *lr_stateful_rec =
> > > > > > > > +                lr_stateful_table_find_(&data->table,
> od->nbr);
> > > > > > > > +            ovs_assert(lr_stateful_rec);
> > > > > > > > +
> > > > > > > > +            /* Update the od->lb_ips with the deleted and
> > > inserted
> > > > > > > > +             * vips (if any). */
> > > > > > > > +
>  remove_ips_from_lb_ip_set(lr_stateful_rec->lb_ips,
> > > > > > > lb->routable,
> > > > > > > > +                                      &clb->deleted_vips_v4,
> > > > > > > > +                                      &clb->deleted_vips_v6);
> > > > > > > > +            add_ips_to_lb_ip_set(lr_stateful_rec->lb_ips,
> > > > > lb->routable,
> > > > > > > > +                                 &clb->inserted_vips_v4,
> > > > > > > > +                                 &clb->inserted_vips_v6);
> > > > > > > > +
> > > > > > > > +            remove_lrouter_lb_reachable_ips(lr_stateful_rec,
> > > > > > > lb->neigh_mode,
> > > > > > > > +
> > >  &clb->deleted_vips_v4,
> > > > > > > > +
> > >  &clb->deleted_vips_v6);
> > > > > > > > +            add_neigh_ips_to_lrouter(lr_stateful_rec,
> > > lb->neigh_mode,
> > > > > > > > +                                     &clb->inserted_vips_v4,
> > > > > > > > +                                     &clb->inserted_vips_v6);
> > > > > > > > +
> > > > > > > > +            /* Add the lr_stateful_rec rec to the tracking
> data.
> > > */
> > > > > > > > +            hmapx_add(&data->trk_data.crupdated,
> > > lr_stateful_rec);
> > > > > > > > +        }
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    const struct crupdated_lbgrp *crupdated_lbgrp;
> > > > > > > > +    HMAP_FOR_EACH (crupdated_lbgrp, hmap_node,
> > > > > > > > +                   &trk_lb_data->crupdated_lbgrps) {
> > > > > > > > +        const struct uuid *lb_uuid =
> > > &crupdated_lbgrp->lbgrp->uuid;
> > > > > > > > +        const struct ovn_lb_group_datapaths *lbgrp_dps =
> > > > > > > > +
> > > > >  ovn_lb_group_datapaths_find(input_data.lbgrp_datapaths_map,
> > > > > > > > +                                        lb_uuid);
> > > > > > > > +        ovs_assert(lbgrp_dps);
> > > > > > > > +
> > > > > > > > +        struct hmapx_node *hnode;
> > > > > > > > +        HMAPX_FOR_EACH (hnode, &crupdated_lbgrp->assoc_lbs) {
> > > > > > > > +            const struct ovn_northd_lb *lb = hnode->data;
> > > > > > > > +            lb_uuid = &lb->nlb->header_.uuid;
> > > > > > > > +            const struct ovn_lb_datapaths *lb_dps =
> > > > > > > ovn_lb_datapaths_find(
> > > > > > > > +                input_data.lb_datapaths_map, lb_uuid);
> > > > > > > > +            ovs_assert(lb_dps);
> > > > > > > > +            for (size_t i = 0; i < lbgrp_dps->n_lr; i++) {
> > > > > > > > +                const struct ovn_datapath *od =
> lbgrp_dps->lr[i];
> > > > > > > > +                struct lr_stateful_record *lr_stateful_rec =
> > > > > > > > +                    lr_stateful_table_find_(&data->table,
> > > od->nbr);
> > > > > > > > +                ovs_assert(lr_stateful_rec);
> > > > > > > > +                /* Add the lb_ips of lb_dps to the lr lb
> data. */
> > > > > > > > +                build_lrouter_lb_ips(lr_stateful_rec->lb_ips,
> > > > > > > lb_dps->lb);
> > > > > > > > +
>  build_lrouter_lb_reachable_ips(lr_stateful_rec,
> > > > > > > lb_dps->lb);
> > > > > > > > +
> > > > > > > > +                /* Add the lr_stateful_rec rec to the
> tracking
> > > data.
> > > > > */
> > > > > > > > +                hmapx_add(&data->trk_data.crupdated,
> > > > > lr_stateful_rec);
> > > > > > > > +            }
> > > > > > > > +        }
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    if (lr_stateful_has_tracked_data(&data->trk_data)) {
> > > > > > > > +        struct hmapx_node *hmapx_node;
> > > > > > > > +        /* For all the modified lr_stateful records
> (re)build the
> > > > > > > > +         * vip nats. */
> > > > > > > > +        HMAPX_FOR_EACH (hmapx_node,
> &data->trk_data.crupdated) {
> > > > > > > > +            lr_stateful_build_vip_nats(hmapx_node->data);
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        engine_set_node_state(node, EN_UPDATED);
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    return true;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +bool
> > > > > > > > +lr_stateful_lr_nat_handler(struct engine_node *node, void
> *data_)
> > > > > > > > +{
> > > > > > > > +    struct ed_type_lr_nat_data *lr_nat_data =
> > > > > > > > +        engine_get_input_data("lr_nat", node);
> > > > > > > > +
> > > > > > > > +    if (!lr_nat_has_tracked_data(&lr_nat_data->trk_data)) {
> > > > > > > > +        return false;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    struct ed_type_lr_stateful *data = data_;
> > > > > > > > +    struct lr_stateful_input input_data =
> > > > > > > > +        lr_stateful_get_input_data(node);
> > > > > > > > +    struct hmapx_node *hmapx_node;
> > > > > > > > +
> > > > > > > > +    HMAPX_FOR_EACH (hmapx_node,
> > > &lr_nat_data->trk_data.crupdated) {
> > > > > > > > +        const struct lr_nat_record *lrnat_rec =
> hmapx_node->data;
> > > > > > > > +        struct lr_stateful_record *lr_stateful_rec =
> > > > > > > > +            lr_stateful_table_find_(&data->table,
> > > > > lrnat_rec->od->nbr);
> > > > > > > > +        if (!lr_stateful_rec) {
> > > > > > > > +            lr_stateful_rec =
> > > lr_stateful_record_create(&data->table,
> > > > > > > > +                                            lrnat_rec,
> > > > > > > > +
> > > > >  input_data.lb_datapaths_map,
> > > > > > > > +
> > > > > > >  input_data.lbgrp_datapaths_map);
> > > > > > > > +        } else {
> > > > > > > > +            lr_stateful_build_vip_nats(lr_stateful_rec);
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        /* Add the lr_stateful_rec rec to the tracking data.
> */
> > > > > > > > +        hmapx_add(&data->trk_data.crupdated,
> lr_stateful_rec);
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    if (lr_stateful_has_tracked_data(&data->trk_data)) {
> > > > > > > > +        engine_set_node_state(node, EN_UPDATED);
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    return true;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +const struct lr_stateful_record *
> > > > > > > > +lr_stateful_table_find_by_index(const struct
> lr_stateful_table
> > > > > *table,
> > > > > > > > +                                   size_t od_index)
> > > > > > > > +{
> > > > > > > > +    return lr_stateful_table_find_by_index_(table, od_index);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +/* static functions. */
> > > > > > > > +static void
> > > > > > > > +lr_stateful_table_init(struct lr_stateful_table *table)
> > > > > > > > +{
> > > > > > > > +    *table = (struct lr_stateful_table) {
> > > > > > > > +        .entries = HMAP_INITIALIZER(&table->entries),
> > > > > > > > +    };
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +lr_stateful_table_destroy(struct lr_stateful_table *table)
> > > > > > > > +{
> > > > > > > > +    lr_stateful_table_clear(table);
> > > > > > > > +    hmap_destroy(&table->entries);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +lr_stateful_table_clear(struct lr_stateful_table *table)
> > > > > > > > +{
> > > > > > > > +    struct lr_stateful_record *lr_stateful_rec;
> > > > > > > > +    HMAP_FOR_EACH_POP (lr_stateful_rec, key_node,
> > > &table->entries) {
> > > > > > > > +        lr_stateful_record_destroy(lr_stateful_rec);
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    free(table->array);
> > > > > > > > +    table->array = NULL;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +lr_stateful_table_build(struct lr_stateful_table *table,
> > > > > > > > +                       const struct lr_nat_table *lr_nats,
> > > > > > > > +                       const struct ovn_datapaths
> *lr_datapaths,
> > > > > > > > +                       const struct hmap *lb_datapaths_map,
> > > > > > > > +                       const struct hmap
> *lbgrp_datapaths_map)
> > > > > > > > +{
> > > > > > > > +    table->array = xrealloc(table->array,
> > > > > > > > +                            ods_size(lr_datapaths) * sizeof
> > > > > > > *table->array);
> > > > > > > > +    const struct lr_nat_record *lrnat_rec;
> > > > > > > > +    LR_NAT_TABLE_FOR_EACH (lrnat_rec, lr_nats) {
> > > > > > > > +        lr_stateful_record_create(table, lrnat_rec,
> > > lb_datapaths_map,
> > > > > > > > +                                     lbgrp_datapaths_map);
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static struct lr_stateful_record *
> > > > > > > > +lr_stateful_table_find_(const struct lr_stateful_table
> *table,
> > > > > > > > +                  const struct nbrec_logical_router *nbr)
> > > > > > > > +{
> > > > > > > > +    struct lr_stateful_record *lr_stateful_rec;
> > > > > > > > +
> > > > > > > > +    HMAP_FOR_EACH_WITH_HASH (lr_stateful_rec, key_node,
> > > > > > > > +                             uuid_hash(&nbr->header_.uuid),
> > > > > > > &table->entries) {
> > > > > > > > +        if (nbr == lr_stateful_rec->od->nbr) {
> > > > > > > > +            return lr_stateful_rec;
> > > > > > > > +        }
> > > > > > > > +    }
> > > > > > > > +    return NULL;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static struct lr_stateful_record *
> > > > > > > > +lr_stateful_table_find_by_index_(const struct
> lr_stateful_table
> > > > > *table,
> > > > > > > > +                                   size_t od_index)
> > > > > > > > +{
> > > > > > > > +    ovs_assert(od_index <= hmap_count(&table->entries));
> > > > > > > > +    return table->array[od_index];
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static struct lr_stateful_record *
> > > > > > > > +lr_stateful_record_create(struct lr_stateful_table *table,
> > > > > > > > +                         const struct lr_nat_record
> *lrnat_rec,
> > > > > > > > +                         const struct hmap *lb_datapaths_map,
> > > > > > > > +                         const struct hmap
> *lbgrp_datapaths_map)
> > > > > > > > +{
> > > > > > > > +    struct lr_stateful_record *lr_stateful_rec =
> > > > > > > > +        xzalloc(sizeof *lr_stateful_rec);
> > > > > > > > +    lr_stateful_rec->lrnat_rec = lrnat_rec;
> > > > > > > > +    lr_stateful_rec->od = lrnat_rec->od;
> > > > > > > > +    lr_stateful_record_init(lr_stateful_rec,
> lb_datapaths_map,
> > > > > > > > +                               lbgrp_datapaths_map);
> > > > > > > > +
> > > > > > > > +    hmap_insert(&table->entries, &lr_stateful_rec->key_node,
> > > > > > > > +
> > >  uuid_hash(&lr_stateful_rec->od->nbr->header_.uuid));
> > > > > > > > +
> > > > > > > > +    table->array[lr_stateful_rec->od->index] =
> lr_stateful_rec;
> > > > > > > > +    return lr_stateful_rec;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +lr_stateful_record_destroy(struct lr_stateful_record
> > > > > *lr_stateful_rec)
> > > > > > > > +{
> > > > > > > > +    ovn_lb_ip_set_destroy(lr_stateful_rec->lb_ips);
> > > > > > > > +    lr_stateful_rec->lb_ips = NULL;
> > > > > > > > +    sset_destroy(&lr_stateful_rec->vip_nats);
> > > > > > > > +    free(lr_stateful_rec);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +lr_stateful_record_init(struct lr_stateful_record
> > > *lr_stateful_rec,
> > > > > > > > +                           const struct hmap
> *lb_datapaths_map,
> > > > > > > > +                           const struct hmap
> > > *lbgrp_datapaths_map)
> > > > > > > > +{
> > > > > > > > +    /* Checking load balancer groups first, starting from the
> > > largest
> > > > > > > one,
> > > > > > > > +     * to more efficiently copy IP sets. */
> > > > > > > > +    size_t largest_group = 0;
> > > > > > > > +
> > > > > > > > +    const struct nbrec_logical_router *nbr =
> > > > > lr_stateful_rec->od->nbr;
> > > > > > > > +    for (size_t i = 1; i < nbr->n_load_balancer_group; i++) {
> > > > > > > > +        if (nbr->load_balancer_group[i]->n_load_balancer >
> > > > > > > > +
> > > > > > >  nbr->load_balancer_group[largest_group]->n_load_balancer) {
> > > > > > > > +            largest_group = i;
> > > > > > > > +        }
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    for (size_t i = 0; i < nbr->n_load_balancer_group; i++) {
> > > > > > > > +        size_t idx = (i + largest_group) %
> > > > > nbr->n_load_balancer_group;
> > > > > > > > +
> > > > > > > > +        const struct nbrec_load_balancer_group
> *nbrec_lb_group =
> > > > > > > > +            nbr->load_balancer_group[idx];
> > > > > > > > +        const struct uuid *lbgrp_uuid =
> > > > > &nbrec_lb_group->header_.uuid;
> > > > > > > > +
> > > > > > > > +         const struct ovn_lb_group_datapaths *lbgrp_dps =
> > > > > > > > +            ovn_lb_group_datapaths_find(lbgrp_datapaths_map,
> > > > > > > > +                                        lbgrp_uuid);
> > > > > > > > +        ovs_assert(lbgrp_dps);
> > > > > > > > +
> > > > > > > > +        if (!lr_stateful_rec->lb_ips) {
> > > > > > > > +            lr_stateful_rec->lb_ips =
> > > > > > > > +
>  ovn_lb_ip_set_clone(lbgrp_dps->lb_group->lb_ips);
> > > > > > > > +        } else {
> > > > > > > > +            for (size_t j = 0; j <
> lbgrp_dps->lb_group->n_lbs;
> > > j++) {
> > > > > > > > +                build_lrouter_lb_ips(lr_stateful_rec->lb_ips,
> > > > > > > > +
> > > lbgrp_dps->lb_group->lbs[j]);
> > > > > > > > +            }
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        for (size_t j = 0; j < lbgrp_dps->lb_group->n_lbs;
> j++) {
> > > > > > > > +            build_lrouter_lb_reachable_ips(lr_stateful_rec,
> > > > > > > > +
> > > > > lbgrp_dps->lb_group->lbs[j]);
> > > > > > > > +        }
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    if (!lr_stateful_rec->lb_ips) {
> > > > > > > > +        lr_stateful_rec->lb_ips = ovn_lb_ip_set_create();
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    for (size_t i = 0; i < nbr->n_load_balancer; i++) {
> > > > > > > > +        const struct uuid *lb_uuid =
> > > > > > > > +            &nbr->load_balancer[i]->header_.uuid;
> > > > > > > > +        const struct ovn_lb_datapaths *lb_dps =
> > > > > > > > +            ovn_lb_datapaths_find(lb_datapaths_map, lb_uuid);
> > > > > > > > +        ovs_assert(lb_dps);
> > > > > > > > +        build_lrouter_lb_ips(lr_stateful_rec->lb_ips,
> > > lb_dps->lb);
> > > > > > > > +        build_lrouter_lb_reachable_ips(lr_stateful_rec,
> > > lb_dps->lb);
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    sset_init(&lr_stateful_rec->vip_nats);
> > > > > > > > +
> > > > > > > > +    if (nbr->n_nat) {
> > > > > > > > +        lr_stateful_build_vip_nats(lr_stateful_rec);
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static struct lr_stateful_input
> > > > > > > > +lr_stateful_get_input_data(struct engine_node *node)
> > > > > > > > +{
> > > > > > > > +    struct northd_data *northd_data =
> > > engine_get_input_data("northd",
> > > > > > > node);
> > > > > > > > +    struct ed_type_lr_nat_data *lr_nat_data =
> > > > > > > > +        engine_get_input_data("lr_nat", node);
> > > > > > > > +
> > > > > > > > +    return (struct lr_stateful_input) {
> > > > > > > > +        .lr_datapaths = &northd_data->lr_datapaths,
> > > > > > > > +        .lb_datapaths_map = &northd_data->lb_datapaths_map,
> > > > > > > > +        .lbgrp_datapaths_map =
> > > &northd_data->lb_group_datapaths_map,
> > > > > > > > +        .lr_nats = &lr_nat_data->lr_nats,
> > > > > > > > +    };
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +build_lrouter_lb_reachable_ips(struct lr_stateful_record
> > > > > > > *lr_stateful_rec,
> > > > > > > > +                               const struct ovn_northd_lb
> *lb)
> > > > > > > > +{
> > > > > > > > +    add_neigh_ips_to_lrouter(lr_stateful_rec, lb->neigh_mode,
> > > > > > > &lb->ips_v4,
> > > > > > > > +                             &lb->ips_v6);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void
> > > > > > > > +add_neigh_ips_to_lrouter(struct lr_stateful_record
> > > *lr_stateful_rec,
> > > > > > > > +                         enum lb_neighbor_responder_mode
> > > neigh_mode,
> > > > > > > > +                         const struct sset *lb_ips_v4,
> > > > > > > > +                         const struct sset *lb_ips_v6)
> > > > > > > > +{
> > > > > > > > +    /* If configured to not reply to any neighbor requests
> for
> > > all
> > > > > VIPs
> > > > > > > > +     * return early.
> > > > > > > > +     */
> > > > > > > > +    if (neigh_mode == LB_NEIGH_RESPOND_NONE) {
> > > > > > > > +        return;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    const char *ip_address;
> > > > > > > > +
> > > > > > > > +    /* If configured to reply to neighbor requests for all
> VIPs
> > > force
> > > > > > > them
> > > > > > > > +     * all to be considered "reachable".
> > > > > > > > +     */
> > > > > > > > +    if (neigh_mode == LB_NEIGH_RESPOND_ALL) {
> > > > > > > > +        SSET_FOR_EACH (ip_address, lb_ips_v4) {
> > > > > > > > +
>  sset_add(&lr_stateful_rec->lb_ips->ips_v4_reachable,
> > > > > > > ip_address);
> > > > > > > > +        }
> > > > > > > > +        SSET_FOR_EACH (ip_address, lb_ips_v6) {
> > > > > > > > +
>  sset_add(&lr_stateful_rec->lb_ips->ips_v6_reachable,
> > > > > > > ip_address);
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        return;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    /* Otherwise, a VIP is reachable if there's at least one
> > > router
> > > > > > > > +     * subnet that includes it.
> > > > > > > > +     */
> > > > > > > > +    ovs_assert(neigh_mode == LB_NEIGH_RESPOND_REACHABLE);
> > > > > > > > +
> > > > > > > > +    SSET_FOR_EACH (ip_address, lb_ips_v4) {
> > > > > > > > +        struct ovn_port *op;
> > > > > > > > +        ovs_be32 vip_ip4;
> > > > > > > > +        if (ip_parse(ip_address, &vip_ip4)) {
> > > > > > > > +            HMAP_FOR_EACH (op, dp_node,
> > > &lr_stateful_rec->od->ports)
> > > > > {
> > > > > > >
> > > > > > > Following my comment for the previous patch regarding the
> implicit
> > > > > > > dependency by storing "od" in lr_nat, here the
> lr_stateful_rec->od
> > > > > actually
> > > > > > > came from the lr_nat_rec->od, and the logic here means the
> handling
> > > > > depends
> > > > > > > on od->ports. However, if the od->ports change, there is no
> clear
> > > change
> > > > > > > propagation from NB_Logical_Router to this engine node, and the
> > > change
> > > > > > > seems not handled. Would this be a problem?
> > > > > >
> > > > > > It won't be a problem because od->ports come from northd engine
> node
> > > and
> > > > > we have
> > > > > > a handler for that - lr_stateful_northd_handler().
> > > > > >
> > > > >
> > > > > Yes, I noticed that there is a handler, but the handler doesn't do
> > > anything
> > > > > other than checking if the northd data is tracked. I  added more
> > > comment to
> > > > > that handler itself.
> > > >
> > > >
> > > > >
> > > > > > Right now since northd engine node results in recompute when
> logical
> > > > > > router ports change,
> > > > > > this node would also result in recompute.
> > > > > >
> > > > > This explains. But from my point of view this is still very risky,
> > > because
> > > > > when someone add I-P for logical router ports in the future, it is
> hard
> > > for
> > > > > them to figure out that something needs to be done here, because
> when
> > > > > looking at the input dependency, they will see this lr_stateful node
> > > > > depends on northd data, and then follow the recompute code to see
> where
> > > > > those data are used, but it wouldn't lead to this place because the
> > > > > reference here is not through any of the northd data members, but
> from
> > > > > lr_stateful_rec->od which came from lr_nat->od instead of northd
> data's
> > > > > lr_datapath.
> > > >
> > > > I'm not sure.   I'd expect the user to handle it regardless of where
> > > > the od came from.
> > > > It is obvious that 'od' is part of the northd engine data.
> > > >
> > > It seems not that obvious unless you know where to look at.
> > >
> > > When trying to figure out the how the input northd_data is used in this
> > > node, we follow the logic starting from the recompute function, we see
> how
> > > the input is retrieved, assigned to local variables, and follow the
> > > reference of the variables, but it would never lead us to this line:
> > > HMAP_FOR_EACH (op, dp_node, &lr_stateful_rec->od->ports)
> > >
> > > I figured out this dependency only because I checked lr_nat node's
> > > implementation and saw the reference of "od" there, and lr_nat is an
> input
> > > of lr_stateful, and then understand that there may be indirect access of
> > > northd_data's lr_datapaths in lr_stateful through the input lr_nat, and
> > > then follow the recompute function again, this time focusing on the "od"
> > > member of lr_nat. This is obviously a much longer process and to some
> > > extent by luck. If we want to be reliably check the dependency this
> way, to
> > > find how an input A1 is used in node B, we need to check all the inputs
> of
> > > B: A1, A2, A3, A4 ... and see if there is any reference of A1 in A2, A3,
> > > A4, ..., and then check how A2->A1, A3->A1, ... are used in the node B.
> It
> > > is also possible that there exist a reference of A2->C->A1, which means
> we
> > > need to explore the whole dependency graph to be sure we identified all
> > > possible dependencies between A1 -> B. The other way may be to just
> check
> > > every line of code in B's recompute function, and we will definitely see
> > > the above line, and with careful examination we will realize the
> > > lr_stateful_rec->od came from northd_data->lr_datapaths. However, this
> is
> > > also way more complex than just following the reference of a single
> direct
> > > input. These would just compromise the value of the I-P engine.
> > >
> > > > Let's say we do something like :
> > > > lr_stateful_rec->od  = get_the_od_from_northd_lr_datapaths(...)  in
> the
> > > > lr_stateful_record_create().
> > > >
> > > > Will the above lead to this place to handle the logical router port
> > > changes ?
> > > >
> > > I wouldn't store od in lr_stateful_rec, so it would be something like:
> > >
> > > od  = get_the_od_from_northd_lr_datapaths(...,
> northd_data->lr_datapaths)
> > > // then use od as needed.
> > >
> > > This way it is much easier to see where and how the input
> > > northd_data->lr_datapaths is used by following the input northd_data,
> and
> > > reliably reasoning about how we should handle the changes of
> > > northd_data->lr_datapaths.
> > >
> > > >
> > > > >
> > > > > > >
> > > > > > > Similarly, this node stores reference to lr_nat_rec, and again
> "od",
> > > > > which
> > > > > > > will be used in en_lflow node later, which creates implicit
> > > dependency
> > > > > > > between en_lflow and lr_nat and northd's "od". With this
> approach,
> > > we
> > > > > will
> > > > > > > need to review the correctness of all the indirect reference's
> usage
> > > > > > > carefully, and what's more challenging is to maintain the
> > > correctness in
> > > > > > > the future when someone adds code to access such implicitly
> depended
> > > > > > > objects.
> > > > > > >
> > > > > >
> > > > > > Actually it's not an indirect reference anymore because after your
> > > > > comments
> > > > > > in the previous version,  I added northd engine node as a direct
> > > > > dependency.
> > > > > >
> > > > > > So we have now the below
> > > > > >
> > > > > > struct lr_stateful_record {
> > > > > >    ....
> > > > > >    const struct ovn_datapath *od;
> > > > > >    const struct lr_nat_record *lrnat_rec;
> > > > > >    ...
> > > > > > }
> > > > > >
> > > > > > en_lflow engine will access lrnat_rec via lr_stateful.  I think
> that
> > > > > > should be fine.
> > > > > > Any changes to NAT and load balancer related stuff in the NB
> logical
> > > > > router gets
> > > > > > propagated properly down.  And I don't see any implicit
> dependencies
> > > > > >
> > > > > The implicit dependencies I mentioned are:
> > > > > * northd_data (lr_datapath) -> lr_nat -> lr_stateful, which is
> caused by
> > > > > storing "od" directly in lr_nat_record and accessed in lr_stateful.
> > > Adding
> > > > > northd_data as a direct dependency in lr_stateful doesn't avoid this
> > > > > implicit dependency because the access of lr_datapath in
> lr_stateful is
> > > > > through lr_nat->od.
> > > >
> > > > Well, technically it can be solved by initializing lr_stateful->od =
> > > > get_the_od_from_northd_lr_datapaths(...)
> > > > in the function lr_stateful_record_create() and then argue that
> > > > accessing 'lr_stateful->od'  came from northd data
> > > > and hence there is no indirect dependency.
> > > >
> > > > But IMO this doesn't justify the extra cost required to do the lookup
> > > > when we can access the same data via lr_nat->od.
> > > >
> > > > Going by this logic,  en_flow engine node should not even access
> > > > od->nbr->header_ when adding a logical flow (ovn_lflow_add())
> > > > because there is an indirect dependency here too:
> > > >
> > > > NB_Logical_Router -> en_northd -> en_lflow.
> > > >
> > > > In my opinion,  this should NOT be considered as indirect dependency,
> > > > because for the en_lflow engine node, "od" comes from northd data and
> > > > so are its internal fields.
> > > > Why should en_lflow bother or be aware of the fact that "od->nbr" is
> > > indirect ?
> > >
> > > This is indeed a good point. I think the main difference here is that
> "od"
> > > is more like a container of "nbr". A "od" is a full representation of a
> LR
> > > here, so the reference is more like a harmless "shortcut"; while in the
> > > other case, a node uses a specific part of an input stores the
> reference of
> > > the whole input object, which is more of a reference than a container.
> But
> > > in theory, you are right, this is still indirect dependency. In theory,
> > > NB_Logical_Router could change, and in some corner cases maybe the
> change
> > > would impact en_lflow but not captured in en_northd's handling for the
> > > NB_Logical_Router change.
> > >
> > > >
> > > > If en_lflow handler is accessing some indirect dependency fields
> > > > (for eg,  lr_stateful->lrnat_rec->has_distributed_nat),  then we need
> > > > to make sure that
> > > > when NB Logical router NAT changes
> > > >    - en_lrnat handles it and updates its node and provide the changed
> > > > lr_nat record in its tracking data
> > > >     - en_lr_stateful node handles the en_lrnat changes, updates its
> > > > node and provide the changed lr_stateful record in  its tracking data
> > > >    - en_lflow node handles the lr_stateful changes and recalculate the
> > > > logical flows for the changed lr_stateful record.
> > > >
> > >
> > > What you described is correct for sure. But the problem is how do we
> > > effectively and reliably identify and expose such chains of dependency,
> so
> > > that we know (and the future developers and code readers know) what
> changes
> > > need to be handled at which node to ensure the correctness.
> > >
> > > >
> > > > We also have many  places in the logical flow generation,  where
> > > > en_lflow node functions
> > > > access op->nbsp->* .  In order to avoid these implicit dependencies,
> > > > en_lflow engine node
> > > > should also have NB Logical_Switch/Logical_Router as input nodes and
> > > > then access them.
> > > > Which is impractical IMO and it gets complicated to solve these
> problems.
> > > >
> > >
> > > I agree such case of indrect dependency should be ok, because of the
> > > "container" relationship between "op" and LSP/LRP, or "od" and LS/LR. I
> > > should have added comments on these special cases, too. This is my tech
> > > debt.
> > >
> > > > These are my 2 cents on this implicit dependency argument.
> > > >
> > > > The reason I created another node "lr_nat" and then made it as input
> > > > to "lr_stateful" is
> > > > mainly to update the vip_nats field.  i.e  a load balancer VIP which
> > > > is also a NAT external_ip.
> > > > This can also be solved by having 2 handlers for northd_engine data .
>  i.e
> > > >
> > > > engine_add_input(&en_lr_stateful, &en_northd,
> > > > lr_stateful_northd_handler_pre_lb);
> > > > engine_add_input(&en_lr_stateful, &en_lr_nat,
> lr_stateful_lr_nat_handler);
> > > > engine_add_input(&en_lr_stateful, &en_northd,
> > > > lr_stateful_northd_handler_post_lb);
> > > >
> > > > In the lr_stateful_northd_handler_post_lb() we can just update the
> > > vip_nats.
> > > >
> > > > This way,  we could get rid of the lr_nat engine node .  Let me know
> wdyt.
> > > >
> > >
> > > Sorry that I didn't understand this. You are trying to get rid to the
> > > lr_nat node but why do you still have "engine_add_input(&en_lr_stateful,
> > > &en_lr_nat, lr_stateful_lr_nat_handler);"?
> >
> > I mean that was a proposal.  Get rid of en_lr_nat node and move the data
> of
> > lr_nat to lr_stateful and have 2 handlers for the en_northd engine
> > input in en_lr_stateful.
> >
> > That was in my mind earlier.
> >
> > > I forgot some details but I remember that you went with the 2 handlers
> > > approach months ago and we decided to move away from it. I am inclined
> to
> > > split data with more clear dependency than adding dependency on the
> order
> > > of handlers, especially avoid adding two handlers for the same input.
> >
> > Ack.
> >
> > >
> > > >
> > > >
> > > > > * lr_nat -> lr_stateful -> en_lflow, which is caused by storing
> > > > > lr_nat_record directly in lr_stateful_record and accessed in
> en_lflow.
> > > > > lr_nat is not even added as an input to en_lflow. But again, even if
> > > adding
> > > > > lr_nat as an input to en_flow doesn't make it explict, because one
> can
> > > > > simply remove the input and the code still works.
> > > > > * northd_data (lr_datapath) -> lr_stateful -> en_lflow., which is
> > > caused by
> > > > > storing "od" directly in lr_stateful_record and accessed in
> en_lflow.
> > > > > Although northd_data is an input to en_lflow, it is used only for
> > > handling
> > > > > other changes such as LSPs. In functions such as
> > > > > build_lrouter_nat_defrag_and_lb(), the "od" is accessed through
> > > > > lr_stateful_rec->od, instead of from the northd_data.
> > > > >
> > > > > The problem of such implicit dependency is that it makes the I-P
> > > > > correctness very hard to be reasoned about. It basically makes the
> > > > > dependency graph of the I-P engine unreliable. Consequently, it may
> > > lead to
> > > > > hidden problems and increase a lot of mental burden when reviewing
> and
> > > > > maintaining the code for longer term.
> > > >
> > > > IMO the problem here is not the implicit (or mult-level) dependency.
> > > > The main problem is the huge "northd" engine data.  Unless we split
> into
> > > > smaller nodes, we would still have the problem.
> > > >
> > > The huge "northd" node is a problem, but I think it is a separate
> problem,
> > > another tech debt. With more and more I-P, the negative impacts would
> > > surface.
> > >
> > > > The en_lrnat engine node which has direct northd engine node input,
> > > > could still access other northd data (not related to NAT) in the
> future
> > > patches
> > > > and it may not handle changes to that data and the reviewers may still
> > > miss it.
> > > >
> > > Yes, but at least when doing code reviews, it is still reasonably
> realistic
> > > to check only en_lr_nat's implementation and identify how the en_northd
> > > input's each fields are used by this node. It is a different story when
> > > there is implict dependency.
> > >
> > > > If we have smaller nodes, then a dependency like A->B->C can be
> > > > handled relatively easier if we can ensure that changes to A would
> result
> > > in
> > > > changes to 'B' and when 'C' handles changes to 'B'  it can access
> > > > B->A without the need for 'A' to be an input to 'C'.
> > > >
> > >
> > > This is the ideal case when B fully contains A.
> >
> > In the next version,  I'll remove 'od' from both 'lr_stateful_record'
> > and 'lr_nat_record'
> > and will be storing the od->index instead.  And get the 'od'  whenever
> > required using the
> > below helper function in northd.h
> >
> > --------
> > static inline struct ovn_datapath *
> > ovn_datapaths_find_by_index(const struct ovn_datapaths *ovn_datapaths,
> >                                                  size_t od_index)
> > {
> >     ovs_assert(od_index <= hmap_count(&ovn_datapaths->datapaths));
> >     return ovn_datapaths->array[od_index];
> > }
> > --------
> >
> > Thanks
> > Numan
> >
>
> Sounds good to me.
> I assume you will take care of removing 'od' from 'ls_stateful_record',
> too, although you didn't mention it :)

No.

Just kidding :).  Yes I'll.

Numan

>
> Thanks,
> Han
> _______________________________________________
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to