> On Tue, Jul 23, 2024 at 10:17 AM Lorenzo Bianconi <
> lorenzo.bianc...@redhat.com> wrote:
> 
> > Introduce ECMP_Nexthop table in the SB db in order to track active
> > ecmp-symmetric-reply connections and flush stale ones.
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com>
> > ---
> >
> 
> Hi Lorenzo,
> 
> thank you for the patch, I have one comment down below.

Hi Ales,

thx for the review.

> 
>  northd/en-northd.c       | 35 ++++++++++++++++
> >  northd/en-northd.h       |  4 ++
> >  northd/inc-proc-northd.c |  7 +++-
> >  northd/northd.c          | 87 ++++++++++++++++++++++++++++++++++++++++
> >  northd/northd.h          | 10 +++++
> >  ovn-sb.ovsschema         | 18 ++++++++-
> >  ovn-sb.xml               | 31 ++++++++++++++
> >  tests/ovn-northd.at      |  4 ++
> >  8 files changed, 193 insertions(+), 3 deletions(-)
> >
> > diff --git a/northd/en-northd.c b/northd/en-northd.c
> > index 817a4736c..0d86fcb60 100644
> > --- a/northd/en-northd.c
> > +++ b/northd/en-northd.c
> > @@ -402,6 +402,25 @@ en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
> >      engine_set_node_state(node, EN_UPDATED);
> >  }
> >
> > +void
> > +en_ecmp_nexthop_run(struct engine_node *node, void *data)
> > +{
> > +    const struct engine_context *eng_ctx = engine_get_context();
> > +    struct static_routes_data *static_routes_data =
> > +        engine_get_input_data("static_routes", node);
> > +    struct ecmp_nexthop_data *enh_data = data;
> > +    const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table =
> > +        EN_OVSDB_GET(engine_get_input("SB_ecmp_nexthop", node));
> > +
> > +    ecmp_nexthop_destroy(data);
> > +    ecmp_nexthop_init(data);
> > +    build_ecmp_nexthop_table(eng_ctx->ovnsb_idl_txn,
> > +                             &static_routes_data->parsed_routes,
> > +                             &enh_data->nexthops,
> > +                             sbrec_ecmp_nexthop_table);
> > +    engine_set_node_state(node, EN_UPDATED);
> > +}
> > +
> >  void
> >  *en_northd_init(struct engine_node *node OVS_UNUSED,
> >                  struct engine_arg *arg OVS_UNUSED)
> > @@ -449,6 +468,16 @@ void *en_bfd_sync_init(struct engine_node *node
> > OVS_UNUSED,
> >      return NULL;
> >  }
> >
> > +void
> > +*en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                      struct engine_arg *arg OVS_UNUSED)
> > +{
> > +    struct ecmp_nexthop_data *data = xzalloc(sizeof *data);
> > +
> > +    ecmp_nexthop_init(data);
> > +    return data;
> > +}
> > +
> >  void
> >  en_northd_cleanup(void *data)
> >  {
> > @@ -484,3 +513,9 @@ void
> >  en_bfd_sync_cleanup(void *data OVS_UNUSED)
> >  {
> >  }
> > +
> > +void
> > +en_ecmp_nexthop_cleanup(void *data)
> > +{
> > +    ecmp_nexthop_destroy(data);
> > +}
> > diff --git a/northd/en-northd.h b/northd/en-northd.h
> > index 68f7755f8..4f30ac335 100644
> > --- a/northd/en-northd.h
> > +++ b/northd/en-northd.h
> > @@ -42,5 +42,9 @@ void *en_bfd_sync_init(struct engine_node *node
> > OVS_UNUSED,
> >  void en_bfd_sync_run(struct engine_node *node OVS_UNUSED,
> >                       void *data OVS_UNUSED);
> >  void en_bfd_sync_cleanup(void *data OVS_UNUSED);
> > +void en_ecmp_nexthop_run(struct engine_node *node, void *data);
> > +void *en_ecmp_nexthop_init(struct engine_node *node OVS_UNUSED,
> > +                           struct engine_arg *arg OVS_UNUSED);
> > +void en_ecmp_nexthop_cleanup(void *data);
> >
> >  #endif /* EN_NORTHD_H */
> > diff --git a/northd/inc-proc-northd.c b/northd/inc-proc-northd.c
> > index b3baecaff..7416b1c43 100644
> > --- a/northd/inc-proc-northd.c
> > +++ b/northd/inc-proc-northd.c
> > @@ -101,7 +101,8 @@ static unixctl_cb_func chassis_features_list;
> >      SB_NODE(fdb, "fdb") \
> >      SB_NODE(static_mac_binding, "static_mac_binding") \
> >      SB_NODE(chassis_template_var, "chassis_template_var") \
> > -    SB_NODE(logical_dp_group, "logical_dp_group")
> > +    SB_NODE(logical_dp_group, "logical_dp_group") \
> > +    SB_NODE(ecmp_nexthop, "ecmp_nexthop")
> >
> >  enum sb_engine_node {
> >  #define SB_NODE(NAME, NAME_STR) SB_##NAME,
> > @@ -159,6 +160,7 @@ static ENGINE_NODE(route_policies, "route_policies");
> >  static ENGINE_NODE(static_routes, "static_routes");
> >  static ENGINE_NODE(bfd, "bfd");
> >  static ENGINE_NODE(bfd_sync, "bfd_sync");
> > +static ENGINE_NODE(ecmp_nexthop, "ecmp_nexthop");
> >
> >  void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >                            struct ovsdb_idl_loop *sb)
> > @@ -258,6 +260,9 @@ void inc_proc_northd_init(struct ovsdb_idl_loop *nb,
> >      engine_add_input(&en_bfd_sync, &en_static_routes, NULL);
> >      engine_add_input(&en_bfd_sync, &en_route_policies, NULL);
> >
> > +    engine_add_input(&en_ecmp_nexthop, &en_sb_ecmp_nexthop, NULL);
> > +    engine_add_input(&en_ecmp_nexthop, &en_static_routes, NULL);
> > +
> >      engine_add_input(&en_sync_meters, &en_nb_acl, NULL);
> >      engine_add_input(&en_sync_meters, &en_nb_meter, NULL);
> >      engine_add_input(&en_sync_meters, &en_sb_meter, NULL);
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 70bfb144e..b5b332536 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -9998,6 +9998,81 @@ build_bfd_map(const struct nbrec_bfd_table
> > *nbrec_bfd_table,
> >      }
> >  }
> >
> > +#define NEXTHOP_IDS_LEN        65535
> > +void
> > +build_ecmp_nexthop_table(
> > +        struct ovsdb_idl_txn *ovnsb_txn,
> > +        struct hmap *routes,
> > +        struct simap *nexthops,
> > +        const struct sbrec_ecmp_nexthop_table *sbrec_ecmp_nexthop_table)
> > +{
> > +    unsigned long *nexthop_ids = bitmap_allocate(NEXTHOP_IDS_LEN);
> > +
> > +    if (!ovnsb_txn) {
> > +        return;
> > +    }
> > +
> > +    const struct sbrec_ecmp_nexthop *sb_ecmp_nexthop;
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH (sb_ecmp_nexthop,
> > +                                       sbrec_ecmp_nexthop_table) {
> > +        simap_put(nexthops, sb_ecmp_nexthop->nexthop,
> > +                  sb_ecmp_nexthop->id);
> > +        bitmap_set1(nexthop_ids, sb_ecmp_nexthop->id);
> > +    }
> > +
> > +    struct parsed_route *pr;
> > +    HMAP_FOR_EACH (pr, key_node, routes) {
> > +        if (!pr->ecmp_symmetric_reply) {
> > +            continue;
> > +        }
> > +
> > +        const struct nbrec_logical_router_static_route *r = pr->route;
> > +        if (!simap_contains(nexthops, r->nexthop)) {
> > +            int id = bitmap_scan(nexthop_ids, 0, 1, NEXTHOP_IDS_LEN);
> > +            if (id == NEXTHOP_IDS_LEN) {
> > +                static struct vlog_rate_limit rl =
> > VLOG_RATE_LIMIT_INIT(1, 1);
> > +                VLOG_WARN_RL(&rl, "nexthop id address space is
> > exhausted");
> > +                continue;
> > +            }
> > +            bitmap_set1(nexthop_ids, id);
> > +            simap_put(nexthops, r->nexthop, id);
> > +
> > +            sb_ecmp_nexthop = sbrec_ecmp_nexthop_insert(ovnsb_txn);
> > +            sbrec_ecmp_nexthop_set_nexthop(sb_ecmp_nexthop, r->nexthop);
> > +            sbrec_ecmp_nexthop_set_id(sb_ecmp_nexthop, id);
> > +        }
> > +    }
> > +
> > +    struct simap_node *n;
> > +    SIMAP_FOR_EACH_SAFE (n, nexthops) {
> > +        bool stale = true;
> > +        HMAP_FOR_EACH (pr, key_node, routes) {
> > +            if (!pr->ecmp_symmetric_reply) {
> > +                continue;
> > +            }
> > +
> > +            const struct nbrec_logical_router_static_route *r = pr->route;
> > +            if (!strcmp(r->nexthop, n->name)) {
> > +                stale = false;
> > +                break;
> > +            }
> > +        }
> > +
> > +        if (stale) {
> > +            simap_delete(nexthops, n);
> > +        }
> > +    }
> > +
> > +    SBREC_ECMP_NEXTHOP_TABLE_FOR_EACH_SAFE (sb_ecmp_nexthop,
> > +                                            sbrec_ecmp_nexthop_table) {
> > +        if (!simap_contains(nexthops, sb_ecmp_nexthop->nexthop)) {
> > +                sbrec_ecmp_nexthop_delete(sb_ecmp_nexthop);
> > +        }
> > +    }
> > +
> > +    bitmap_free(nexthop_ids);
> > +}
> >
> 
> The amount of loops in this could be greatly reduced by using sset as
> representation what is in sync or not.
> 
> HMAP_FOR_EACH (pr, key_node, routes) would add everything into sset.
> Then you could walk all SB entries, what is in sset would end up in the
> simap
> and bitmap. If it's not present in the sset we can remove it from the DB.
> Last step would be to walk the sset and add an SB entry for everything that
> is
> in the sset but not in the simap WDYT?

ack, I guess we can get rid of the third loop here (the one to look for stale
nexthops entries). I will fix it in the next version.

Regards,
Lorenzo

> 
> 
> > +
> >  /* Returns a string of the IP address of the router port 'op' that
> >   * overlaps with 'ip_s".  If one is not found, returns NULL.
> >   *
> > @@ -17791,6 +17866,12 @@ bfd_init(struct bfd_data *data)
> >      hmap_init(&data->bfd_connections);
> >  }
> >
> > +void
> > +ecmp_nexthop_init(struct ecmp_nexthop_data *data)
> > +{
> > +    simap_init(&data->nexthops);
> > +}
> > +
> >  void
> >  northd_destroy(struct northd_data *data)
> >  {
> > @@ -17872,6 +17953,12 @@ static_routes_destroy(struct static_routes_data
> > *data)
> >      __bfd_destroy(&data->bfd_active_connections);
> >  }
> >
> > +void
> > +ecmp_nexthop_destroy(struct ecmp_nexthop_data *data)
> > +{
> > +    simap_destroy(&data->nexthops);
> > +}
> > +
> >  void
> >  ovnnb_db_run(struct northd_input *input_data,
> >               struct northd_data *data,
> > diff --git a/northd/northd.h b/northd/northd.h
> > index 7ddb72eb7..205793e56 100644
> > --- a/northd/northd.h
> > +++ b/northd/northd.h
> > @@ -185,6 +185,10 @@ struct bfd_data {
> >      struct hmap bfd_connections;
> >  };
> >
> > +struct ecmp_nexthop_data {
> > +    struct simap nexthops;
> > +};
> > +
> >  struct lr_nat_table;
> >
> >  struct lflow_input {
> > @@ -724,6 +728,12 @@ void static_routes_destroy(struct static_routes_data
> > *);
> >  void bfd_init(struct bfd_data *);
> >  void bfd_destroy(struct bfd_data *);
> >
> > +void build_ecmp_nexthop_table(struct ovsdb_idl_txn *,
> > +                              struct hmap *, struct simap *,
> > +                              const struct sbrec_ecmp_nexthop_table *);
> > +void ecmp_nexthop_init(struct ecmp_nexthop_data *);
> > +void ecmp_nexthop_destroy(struct ecmp_nexthop_data *);
> > +
> >  struct lflow_table;
> >  struct lr_stateful_tracked_data;
> >  struct ls_stateful_tracked_data;
> > diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
> > index b6c051ae6..a4d1630b0 100644
> > --- a/ovn-sb.ovsschema
> > +++ b/ovn-sb.ovsschema
> > @@ -1,7 +1,7 @@
> >  {
> >      "name": "OVN_Southbound",
> > -    "version": "20.34.0",
> > -    "cksum": "2786607656 31376",
> > +    "version": "20.35.0",
> > +    "cksum": "1887835491 32037",
> >      "tables": {
> >          "SB_Global": {
> >              "columns": {
> > @@ -608,6 +608,20 @@
> >                                            "refTable":
> > "Datapath_Binding"}}}},
> >              "indexes": [["logical_port", "ip"]],
> >              "isRoot": true},
> > +        "ECMP_Nexthop": {
> > +            "columns": {
> > +                "nexthop": {"type": "string"},
> > +                "id": {"type": {"key": {"type": "integer",
> > +                                        "minInteger": 0,
> > +                                        "maxInteger": 65535}}},
> > +                "external_ids": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}},
> > +                "options": {
> > +                    "type": {"key": "string", "value": "string",
> > +                             "min": 0, "max": "unlimited"}}},
> > +            "indexes": [["nexthop"]],
> > +            "isRoot": true},
> >          "Chassis_Template_Var": {
> >              "columns": {
> >                  "chassis": {"type": "string"},
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 73a1be5ed..d84e956c5 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -5179,4 +5179,35 @@ tcp.flags = RST;
> >        The set of variable values for a given chassis.
> >      </column>
> >    </table>
> > +
> > +  <table name="ECMP_Nexthop">
> > +    <p>
> > +      Each record in this table represents an active ECMP route committed
> > by
> > +      <code>ovn-northd</code> to <code>ovs</code> connection-tracking
> > table.
> > +      <code>ECMP_Nexthop</code> table is used by
> > <code>ovn-controller</code>
> > +      to track active ct entries and to flush stale ones.
> > +    </p>
> > +    <column name="nexthop">
> > +      <p>
> > +        Nexthop IP address for this ECMP route. Nexthop IP address should
> > +        be the IP address of a connected router port or the IP address of
> > +        an external device used as nexthop for the given destination.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="id">
> > +      <p>
> > +        Nexthop unique identifier. Nexthop ID is used to track active
> > +        ecmp-symmetric-reply connections and flush stale ones.
> > +      </p>
> > +    </column>
> > +
> > +    <column name="options">
> > +      Reserved for future use.
> > +    </column>
> > +
> > +    <column name="external_ids">
> > +      See <em>External IDs</em> at the beginning of this document.
> > +    </column>
> > +  </table>
> >  </database>
> > diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> > index a84fb6fab..b815eb321 100644
> > --- a/tests/ovn-northd.at
> > +++ b/tests/ovn-northd.at
> > @@ -6775,6 +6775,7 @@ check ovn-nbctl lsp-set-addresses public-lr0 router
> >  check ovn-nbctl lsp-set-options public-lr0 router-port=lr0-public
> >
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.0.10
> > +check_row_count ECMP_Nexthop 1
> >
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >
> > @@ -6786,6 +6787,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> > ovn_strip_lflows], [0], [dn
> >  ])
> >
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.0.20
> > +check_row_count ECMP_Nexthop 2
> >
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> > ovn_strip_lflows], [0], [dnl
> > @@ -6822,6 +6824,7 @@ AT_CHECK([grep -e "lr_in_arp_resolve.*ecmp" lr0flows
> > | ovn_strip_lflows], [0], [
> >
> >  # add ecmp route with wrong nexthop
> >  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1
> > 192.168.1.20
> > +check_row_count ECMP_Nexthop 2
> >
> >  ovn-sbctl dump-flows lr0 > lr0flows
> >  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows |
> > ovn_strip_lflows], [0], [dnl
> > @@ -6836,6 +6839,7 @@ AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows |
> > sed 's/192\.168\.0\..0/192.
> >
> >  check ovn-nbctl lr-route-del lr0
> >  wait_row_count nb:Logical_Router_Static_Route 0
> > +check_row_count ECMP_Nexthop 0
> >
> >  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >  ovn-sbctl dump-flows lr0 > lr0flows
> > --
> > 2.45.2
> >
> >
> Thanks,
> Ales
> 
> -- 
> 
> Ales Musil
> 
> Senior Software Engineer - OVN Core
> 
> Red Hat EMEA <https://www.redhat.com>
> 
> amu...@redhat.com
> <https://red.ht/sig>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to